Rewrite frontend as single self-contained HTML file — all CSS/JS inline, no external files to fail loading

This commit is contained in:
2026-05-13 17:24:10 +02:00
parent 3432d362e2
commit ddb0f88257
116 changed files with 4240 additions and 921 deletions

13
dist/utils/configUtils.d.ts vendored Normal file
View File

@@ -0,0 +1,13 @@
import { Config } from '../config/config';
/**
* Load configuration from a JSON file
* @param filePath - Path to the configuration file
* @returns Configuration object
*/
export declare function loadConfigFromFile(filePath: string): Partial<Config>;
/**
* Save configuration to a JSON file
* @param filePath - Path to save the configuration file
* @param config - Configuration object to save
*/
export declare function saveConfigToFile(filePath: string, config: any): void;

44
dist/utils/configUtils.js vendored Normal file
View File

@@ -0,0 +1,44 @@
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.loadConfigFromFile = loadConfigFromFile;
exports.saveConfigToFile = saveConfigToFile;
const fs_1 = __importDefault(require("fs"));
/**
* Load configuration from a JSON file
* @param filePath - Path to the configuration file
* @returns Configuration object
*/
function loadConfigFromFile(filePath) {
try {
const configFile = fs_1.default.readFileSync(filePath, 'utf8');
const config = JSON.parse(configFile);
console.log(`Loaded configuration from ${filePath}`);
return config;
}
catch (error) {
console.error(`Error loading config from ${filePath}:`, error);
process.exit(1);
}
}
/**
* Save configuration to a JSON file
* @param filePath - Path to save the configuration file
* @param config - Configuration object to save
*/
function saveConfigToFile(filePath, config) {
try {
// Filter out non-configuration properties
const configToSave = { ...config };
const keysToExclude = ['_', '$0', 'video_file_path', 'estimate', 'config', 'saveConfig', 'help', 'version', 'h'];
keysToExclude.forEach(key => delete configToSave[key]);
fs_1.default.writeFileSync(filePath, JSON.stringify(configToSave, null, 2), 'utf8');
console.log(`Configuration saved to ${filePath}`);
}
catch (error) {
console.error(`Error saving config to ${filePath}:`, error);
}
}
//# sourceMappingURL=configUtils.js.map

1
dist/utils/configUtils.js.map vendored Normal file
View File

@@ -0,0 +1 @@
{"version":3,"file":"configUtils.js","sourceRoot":"","sources":["../../src/utils/configUtils.ts"],"names":[],"mappings":";;;;;AAQA,gDAUC;AAOD,4CAYC;AArCD,4CAAoB;AAGpB;;;;GAIG;AACH,SAAgB,kBAAkB,CAAC,QAAgB;IACjD,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,YAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QACrD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,6BAA6B,QAAQ,EAAE,CAAC,CAAC;QACrD,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,6BAA6B,QAAQ,GAAG,EAAE,KAAK,CAAC,CAAC;QAC/D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED;;;;GAIG;AACH,SAAgB,gBAAgB,CAAC,QAAgB,EAAE,MAAW;IAC5D,IAAI,CAAC;QACH,0CAA0C;QAC1C,MAAM,YAAY,GAAG,EAAE,GAAG,MAAM,EAAE,CAAC;QACnC,MAAM,aAAa,GAAG,CAAC,GAAG,EAAE,IAAI,EAAE,iBAAiB,EAAE,UAAU,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,EAAE,SAAS,EAAE,GAAG,CAAC,CAAC;QACjH,aAAa,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,OAAO,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC;QAEvD,YAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,YAAY,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;QAC1E,OAAO,CAAC,GAAG,CAAC,0BAA0B,QAAQ,EAAE,CAAC,CAAC;IACpD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,0BAA0B,QAAQ,GAAG,EAAE,KAAK,CAAC,CAAC;IAC9D,CAAC;AACH,CAAC"}

9
dist/utils/costEstimator.d.ts vendored Normal file
View File

@@ -0,0 +1,9 @@
import { Config } from '../config/config';
import { CostBreakdown } from '../interfaces';
/**
* Estimate the cost of generating audio descriptions for a video
* @param videoFilePath - Path to the input video file
* @param options - Optional configuration overrides
* @returns Cost estimation breakdown
*/
export declare function estimateCost(videoFilePath: string, options?: Partial<Config>): Promise<CostBreakdown>;

151
dist/utils/costEstimator.js vendored Normal file
View File

@@ -0,0 +1,151 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.estimateCost = estimateCost;
const mediaUtils_1 = require("./mediaUtils");
/**
* Estimate the cost of generating audio descriptions for a video
* @param videoFilePath - Path to the input video file
* @param options - Optional configuration overrides
* @returns Cost estimation breakdown
*/
async function estimateCost(videoFilePath, options = {}) {
// Merge provided options with defaults
const settings = { ...options };
// Get video duration
const videoDuration = (0, mediaUtils_1.getVideoDuration)(videoFilePath);
console.log(`Video duration: ${videoDuration} seconds`);
// Calculate the number of frames or batches to process
let totalUnits;
let unitCostMultiplier;
let unitType;
if (settings.batchTimeMode) {
totalUnits = Math.floor(videoDuration / settings.batchWindowDuration);
unitCostMultiplier = settings.framesInBatch;
unitType = "batches";
}
else {
totalUnits = Math.floor(videoDuration / settings.captureIntervalSeconds);
unitCostMultiplier = 1;
unitType = "frames";
}
console.log(`Will process ${totalUnits} ${unitType}`);
// Pricing constants (per 1K units unless otherwise noted)
const pricing = {
vision: {
openai: {
'gpt-4o': { input: 0.0025, output: 0.01 },
'gpt-5.4-mini': { input: 0.00015, output: 0.0006 },
'gpt-4o-mini': { input: 0.00015, output: 0.0006 }
},
gemini: {
'gemini-2.0-flash': { input: 0.0001, output: 0.0004 },
'gemini-1.5-flash': { input: 0.000075, output: 0.0003 },
'gemini-1.5-pro': { input: 0.00125, output: 0.005 }
},
openrouter: {
'anthropic/claude-sonnet-4.5': { input: 0.003, output: 0.015 },
'anthropic/claude-3.5-sonnet': { input: 0.003, output: 0.015 },
'anthropic/claude-3-haiku': { input: 0.0008, output: 0.004 },
'google/gemini-2.0-flash-001': { input: 0.0001, output: 0.0004 }
}
},
tts: {
openai: {
'tts-1': 0.015,
'tts-1-hd': 0.030,
'gpt-4o-mini-tts': { inputTokens: 0.60, outputTokens: 12.00 }
},
elevenlabs: {
'eleven_multilingual_v2': 0.30,
'eleven_turbo_v2.5': 0.015
},
google: {
'chirp-hd': 0.016,
'wavenet': 0.016,
'neural2': 0.016,
'standard': 0.004
}
}
};
// Get the pricing for the selected providers
const visionProvider = settings.visionProvider;
const visionModel = settings.visionProviders[visionProvider].model;
const ttsProvider = settings.ttsProvider;
const ttsModel = settings.ttsProviders[ttsProvider].model;
// Check if the pricing data exists
const visionPricing = pricing.vision[visionProvider]?.[visionModel];
const ttsPricing = pricing.tts[ttsProvider]?.[ttsModel];
if (!visionPricing) {
console.warn(`Warning: No pricing data for vision provider "${visionProvider}" and model "${visionModel}".`);
}
if (!ttsPricing) {
console.warn(`Warning: No pricing data for TTS provider "${ttsProvider}" and model "${ttsModel}".`);
}
// Estimated token counts
const estimatedVisionInputTokens = 1000 * unitCostMultiplier;
const estimatedPromptTokens = 100;
const estimatedOutputTokensPerUnit = 75;
// Estimated character counts for TTS
const estimatedCharsPerDescription = 200;
// Calculate estimated costs for first unit
const firstUnitCost = {
visionInput: (estimatedVisionInputTokens + estimatedPromptTokens) * (visionPricing?.input || 0) / 1000,
visionOutput: estimatedOutputTokensPerUnit * (visionPricing?.output || 0) / 1000,
tts: calculateTTSCost(estimatedCharsPerDescription, ttsPricing)
};
// For subsequent units, we need context (e.g., previous frames)
const contextMultiplier = settings.batchTimeMode ? 1.2 : 2;
const subsequentUnitCost = {
visionInput: (estimatedVisionInputTokens * contextMultiplier + estimatedPromptTokens) * (visionPricing?.input || 0) / 1000,
visionOutput: estimatedOutputTokensPerUnit * (visionPricing?.output || 0) / 1000,
tts: calculateTTSCost(estimatedCharsPerDescription, ttsPricing)
};
// Calculate total costs
const totalVisionInputCost = firstUnitCost.visionInput +
(totalUnits - 1) * subsequentUnitCost.visionInput;
const totalVisionOutputCost = firstUnitCost.visionOutput +
(totalUnits - 1) * subsequentUnitCost.visionOutput;
const totalTTSCost = firstUnitCost.tts +
(totalUnits - 1) * subsequentUnitCost.tts;
const totalCost = totalVisionInputCost + totalVisionOutputCost + totalTTSCost;
// Create cost breakdown
const costBreakdown = {
videoInfo: {
duration: videoDuration,
totalUnits: totalUnits,
unitType: unitType,
processingInterval: settings.batchTimeMode ? settings.batchWindowDuration : settings.captureIntervalSeconds
},
providerInfo: {
visionProvider: visionProvider,
visionModel: visionModel,
ttsProvider: ttsProvider,
ttsModel: ttsModel
},
apiCosts: {
visionInput: totalVisionInputCost.toFixed(4),
visionOutput: totalVisionOutputCost.toFixed(4),
tts: totalTTSCost.toFixed(4),
total: totalCost.toFixed(4)
},
estimates: {
totalAPICallsToProviders: totalUnits * 2,
estimatedProcessingTimeMinutes: (totalUnits * 3) / 60
}
};
return costBreakdown;
}
function calculateTTSCost(charCount, pricing) {
if (!pricing)
return 0;
if (typeof pricing === 'number') {
// Per-character pricing: cost per 1000 characters
return charCount * pricing / 1000;
}
// Per-token pricing (e.g., gpt-4o-mini-tts): cost per 1M tokens
// Rough estimate: 1 char ≈ 0.25 tokens for English text
const estimatedInputTokens = charCount * 0.25;
const estimatedOutputTokens = charCount * 3; // audio output is token-heavy
return (estimatedInputTokens * pricing.inputTokens + estimatedOutputTokens * pricing.outputTokens) / 1000000;
}
//# sourceMappingURL=costEstimator.js.map

1
dist/utils/costEstimator.js.map vendored Normal file
View File

@@ -0,0 +1 @@
{"version":3,"file":"costEstimator.js","sourceRoot":"","sources":["../../src/utils/costEstimator.ts"],"names":[],"mappings":";;AAYA,oCA0JC;AApKD,6CAAgD;AAIhD;;;;;GAKG;AACI,KAAK,UAAU,YAAY,CAChC,aAAqB,EACrB,UAA2B,EAAE;IAE7B,uCAAuC;IACvC,MAAM,QAAQ,GAAG,EAAE,GAAG,OAAO,EAAY,CAAC;IAE1C,qBAAqB;IACrB,MAAM,aAAa,GAAG,IAAA,6BAAgB,EAAC,aAAa,CAAC,CAAC;IACtD,OAAO,CAAC,GAAG,CAAC,mBAAmB,aAAa,UAAU,CAAC,CAAC;IAExD,uDAAuD;IACvD,IAAI,UAAkB,CAAC;IACvB,IAAI,kBAA0B,CAAC;IAC/B,IAAI,QAAgB,CAAC;IAErB,IAAI,QAAQ,CAAC,aAAa,EAAE,CAAC;QAC3B,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,GAAG,QAAQ,CAAC,mBAAmB,CAAC,CAAC;QACtE,kBAAkB,GAAG,QAAQ,CAAC,aAAa,CAAC;QAC5C,QAAQ,GAAG,SAAS,CAAC;IACvB,CAAC;SAAM,CAAC;QACN,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,GAAG,QAAQ,CAAC,sBAAsB,CAAC,CAAC;QACzE,kBAAkB,GAAG,CAAC,CAAC;QACvB,QAAQ,GAAG,QAAQ,CAAC;IACtB,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,gBAAgB,UAAU,IAAI,QAAQ,EAAE,CAAC,CAAC;IAEtD,0DAA0D;IAC1D,MAAM,OAAO,GAGT;QACF,MAAM,EAAE;YACN,MAAM,EAAE;gBACN,QAAQ,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE;gBACzC,cAAc,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE;gBAClD,aAAa,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE;aAClD;YACD,MAAM,EAAE;gBACN,kBAAkB,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE;gBACrD,kBAAkB,EAAE,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE;gBACvD,gBAAgB,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE;aACpD;YACD,UAAU,EAAE;gBACV,6BAA6B,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE;gBAC9D,6BAA6B,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE;gBAC9D,0BAA0B,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE;gBAC5D,6BAA6B,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE;aACjE;SACF;QACD,GAAG,EAAE;YACH,MAAM,EAAE;gBACN,OAAO,EAAE,KAAK;gBACd,UAAU,EAAE,KAAK;gBACjB,iBAAiB,EAAE,EAAE,WAAW,EAAE,IAAI,EAAE,YAAY,EAAE,KAAK,EAAE;aAC9D;YACD,UAAU,EAAE;gBACV,wBAAwB,EAAE,IAAI;gBAC9B,mBAAmB,EAAE,KAAK;aAC3B;YACD,MAAM,EAAE;gBACN,UAAU,EAAE,KAAK;gBACjB,SAAS,EAAE,KAAK;gBAChB,SAAS,EAAE,KAAK;gBAChB,UAAU,EAAE,KAAK;aAClB;SACF;KACF,CAAC;IAEF,6CAA6C;IAC7C,MAAM,cAAc,GAAG,QAAQ,CAAC,cAAc,CAAC;IAC/C,MAAM,WAAW,GAAG,QAAQ,CAAC,eAAe,CAAC,cAAc,CAAC,CAAC,KAAK,CAAC;IACnE,MAAM,WAAW,GAAG,QAAQ,CAAC,WAAW,CAAC;IACzC,MAAM,QAAQ,GAAG,QAAQ,CAAC,YAAY,CAAC,WAAW,CAAC,CAAC,KAAK,CAAC;IAE1D,mCAAmC;IACnC,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,cAAc,CAAC,EAAE,CAAC,WAAW,CAAC,CAAC;IACpE,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,CAAC,QAAQ,CAAC,CAAC;IAExD,IAAI,CAAC,aAAa,EAAE,CAAC;QACnB,OAAO,CAAC,IAAI,CAAC,iDAAiD,cAAc,gBAAgB,WAAW,IAAI,CAAC,CAAC;IAC/G,CAAC;IAED,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,OAAO,CAAC,IAAI,CAAC,8CAA8C,WAAW,gBAAgB,QAAQ,IAAI,CAAC,CAAC;IACtG,CAAC;IAED,yBAAyB;IACzB,MAAM,0BAA0B,GAAG,IAAI,GAAG,kBAAkB,CAAC;IAC7D,MAAM,qBAAqB,GAAG,GAAG,CAAC;IAClC,MAAM,4BAA4B,GAAG,EAAE,CAAC;IAExC,qCAAqC;IACrC,MAAM,4BAA4B,GAAG,GAAG,CAAC;IAEzC,2CAA2C;IAC3C,MAAM,aAAa,GAAG;QACpB,WAAW,EAAE,CAAC,0BAA0B,GAAG,qBAAqB,CAAC,GAAG,CAAC,aAAa,EAAE,KAAK,IAAI,CAAC,CAAC,GAAG,IAAI;QACtG,YAAY,EAAE,4BAA4B,GAAG,CAAC,aAAa,EAAE,MAAM,IAAI,CAAC,CAAC,GAAG,IAAI;QAChF,GAAG,EAAE,gBAAgB,CAAC,4BAA4B,EAAE,UAAU,CAAC;KAChE,CAAC;IAEF,gEAAgE;IAChE,MAAM,iBAAiB,GAAG,QAAQ,CAAC,aAAa,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IAE3D,MAAM,kBAAkB,GAAG;QACzB,WAAW,EAAE,CAAC,0BAA0B,GAAG,iBAAiB,GAAG,qBAAqB,CAAC,GAAG,CAAC,aAAa,EAAE,KAAK,IAAI,CAAC,CAAC,GAAG,IAAI;QAC1H,YAAY,EAAE,4BAA4B,GAAG,CAAC,aAAa,EAAE,MAAM,IAAI,CAAC,CAAC,GAAG,IAAI;QAChF,GAAG,EAAE,gBAAgB,CAAC,4BAA4B,EAAE,UAAU,CAAC;KAChE,CAAC;IAEF,wBAAwB;IACxB,MAAM,oBAAoB,GACxB,aAAa,CAAC,WAAW;QACzB,CAAC,UAAU,GAAG,CAAC,CAAC,GAAG,kBAAkB,CAAC,WAAW,CAAC;IAEpD,MAAM,qBAAqB,GACzB,aAAa,CAAC,YAAY;QAC1B,CAAC,UAAU,GAAG,CAAC,CAAC,GAAG,kBAAkB,CAAC,YAAY,CAAC;IAErD,MAAM,YAAY,GAChB,aAAa,CAAC,GAAG;QACjB,CAAC,UAAU,GAAG,CAAC,CAAC,GAAG,kBAAkB,CAAC,GAAG,CAAC;IAE5C,MAAM,SAAS,GAAG,oBAAoB,GAAG,qBAAqB,GAAG,YAAY,CAAC;IAE9E,wBAAwB;IACxB,MAAM,aAAa,GAAkB;QACnC,SAAS,EAAE;YACT,QAAQ,EAAE,aAAa;YACvB,UAAU,EAAE,UAAU;YACtB,QAAQ,EAAE,QAAQ;YAClB,kBAAkB,EAAE,QAAQ,CAAC,aAAa,CAAC,CAAC,CAAC,QAAQ,CAAC,mBAAmB,CAAC,CAAC,CAAC,QAAQ,CAAC,sBAAsB;SAC5G;QACD,YAAY,EAAE;YACZ,cAAc,EAAE,cAAc;YAC9B,WAAW,EAAE,WAAW;YACxB,WAAW,EAAE,WAAW;YACxB,QAAQ,EAAE,QAAQ;SACnB;QACD,QAAQ,EAAE;YACR,WAAW,EAAE,oBAAoB,CAAC,OAAO,CAAC,CAAC,CAAC;YAC5C,YAAY,EAAE,qBAAqB,CAAC,OAAO,CAAC,CAAC,CAAC;YAC9C,GAAG,EAAE,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC;YAC5B,KAAK,EAAE,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;SAC5B;QACD,SAAS,EAAE;YACT,wBAAwB,EAAE,UAAU,GAAG,CAAC;YACxC,8BAA8B,EAAE,CAAC,UAAU,GAAG,CAAC,CAAC,GAAG,EAAE;SACtD;KACF,CAAC;IAEF,OAAO,aAAa,CAAC;AACvB,CAAC;AAED,SAAS,gBAAgB,CAAC,SAAiB,EAAE,OAAoC;IAC/E,IAAI,CAAC,OAAO;QAAE,OAAO,CAAC,CAAC;IAEvB,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;QAChC,kDAAkD;QAClD,OAAO,SAAS,GAAG,OAAO,GAAG,IAAI,CAAC;IACpC,CAAC;IAED,gEAAgE;IAChE,wDAAwD;IACxD,MAAM,oBAAoB,GAAG,SAAS,GAAG,IAAI,CAAC;IAC9C,MAAM,qBAAqB,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,8BAA8B;IAC3E,OAAO,CAAC,oBAAoB,GAAG,OAAO,CAAC,WAAW,GAAG,qBAAqB,GAAG,OAAO,CAAC,YAAY,CAAC,GAAG,OAAO,CAAC;AAC/G,CAAC"}

4
dist/utils/index.d.ts vendored Normal file
View File

@@ -0,0 +1,4 @@
export * from './mediaUtils';
export * from './processor';
export * from './costEstimator';
export * from './configUtils';

21
dist/utils/index.js vendored Normal file
View File

@@ -0,0 +1,21 @@
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __exportStar = (this && this.__exportStar) || function(m, exports) {
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
};
Object.defineProperty(exports, "__esModule", { value: true });
__exportStar(require("./mediaUtils"), exports);
__exportStar(require("./processor"), exports);
__exportStar(require("./costEstimator"), exports);
__exportStar(require("./configUtils"), exports);
//# sourceMappingURL=index.js.map

1
dist/utils/index.js.map vendored Normal file
View File

@@ -0,0 +1 @@
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/utils/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,+CAA6B;AAC7B,8CAA4B;AAC5B,kDAAgC;AAChC,gDAA8B"}

37
dist/utils/mediaUtils.d.ts vendored Normal file
View File

@@ -0,0 +1,37 @@
import { AudioSegment } from '../interfaces';
import { Config } from '../config/config';
/**
* Get the duration of a video file in seconds
* @param videoFilePath - Path to the video file
* @returns Duration in seconds
*/
export declare function getVideoDuration(videoFilePath: string): number;
/**
* Capture a frame from a video at a specific time position
* @param videoFilePath - Path to the video file
* @param timePosition - Time position in seconds
* @param outputPath - Output path for the captured frame
* @param lowQuality - If true, save screenshot in 360p resolution
*/
export declare function captureVideoFrame(videoFilePath: string, timePosition: number, outputPath: string, lowQuality?: boolean): void;
/**
* Get the duration of an audio file in seconds
* @param audioFilePath - Path to the audio file
* @returns Duration in seconds
*/
export declare function getAudioDuration(audioFilePath: string): number;
/**
* Combine audio segments into a single audio track using lossless intermediates
* @param segments - Array of audio segment information
* @param outputPath - Output path for the combined audio
* @param videoDuration - Duration of the video in seconds
* @param settings - Configuration settings
*/
export declare function combineAudioSegments(segments: AudioSegment[], outputPath: string, videoDuration: number, settings: Config): string | {
commandFile: string;
};
/**
* Clean up temporary files
* @param tempDir - Directory containing temporary files
*/
export declare function cleanupTempFiles(tempDir: string): void;

261
dist/utils/mediaUtils.js vendored Normal file
View File

@@ -0,0 +1,261 @@
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.getVideoDuration = getVideoDuration;
exports.captureVideoFrame = captureVideoFrame;
exports.getAudioDuration = getAudioDuration;
exports.combineAudioSegments = combineAudioSegments;
exports.cleanupTempFiles = cleanupTempFiles;
const child_process_1 = require("child_process");
const fs_1 = __importDefault(require("fs"));
const path_1 = __importDefault(require("path"));
/**
* Get the duration of a video file in seconds
* @param videoFilePath - Path to the video file
* @returns Duration in seconds
*/
function getVideoDuration(videoFilePath) {
const result = (0, child_process_1.execSync)(`ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "${videoFilePath}"`);
return parseFloat(result.toString());
}
/**
* Capture a frame from a video at a specific time position
* @param videoFilePath - Path to the video file
* @param timePosition - Time position in seconds
* @param outputPath - Output path for the captured frame
* @param lowQuality - If true, save screenshot in 360p resolution
*/
function captureVideoFrame(videoFilePath, timePosition, outputPath, lowQuality = true) {
let command = `ffmpeg -v error -ss ${timePosition} -i "${videoFilePath}" -vframes 1 -q:v 2`;
// Add resolution scaling for low quality option
if (lowQuality) {
command += ' -vf scale=-1:360'; // Scale to 360p height while maintaining aspect ratio
}
command += ` "${outputPath}" -y`;
(0, child_process_1.execSync)(command);
}
/**
* Get the duration of an audio file in seconds
* @param audioFilePath - Path to the audio file
* @returns Duration in seconds
*/
function getAudioDuration(audioFilePath) {
const result = (0, child_process_1.execSync)(`ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "${audioFilePath}"`);
return parseFloat(result.toString());
}
/**
* Combine audio segments into a single audio track using lossless intermediates
* @param segments - Array of audio segment information
* @param outputPath - Output path for the combined audio
* @param videoDuration - Duration of the video in seconds
* @param settings - Configuration settings
*/
function combineAudioSegments(segments, outputPath, videoDuration, settings) {
console.log(`Combining ${segments.length} audio segments using lossless intermediates...`);
try {
// Create a silent base track with the full video duration (always WAV)
const silentBasePath = path_1.default.join(settings.tempDir, 'silent_base.wav');
(0, child_process_1.execSync)(`ffmpeg -v error -f lavfi -i anullsrc=r=44100:cl=stereo -t ${videoDuration} -c:a pcm_s16le "${silentBasePath}" -y`);
// Sort segments by start time to process them in order
const sortedSegments = [...segments].sort((a, b) => a.startTime - b.startTime);
// Process one segment at a time, building up the audio file
let currentAudioPath = silentBasePath;
for (let i = 0; i < sortedSegments.length; i++) {
const segment = sortedSegments[i];
const outputFile = path_1.default.join(settings.tempDir, `segment_${i}_output.wav`);
// Convert the segment to a standard WAV format first to avoid compatibility issues
// and ensure we're always working with lossless audio
const standardizedSegment = path_1.default.join(settings.tempDir, `segment_${i}_std.wav`);
(0, child_process_1.execSync)(`ffmpeg -v error -i "${segment.audioFile}" -ar 44100 -ac 2 -c:a pcm_s16le "${standardizedSegment}" -y`);
// Calculate the position for this segment
const timestamp = segment.startTime.toFixed(3);
// Create a filter script for this segment
const filterPath = path_1.default.join(settings.tempDir, `filter_${i}.txt`);
// Use a filter that preserves the audio quality and positions correctly
const filterContent = `[1:a]adelay=${Math.round(segment.startTime * 1000)}|${Math.round(segment.startTime * 1000)}[delayed];\n` +
`[0:a][delayed]amix=inputs=2:duration=first:dropout_transition=0:normalize=0[out]`;
fs_1.default.writeFileSync(filterPath, filterContent);
// Execute FFmpeg with the filter script
(0, child_process_1.execSync)(`ffmpeg -v error -i "${currentAudioPath}" -i "${standardizedSegment}" -filter_complex_script "${filterPath}" -map "[out]" -c:a pcm_s16le "${outputFile}" -y`);
// Clean up previous file if not the original
if (currentAudioPath !== silentBasePath) {
fs_1.default.unlinkSync(currentAudioPath);
}
// Clean up standardized segment and filter
fs_1.default.unlinkSync(standardizedSegment);
fs_1.default.unlinkSync(filterPath);
// Update current audio path for next iteration
currentAudioPath = outputFile;
console.log(`Added segment ${i + 1}/${sortedSegments.length} at position ${timestamp}s`);
}
// Only at the very end, convert to the requested output format
if (path_1.default.extname(outputPath).toLowerCase() === '.mp3') {
console.log(`Converting final lossless WAV to MP3: ${outputPath}`);
(0, child_process_1.execSync)(`ffmpeg -v error -i "${currentAudioPath}" -c:a libmp3lame -q:a 2 "${outputPath}" -y`);
}
else {
fs_1.default.copyFileSync(currentAudioPath, outputPath);
}
console.log(`Audio description track created: ${outputPath}`);
// Clean up the last temp file
if (currentAudioPath !== silentBasePath) {
fs_1.default.unlinkSync(currentAudioPath);
}
if (fs_1.default.existsSync(silentBasePath)) {
fs_1.default.unlinkSync(silentBasePath);
}
return outputPath;
}
catch (error) {
console.error("Error in lossless audio combination:", error.message);
try {
console.log("Trying alternative approach with single-step filter...");
// Create a silent base track (always WAV)
const silentBasePath = path_1.default.join(settings.tempDir, 'silent_base.wav');
(0, child_process_1.execSync)(`ffmpeg -v error -f lavfi -i anullsrc=r=44100:cl=stereo -t ${videoDuration} -c:a pcm_s16le "${silentBasePath}" -y`);
// Create a complex filter to overlay all audio files at their specific timestamps
const filterScriptPath = path_1.default.join(settings.tempDir, 'overlay_filter.txt');
let filterScript = '';
// Sort segments by start time
const sortedSegments = [...segments].sort((a, b) => a.startTime - b.startTime);
// Standardize all segments to WAV first
const standardizedSegments = [];
for (let i = 0; i < sortedSegments.length; i++) {
const segment = sortedSegments[i];
const stdPath = path_1.default.join(settings.tempDir, `std_${i}.wav`);
(0, child_process_1.execSync)(`ffmpeg -v error -i "${segment.audioFile}" -ar 44100 -ac 2 -c:a pcm_s16le "${stdPath}" -y`);
standardizedSegments.push({
path: stdPath,
startTime: segment.startTime
});
}
// Build the FFmpeg command with all standardized inputs
let ffmpegCmd = `ffmpeg -v error -i "${silentBasePath}" `;
// Add all standardized segments as inputs and create the filter script
for (let i = 0; i < standardizedSegments.length; i++) {
// Add as input
ffmpegCmd += `-i "${standardizedSegments[i].path}" `;
// Add to filter script - the input index starts at 1 because 0 is the silent base
const inputIndex = i + 1;
const delay = Math.round(standardizedSegments[i].startTime * 1000);
// Add this input to filter script with proper delay
filterScript += `[${inputIndex}:a]adelay=${delay}|${delay}[a${i}];\n`;
}
// Complete the filter script to merge all streams
filterScript += '[0:a]'; // Start with base
for (let i = 0; i < standardizedSegments.length; i++) {
filterScript += `[a${i}]`;
}
// Use amix with normalize=0 to preserve volumes
filterScript += `amix=inputs=${standardizedSegments.length + 1}:normalize=0:duration=first[aout]`;
// Write the filter script
fs_1.default.writeFileSync(filterScriptPath, filterScript);
// Use an intermediate WAV for the output to maintain quality
const intermediatePath = path_1.default.join(settings.tempDir, 'intermediate_output.wav');
// Complete the FFmpeg command - always output to WAV first
ffmpegCmd += `-filter_complex_script "${filterScriptPath}" -map "[aout]" -c:a pcm_s16le "${intermediatePath}" -y`;
// Execute the command
(0, child_process_1.execSync)(ffmpegCmd);
// Convert to the requested format only at the end
if (path_1.default.extname(outputPath).toLowerCase() === '.mp3') {
console.log(`Converting final audio to MP3...`);
(0, child_process_1.execSync)(`ffmpeg -v error -i "${intermediatePath}" -c:a libmp3lame -q:a 2 "${outputPath}" -y`);
}
else {
fs_1.default.copyFileSync(intermediatePath, outputPath);
}
console.log(`Audio description track created with alternative method: ${outputPath}`);
// Clean up temp files
if (fs_1.default.existsSync(filterScriptPath)) {
fs_1.default.unlinkSync(filterScriptPath);
}
if (fs_1.default.existsSync(silentBasePath)) {
fs_1.default.unlinkSync(silentBasePath);
}
if (fs_1.default.existsSync(intermediatePath)) {
fs_1.default.unlinkSync(intermediatePath);
}
// Clean up standardized segments
standardizedSegments.forEach(seg => {
if (fs_1.default.existsSync(seg.path)) {
fs_1.default.unlinkSync(seg.path);
}
});
return outputPath;
}
catch (secondError) {
console.error("Alternative approach failed:", secondError.message);
// Last resort: Generate a command file with the proper syntax
const cmdFilePath = outputPath.replace(/\.\w+$/, '_ffmpeg_cmd.sh');
let cmdContent = `#!/bin/bash\n\n# FFmpeg command to combine audio segments\n\n`;
// Add commands to convert all segments to WAV first
cmdContent += `# First convert all segments to standard WAV format\n`;
for (let i = 0; i < segments.length; i++) {
const segment = segments[i];
const stdPath = `"${settings.tempDir}/std_${i}.wav"`;
cmdContent += `ffmpeg -i "${segment.audioFile}" -ar 44100 -ac 2 -c:a pcm_s16le ${stdPath} -y\n`;
}
// Create silent base
cmdContent += `\n# Create silent base track\n`;
cmdContent += `ffmpeg -f lavfi -i anullsrc=r=44100:cl=stereo -t ${videoDuration} -c:a pcm_s16le "${settings.tempDir}/silent_base.wav" -y\n\n`;
// Create filter file
cmdContent += `# Create filter file\n`;
cmdContent += `cat > "${settings.tempDir}/filter.txt" << EOL\n`;
// Add delay filters for each segment
for (let i = 0; i < segments.length; i++) {
const segment = segments[i];
const delay = Math.round(segment.startTime * 1000);
cmdContent += `[${i + 1}:a]adelay=${delay}|${delay}[a${i}];\n`;
}
// Mix all streams
cmdContent += `[0:a]`;
for (let i = 0; i < segments.length; i++) {
cmdContent += `[a${i}]`;
}
cmdContent += `amix=inputs=${segments.length + 1}:normalize=0:duration=first[aout]\nEOL\n\n`;
// Final command
cmdContent += `# Run final FFmpeg command\n`;
cmdContent += `ffmpeg -i "${settings.tempDir}/silent_base.wav" `;
// Add all segments as inputs
for (let i = 0; i < segments.length; i++) {
cmdContent += `-i "${settings.tempDir}/std_${i}.wav" `;
}
// Complete command
cmdContent += `-filter_complex_script "${settings.tempDir}/filter.txt" -map "[aout]" `;
if (path_1.default.extname(outputPath).toLowerCase() === '.mp3') {
cmdContent += `-c:a libmp3lame -q:a 2 `;
}
else {
cmdContent += `-c:a pcm_s16le `;
}
cmdContent += `"${outputPath}" -y\n\n`;
// Add cleanup
cmdContent += `# Clean up temp files\n`;
cmdContent += `rm "${settings.tempDir}/silent_base.wav" "${settings.tempDir}/filter.txt"\n`;
for (let i = 0; i < segments.length; i++) {
cmdContent += `rm "${settings.tempDir}/std_${i}.wav"\n`;
}
// Make the file executable
fs_1.default.writeFileSync(cmdFilePath, cmdContent);
(0, child_process_1.execSync)(`chmod +x "${cmdFilePath}"`);
console.log(`\nCreated executable script with proper FFmpeg commands: ${cmdFilePath}`);
console.log(`Run this script to generate the audio file.`);
return {
commandFile: cmdFilePath
};
}
}
}
/**
* Clean up temporary files
* @param tempDir - Directory containing temporary files
*/
function cleanupTempFiles(tempDir) {
const files = fs_1.default.readdirSync(tempDir);
for (const file of files) {
fs_1.default.unlinkSync(path_1.default.join(tempDir, file));
}
}
//# sourceMappingURL=mediaUtils.js.map

1
dist/utils/mediaUtils.js.map vendored Normal file

File diff suppressed because one or more lines are too long

21
dist/utils/processor.d.ts vendored Normal file
View File

@@ -0,0 +1,21 @@
import { VisionProvider, TTSProvider, Stats, ProcessingResult, ProcessingOptions } from '../interfaces';
import { Config } from '../config/config';
/**
* High-level API: Generate audio description for a video with just options.
* This internally creates providers and stats so callers don't need to.
*
* @param videoFilePath - Path to the input video file
* @param options - Optional configuration overrides
* @returns Result of the operation
*/
export declare function generateAudioDescriptionFromOptions(videoFilePath: string, options?: Partial<Config>, processingOptions?: ProcessingOptions): Promise<ProcessingResult>;
/**
* Generate audio description for a video (low-level API requiring pre-initialized providers).
* @param videoFilePath - Path to the input video file
* @param visionProvider - Vision provider instance
* @param ttsProvider - TTS provider instance
* @param options - Optional configuration overrides
* @param stats - Stats object for tracking
* @returns Result of the operation
*/
export declare function generateAudioDescription(videoFilePath: string, visionProvider: VisionProvider, ttsProvider: TTSProvider, options: Partial<Config> | undefined, stats: Stats, processingOptions?: ProcessingOptions): Promise<ProcessingResult>;

295
dist/utils/processor.js vendored Normal file
View File

@@ -0,0 +1,295 @@
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.generateAudioDescriptionFromOptions = generateAudioDescriptionFromOptions;
exports.generateAudioDescription = generateAudioDescription;
const fs_1 = __importDefault(require("fs"));
const path_1 = __importDefault(require("path"));
const config_1 = require("../config/config");
const stats_1 = require("../config/stats");
const visionProviderFactory_1 = require("../providers/vision/visionProviderFactory");
const ttsProviderFactory_1 = require("../providers/tts/ttsProviderFactory");
const mediaUtils_1 = require("./mediaUtils");
/**
* High-level API: Generate audio description for a video with just options.
* This internally creates providers and stats so callers don't need to.
*
* @param videoFilePath - Path to the input video file
* @param options - Optional configuration overrides
* @returns Result of the operation
*/
async function generateAudioDescriptionFromOptions(videoFilePath, options = {}, processingOptions = {}) {
const config = { ...(0, config_1.getDefaultConfig)(), ...options };
if (!fs_1.default.existsSync(config.tempDir)) {
fs_1.default.mkdirSync(config.tempDir, { recursive: true });
}
if (!fs_1.default.existsSync(config.outputDir)) {
fs_1.default.mkdirSync(config.outputDir, { recursive: true });
}
const visionProvider = visionProviderFactory_1.VisionProviderFactory.getProvider(config);
const ttsProvider = ttsProviderFactory_1.TTSProviderFactory.getProvider(config);
const stats = (0, stats_1.createStats)();
return generateAudioDescription(videoFilePath, visionProvider, ttsProvider, config, stats, processingOptions);
}
/**
* Generate audio description for a video (low-level API requiring pre-initialized providers).
* @param videoFilePath - Path to the input video file
* @param visionProvider - Vision provider instance
* @param ttsProvider - TTS provider instance
* @param options - Optional configuration overrides
* @param stats - Stats object for tracking
* @returns Result of the operation
*/
async function generateAudioDescription(videoFilePath, visionProvider, ttsProvider, options = {}, stats, processingOptions = {}) {
// Merge provided options with defaults
const settings = { ...options };
// Ensure temporary and output directories exist
if (!fs_1.default.existsSync(settings.tempDir)) {
fs_1.default.mkdirSync(settings.tempDir, { recursive: true });
}
if (!fs_1.default.existsSync(settings.outputDir)) {
fs_1.default.mkdirSync(settings.outputDir, { recursive: true });
}
// Get video duration
const videoDuration = (0, mediaUtils_1.getVideoDuration)(videoFilePath);
stats.totalFrames = Math.floor(videoDuration / settings.captureIntervalSeconds);
console.log(`Video duration: ${videoDuration} seconds`);
// If batchTimeMode is enabled, use the new approach
if (settings.batchTimeMode) {
return await generateAudioDescriptionBatch(videoFilePath, videoDuration, settings, visionProvider, ttsProvider, stats, processingOptions);
}
// Calculate the number of frames to capture
const totalFrames = Math.floor(videoDuration / settings.captureIntervalSeconds);
console.log(`Will capture ${totalFrames} frames at ${settings.captureIntervalSeconds} second intervals`);
// Context window to store previous frames
const frameContext = [];
// Array to store audio segment information - preload with existing segments if resuming
const audioSegments = processingOptions.existingSegments
? [...processingOptions.existingSegments]
: [];
// Track our current time position (will be adjusted for audio overlap)
let currentTimePosition = processingOptions.currentTimePosition || 0;
// Start from given index if resuming
const startIndex = processingOptions.startIndex || 0;
// Track drift from the original schedule
let timelineDrift = 0;
const maxAllowableDrift = settings.captureIntervalSeconds * 2; // Maximum drift before warning
// Process each frame
for (let i = startIndex; i < totalFrames; i++) {
// Calculate the ideal time position based on the original schedule
const idealTimePosition = i * settings.captureIntervalSeconds;
// Use the adjusted time position that accounts for previous audio durations
const timePosition = currentTimePosition;
// Calculate drift from the original schedule
timelineDrift = timePosition - idealTimePosition;
// Log if drift is becoming significant
if (Math.abs(timelineDrift) > maxAllowableDrift) {
console.warn(`WARNING: Timeline drift at frame ${i} is ${timelineDrift.toFixed(2)} seconds.`);
}
const frameFilePath = path_1.default.join(settings.tempDir, `frame_${i.toString().padStart(5, '0')}.jpg`);
// Capture frame at current time position (use the ideal time to capture the frame)
(0, mediaUtils_1.captureVideoFrame)(videoFilePath, idealTimePosition, frameFilePath);
console.log(`Captured frame at ${idealTimePosition} seconds (scheduled at ${timePosition.toFixed(2)} seconds)`);
// Add current frame to context
const currentFrame = {
index: i,
path: frameFilePath,
timePosition
};
frameContext.push(currentFrame);
// Keep context window at specified size
if (frameContext.length > settings.contextWindowSize) {
frameContext.shift();
}
// Generate description
let description;
let usageStats;
if (frameContext.length === 1) {
// First frame - just describe what's in it
const result = await visionProvider.describeImage(frameFilePath, settings.defaultPrompt);
description = result.description;
usageStats = result.usage;
}
else {
// Compare with previous frame
const previousFrame = frameContext[frameContext.length - 2];
const result = await visionProvider.compareImages(previousFrame.path, frameFilePath, settings.changePrompt);
description = result.description;
usageStats = result.usage;
}
// Update stats
stats.totalVisionInputCost += usageStats.inputTokens;
stats.totalVisionOutputCost += usageStats.outputTokens;
stats.totalCost += usageStats.totalTokens;
console.log(`Description: ${description}`);
// Generate speech from description
const audioFilePath = path_1.default.join(settings.tempDir, `audio_${i.toString().padStart(5, '0')}.mp3`);
const ttsResult = await ttsProvider.textToSpeech(description, audioFilePath, {
voice: settings.ttsVoice,
model: settings.ttsProviders[settings.ttsProvider].model,
speedFactor: settings.ttsSpeedFactor,
instructions: settings.ttsInstructions
});
const audioDuration = ttsResult.duration;
stats.totalTTSCost += ttsResult.cost;
console.log(`Audio duration: ${audioDuration} seconds`);
// Store segment information
const segment = {
audioFile: audioFilePath,
startTime: timePosition,
duration: audioDuration,
description
};
audioSegments.push(segment);
// Notify progress callback
if (processingOptions.onProgress) {
processingOptions.onProgress({
type: 'frame',
index: i,
total: totalFrames,
segment
});
}
// Update the time position for the next iteration
// Add a small buffer (0.25 sec) between descriptions to prevent hard cuts
const bufferTime = 0.25;
currentTimePosition = timePosition + audioDuration + bufferTime;
// If we've fallen behind schedule, try to catch up (but don't skip content)
const nextIdealPosition = (i + 1) * settings.captureIntervalSeconds;
if (currentTimePosition < nextIdealPosition) {
console.log(`Audio finished before next scheduled frame. Catching up with timeline.`);
currentTimePosition = nextIdealPosition;
timelineDrift = 0; // Reset drift since we've caught up
}
}
// Combine audio segments into final audio description track
const outputAudioPath = path_1.default.join(settings.outputDir, `${path_1.default.basename(videoFilePath, path_1.default.extname(videoFilePath))}_description.mp3`);
(0, mediaUtils_1.combineAudioSegments)(audioSegments, outputAudioPath, videoDuration, settings);
// Clean up temporary files if desired
// cleanupTempFiles(settings.tempDir);
console.log(`\nAudio description generated: ${outputAudioPath}`);
console.log(`To play with video, use: ffplay -i ${videoFilePath} -i ${outputAudioPath} -map 0:v -map 1:a`);
(0, stats_1.printStats)(stats, settings);
return {
videoFile: videoFilePath,
audioDescriptionFile: outputAudioPath,
segments: audioSegments
};
}
/**
* Generate audio description using the "batch time" mode with overlap prevention.
* @param videoFilePath - Path to the input video file
* @param videoDuration - Duration of the video in seconds
* @param settings - The merged config and user options
* @param visionProvider - The vision provider instance
* @param ttsProvider - The TTS provider instance
* @param stats - Stats object for tracking
*/
async function generateAudioDescriptionBatch(videoFilePath, videoDuration, settings, visionProvider, ttsProvider, stats, processingOptions = {}) {
const totalBatches = Math.floor(videoDuration / settings.batchWindowDuration);
console.log(`Using batchTimeMode. Total batches: ${totalBatches} (each covers ${settings.batchWindowDuration} sec)`);
// We'll hold the last batch's frames or last batch's description for context
let lastBatchContext = processingOptions.lastContext || {};
// Preload with existing segments if resuming
const audioSegments = processingOptions.existingSegments
? [...processingOptions.existingSegments]
: [];
// Track our current time position (will be adjusted for audio overlap)
let currentTimePosition = processingOptions.currentTimePosition || 0;
// Start from given index if resuming
const startBatchIndex = processingOptions.startIndex || 0;
// Track drift from the original schedule
let timelineDrift = 0;
const maxAllowableDrift = settings.batchWindowDuration * 0.5; // Maximum drift of 50% of batch window
for (let batchIndex = startBatchIndex; batchIndex < totalBatches; batchIndex++) {
// Calculate ideal batch timing based on configuration
const idealBatchStart = batchIndex * settings.batchWindowDuration;
// Use adjusted time position that accounts for previous audio durations
const batchStart = currentTimePosition;
// Calculate drift from the original schedule
timelineDrift = batchStart - idealBatchStart;
// Log if drift is becoming significant
if (Math.abs(timelineDrift) > maxAllowableDrift) {
console.warn(`WARNING: Timeline drift at batch ${batchIndex} is ${timelineDrift.toFixed(2)} seconds.`);
}
const batchEnd = idealBatchStart + settings.batchWindowDuration;
if (batchEnd > videoDuration)
break; // Safety check
console.log(`Processing batch #${batchIndex}: Original time window ${idealBatchStart}-${batchEnd} sec, scheduled at ${batchStart.toFixed(2)} sec`);
// Capture frames for this batch - use the ideal timing for frame capture
const framePaths = [];
for (let i = 0; i < settings.framesInBatch; i++) {
const t = idealBatchStart + (i * settings.batchWindowDuration) / settings.framesInBatch;
const frameFilePath = path_1.default.join(settings.tempDir, `batch_${batchIndex}_frame_${i}.jpg`);
(0, mediaUtils_1.captureVideoFrame)(videoFilePath, t, frameFilePath);
framePaths.push(frameFilePath);
}
// Use AI to describe this batch of frames, possibly providing some context
const result = await visionProvider.describeBatch(framePaths, lastBatchContext, settings.batchPrompt);
const description = result.description;
const usageStats = result.usage;
// Update stats
stats.totalVisionInputCost += usageStats.inputTokens;
stats.totalVisionOutputCost += usageStats.outputTokens;
stats.totalCost += usageStats.totalTokens;
console.log(`Batch #${batchIndex} description:\n${description}\n`);
// Convert description to TTS
const audioFilePath = path_1.default.join(settings.tempDir, `batch_audio_${batchIndex}.mp3`);
const ttsResult = await ttsProvider.textToSpeech(description, audioFilePath, {
voice: settings.ttsVoice,
model: settings.ttsProviders[settings.ttsProvider].model,
speedFactor: settings.ttsSpeedFactor,
instructions: settings.ttsInstructions
});
const audioDuration = ttsResult.duration;
stats.totalTTSCost += ttsResult.cost;
console.log(`Batch #${batchIndex} audio duration: ${audioDuration} seconds`);
// Store segment info with the adjusted start time
const segment = {
audioFile: audioFilePath,
startTime: batchStart,
duration: audioDuration,
description
};
audioSegments.push(segment);
// Notify progress callback
if (processingOptions.onProgress) {
processingOptions.onProgress({
type: 'batch',
index: batchIndex,
total: totalBatches,
segment
});
}
// Update the time position for the next iteration
// Add a small buffer (0.5 sec) between descriptions
const bufferTime = 0.5;
currentTimePosition = batchStart + audioDuration + bufferTime;
// If we've fallen behind schedule, try to catch up (but don't skip content)
const nextIdealPosition = (batchIndex + 1) * settings.batchWindowDuration;
if (currentTimePosition < nextIdealPosition) {
console.log(`Batch audio finished before next scheduled batch. Catching up with timeline.`);
currentTimePosition = nextIdealPosition;
timelineDrift = 0; // Reset drift since we've caught up
}
// Update lastBatchContext so the next batch can keep track of what's previously seen
lastBatchContext = {
lastDescription: description,
lastFramePaths: framePaths.slice(-2) // keep the last 2 frames from this batch
};
}
// Combine all the audio segments into one track
const outputAudioPath = path_1.default.join(settings.outputDir, `${path_1.default.basename(videoFilePath, path_1.default.extname(videoFilePath))}_description_batch.mp3`);
(0, mediaUtils_1.combineAudioSegments)(audioSegments, outputAudioPath, videoDuration, settings);
console.log(`\nBatch audio description generated: ${outputAudioPath}`);
console.log(`To play with video, use: ffplay -i ${videoFilePath} -i ${outputAudioPath} -map 0:v -map 1:a`);
(0, stats_1.printStats)(stats, settings);
return {
videoFile: videoFilePath,
audioDescriptionFile: outputAudioPath,
segments: audioSegments
};
}
//# sourceMappingURL=processor.js.map

1
dist/utils/processor.js.map vendored Normal file

File diff suppressed because one or more lines are too long