Rewrite frontend as single self-contained HTML file — all CSS/JS inline, no external files to fail loading
This commit is contained in:
13
dist/utils/configUtils.d.ts
vendored
Normal file
13
dist/utils/configUtils.d.ts
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
import { Config } from '../config/config';
|
||||
/**
|
||||
* Load configuration from a JSON file
|
||||
* @param filePath - Path to the configuration file
|
||||
* @returns Configuration object
|
||||
*/
|
||||
export declare function loadConfigFromFile(filePath: string): Partial<Config>;
|
||||
/**
|
||||
* Save configuration to a JSON file
|
||||
* @param filePath - Path to save the configuration file
|
||||
* @param config - Configuration object to save
|
||||
*/
|
||||
export declare function saveConfigToFile(filePath: string, config: any): void;
|
||||
44
dist/utils/configUtils.js
vendored
Normal file
44
dist/utils/configUtils.js
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
"use strict";
|
||||
var __importDefault = (this && this.__importDefault) || function (mod) {
|
||||
return (mod && mod.__esModule) ? mod : { "default": mod };
|
||||
};
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.loadConfigFromFile = loadConfigFromFile;
|
||||
exports.saveConfigToFile = saveConfigToFile;
|
||||
const fs_1 = __importDefault(require("fs"));
|
||||
/**
|
||||
* Load configuration from a JSON file
|
||||
* @param filePath - Path to the configuration file
|
||||
* @returns Configuration object
|
||||
*/
|
||||
function loadConfigFromFile(filePath) {
|
||||
try {
|
||||
const configFile = fs_1.default.readFileSync(filePath, 'utf8');
|
||||
const config = JSON.parse(configFile);
|
||||
console.log(`Loaded configuration from ${filePath}`);
|
||||
return config;
|
||||
}
|
||||
catch (error) {
|
||||
console.error(`Error loading config from ${filePath}:`, error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Save configuration to a JSON file
|
||||
* @param filePath - Path to save the configuration file
|
||||
* @param config - Configuration object to save
|
||||
*/
|
||||
function saveConfigToFile(filePath, config) {
|
||||
try {
|
||||
// Filter out non-configuration properties
|
||||
const configToSave = { ...config };
|
||||
const keysToExclude = ['_', '$0', 'video_file_path', 'estimate', 'config', 'saveConfig', 'help', 'version', 'h'];
|
||||
keysToExclude.forEach(key => delete configToSave[key]);
|
||||
fs_1.default.writeFileSync(filePath, JSON.stringify(configToSave, null, 2), 'utf8');
|
||||
console.log(`Configuration saved to ${filePath}`);
|
||||
}
|
||||
catch (error) {
|
||||
console.error(`Error saving config to ${filePath}:`, error);
|
||||
}
|
||||
}
|
||||
//# sourceMappingURL=configUtils.js.map
|
||||
1
dist/utils/configUtils.js.map
vendored
Normal file
1
dist/utils/configUtils.js.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"configUtils.js","sourceRoot":"","sources":["../../src/utils/configUtils.ts"],"names":[],"mappings":";;;;;AAQA,gDAUC;AAOD,4CAYC;AArCD,4CAAoB;AAGpB;;;;GAIG;AACH,SAAgB,kBAAkB,CAAC,QAAgB;IACjD,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,YAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QACrD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,6BAA6B,QAAQ,EAAE,CAAC,CAAC;QACrD,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,6BAA6B,QAAQ,GAAG,EAAE,KAAK,CAAC,CAAC;QAC/D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED;;;;GAIG;AACH,SAAgB,gBAAgB,CAAC,QAAgB,EAAE,MAAW;IAC5D,IAAI,CAAC;QACH,0CAA0C;QAC1C,MAAM,YAAY,GAAG,EAAE,GAAG,MAAM,EAAE,CAAC;QACnC,MAAM,aAAa,GAAG,CAAC,GAAG,EAAE,IAAI,EAAE,iBAAiB,EAAE,UAAU,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,EAAE,SAAS,EAAE,GAAG,CAAC,CAAC;QACjH,aAAa,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,OAAO,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC;QAEvD,YAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,YAAY,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;QAC1E,OAAO,CAAC,GAAG,CAAC,0BAA0B,QAAQ,EAAE,CAAC,CAAC;IACpD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,0BAA0B,QAAQ,GAAG,EAAE,KAAK,CAAC,CAAC;IAC9D,CAAC;AACH,CAAC"}
|
||||
9
dist/utils/costEstimator.d.ts
vendored
Normal file
9
dist/utils/costEstimator.d.ts
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
import { Config } from '../config/config';
|
||||
import { CostBreakdown } from '../interfaces';
|
||||
/**
|
||||
* Estimate the cost of generating audio descriptions for a video
|
||||
* @param videoFilePath - Path to the input video file
|
||||
* @param options - Optional configuration overrides
|
||||
* @returns Cost estimation breakdown
|
||||
*/
|
||||
export declare function estimateCost(videoFilePath: string, options?: Partial<Config>): Promise<CostBreakdown>;
|
||||
151
dist/utils/costEstimator.js
vendored
Normal file
151
dist/utils/costEstimator.js
vendored
Normal file
@@ -0,0 +1,151 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.estimateCost = estimateCost;
|
||||
const mediaUtils_1 = require("./mediaUtils");
|
||||
/**
|
||||
* Estimate the cost of generating audio descriptions for a video
|
||||
* @param videoFilePath - Path to the input video file
|
||||
* @param options - Optional configuration overrides
|
||||
* @returns Cost estimation breakdown
|
||||
*/
|
||||
async function estimateCost(videoFilePath, options = {}) {
|
||||
// Merge provided options with defaults
|
||||
const settings = { ...options };
|
||||
// Get video duration
|
||||
const videoDuration = (0, mediaUtils_1.getVideoDuration)(videoFilePath);
|
||||
console.log(`Video duration: ${videoDuration} seconds`);
|
||||
// Calculate the number of frames or batches to process
|
||||
let totalUnits;
|
||||
let unitCostMultiplier;
|
||||
let unitType;
|
||||
if (settings.batchTimeMode) {
|
||||
totalUnits = Math.floor(videoDuration / settings.batchWindowDuration);
|
||||
unitCostMultiplier = settings.framesInBatch;
|
||||
unitType = "batches";
|
||||
}
|
||||
else {
|
||||
totalUnits = Math.floor(videoDuration / settings.captureIntervalSeconds);
|
||||
unitCostMultiplier = 1;
|
||||
unitType = "frames";
|
||||
}
|
||||
console.log(`Will process ${totalUnits} ${unitType}`);
|
||||
// Pricing constants (per 1K units unless otherwise noted)
|
||||
const pricing = {
|
||||
vision: {
|
||||
openai: {
|
||||
'gpt-4o': { input: 0.0025, output: 0.01 },
|
||||
'gpt-5.4-mini': { input: 0.00015, output: 0.0006 },
|
||||
'gpt-4o-mini': { input: 0.00015, output: 0.0006 }
|
||||
},
|
||||
gemini: {
|
||||
'gemini-2.0-flash': { input: 0.0001, output: 0.0004 },
|
||||
'gemini-1.5-flash': { input: 0.000075, output: 0.0003 },
|
||||
'gemini-1.5-pro': { input: 0.00125, output: 0.005 }
|
||||
},
|
||||
openrouter: {
|
||||
'anthropic/claude-sonnet-4.5': { input: 0.003, output: 0.015 },
|
||||
'anthropic/claude-3.5-sonnet': { input: 0.003, output: 0.015 },
|
||||
'anthropic/claude-3-haiku': { input: 0.0008, output: 0.004 },
|
||||
'google/gemini-2.0-flash-001': { input: 0.0001, output: 0.0004 }
|
||||
}
|
||||
},
|
||||
tts: {
|
||||
openai: {
|
||||
'tts-1': 0.015,
|
||||
'tts-1-hd': 0.030,
|
||||
'gpt-4o-mini-tts': { inputTokens: 0.60, outputTokens: 12.00 }
|
||||
},
|
||||
elevenlabs: {
|
||||
'eleven_multilingual_v2': 0.30,
|
||||
'eleven_turbo_v2.5': 0.015
|
||||
},
|
||||
google: {
|
||||
'chirp-hd': 0.016,
|
||||
'wavenet': 0.016,
|
||||
'neural2': 0.016,
|
||||
'standard': 0.004
|
||||
}
|
||||
}
|
||||
};
|
||||
// Get the pricing for the selected providers
|
||||
const visionProvider = settings.visionProvider;
|
||||
const visionModel = settings.visionProviders[visionProvider].model;
|
||||
const ttsProvider = settings.ttsProvider;
|
||||
const ttsModel = settings.ttsProviders[ttsProvider].model;
|
||||
// Check if the pricing data exists
|
||||
const visionPricing = pricing.vision[visionProvider]?.[visionModel];
|
||||
const ttsPricing = pricing.tts[ttsProvider]?.[ttsModel];
|
||||
if (!visionPricing) {
|
||||
console.warn(`Warning: No pricing data for vision provider "${visionProvider}" and model "${visionModel}".`);
|
||||
}
|
||||
if (!ttsPricing) {
|
||||
console.warn(`Warning: No pricing data for TTS provider "${ttsProvider}" and model "${ttsModel}".`);
|
||||
}
|
||||
// Estimated token counts
|
||||
const estimatedVisionInputTokens = 1000 * unitCostMultiplier;
|
||||
const estimatedPromptTokens = 100;
|
||||
const estimatedOutputTokensPerUnit = 75;
|
||||
// Estimated character counts for TTS
|
||||
const estimatedCharsPerDescription = 200;
|
||||
// Calculate estimated costs for first unit
|
||||
const firstUnitCost = {
|
||||
visionInput: (estimatedVisionInputTokens + estimatedPromptTokens) * (visionPricing?.input || 0) / 1000,
|
||||
visionOutput: estimatedOutputTokensPerUnit * (visionPricing?.output || 0) / 1000,
|
||||
tts: calculateTTSCost(estimatedCharsPerDescription, ttsPricing)
|
||||
};
|
||||
// For subsequent units, we need context (e.g., previous frames)
|
||||
const contextMultiplier = settings.batchTimeMode ? 1.2 : 2;
|
||||
const subsequentUnitCost = {
|
||||
visionInput: (estimatedVisionInputTokens * contextMultiplier + estimatedPromptTokens) * (visionPricing?.input || 0) / 1000,
|
||||
visionOutput: estimatedOutputTokensPerUnit * (visionPricing?.output || 0) / 1000,
|
||||
tts: calculateTTSCost(estimatedCharsPerDescription, ttsPricing)
|
||||
};
|
||||
// Calculate total costs
|
||||
const totalVisionInputCost = firstUnitCost.visionInput +
|
||||
(totalUnits - 1) * subsequentUnitCost.visionInput;
|
||||
const totalVisionOutputCost = firstUnitCost.visionOutput +
|
||||
(totalUnits - 1) * subsequentUnitCost.visionOutput;
|
||||
const totalTTSCost = firstUnitCost.tts +
|
||||
(totalUnits - 1) * subsequentUnitCost.tts;
|
||||
const totalCost = totalVisionInputCost + totalVisionOutputCost + totalTTSCost;
|
||||
// Create cost breakdown
|
||||
const costBreakdown = {
|
||||
videoInfo: {
|
||||
duration: videoDuration,
|
||||
totalUnits: totalUnits,
|
||||
unitType: unitType,
|
||||
processingInterval: settings.batchTimeMode ? settings.batchWindowDuration : settings.captureIntervalSeconds
|
||||
},
|
||||
providerInfo: {
|
||||
visionProvider: visionProvider,
|
||||
visionModel: visionModel,
|
||||
ttsProvider: ttsProvider,
|
||||
ttsModel: ttsModel
|
||||
},
|
||||
apiCosts: {
|
||||
visionInput: totalVisionInputCost.toFixed(4),
|
||||
visionOutput: totalVisionOutputCost.toFixed(4),
|
||||
tts: totalTTSCost.toFixed(4),
|
||||
total: totalCost.toFixed(4)
|
||||
},
|
||||
estimates: {
|
||||
totalAPICallsToProviders: totalUnits * 2,
|
||||
estimatedProcessingTimeMinutes: (totalUnits * 3) / 60
|
||||
}
|
||||
};
|
||||
return costBreakdown;
|
||||
}
|
||||
function calculateTTSCost(charCount, pricing) {
|
||||
if (!pricing)
|
||||
return 0;
|
||||
if (typeof pricing === 'number') {
|
||||
// Per-character pricing: cost per 1000 characters
|
||||
return charCount * pricing / 1000;
|
||||
}
|
||||
// Per-token pricing (e.g., gpt-4o-mini-tts): cost per 1M tokens
|
||||
// Rough estimate: 1 char ≈ 0.25 tokens for English text
|
||||
const estimatedInputTokens = charCount * 0.25;
|
||||
const estimatedOutputTokens = charCount * 3; // audio output is token-heavy
|
||||
return (estimatedInputTokens * pricing.inputTokens + estimatedOutputTokens * pricing.outputTokens) / 1000000;
|
||||
}
|
||||
//# sourceMappingURL=costEstimator.js.map
|
||||
1
dist/utils/costEstimator.js.map
vendored
Normal file
1
dist/utils/costEstimator.js.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"costEstimator.js","sourceRoot":"","sources":["../../src/utils/costEstimator.ts"],"names":[],"mappings":";;AAYA,oCA0JC;AApKD,6CAAgD;AAIhD;;;;;GAKG;AACI,KAAK,UAAU,YAAY,CAChC,aAAqB,EACrB,UAA2B,EAAE;IAE7B,uCAAuC;IACvC,MAAM,QAAQ,GAAG,EAAE,GAAG,OAAO,EAAY,CAAC;IAE1C,qBAAqB;IACrB,MAAM,aAAa,GAAG,IAAA,6BAAgB,EAAC,aAAa,CAAC,CAAC;IACtD,OAAO,CAAC,GAAG,CAAC,mBAAmB,aAAa,UAAU,CAAC,CAAC;IAExD,uDAAuD;IACvD,IAAI,UAAkB,CAAC;IACvB,IAAI,kBAA0B,CAAC;IAC/B,IAAI,QAAgB,CAAC;IAErB,IAAI,QAAQ,CAAC,aAAa,EAAE,CAAC;QAC3B,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,GAAG,QAAQ,CAAC,mBAAmB,CAAC,CAAC;QACtE,kBAAkB,GAAG,QAAQ,CAAC,aAAa,CAAC;QAC5C,QAAQ,GAAG,SAAS,CAAC;IACvB,CAAC;SAAM,CAAC;QACN,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,GAAG,QAAQ,CAAC,sBAAsB,CAAC,CAAC;QACzE,kBAAkB,GAAG,CAAC,CAAC;QACvB,QAAQ,GAAG,QAAQ,CAAC;IACtB,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,gBAAgB,UAAU,IAAI,QAAQ,EAAE,CAAC,CAAC;IAEtD,0DAA0D;IAC1D,MAAM,OAAO,GAGT;QACF,MAAM,EAAE;YACN,MAAM,EAAE;gBACN,QAAQ,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE;gBACzC,cAAc,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE;gBAClD,aAAa,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE;aAClD;YACD,MAAM,EAAE;gBACN,kBAAkB,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE;gBACrD,kBAAkB,EAAE,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE;gBACvD,gBAAgB,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE;aACpD;YACD,UAAU,EAAE;gBACV,6BAA6B,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE;gBAC9D,6BAA6B,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE;gBAC9D,0BAA0B,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE;gBAC5D,6BAA6B,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE;aACjE;SACF;QACD,GAAG,EAAE;YACH,MAAM,EAAE;gBACN,OAAO,EAAE,KAAK;gBACd,UAAU,EAAE,KAAK;gBACjB,iBAAiB,EAAE,EAAE,WAAW,EAAE,IAAI,EAAE,YAAY,EAAE,KAAK,EAAE;aAC9D;YACD,UAAU,EAAE;gBACV,wBAAwB,EAAE,IAAI;gBAC9B,mBAAmB,EAAE,KAAK;aAC3B;YACD,MAAM,EAAE;gBACN,UAAU,EAAE,KAAK;gBACjB,SAAS,EAAE,KAAK;gBAChB,SAAS,EAAE,KAAK;gBAChB,UAAU,EAAE,KAAK;aAClB;SACF;KACF,CAAC;IAEF,6CAA6C;IAC7C,MAAM,cAAc,GAAG,QAAQ,CAAC,cAAc,CAAC;IAC/C,MAAM,WAAW,GAAG,QAAQ,CAAC,eAAe,CAAC,cAAc,CAAC,CAAC,KAAK,CAAC;IACnE,MAAM,WAAW,GAAG,QAAQ,CAAC,WAAW,CAAC;IACzC,MAAM,QAAQ,GAAG,QAAQ,CAAC,YAAY,CAAC,WAAW,CAAC,CAAC,KAAK,CAAC;IAE1D,mCAAmC;IACnC,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,cAAc,CAAC,EAAE,CAAC,WAAW,CAAC,CAAC;IACpE,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,CAAC,QAAQ,CAAC,CAAC;IAExD,IAAI,CAAC,aAAa,EAAE,CAAC;QACnB,OAAO,CAAC,IAAI,CAAC,iDAAiD,cAAc,gBAAgB,WAAW,IAAI,CAAC,CAAC;IAC/G,CAAC;IAED,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,OAAO,CAAC,IAAI,CAAC,8CAA8C,WAAW,gBAAgB,QAAQ,IAAI,CAAC,CAAC;IACtG,CAAC;IAED,yBAAyB;IACzB,MAAM,0BAA0B,GAAG,IAAI,GAAG,kBAAkB,CAAC;IAC7D,MAAM,qBAAqB,GAAG,GAAG,CAAC;IAClC,MAAM,4BAA4B,GAAG,EAAE,CAAC;IAExC,qCAAqC;IACrC,MAAM,4BAA4B,GAAG,GAAG,CAAC;IAEzC,2CAA2C;IAC3C,MAAM,aAAa,GAAG;QACpB,WAAW,EAAE,CAAC,0BAA0B,GAAG,qBAAqB,CAAC,GAAG,CAAC,aAAa,EAAE,KAAK,IAAI,CAAC,CAAC,GAAG,IAAI;QACtG,YAAY,EAAE,4BAA4B,GAAG,CAAC,aAAa,EAAE,MAAM,IAAI,CAAC,CAAC,GAAG,IAAI;QAChF,GAAG,EAAE,gBAAgB,CAAC,4BAA4B,EAAE,UAAU,CAAC;KAChE,CAAC;IAEF,gEAAgE;IAChE,MAAM,iBAAiB,GAAG,QAAQ,CAAC,aAAa,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IAE3D,MAAM,kBAAkB,GAAG;QACzB,WAAW,EAAE,CAAC,0BAA0B,GAAG,iBAAiB,GAAG,qBAAqB,CAAC,GAAG,CAAC,aAAa,EAAE,KAAK,IAAI,CAAC,CAAC,GAAG,IAAI;QAC1H,YAAY,EAAE,4BAA4B,GAAG,CAAC,aAAa,EAAE,MAAM,IAAI,CAAC,CAAC,GAAG,IAAI;QAChF,GAAG,EAAE,gBAAgB,CAAC,4BAA4B,EAAE,UAAU,CAAC;KAChE,CAAC;IAEF,wBAAwB;IACxB,MAAM,oBAAoB,GACxB,aAAa,CAAC,WAAW;QACzB,CAAC,UAAU,GAAG,CAAC,CAAC,GAAG,kBAAkB,CAAC,WAAW,CAAC;IAEpD,MAAM,qBAAqB,GACzB,aAAa,CAAC,YAAY;QAC1B,CAAC,UAAU,GAAG,CAAC,CAAC,GAAG,kBAAkB,CAAC,YAAY,CAAC;IAErD,MAAM,YAAY,GAChB,aAAa,CAAC,GAAG;QACjB,CAAC,UAAU,GAAG,CAAC,CAAC,GAAG,kBAAkB,CAAC,GAAG,CAAC;IAE5C,MAAM,SAAS,GAAG,oBAAoB,GAAG,qBAAqB,GAAG,YAAY,CAAC;IAE9E,wBAAwB;IACxB,MAAM,aAAa,GAAkB;QACnC,SAAS,EAAE;YACT,QAAQ,EAAE,aAAa;YACvB,UAAU,EAAE,UAAU;YACtB,QAAQ,EAAE,QAAQ;YAClB,kBAAkB,EAAE,QAAQ,CAAC,aAAa,CAAC,CAAC,CAAC,QAAQ,CAAC,mBAAmB,CAAC,CAAC,CAAC,QAAQ,CAAC,sBAAsB;SAC5G;QACD,YAAY,EAAE;YACZ,cAAc,EAAE,cAAc;YAC9B,WAAW,EAAE,WAAW;YACxB,WAAW,EAAE,WAAW;YACxB,QAAQ,EAAE,QAAQ;SACnB;QACD,QAAQ,EAAE;YACR,WAAW,EAAE,oBAAoB,CAAC,OAAO,CAAC,CAAC,CAAC;YAC5C,YAAY,EAAE,qBAAqB,CAAC,OAAO,CAAC,CAAC,CAAC;YAC9C,GAAG,EAAE,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC;YAC5B,KAAK,EAAE,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;SAC5B;QACD,SAAS,EAAE;YACT,wBAAwB,EAAE,UAAU,GAAG,CAAC;YACxC,8BAA8B,EAAE,CAAC,UAAU,GAAG,CAAC,CAAC,GAAG,EAAE;SACtD;KACF,CAAC;IAEF,OAAO,aAAa,CAAC;AACvB,CAAC;AAED,SAAS,gBAAgB,CAAC,SAAiB,EAAE,OAAoC;IAC/E,IAAI,CAAC,OAAO;QAAE,OAAO,CAAC,CAAC;IAEvB,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;QAChC,kDAAkD;QAClD,OAAO,SAAS,GAAG,OAAO,GAAG,IAAI,CAAC;IACpC,CAAC;IAED,gEAAgE;IAChE,wDAAwD;IACxD,MAAM,oBAAoB,GAAG,SAAS,GAAG,IAAI,CAAC;IAC9C,MAAM,qBAAqB,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,8BAA8B;IAC3E,OAAO,CAAC,oBAAoB,GAAG,OAAO,CAAC,WAAW,GAAG,qBAAqB,GAAG,OAAO,CAAC,YAAY,CAAC,GAAG,OAAO,CAAC;AAC/G,CAAC"}
|
||||
4
dist/utils/index.d.ts
vendored
Normal file
4
dist/utils/index.d.ts
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
export * from './mediaUtils';
|
||||
export * from './processor';
|
||||
export * from './costEstimator';
|
||||
export * from './configUtils';
|
||||
21
dist/utils/index.js
vendored
Normal file
21
dist/utils/index.js
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
"use strict";
|
||||
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
||||
if (k2 === undefined) k2 = k;
|
||||
var desc = Object.getOwnPropertyDescriptor(m, k);
|
||||
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
||||
desc = { enumerable: true, get: function() { return m[k]; } };
|
||||
}
|
||||
Object.defineProperty(o, k2, desc);
|
||||
}) : (function(o, m, k, k2) {
|
||||
if (k2 === undefined) k2 = k;
|
||||
o[k2] = m[k];
|
||||
}));
|
||||
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
||||
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
||||
};
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
__exportStar(require("./mediaUtils"), exports);
|
||||
__exportStar(require("./processor"), exports);
|
||||
__exportStar(require("./costEstimator"), exports);
|
||||
__exportStar(require("./configUtils"), exports);
|
||||
//# sourceMappingURL=index.js.map
|
||||
1
dist/utils/index.js.map
vendored
Normal file
1
dist/utils/index.js.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/utils/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,+CAA6B;AAC7B,8CAA4B;AAC5B,kDAAgC;AAChC,gDAA8B"}
|
||||
37
dist/utils/mediaUtils.d.ts
vendored
Normal file
37
dist/utils/mediaUtils.d.ts
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
import { AudioSegment } from '../interfaces';
|
||||
import { Config } from '../config/config';
|
||||
/**
|
||||
* Get the duration of a video file in seconds
|
||||
* @param videoFilePath - Path to the video file
|
||||
* @returns Duration in seconds
|
||||
*/
|
||||
export declare function getVideoDuration(videoFilePath: string): number;
|
||||
/**
|
||||
* Capture a frame from a video at a specific time position
|
||||
* @param videoFilePath - Path to the video file
|
||||
* @param timePosition - Time position in seconds
|
||||
* @param outputPath - Output path for the captured frame
|
||||
* @param lowQuality - If true, save screenshot in 360p resolution
|
||||
*/
|
||||
export declare function captureVideoFrame(videoFilePath: string, timePosition: number, outputPath: string, lowQuality?: boolean): void;
|
||||
/**
|
||||
* Get the duration of an audio file in seconds
|
||||
* @param audioFilePath - Path to the audio file
|
||||
* @returns Duration in seconds
|
||||
*/
|
||||
export declare function getAudioDuration(audioFilePath: string): number;
|
||||
/**
|
||||
* Combine audio segments into a single audio track using lossless intermediates
|
||||
* @param segments - Array of audio segment information
|
||||
* @param outputPath - Output path for the combined audio
|
||||
* @param videoDuration - Duration of the video in seconds
|
||||
* @param settings - Configuration settings
|
||||
*/
|
||||
export declare function combineAudioSegments(segments: AudioSegment[], outputPath: string, videoDuration: number, settings: Config): string | {
|
||||
commandFile: string;
|
||||
};
|
||||
/**
|
||||
* Clean up temporary files
|
||||
* @param tempDir - Directory containing temporary files
|
||||
*/
|
||||
export declare function cleanupTempFiles(tempDir: string): void;
|
||||
261
dist/utils/mediaUtils.js
vendored
Normal file
261
dist/utils/mediaUtils.js
vendored
Normal file
@@ -0,0 +1,261 @@
|
||||
"use strict";
|
||||
var __importDefault = (this && this.__importDefault) || function (mod) {
|
||||
return (mod && mod.__esModule) ? mod : { "default": mod };
|
||||
};
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.getVideoDuration = getVideoDuration;
|
||||
exports.captureVideoFrame = captureVideoFrame;
|
||||
exports.getAudioDuration = getAudioDuration;
|
||||
exports.combineAudioSegments = combineAudioSegments;
|
||||
exports.cleanupTempFiles = cleanupTempFiles;
|
||||
const child_process_1 = require("child_process");
|
||||
const fs_1 = __importDefault(require("fs"));
|
||||
const path_1 = __importDefault(require("path"));
|
||||
/**
|
||||
* Get the duration of a video file in seconds
|
||||
* @param videoFilePath - Path to the video file
|
||||
* @returns Duration in seconds
|
||||
*/
|
||||
function getVideoDuration(videoFilePath) {
|
||||
const result = (0, child_process_1.execSync)(`ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "${videoFilePath}"`);
|
||||
return parseFloat(result.toString());
|
||||
}
|
||||
/**
|
||||
* Capture a frame from a video at a specific time position
|
||||
* @param videoFilePath - Path to the video file
|
||||
* @param timePosition - Time position in seconds
|
||||
* @param outputPath - Output path for the captured frame
|
||||
* @param lowQuality - If true, save screenshot in 360p resolution
|
||||
*/
|
||||
function captureVideoFrame(videoFilePath, timePosition, outputPath, lowQuality = true) {
|
||||
let command = `ffmpeg -v error -ss ${timePosition} -i "${videoFilePath}" -vframes 1 -q:v 2`;
|
||||
// Add resolution scaling for low quality option
|
||||
if (lowQuality) {
|
||||
command += ' -vf scale=-1:360'; // Scale to 360p height while maintaining aspect ratio
|
||||
}
|
||||
command += ` "${outputPath}" -y`;
|
||||
(0, child_process_1.execSync)(command);
|
||||
}
|
||||
/**
|
||||
* Get the duration of an audio file in seconds
|
||||
* @param audioFilePath - Path to the audio file
|
||||
* @returns Duration in seconds
|
||||
*/
|
||||
function getAudioDuration(audioFilePath) {
|
||||
const result = (0, child_process_1.execSync)(`ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "${audioFilePath}"`);
|
||||
return parseFloat(result.toString());
|
||||
}
|
||||
/**
|
||||
* Combine audio segments into a single audio track using lossless intermediates
|
||||
* @param segments - Array of audio segment information
|
||||
* @param outputPath - Output path for the combined audio
|
||||
* @param videoDuration - Duration of the video in seconds
|
||||
* @param settings - Configuration settings
|
||||
*/
|
||||
function combineAudioSegments(segments, outputPath, videoDuration, settings) {
|
||||
console.log(`Combining ${segments.length} audio segments using lossless intermediates...`);
|
||||
try {
|
||||
// Create a silent base track with the full video duration (always WAV)
|
||||
const silentBasePath = path_1.default.join(settings.tempDir, 'silent_base.wav');
|
||||
(0, child_process_1.execSync)(`ffmpeg -v error -f lavfi -i anullsrc=r=44100:cl=stereo -t ${videoDuration} -c:a pcm_s16le "${silentBasePath}" -y`);
|
||||
// Sort segments by start time to process them in order
|
||||
const sortedSegments = [...segments].sort((a, b) => a.startTime - b.startTime);
|
||||
// Process one segment at a time, building up the audio file
|
||||
let currentAudioPath = silentBasePath;
|
||||
for (let i = 0; i < sortedSegments.length; i++) {
|
||||
const segment = sortedSegments[i];
|
||||
const outputFile = path_1.default.join(settings.tempDir, `segment_${i}_output.wav`);
|
||||
// Convert the segment to a standard WAV format first to avoid compatibility issues
|
||||
// and ensure we're always working with lossless audio
|
||||
const standardizedSegment = path_1.default.join(settings.tempDir, `segment_${i}_std.wav`);
|
||||
(0, child_process_1.execSync)(`ffmpeg -v error -i "${segment.audioFile}" -ar 44100 -ac 2 -c:a pcm_s16le "${standardizedSegment}" -y`);
|
||||
// Calculate the position for this segment
|
||||
const timestamp = segment.startTime.toFixed(3);
|
||||
// Create a filter script for this segment
|
||||
const filterPath = path_1.default.join(settings.tempDir, `filter_${i}.txt`);
|
||||
// Use a filter that preserves the audio quality and positions correctly
|
||||
const filterContent = `[1:a]adelay=${Math.round(segment.startTime * 1000)}|${Math.round(segment.startTime * 1000)}[delayed];\n` +
|
||||
`[0:a][delayed]amix=inputs=2:duration=first:dropout_transition=0:normalize=0[out]`;
|
||||
fs_1.default.writeFileSync(filterPath, filterContent);
|
||||
// Execute FFmpeg with the filter script
|
||||
(0, child_process_1.execSync)(`ffmpeg -v error -i "${currentAudioPath}" -i "${standardizedSegment}" -filter_complex_script "${filterPath}" -map "[out]" -c:a pcm_s16le "${outputFile}" -y`);
|
||||
// Clean up previous file if not the original
|
||||
if (currentAudioPath !== silentBasePath) {
|
||||
fs_1.default.unlinkSync(currentAudioPath);
|
||||
}
|
||||
// Clean up standardized segment and filter
|
||||
fs_1.default.unlinkSync(standardizedSegment);
|
||||
fs_1.default.unlinkSync(filterPath);
|
||||
// Update current audio path for next iteration
|
||||
currentAudioPath = outputFile;
|
||||
console.log(`Added segment ${i + 1}/${sortedSegments.length} at position ${timestamp}s`);
|
||||
}
|
||||
// Only at the very end, convert to the requested output format
|
||||
if (path_1.default.extname(outputPath).toLowerCase() === '.mp3') {
|
||||
console.log(`Converting final lossless WAV to MP3: ${outputPath}`);
|
||||
(0, child_process_1.execSync)(`ffmpeg -v error -i "${currentAudioPath}" -c:a libmp3lame -q:a 2 "${outputPath}" -y`);
|
||||
}
|
||||
else {
|
||||
fs_1.default.copyFileSync(currentAudioPath, outputPath);
|
||||
}
|
||||
console.log(`Audio description track created: ${outputPath}`);
|
||||
// Clean up the last temp file
|
||||
if (currentAudioPath !== silentBasePath) {
|
||||
fs_1.default.unlinkSync(currentAudioPath);
|
||||
}
|
||||
if (fs_1.default.existsSync(silentBasePath)) {
|
||||
fs_1.default.unlinkSync(silentBasePath);
|
||||
}
|
||||
return outputPath;
|
||||
}
|
||||
catch (error) {
|
||||
console.error("Error in lossless audio combination:", error.message);
|
||||
try {
|
||||
console.log("Trying alternative approach with single-step filter...");
|
||||
// Create a silent base track (always WAV)
|
||||
const silentBasePath = path_1.default.join(settings.tempDir, 'silent_base.wav');
|
||||
(0, child_process_1.execSync)(`ffmpeg -v error -f lavfi -i anullsrc=r=44100:cl=stereo -t ${videoDuration} -c:a pcm_s16le "${silentBasePath}" -y`);
|
||||
// Create a complex filter to overlay all audio files at their specific timestamps
|
||||
const filterScriptPath = path_1.default.join(settings.tempDir, 'overlay_filter.txt');
|
||||
let filterScript = '';
|
||||
// Sort segments by start time
|
||||
const sortedSegments = [...segments].sort((a, b) => a.startTime - b.startTime);
|
||||
// Standardize all segments to WAV first
|
||||
const standardizedSegments = [];
|
||||
for (let i = 0; i < sortedSegments.length; i++) {
|
||||
const segment = sortedSegments[i];
|
||||
const stdPath = path_1.default.join(settings.tempDir, `std_${i}.wav`);
|
||||
(0, child_process_1.execSync)(`ffmpeg -v error -i "${segment.audioFile}" -ar 44100 -ac 2 -c:a pcm_s16le "${stdPath}" -y`);
|
||||
standardizedSegments.push({
|
||||
path: stdPath,
|
||||
startTime: segment.startTime
|
||||
});
|
||||
}
|
||||
// Build the FFmpeg command with all standardized inputs
|
||||
let ffmpegCmd = `ffmpeg -v error -i "${silentBasePath}" `;
|
||||
// Add all standardized segments as inputs and create the filter script
|
||||
for (let i = 0; i < standardizedSegments.length; i++) {
|
||||
// Add as input
|
||||
ffmpegCmd += `-i "${standardizedSegments[i].path}" `;
|
||||
// Add to filter script - the input index starts at 1 because 0 is the silent base
|
||||
const inputIndex = i + 1;
|
||||
const delay = Math.round(standardizedSegments[i].startTime * 1000);
|
||||
// Add this input to filter script with proper delay
|
||||
filterScript += `[${inputIndex}:a]adelay=${delay}|${delay}[a${i}];\n`;
|
||||
}
|
||||
// Complete the filter script to merge all streams
|
||||
filterScript += '[0:a]'; // Start with base
|
||||
for (let i = 0; i < standardizedSegments.length; i++) {
|
||||
filterScript += `[a${i}]`;
|
||||
}
|
||||
// Use amix with normalize=0 to preserve volumes
|
||||
filterScript += `amix=inputs=${standardizedSegments.length + 1}:normalize=0:duration=first[aout]`;
|
||||
// Write the filter script
|
||||
fs_1.default.writeFileSync(filterScriptPath, filterScript);
|
||||
// Use an intermediate WAV for the output to maintain quality
|
||||
const intermediatePath = path_1.default.join(settings.tempDir, 'intermediate_output.wav');
|
||||
// Complete the FFmpeg command - always output to WAV first
|
||||
ffmpegCmd += `-filter_complex_script "${filterScriptPath}" -map "[aout]" -c:a pcm_s16le "${intermediatePath}" -y`;
|
||||
// Execute the command
|
||||
(0, child_process_1.execSync)(ffmpegCmd);
|
||||
// Convert to the requested format only at the end
|
||||
if (path_1.default.extname(outputPath).toLowerCase() === '.mp3') {
|
||||
console.log(`Converting final audio to MP3...`);
|
||||
(0, child_process_1.execSync)(`ffmpeg -v error -i "${intermediatePath}" -c:a libmp3lame -q:a 2 "${outputPath}" -y`);
|
||||
}
|
||||
else {
|
||||
fs_1.default.copyFileSync(intermediatePath, outputPath);
|
||||
}
|
||||
console.log(`Audio description track created with alternative method: ${outputPath}`);
|
||||
// Clean up temp files
|
||||
if (fs_1.default.existsSync(filterScriptPath)) {
|
||||
fs_1.default.unlinkSync(filterScriptPath);
|
||||
}
|
||||
if (fs_1.default.existsSync(silentBasePath)) {
|
||||
fs_1.default.unlinkSync(silentBasePath);
|
||||
}
|
||||
if (fs_1.default.existsSync(intermediatePath)) {
|
||||
fs_1.default.unlinkSync(intermediatePath);
|
||||
}
|
||||
// Clean up standardized segments
|
||||
standardizedSegments.forEach(seg => {
|
||||
if (fs_1.default.existsSync(seg.path)) {
|
||||
fs_1.default.unlinkSync(seg.path);
|
||||
}
|
||||
});
|
||||
return outputPath;
|
||||
}
|
||||
catch (secondError) {
|
||||
console.error("Alternative approach failed:", secondError.message);
|
||||
// Last resort: Generate a command file with the proper syntax
|
||||
const cmdFilePath = outputPath.replace(/\.\w+$/, '_ffmpeg_cmd.sh');
|
||||
let cmdContent = `#!/bin/bash\n\n# FFmpeg command to combine audio segments\n\n`;
|
||||
// Add commands to convert all segments to WAV first
|
||||
cmdContent += `# First convert all segments to standard WAV format\n`;
|
||||
for (let i = 0; i < segments.length; i++) {
|
||||
const segment = segments[i];
|
||||
const stdPath = `"${settings.tempDir}/std_${i}.wav"`;
|
||||
cmdContent += `ffmpeg -i "${segment.audioFile}" -ar 44100 -ac 2 -c:a pcm_s16le ${stdPath} -y\n`;
|
||||
}
|
||||
// Create silent base
|
||||
cmdContent += `\n# Create silent base track\n`;
|
||||
cmdContent += `ffmpeg -f lavfi -i anullsrc=r=44100:cl=stereo -t ${videoDuration} -c:a pcm_s16le "${settings.tempDir}/silent_base.wav" -y\n\n`;
|
||||
// Create filter file
|
||||
cmdContent += `# Create filter file\n`;
|
||||
cmdContent += `cat > "${settings.tempDir}/filter.txt" << EOL\n`;
|
||||
// Add delay filters for each segment
|
||||
for (let i = 0; i < segments.length; i++) {
|
||||
const segment = segments[i];
|
||||
const delay = Math.round(segment.startTime * 1000);
|
||||
cmdContent += `[${i + 1}:a]adelay=${delay}|${delay}[a${i}];\n`;
|
||||
}
|
||||
// Mix all streams
|
||||
cmdContent += `[0:a]`;
|
||||
for (let i = 0; i < segments.length; i++) {
|
||||
cmdContent += `[a${i}]`;
|
||||
}
|
||||
cmdContent += `amix=inputs=${segments.length + 1}:normalize=0:duration=first[aout]\nEOL\n\n`;
|
||||
// Final command
|
||||
cmdContent += `# Run final FFmpeg command\n`;
|
||||
cmdContent += `ffmpeg -i "${settings.tempDir}/silent_base.wav" `;
|
||||
// Add all segments as inputs
|
||||
for (let i = 0; i < segments.length; i++) {
|
||||
cmdContent += `-i "${settings.tempDir}/std_${i}.wav" `;
|
||||
}
|
||||
// Complete command
|
||||
cmdContent += `-filter_complex_script "${settings.tempDir}/filter.txt" -map "[aout]" `;
|
||||
if (path_1.default.extname(outputPath).toLowerCase() === '.mp3') {
|
||||
cmdContent += `-c:a libmp3lame -q:a 2 `;
|
||||
}
|
||||
else {
|
||||
cmdContent += `-c:a pcm_s16le `;
|
||||
}
|
||||
cmdContent += `"${outputPath}" -y\n\n`;
|
||||
// Add cleanup
|
||||
cmdContent += `# Clean up temp files\n`;
|
||||
cmdContent += `rm "${settings.tempDir}/silent_base.wav" "${settings.tempDir}/filter.txt"\n`;
|
||||
for (let i = 0; i < segments.length; i++) {
|
||||
cmdContent += `rm "${settings.tempDir}/std_${i}.wav"\n`;
|
||||
}
|
||||
// Make the file executable
|
||||
fs_1.default.writeFileSync(cmdFilePath, cmdContent);
|
||||
(0, child_process_1.execSync)(`chmod +x "${cmdFilePath}"`);
|
||||
console.log(`\nCreated executable script with proper FFmpeg commands: ${cmdFilePath}`);
|
||||
console.log(`Run this script to generate the audio file.`);
|
||||
return {
|
||||
commandFile: cmdFilePath
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Clean up temporary files
|
||||
* @param tempDir - Directory containing temporary files
|
||||
*/
|
||||
function cleanupTempFiles(tempDir) {
|
||||
const files = fs_1.default.readdirSync(tempDir);
|
||||
for (const file of files) {
|
||||
fs_1.default.unlinkSync(path_1.default.join(tempDir, file));
|
||||
}
|
||||
}
|
||||
//# sourceMappingURL=mediaUtils.js.map
|
||||
1
dist/utils/mediaUtils.js.map
vendored
Normal file
1
dist/utils/mediaUtils.js.map
vendored
Normal file
File diff suppressed because one or more lines are too long
21
dist/utils/processor.d.ts
vendored
Normal file
21
dist/utils/processor.d.ts
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
import { VisionProvider, TTSProvider, Stats, ProcessingResult, ProcessingOptions } from '../interfaces';
|
||||
import { Config } from '../config/config';
|
||||
/**
|
||||
* High-level API: Generate audio description for a video with just options.
|
||||
* This internally creates providers and stats so callers don't need to.
|
||||
*
|
||||
* @param videoFilePath - Path to the input video file
|
||||
* @param options - Optional configuration overrides
|
||||
* @returns Result of the operation
|
||||
*/
|
||||
export declare function generateAudioDescriptionFromOptions(videoFilePath: string, options?: Partial<Config>, processingOptions?: ProcessingOptions): Promise<ProcessingResult>;
|
||||
/**
|
||||
* Generate audio description for a video (low-level API requiring pre-initialized providers).
|
||||
* @param videoFilePath - Path to the input video file
|
||||
* @param visionProvider - Vision provider instance
|
||||
* @param ttsProvider - TTS provider instance
|
||||
* @param options - Optional configuration overrides
|
||||
* @param stats - Stats object for tracking
|
||||
* @returns Result of the operation
|
||||
*/
|
||||
export declare function generateAudioDescription(videoFilePath: string, visionProvider: VisionProvider, ttsProvider: TTSProvider, options: Partial<Config> | undefined, stats: Stats, processingOptions?: ProcessingOptions): Promise<ProcessingResult>;
|
||||
295
dist/utils/processor.js
vendored
Normal file
295
dist/utils/processor.js
vendored
Normal file
@@ -0,0 +1,295 @@
|
||||
"use strict";
|
||||
var __importDefault = (this && this.__importDefault) || function (mod) {
|
||||
return (mod && mod.__esModule) ? mod : { "default": mod };
|
||||
};
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.generateAudioDescriptionFromOptions = generateAudioDescriptionFromOptions;
|
||||
exports.generateAudioDescription = generateAudioDescription;
|
||||
const fs_1 = __importDefault(require("fs"));
|
||||
const path_1 = __importDefault(require("path"));
|
||||
const config_1 = require("../config/config");
|
||||
const stats_1 = require("../config/stats");
|
||||
const visionProviderFactory_1 = require("../providers/vision/visionProviderFactory");
|
||||
const ttsProviderFactory_1 = require("../providers/tts/ttsProviderFactory");
|
||||
const mediaUtils_1 = require("./mediaUtils");
|
||||
/**
|
||||
* High-level API: Generate audio description for a video with just options.
|
||||
* This internally creates providers and stats so callers don't need to.
|
||||
*
|
||||
* @param videoFilePath - Path to the input video file
|
||||
* @param options - Optional configuration overrides
|
||||
* @returns Result of the operation
|
||||
*/
|
||||
async function generateAudioDescriptionFromOptions(videoFilePath, options = {}, processingOptions = {}) {
|
||||
const config = { ...(0, config_1.getDefaultConfig)(), ...options };
|
||||
if (!fs_1.default.existsSync(config.tempDir)) {
|
||||
fs_1.default.mkdirSync(config.tempDir, { recursive: true });
|
||||
}
|
||||
if (!fs_1.default.existsSync(config.outputDir)) {
|
||||
fs_1.default.mkdirSync(config.outputDir, { recursive: true });
|
||||
}
|
||||
const visionProvider = visionProviderFactory_1.VisionProviderFactory.getProvider(config);
|
||||
const ttsProvider = ttsProviderFactory_1.TTSProviderFactory.getProvider(config);
|
||||
const stats = (0, stats_1.createStats)();
|
||||
return generateAudioDescription(videoFilePath, visionProvider, ttsProvider, config, stats, processingOptions);
|
||||
}
|
||||
/**
|
||||
* Generate audio description for a video (low-level API requiring pre-initialized providers).
|
||||
* @param videoFilePath - Path to the input video file
|
||||
* @param visionProvider - Vision provider instance
|
||||
* @param ttsProvider - TTS provider instance
|
||||
* @param options - Optional configuration overrides
|
||||
* @param stats - Stats object for tracking
|
||||
* @returns Result of the operation
|
||||
*/
|
||||
async function generateAudioDescription(videoFilePath, visionProvider, ttsProvider, options = {}, stats, processingOptions = {}) {
|
||||
// Merge provided options with defaults
|
||||
const settings = { ...options };
|
||||
// Ensure temporary and output directories exist
|
||||
if (!fs_1.default.existsSync(settings.tempDir)) {
|
||||
fs_1.default.mkdirSync(settings.tempDir, { recursive: true });
|
||||
}
|
||||
if (!fs_1.default.existsSync(settings.outputDir)) {
|
||||
fs_1.default.mkdirSync(settings.outputDir, { recursive: true });
|
||||
}
|
||||
// Get video duration
|
||||
const videoDuration = (0, mediaUtils_1.getVideoDuration)(videoFilePath);
|
||||
stats.totalFrames = Math.floor(videoDuration / settings.captureIntervalSeconds);
|
||||
console.log(`Video duration: ${videoDuration} seconds`);
|
||||
// If batchTimeMode is enabled, use the new approach
|
||||
if (settings.batchTimeMode) {
|
||||
return await generateAudioDescriptionBatch(videoFilePath, videoDuration, settings, visionProvider, ttsProvider, stats, processingOptions);
|
||||
}
|
||||
// Calculate the number of frames to capture
|
||||
const totalFrames = Math.floor(videoDuration / settings.captureIntervalSeconds);
|
||||
console.log(`Will capture ${totalFrames} frames at ${settings.captureIntervalSeconds} second intervals`);
|
||||
// Context window to store previous frames
|
||||
const frameContext = [];
|
||||
// Array to store audio segment information - preload with existing segments if resuming
|
||||
const audioSegments = processingOptions.existingSegments
|
||||
? [...processingOptions.existingSegments]
|
||||
: [];
|
||||
// Track our current time position (will be adjusted for audio overlap)
|
||||
let currentTimePosition = processingOptions.currentTimePosition || 0;
|
||||
// Start from given index if resuming
|
||||
const startIndex = processingOptions.startIndex || 0;
|
||||
// Track drift from the original schedule
|
||||
let timelineDrift = 0;
|
||||
const maxAllowableDrift = settings.captureIntervalSeconds * 2; // Maximum drift before warning
|
||||
// Process each frame
|
||||
for (let i = startIndex; i < totalFrames; i++) {
|
||||
// Calculate the ideal time position based on the original schedule
|
||||
const idealTimePosition = i * settings.captureIntervalSeconds;
|
||||
// Use the adjusted time position that accounts for previous audio durations
|
||||
const timePosition = currentTimePosition;
|
||||
// Calculate drift from the original schedule
|
||||
timelineDrift = timePosition - idealTimePosition;
|
||||
// Log if drift is becoming significant
|
||||
if (Math.abs(timelineDrift) > maxAllowableDrift) {
|
||||
console.warn(`WARNING: Timeline drift at frame ${i} is ${timelineDrift.toFixed(2)} seconds.`);
|
||||
}
|
||||
const frameFilePath = path_1.default.join(settings.tempDir, `frame_${i.toString().padStart(5, '0')}.jpg`);
|
||||
// Capture frame at current time position (use the ideal time to capture the frame)
|
||||
(0, mediaUtils_1.captureVideoFrame)(videoFilePath, idealTimePosition, frameFilePath);
|
||||
console.log(`Captured frame at ${idealTimePosition} seconds (scheduled at ${timePosition.toFixed(2)} seconds)`);
|
||||
// Add current frame to context
|
||||
const currentFrame = {
|
||||
index: i,
|
||||
path: frameFilePath,
|
||||
timePosition
|
||||
};
|
||||
frameContext.push(currentFrame);
|
||||
// Keep context window at specified size
|
||||
if (frameContext.length > settings.contextWindowSize) {
|
||||
frameContext.shift();
|
||||
}
|
||||
// Generate description
|
||||
let description;
|
||||
let usageStats;
|
||||
if (frameContext.length === 1) {
|
||||
// First frame - just describe what's in it
|
||||
const result = await visionProvider.describeImage(frameFilePath, settings.defaultPrompt);
|
||||
description = result.description;
|
||||
usageStats = result.usage;
|
||||
}
|
||||
else {
|
||||
// Compare with previous frame
|
||||
const previousFrame = frameContext[frameContext.length - 2];
|
||||
const result = await visionProvider.compareImages(previousFrame.path, frameFilePath, settings.changePrompt);
|
||||
description = result.description;
|
||||
usageStats = result.usage;
|
||||
}
|
||||
// Update stats
|
||||
stats.totalVisionInputCost += usageStats.inputTokens;
|
||||
stats.totalVisionOutputCost += usageStats.outputTokens;
|
||||
stats.totalCost += usageStats.totalTokens;
|
||||
console.log(`Description: ${description}`);
|
||||
// Generate speech from description
|
||||
const audioFilePath = path_1.default.join(settings.tempDir, `audio_${i.toString().padStart(5, '0')}.mp3`);
|
||||
const ttsResult = await ttsProvider.textToSpeech(description, audioFilePath, {
|
||||
voice: settings.ttsVoice,
|
||||
model: settings.ttsProviders[settings.ttsProvider].model,
|
||||
speedFactor: settings.ttsSpeedFactor,
|
||||
instructions: settings.ttsInstructions
|
||||
});
|
||||
const audioDuration = ttsResult.duration;
|
||||
stats.totalTTSCost += ttsResult.cost;
|
||||
console.log(`Audio duration: ${audioDuration} seconds`);
|
||||
// Store segment information
|
||||
const segment = {
|
||||
audioFile: audioFilePath,
|
||||
startTime: timePosition,
|
||||
duration: audioDuration,
|
||||
description
|
||||
};
|
||||
audioSegments.push(segment);
|
||||
// Notify progress callback
|
||||
if (processingOptions.onProgress) {
|
||||
processingOptions.onProgress({
|
||||
type: 'frame',
|
||||
index: i,
|
||||
total: totalFrames,
|
||||
segment
|
||||
});
|
||||
}
|
||||
// Update the time position for the next iteration
|
||||
// Add a small buffer (0.25 sec) between descriptions to prevent hard cuts
|
||||
const bufferTime = 0.25;
|
||||
currentTimePosition = timePosition + audioDuration + bufferTime;
|
||||
// If we've fallen behind schedule, try to catch up (but don't skip content)
|
||||
const nextIdealPosition = (i + 1) * settings.captureIntervalSeconds;
|
||||
if (currentTimePosition < nextIdealPosition) {
|
||||
console.log(`Audio finished before next scheduled frame. Catching up with timeline.`);
|
||||
currentTimePosition = nextIdealPosition;
|
||||
timelineDrift = 0; // Reset drift since we've caught up
|
||||
}
|
||||
}
|
||||
// Combine audio segments into final audio description track
|
||||
const outputAudioPath = path_1.default.join(settings.outputDir, `${path_1.default.basename(videoFilePath, path_1.default.extname(videoFilePath))}_description.mp3`);
|
||||
(0, mediaUtils_1.combineAudioSegments)(audioSegments, outputAudioPath, videoDuration, settings);
|
||||
// Clean up temporary files if desired
|
||||
// cleanupTempFiles(settings.tempDir);
|
||||
console.log(`\nAudio description generated: ${outputAudioPath}`);
|
||||
console.log(`To play with video, use: ffplay -i ${videoFilePath} -i ${outputAudioPath} -map 0:v -map 1:a`);
|
||||
(0, stats_1.printStats)(stats, settings);
|
||||
return {
|
||||
videoFile: videoFilePath,
|
||||
audioDescriptionFile: outputAudioPath,
|
||||
segments: audioSegments
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Generate audio description using the "batch time" mode with overlap prevention.
|
||||
* @param videoFilePath - Path to the input video file
|
||||
* @param videoDuration - Duration of the video in seconds
|
||||
* @param settings - The merged config and user options
|
||||
* @param visionProvider - The vision provider instance
|
||||
* @param ttsProvider - The TTS provider instance
|
||||
* @param stats - Stats object for tracking
|
||||
*/
|
||||
async function generateAudioDescriptionBatch(videoFilePath, videoDuration, settings, visionProvider, ttsProvider, stats, processingOptions = {}) {
|
||||
const totalBatches = Math.floor(videoDuration / settings.batchWindowDuration);
|
||||
console.log(`Using batchTimeMode. Total batches: ${totalBatches} (each covers ${settings.batchWindowDuration} sec)`);
|
||||
// We'll hold the last batch's frames or last batch's description for context
|
||||
let lastBatchContext = processingOptions.lastContext || {};
|
||||
// Preload with existing segments if resuming
|
||||
const audioSegments = processingOptions.existingSegments
|
||||
? [...processingOptions.existingSegments]
|
||||
: [];
|
||||
// Track our current time position (will be adjusted for audio overlap)
|
||||
let currentTimePosition = processingOptions.currentTimePosition || 0;
|
||||
// Start from given index if resuming
|
||||
const startBatchIndex = processingOptions.startIndex || 0;
|
||||
// Track drift from the original schedule
|
||||
let timelineDrift = 0;
|
||||
const maxAllowableDrift = settings.batchWindowDuration * 0.5; // Maximum drift of 50% of batch window
|
||||
for (let batchIndex = startBatchIndex; batchIndex < totalBatches; batchIndex++) {
|
||||
// Calculate ideal batch timing based on configuration
|
||||
const idealBatchStart = batchIndex * settings.batchWindowDuration;
|
||||
// Use adjusted time position that accounts for previous audio durations
|
||||
const batchStart = currentTimePosition;
|
||||
// Calculate drift from the original schedule
|
||||
timelineDrift = batchStart - idealBatchStart;
|
||||
// Log if drift is becoming significant
|
||||
if (Math.abs(timelineDrift) > maxAllowableDrift) {
|
||||
console.warn(`WARNING: Timeline drift at batch ${batchIndex} is ${timelineDrift.toFixed(2)} seconds.`);
|
||||
}
|
||||
const batchEnd = idealBatchStart + settings.batchWindowDuration;
|
||||
if (batchEnd > videoDuration)
|
||||
break; // Safety check
|
||||
console.log(`Processing batch #${batchIndex}: Original time window ${idealBatchStart}-${batchEnd} sec, scheduled at ${batchStart.toFixed(2)} sec`);
|
||||
// Capture frames for this batch - use the ideal timing for frame capture
|
||||
const framePaths = [];
|
||||
for (let i = 0; i < settings.framesInBatch; i++) {
|
||||
const t = idealBatchStart + (i * settings.batchWindowDuration) / settings.framesInBatch;
|
||||
const frameFilePath = path_1.default.join(settings.tempDir, `batch_${batchIndex}_frame_${i}.jpg`);
|
||||
(0, mediaUtils_1.captureVideoFrame)(videoFilePath, t, frameFilePath);
|
||||
framePaths.push(frameFilePath);
|
||||
}
|
||||
// Use AI to describe this batch of frames, possibly providing some context
|
||||
const result = await visionProvider.describeBatch(framePaths, lastBatchContext, settings.batchPrompt);
|
||||
const description = result.description;
|
||||
const usageStats = result.usage;
|
||||
// Update stats
|
||||
stats.totalVisionInputCost += usageStats.inputTokens;
|
||||
stats.totalVisionOutputCost += usageStats.outputTokens;
|
||||
stats.totalCost += usageStats.totalTokens;
|
||||
console.log(`Batch #${batchIndex} description:\n${description}\n`);
|
||||
// Convert description to TTS
|
||||
const audioFilePath = path_1.default.join(settings.tempDir, `batch_audio_${batchIndex}.mp3`);
|
||||
const ttsResult = await ttsProvider.textToSpeech(description, audioFilePath, {
|
||||
voice: settings.ttsVoice,
|
||||
model: settings.ttsProviders[settings.ttsProvider].model,
|
||||
speedFactor: settings.ttsSpeedFactor,
|
||||
instructions: settings.ttsInstructions
|
||||
});
|
||||
const audioDuration = ttsResult.duration;
|
||||
stats.totalTTSCost += ttsResult.cost;
|
||||
console.log(`Batch #${batchIndex} audio duration: ${audioDuration} seconds`);
|
||||
// Store segment info with the adjusted start time
|
||||
const segment = {
|
||||
audioFile: audioFilePath,
|
||||
startTime: batchStart,
|
||||
duration: audioDuration,
|
||||
description
|
||||
};
|
||||
audioSegments.push(segment);
|
||||
// Notify progress callback
|
||||
if (processingOptions.onProgress) {
|
||||
processingOptions.onProgress({
|
||||
type: 'batch',
|
||||
index: batchIndex,
|
||||
total: totalBatches,
|
||||
segment
|
||||
});
|
||||
}
|
||||
// Update the time position for the next iteration
|
||||
// Add a small buffer (0.5 sec) between descriptions
|
||||
const bufferTime = 0.5;
|
||||
currentTimePosition = batchStart + audioDuration + bufferTime;
|
||||
// If we've fallen behind schedule, try to catch up (but don't skip content)
|
||||
const nextIdealPosition = (batchIndex + 1) * settings.batchWindowDuration;
|
||||
if (currentTimePosition < nextIdealPosition) {
|
||||
console.log(`Batch audio finished before next scheduled batch. Catching up with timeline.`);
|
||||
currentTimePosition = nextIdealPosition;
|
||||
timelineDrift = 0; // Reset drift since we've caught up
|
||||
}
|
||||
// Update lastBatchContext so the next batch can keep track of what's previously seen
|
||||
lastBatchContext = {
|
||||
lastDescription: description,
|
||||
lastFramePaths: framePaths.slice(-2) // keep the last 2 frames from this batch
|
||||
};
|
||||
}
|
||||
// Combine all the audio segments into one track
|
||||
const outputAudioPath = path_1.default.join(settings.outputDir, `${path_1.default.basename(videoFilePath, path_1.default.extname(videoFilePath))}_description_batch.mp3`);
|
||||
(0, mediaUtils_1.combineAudioSegments)(audioSegments, outputAudioPath, videoDuration, settings);
|
||||
console.log(`\nBatch audio description generated: ${outputAudioPath}`);
|
||||
console.log(`To play with video, use: ffplay -i ${videoFilePath} -i ${outputAudioPath} -map 0:v -map 1:a`);
|
||||
(0, stats_1.printStats)(stats, settings);
|
||||
return {
|
||||
videoFile: videoFilePath,
|
||||
audioDescriptionFile: outputAudioPath,
|
||||
segments: audioSegments
|
||||
};
|
||||
}
|
||||
//# sourceMappingURL=processor.js.map
|
||||
1
dist/utils/processor.js.map
vendored
Normal file
1
dist/utils/processor.js.map
vendored
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user