import { Config } from '../config/config'; import { CostBreakdown } from '../interfaces'; import { getVideoDuration } from './mediaUtils'; /** * Estimate the cost of generating audio descriptions for a video * @param videoFilePath - Path to the input video file * @param options - Optional configuration overrides * @returns Cost estimation breakdown */ export async function estimateCost( videoFilePath: string, options: Partial = {} ): Promise { // Merge provided options with defaults const settings = { ...options } as Config; // Get video duration const videoDuration = getVideoDuration(videoFilePath); console.log(`Video duration: ${videoDuration} seconds`); // Calculate the number of frames or batches to process let totalUnits: number; let unitCostMultiplier: number; let unitType: string; if (settings.batchTimeMode) { totalUnits = Math.floor(videoDuration / settings.batchWindowDuration); unitCostMultiplier = settings.framesInBatch; // Cost multiplier for batch mode unitType = "batches"; } else { totalUnits = Math.floor(videoDuration / settings.captureIntervalSeconds); unitCostMultiplier = 1; // No multiplier for normal mode unitType = "frames"; } console.log(`Will process ${totalUnits} ${unitType}`); // Pricing constants (as of March 2025, update as needed) const pricing: { vision: Record>; tts: Record>; } = { vision: { openai: { 'gpt-4o': { input: 0.0025, output: 0.01 } }, gemini: { 'gemini-pro-vision': { input: 0.0025, output: 0.0025 } } }, tts: { openai: { 'tts-1': 0.015, 'tts-1-hd': 0.030 } } }; // Get the pricing for the selected providers const visionProvider = settings.visionProvider; const visionModel = settings.visionProviders[visionProvider].model; const ttsProvider = settings.ttsProvider; const ttsModel = settings.ttsProviders[ttsProvider].model; // Check if the pricing data exists const visionPricing = pricing.vision[visionProvider]?.[visionModel]; const ttsPricing = pricing.tts[ttsProvider]?.[ttsModel]; if (!visionPricing) { console.warn(`Warning: No pricing data for vision provider "${visionProvider}" and model "${visionModel}".`); } if (!ttsPricing) { console.warn(`Warning: No pricing data for TTS provider "${ttsProvider}" and model "${ttsModel}".`); } // Estimated token counts const estimatedVisionInputTokens = 1000 * unitCostMultiplier; // Base tokens for the vision input const estimatedPromptTokens = 100; // Tokens for the prompt text const estimatedOutputTokensPerUnit = 75; // Average tokens for description output // Estimated character counts for TTS const estimatedCharsPerDescription = 200; // Average characters per description // Calculate estimated costs for first unit const firstUnitCost = { visionInput: (estimatedVisionInputTokens + estimatedPromptTokens) * (visionPricing?.input || 0) / 1000, visionOutput: estimatedOutputTokensPerUnit * (visionPricing?.output || 0) / 1000, tts: estimatedCharsPerDescription * (ttsPricing || 0) / 1000 }; // For subsequent units, we need context (e.g., previous frames) const contextMultiplier = settings.batchTimeMode ? 1.2 : 2; // Less overhead in batch mode const subsequentUnitCost = { visionInput: (estimatedVisionInputTokens * contextMultiplier + estimatedPromptTokens) * (visionPricing?.input || 0) / 1000, visionOutput: estimatedOutputTokensPerUnit * (visionPricing?.output || 0) / 1000, tts: estimatedCharsPerDescription * (ttsPricing || 0) / 1000 }; // Calculate total costs const totalVisionInputCost = firstUnitCost.visionInput + (totalUnits - 1) * subsequentUnitCost.visionInput; const totalVisionOutputCost = firstUnitCost.visionOutput + (totalUnits - 1) * subsequentUnitCost.visionOutput; const totalTTSCost = firstUnitCost.tts + (totalUnits - 1) * subsequentUnitCost.tts; const totalCost = totalVisionInputCost + totalVisionOutputCost + totalTTSCost; // Create cost breakdown const costBreakdown: CostBreakdown = { videoInfo: { duration: videoDuration, totalUnits: totalUnits, unitType: unitType, processingInterval: settings.batchTimeMode ? settings.batchWindowDuration : settings.captureIntervalSeconds }, providerInfo: { visionProvider: visionProvider, visionModel: visionModel, ttsProvider: ttsProvider, ttsModel: ttsModel }, apiCosts: { visionInput: totalVisionInputCost.toFixed(4), visionOutput: totalVisionOutputCost.toFixed(4), tts: totalTTSCost.toFixed(4), total: totalCost.toFixed(4) }, estimates: { totalAPICallsToProviders: totalUnits * 2, // Vision + TTS for each unit estimatedProcessingTimeMinutes: (totalUnits * 3) / 60 // rough estimate, 3 seconds per unit } }; return costBreakdown; }