"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.estimateCost = estimateCost; const mediaUtils_1 = require("./mediaUtils"); /** * Estimate the cost of generating audio descriptions for a video * @param videoFilePath - Path to the input video file * @param options - Optional configuration overrides * @returns Cost estimation breakdown */ async function estimateCost(videoFilePath, options = {}) { // Merge provided options with defaults const settings = { ...options }; // Get video duration const videoDuration = (0, mediaUtils_1.getVideoDuration)(videoFilePath); console.log(`Video duration: ${videoDuration} seconds`); // Calculate the number of frames or batches to process let totalUnits; let unitCostMultiplier; let unitType; if (settings.batchTimeMode) { totalUnits = Math.floor(videoDuration / settings.batchWindowDuration); unitCostMultiplier = settings.framesInBatch; unitType = "batches"; } else { totalUnits = Math.floor(videoDuration / settings.captureIntervalSeconds); unitCostMultiplier = 1; unitType = "frames"; } console.log(`Will process ${totalUnits} ${unitType}`); // Pricing constants (per 1K units unless otherwise noted) const pricing = { vision: { openai: { 'gpt-4o': { input: 0.0025, output: 0.01 }, 'gpt-5.4-mini': { input: 0.00015, output: 0.0006 }, 'gpt-4o-mini': { input: 0.00015, output: 0.0006 } }, gemini: { 'gemini-2.0-flash': { input: 0.0001, output: 0.0004 }, 'gemini-1.5-flash': { input: 0.000075, output: 0.0003 }, 'gemini-1.5-pro': { input: 0.00125, output: 0.005 } }, openrouter: { 'anthropic/claude-sonnet-4.5': { input: 0.003, output: 0.015 }, 'anthropic/claude-3.5-sonnet': { input: 0.003, output: 0.015 }, 'anthropic/claude-3-haiku': { input: 0.0008, output: 0.004 }, 'google/gemini-2.0-flash-001': { input: 0.0001, output: 0.0004 } } }, tts: { openai: { 'tts-1': 0.015, 'tts-1-hd': 0.030, 'gpt-4o-mini-tts': { inputTokens: 0.60, outputTokens: 12.00 } }, elevenlabs: { 'eleven_multilingual_v2': 0.30, 'eleven_turbo_v2.5': 0.015 }, google: { 'chirp-hd': 0.016, 'wavenet': 0.016, 'neural2': 0.016, 'standard': 0.004 } } }; // Get the pricing for the selected providers const visionProvider = settings.visionProvider; const visionModel = settings.visionProviders[visionProvider].model; const ttsProvider = settings.ttsProvider; const ttsModel = settings.ttsProviders[ttsProvider].model; // Check if the pricing data exists const visionPricing = pricing.vision[visionProvider]?.[visionModel]; const ttsPricing = pricing.tts[ttsProvider]?.[ttsModel]; if (!visionPricing) { console.warn(`Warning: No pricing data for vision provider "${visionProvider}" and model "${visionModel}".`); } if (!ttsPricing) { console.warn(`Warning: No pricing data for TTS provider "${ttsProvider}" and model "${ttsModel}".`); } // Estimated token counts const estimatedVisionInputTokens = 1000 * unitCostMultiplier; const estimatedPromptTokens = 100; const estimatedOutputTokensPerUnit = 75; // Estimated character counts for TTS const estimatedCharsPerDescription = 200; // Calculate estimated costs for first unit const firstUnitCost = { visionInput: (estimatedVisionInputTokens + estimatedPromptTokens) * (visionPricing?.input || 0) / 1000, visionOutput: estimatedOutputTokensPerUnit * (visionPricing?.output || 0) / 1000, tts: calculateTTSCost(estimatedCharsPerDescription, ttsPricing) }; // For subsequent units, we need context (e.g., previous frames) const contextMultiplier = settings.batchTimeMode ? 1.2 : 2; const subsequentUnitCost = { visionInput: (estimatedVisionInputTokens * contextMultiplier + estimatedPromptTokens) * (visionPricing?.input || 0) / 1000, visionOutput: estimatedOutputTokensPerUnit * (visionPricing?.output || 0) / 1000, tts: calculateTTSCost(estimatedCharsPerDescription, ttsPricing) }; // Calculate total costs const totalVisionInputCost = firstUnitCost.visionInput + (totalUnits - 1) * subsequentUnitCost.visionInput; const totalVisionOutputCost = firstUnitCost.visionOutput + (totalUnits - 1) * subsequentUnitCost.visionOutput; const totalTTSCost = firstUnitCost.tts + (totalUnits - 1) * subsequentUnitCost.tts; const totalCost = totalVisionInputCost + totalVisionOutputCost + totalTTSCost; // Create cost breakdown const costBreakdown = { videoInfo: { duration: videoDuration, totalUnits: totalUnits, unitType: unitType, processingInterval: settings.batchTimeMode ? settings.batchWindowDuration : settings.captureIntervalSeconds }, providerInfo: { visionProvider: visionProvider, visionModel: visionModel, ttsProvider: ttsProvider, ttsModel: ttsModel }, apiCosts: { visionInput: totalVisionInputCost.toFixed(4), visionOutput: totalVisionOutputCost.toFixed(4), tts: totalTTSCost.toFixed(4), total: totalCost.toFixed(4) }, estimates: { totalAPICallsToProviders: totalUnits * 2, estimatedProcessingTimeMinutes: (totalUnits * 3) / 60 } }; return costBreakdown; } function calculateTTSCost(charCount, pricing) { if (!pricing) return 0; if (typeof pricing === 'number') { // Per-character pricing: cost per 1000 characters return charCount * pricing / 1000; } // Per-token pricing (e.g., gpt-4o-mini-tts): cost per 1M tokens // Rough estimate: 1 char ≈ 0.25 tokens for English text const estimatedInputTokens = charCount * 0.25; const estimatedOutputTokens = charCount * 3; // audio output is token-heavy return (estimatedInputTokens * pricing.inputTokens + estimatedOutputTokens * pricing.outputTokens) / 1000000; } //# sourceMappingURL=costEstimator.js.map