Update cost estimator with all new models and per-token TTS pricing support

2026-05-13 02:44:30 +02:00
parent f05e57493c
commit 3a198d7d50
1 changed files with 182 additions and 150 deletions
--- a/src/utils/costEstimator.ts
+++ b/src/utils/costEstimator.ts
@@ -1,150 +1,182 @@
-import { Config } from '../config/config';
+import { Config } from '../config/config';
-import { CostBreakdown } from '../interfaces';
+import { CostBreakdown } from '../interfaces';
-import { getVideoDuration } from './mediaUtils';
+import { getVideoDuration } from './mediaUtils';
-
+
-/**
+type TTSPricingModel = number | { inputTokens: number; outputTokens: number };
- * Estimate the cost of generating audio descriptions for a video
+
- * @param videoFilePath - Path to the input video file
+/**
- * @param options - Optional configuration overrides
+ * Estimate the cost of generating audio descriptions for a video
- * @returns Cost estimation breakdown
+ * @param videoFilePath - Path to the input video file
- */
+ * @param options - Optional configuration overrides
-export async function estimateCost(
+ * @returns Cost estimation breakdown
-  videoFilePath: string, 
+ */
-  options: Partial<Config> = {}
+export async function estimateCost(
-): Promise<CostBreakdown> {
+  videoFilePath: string, 
-  // Merge provided options with defaults
+  options: Partial<Config> = {}
-  const settings = { ...options } as Config;
+): Promise<CostBreakdown> {
-
+  // Merge provided options with defaults
-  // Get video duration
+  const settings = { ...options } as Config;
-  const videoDuration = getVideoDuration(videoFilePath);
+
-  console.log(`Video duration: ${videoDuration} seconds`);
+  // Get video duration
-
+  const videoDuration = getVideoDuration(videoFilePath);
-  // Calculate the number of frames or batches to process
+  console.log(`Video duration: ${videoDuration} seconds`);
-  let totalUnits: number;
+
-  let unitCostMultiplier: number;
+  // Calculate the number of frames or batches to process
-  let unitType: string;
+  let totalUnits: number;
-  
+  let unitCostMultiplier: number;
-  if (settings.batchTimeMode) {
+  let unitType: string;
-    totalUnits = Math.floor(videoDuration / settings.batchWindowDuration);
+  
-    unitCostMultiplier = settings.framesInBatch; // Cost multiplier for batch mode
+  if (settings.batchTimeMode) {
-    unitType = "batches";
+    totalUnits = Math.floor(videoDuration / settings.batchWindowDuration);
-  } else {
+    unitCostMultiplier = settings.framesInBatch;
-    totalUnits = Math.floor(videoDuration / settings.captureIntervalSeconds);
+    unitType = "batches";
-    unitCostMultiplier = 1; // No multiplier for normal mode
+  } else {
-    unitType = "frames";
+    totalUnits = Math.floor(videoDuration / settings.captureIntervalSeconds);
-  }
+    unitCostMultiplier = 1;
-  
+    unitType = "frames";
-  console.log(`Will process ${totalUnits} ${unitType}`);
+  }
-
+  
-  // Pricing constants (as of March 2025, update as needed)
+  console.log(`Will process ${totalUnits} ${unitType}`);
-  const pricing: {
+
-    vision: Record<string, Record<string, { input: number; output: number }>>;
+  // Pricing constants (per 1K units unless otherwise noted)
-    tts: Record<string, Record<string, number>>;
+  const pricing: {
-  } = {
+    vision: Record<string, Record<string, { input: number; output: number }>>;
-    vision: {
+    tts: Record<string, Record<string, TTSPricingModel>>;
-      openai: {
+  } = {
-        'gpt-4o': {
+    vision: {
-          input: 0.0025,
+      openai: {
-          output: 0.01
+        'gpt-4o': { input: 0.0025, output: 0.01 },
-        }
+        'gpt-5.4-mini': { input: 0.00015, output: 0.0006 },
-      },
+        'gpt-4o-mini': { input: 0.00015, output: 0.0006 }
-      gemini: {
+      },
-        'gemini-pro-vision': {
+      gemini: {
-          input: 0.0025,
+        'gemini-2.0-flash': { input: 0.0001, output: 0.0004 },
-          output: 0.0025
+        'gemini-1.5-flash': { input: 0.000075, output: 0.0003 },
-        }
+        'gemini-1.5-pro': { input: 0.00125, output: 0.005 }
-      }
+      },
-    },
+      openrouter: {
-    tts: {
+        'anthropic/claude-sonnet-4.5': { input: 0.003, output: 0.015 },
-      openai: {
+        'anthropic/claude-3.5-sonnet': { input: 0.003, output: 0.015 },
-        'tts-1': 0.015,
+        'anthropic/claude-3-haiku': { input: 0.0008, output: 0.004 },
-        'tts-1-hd': 0.030
+        'google/gemini-2.0-flash-001': { input: 0.0001, output: 0.0004 }
-      }
+      }
-    }
+    },
-  };
+    tts: {
-
+      openai: {
-  // Get the pricing for the selected providers
+        'tts-1': 0.015,
-  const visionProvider = settings.visionProvider;
+        'tts-1-hd': 0.030,
-  const visionModel = settings.visionProviders[visionProvider].model;
+        'gpt-4o-mini-tts': { inputTokens: 0.60, outputTokens: 12.00 }
-  const ttsProvider = settings.ttsProvider;
+      },
-  const ttsModel = settings.ttsProviders[ttsProvider].model;
+      elevenlabs: {
-
+        'eleven_multilingual_v2': 0.30,
-  // Check if the pricing data exists
+        'eleven_turbo_v2.5': 0.015
-  const visionPricing = pricing.vision[visionProvider]?.[visionModel];
+      },
-  const ttsPricing = pricing.tts[ttsProvider]?.[ttsModel];
+      google: {
-
+        'chirp-hd': 0.016,
-  if (!visionPricing) {
+        'wavenet': 0.016,
-    console.warn(`Warning: No pricing data for vision provider "${visionProvider}" and model "${visionModel}".`);
+        'neural2': 0.016,
-  }
+        'standard': 0.004
-
+      }
-  if (!ttsPricing) {
+    }
-    console.warn(`Warning: No pricing data for TTS provider "${ttsProvider}" and model "${ttsModel}".`);
+  };
-  }
+
-
+  // Get the pricing for the selected providers
-  // Estimated token counts
+  const visionProvider = settings.visionProvider;
-  const estimatedVisionInputTokens = 1000 * unitCostMultiplier; // Base tokens for the vision input
+  const visionModel = settings.visionProviders[visionProvider].model;
-  const estimatedPromptTokens = 100; // Tokens for the prompt text
+  const ttsProvider = settings.ttsProvider;
-  const estimatedOutputTokensPerUnit = 75; // Average tokens for description output
+  const ttsModel = settings.ttsProviders[ttsProvider].model;
-
+
-  // Estimated character counts for TTS
+  // Check if the pricing data exists
-  const estimatedCharsPerDescription = 200; // Average characters per description
+  const visionPricing = pricing.vision[visionProvider]?.[visionModel];
-
+  const ttsPricing = pricing.tts[ttsProvider]?.[ttsModel];
-  // Calculate estimated costs for first unit
+
-  const firstUnitCost = {
+  if (!visionPricing) {
-    visionInput: (estimatedVisionInputTokens + estimatedPromptTokens) * (visionPricing?.input || 0) / 1000,
+    console.warn(`Warning: No pricing data for vision provider "${visionProvider}" and model "${visionModel}".`);
-    visionOutput: estimatedOutputTokensPerUnit * (visionPricing?.output || 0) / 1000,
+  }
-    tts: estimatedCharsPerDescription * (ttsPricing || 0) / 1000
+
-  };
+  if (!ttsPricing) {
-
+    console.warn(`Warning: No pricing data for TTS provider "${ttsProvider}" and model "${ttsModel}".`);
-  // For subsequent units, we need context (e.g., previous frames)
+  }
-  const contextMultiplier = settings.batchTimeMode ? 1.2 : 2; // Less overhead in batch mode
+
-  
+  // Estimated token counts
-  const subsequentUnitCost = {
+  const estimatedVisionInputTokens = 1000 * unitCostMultiplier;
-    visionInput: (estimatedVisionInputTokens * contextMultiplier + estimatedPromptTokens) * (visionPricing?.input || 0) / 1000,
+  const estimatedPromptTokens = 100;
-    visionOutput: estimatedOutputTokensPerUnit * (visionPricing?.output || 0) / 1000,
+  const estimatedOutputTokensPerUnit = 75;
-    tts: estimatedCharsPerDescription * (ttsPricing || 0) / 1000
+
-  };
+  // Estimated character counts for TTS
-
+  const estimatedCharsPerDescription = 200;
-  // Calculate total costs
+
-  const totalVisionInputCost =
+  // Calculate estimated costs for first unit
-    firstUnitCost.visionInput +
+  const firstUnitCost = {
-    (totalUnits - 1) * subsequentUnitCost.visionInput;
+    visionInput: (estimatedVisionInputTokens + estimatedPromptTokens) * (visionPricing?.input || 0) / 1000,
-
+    visionOutput: estimatedOutputTokensPerUnit * (visionPricing?.output || 0) / 1000,
-  const totalVisionOutputCost =
+    tts: calculateTTSCost(estimatedCharsPerDescription, ttsPricing)
-    firstUnitCost.visionOutput +
+  };
-    (totalUnits - 1) * subsequentUnitCost.visionOutput;
+
-
+  // For subsequent units, we need context (e.g., previous frames)
-  const totalTTSCost =
+  const contextMultiplier = settings.batchTimeMode ? 1.2 : 2;
-    firstUnitCost.tts +
+  
-    (totalUnits - 1) * subsequentUnitCost.tts;
+  const subsequentUnitCost = {
-
+    visionInput: (estimatedVisionInputTokens * contextMultiplier + estimatedPromptTokens) * (visionPricing?.input || 0) / 1000,
-  const totalCost = totalVisionInputCost + totalVisionOutputCost + totalTTSCost;
+    visionOutput: estimatedOutputTokensPerUnit * (visionPricing?.output || 0) / 1000,
-
+    tts: calculateTTSCost(estimatedCharsPerDescription, ttsPricing)
-  // Create cost breakdown
+  };
-  const costBreakdown: CostBreakdown = {
+
-    videoInfo: {
+  // Calculate total costs
-      duration: videoDuration,
+  const totalVisionInputCost =
-      totalUnits: totalUnits,
+    firstUnitCost.visionInput +
-      unitType: unitType,
+    (totalUnits - 1) * subsequentUnitCost.visionInput;
-      processingInterval: settings.batchTimeMode ? settings.batchWindowDuration : settings.captureIntervalSeconds
+
-    },
+  const totalVisionOutputCost =
-    providerInfo: {
+    firstUnitCost.visionOutput +
-      visionProvider: visionProvider,
+    (totalUnits - 1) * subsequentUnitCost.visionOutput;
-      visionModel: visionModel,
+
-      ttsProvider: ttsProvider,
+  const totalTTSCost =
-      ttsModel: ttsModel
+    firstUnitCost.tts +
-    },
+    (totalUnits - 1) * subsequentUnitCost.tts;
-    apiCosts: {
+
-      visionInput: totalVisionInputCost.toFixed(4),
+  const totalCost = totalVisionInputCost + totalVisionOutputCost + totalTTSCost;
-      visionOutput: totalVisionOutputCost.toFixed(4),
+
-      tts: totalTTSCost.toFixed(4),
+  // Create cost breakdown
-      total: totalCost.toFixed(4)
+  const costBreakdown: CostBreakdown = {
-    },
+    videoInfo: {
-    estimates: {
+      duration: videoDuration,
-      totalAPICallsToProviders: totalUnits * 2, // Vision + TTS for each unit
+      totalUnits: totalUnits,
-      estimatedProcessingTimeMinutes: (totalUnits * 3) / 60 // rough estimate, 3 seconds per unit
+      unitType: unitType,
-    }
+      processingInterval: settings.batchTimeMode ? settings.batchWindowDuration : settings.captureIntervalSeconds
-  };
+    },
-
+    providerInfo: {
-  return costBreakdown;
+      visionProvider: visionProvider,
-}
+      visionModel: visionModel,
      ttsProvider: ttsProvider,
      ttsModel: ttsModel
    },
    apiCosts: {
      visionInput: totalVisionInputCost.toFixed(4),
      visionOutput: totalVisionOutputCost.toFixed(4),
      tts: totalTTSCost.toFixed(4),
      total: totalCost.toFixed(4)
    },
    estimates: {
      totalAPICallsToProviders: totalUnits * 2,
      estimatedProcessingTimeMinutes: (totalUnits * 3) / 60
    }
  };
  return costBreakdown;
 }
 function calculateTTSCost(charCount: number, pricing: TTSPricingModel | undefined): number {
  if (!pricing) return 0;
  if (typeof pricing === 'number') {
    // Per-character pricing: cost per 1000 characters
    return charCount * pricing / 1000;
  }
  // Per-token pricing (e.g., gpt-4o-mini-tts): cost per 1M tokens
  // Rough estimate: 1 char ≈ 0.25 tokens for English text
  const estimatedInputTokens = charCount * 0.25;
  const estimatedOutputTokens = charCount * 3; // audio output is token-heavy
  return (estimatedInputTokens * pricing.inputTokens + estimatedOutputTokens * pricing.outputTokens) / 1000000;
 }