WIP typescript conversion

2025-06-10 19:24:13 +02:00
parent 9425b4b256
commit 507d4f6474
26 changed files with 2128 additions and 27 deletions
--- a/src/utils/costEstimator.ts
+++ b/src/utils/costEstimator.ts
@@ -0,0 +1,152 @@
+import { Config } from '../config/config';
+import { CostBreakdown } from '../interfaces';
+import { getVideoDuration } from './mediaUtils';
+
+/**
+ * Estimate the cost of generating audio descriptions for a video
+ * @param videoFilePath - Path to the input video file
+ * @param options - Optional configuration overrides
+ * @returns Cost estimation breakdown
+ */
+export async function estimateCost(
+  videoFilePath: string, 
+  options: Partial<Config> = {}
+): Promise<CostBreakdown> {
+  // Merge provided options with defaults
+  const settings = { ...options } as Config;
+
+  // Get video duration
+  const videoDuration = getVideoDuration(videoFilePath);
+  console.log(`Video duration: ${videoDuration} seconds`);
+
+  // Calculate the number of frames or batches to process
+  let totalUnits: number;
+  let unitCostMultiplier: number;
+  let unitType: string;
+  
+  if (settings.batchTimeMode) {
+    totalUnits = Math.floor(videoDuration / settings.batchWindowDuration);
+    unitCostMultiplier = settings.framesInBatch; // Cost multiplier for batch mode
+    unitType = "batches";
+  } else {
+    totalUnits = Math.floor(videoDuration / settings.captureIntervalSeconds);
+    unitCostMultiplier = 1; // No multiplier for normal mode
+    unitType = "frames";
+  }
+  
+  console.log(`Will process ${totalUnits} ${unitType}`);
+
+  // Pricing constants (as of March 2025, update as needed)
+  const pricing = {
+    // Get pricing based on vision provider
+    vision: {
+      openai: {
+        'gpt-4o': {
+          input: 0.0025,  // per 1K input tokens
+          output: 0.01    // per 1K output tokens
+        }
+        // Add other OpenAI models here
+      },
+      gemini: {
+        'gemini-pro-vision': {
+          input: 0.0025,  // per 1K input tokens
+          output: 0.0025   // per 1K output tokens
+        }
+      }
+      // Add other vision providers here
+    },
+    // Get pricing based on TTS provider
+    tts: {
+      openai: {
+        'tts-1': 0.015,      // per 1K characters
+        'tts-1-hd': 0.030    // per 1K characters
+      }
+      // Add other TTS providers here
+    }
+  };
+
+  // Get the pricing for the selected providers
+  const visionProvider = settings.visionProvider;
+  const visionModel = settings.visionProviders[visionProvider].model;
+  const ttsProvider = settings.ttsProvider;
+  const ttsModel = settings.ttsProviders[ttsProvider].model;
+
+  // Check if the pricing data exists
+  const visionPricing = pricing.vision[visionProvider]?.[visionModel];
+  const ttsPricing = pricing.tts[ttsProvider]?.[ttsModel];
+
+  if (!visionPricing) {
+    console.warn(`Warning: No pricing data for vision provider "${visionProvider}" and model "${visionModel}".`);
+  }
+
+  if (!ttsPricing) {
+    console.warn(`Warning: No pricing data for TTS provider "${ttsProvider}" and model "${ttsModel}".`);
+  }
+
+  // Estimated token counts
+  const estimatedVisionInputTokens = 1000 * unitCostMultiplier; // Base tokens for the vision input
+  const estimatedPromptTokens = 100; // Tokens for the prompt text
+  const estimatedOutputTokensPerUnit = 75; // Average tokens for description output
+
+  // Estimated character counts for TTS
+  const estimatedCharsPerDescription = 200; // Average characters per description
+
+  // Calculate estimated costs for first unit
+  const firstUnitCost = {
+    visionInput: (estimatedVisionInputTokens + estimatedPromptTokens) * (visionPricing?.input || 0) / 1000,
+    visionOutput: estimatedOutputTokensPerUnit * (visionPricing?.output || 0) / 1000,
+    tts: estimatedCharsPerDescription * (ttsPricing || 0) / 1000
+  };
+
+  // For subsequent units, we need context (e.g., previous frames)
+  const contextMultiplier = settings.batchTimeMode ? 1.2 : 2; // Less overhead in batch mode
+  
+  const subsequentUnitCost = {
+    visionInput: (estimatedVisionInputTokens * contextMultiplier + estimatedPromptTokens) * (visionPricing?.input || 0) / 1000,
+    visionOutput: estimatedOutputTokensPerUnit * (visionPricing?.output || 0) / 1000,
+    tts: estimatedCharsPerDescription * (ttsPricing || 0) / 1000
+  };
+
+  // Calculate total costs
+  const totalVisionInputCost =
+    firstUnitCost.visionInput +
+    (totalUnits - 1) * subsequentUnitCost.visionInput;
+
+  const totalVisionOutputCost =
+    firstUnitCost.visionOutput +
+    (totalUnits - 1) * subsequentUnitCost.visionOutput;
+
+  const totalTTSCost =
+    firstUnitCost.tts +
+    (totalUnits - 1) * subsequentUnitCost.tts;
+
+  const totalCost = totalVisionInputCost + totalVisionOutputCost + totalTTSCost;
+
+  // Create cost breakdown
+  const costBreakdown: CostBreakdown = {
+    videoInfo: {
+      duration: videoDuration,
+      totalUnits: totalUnits,
+      unitType: unitType,
+      processingInterval: settings.batchTimeMode ? settings.batchWindowDuration : settings.captureIntervalSeconds
+    },
+    providerInfo: {
+      visionProvider: visionProvider,
+      visionModel: visionModel,
+      ttsProvider: ttsProvider,
+      ttsModel: ttsModel
+    },
+    apiCosts: {
+      visionInput: totalVisionInputCost.toFixed(4),
+      visionOutput: totalVisionOutputCost.toFixed(4),
+      tts: totalTTSCost.toFixed(4),
+      total: totalCost.toFixed(4)
+    },
+    estimates: {
+      totalAPICallsToProviders: totalUnits * 2, // Vision + TTS for each unit
+      estimatedProcessingTimeMinutes: (totalUnits * 3) / 60 // rough estimate, 3 seconds per unit
+    }
+  };
+
+  return costBreakdown;
+}