Files
aidio-description/src/utils/costEstimator.ts

150 lines
5.1 KiB
TypeScript
Raw Normal View History

2025-06-10 19:24:13 +02:00
import { Config } from '../config/config';
import { CostBreakdown } from '../interfaces';
import { getVideoDuration } from './mediaUtils';
/**
* Estimate the cost of generating audio descriptions for a video
* @param videoFilePath - Path to the input video file
* @param options - Optional configuration overrides
* @returns Cost estimation breakdown
*/
export async function estimateCost(
videoFilePath: string,
options: Partial<Config> = {}
): Promise<CostBreakdown> {
// Merge provided options with defaults
const settings = { ...options } as Config;
// Get video duration
const videoDuration = getVideoDuration(videoFilePath);
console.log(`Video duration: ${videoDuration} seconds`);
// Calculate the number of frames or batches to process
let totalUnits: number;
let unitCostMultiplier: number;
let unitType: string;
if (settings.batchTimeMode) {
totalUnits = Math.floor(videoDuration / settings.batchWindowDuration);
unitCostMultiplier = settings.framesInBatch; // Cost multiplier for batch mode
unitType = "batches";
} else {
totalUnits = Math.floor(videoDuration / settings.captureIntervalSeconds);
unitCostMultiplier = 1; // No multiplier for normal mode
unitType = "frames";
}
console.log(`Will process ${totalUnits} ${unitType}`);
// Pricing constants (as of March 2025, update as needed)
2026-05-13 02:17:07 +02:00
const pricing: {
vision: Record<string, Record<string, { input: number; output: number }>>;
tts: Record<string, Record<string, number>>;
} = {
2025-06-10 19:24:13 +02:00
vision: {
openai: {
'gpt-4o': {
2026-05-13 02:17:07 +02:00
input: 0.0025,
output: 0.01
2025-06-10 19:24:13 +02:00
}
},
gemini: {
'gemini-pro-vision': {
2026-05-13 02:17:07 +02:00
input: 0.0025,
output: 0.0025
2025-06-10 19:24:13 +02:00
}
}
},
tts: {
openai: {
2026-05-13 02:17:07 +02:00
'tts-1': 0.015,
'tts-1-hd': 0.030
2025-06-10 19:24:13 +02:00
}
}
};
// Get the pricing for the selected providers
const visionProvider = settings.visionProvider;
const visionModel = settings.visionProviders[visionProvider].model;
const ttsProvider = settings.ttsProvider;
const ttsModel = settings.ttsProviders[ttsProvider].model;
// Check if the pricing data exists
const visionPricing = pricing.vision[visionProvider]?.[visionModel];
const ttsPricing = pricing.tts[ttsProvider]?.[ttsModel];
if (!visionPricing) {
console.warn(`Warning: No pricing data for vision provider "${visionProvider}" and model "${visionModel}".`);
}
if (!ttsPricing) {
console.warn(`Warning: No pricing data for TTS provider "${ttsProvider}" and model "${ttsModel}".`);
}
// Estimated token counts
const estimatedVisionInputTokens = 1000 * unitCostMultiplier; // Base tokens for the vision input
const estimatedPromptTokens = 100; // Tokens for the prompt text
const estimatedOutputTokensPerUnit = 75; // Average tokens for description output
// Estimated character counts for TTS
const estimatedCharsPerDescription = 200; // Average characters per description
// Calculate estimated costs for first unit
const firstUnitCost = {
visionInput: (estimatedVisionInputTokens + estimatedPromptTokens) * (visionPricing?.input || 0) / 1000,
visionOutput: estimatedOutputTokensPerUnit * (visionPricing?.output || 0) / 1000,
tts: estimatedCharsPerDescription * (ttsPricing || 0) / 1000
};
// For subsequent units, we need context (e.g., previous frames)
const contextMultiplier = settings.batchTimeMode ? 1.2 : 2; // Less overhead in batch mode
const subsequentUnitCost = {
visionInput: (estimatedVisionInputTokens * contextMultiplier + estimatedPromptTokens) * (visionPricing?.input || 0) / 1000,
visionOutput: estimatedOutputTokensPerUnit * (visionPricing?.output || 0) / 1000,
tts: estimatedCharsPerDescription * (ttsPricing || 0) / 1000
};
// Calculate total costs
const totalVisionInputCost =
firstUnitCost.visionInput +
(totalUnits - 1) * subsequentUnitCost.visionInput;
const totalVisionOutputCost =
firstUnitCost.visionOutput +
(totalUnits - 1) * subsequentUnitCost.visionOutput;
const totalTTSCost =
firstUnitCost.tts +
(totalUnits - 1) * subsequentUnitCost.tts;
const totalCost = totalVisionInputCost + totalVisionOutputCost + totalTTSCost;
// Create cost breakdown
const costBreakdown: CostBreakdown = {
videoInfo: {
duration: videoDuration,
totalUnits: totalUnits,
unitType: unitType,
processingInterval: settings.batchTimeMode ? settings.batchWindowDuration : settings.captureIntervalSeconds
},
providerInfo: {
visionProvider: visionProvider,
visionModel: visionModel,
ttsProvider: ttsProvider,
ttsModel: ttsModel
},
apiCosts: {
visionInput: totalVisionInputCost.toFixed(4),
visionOutput: totalVisionOutputCost.toFixed(4),
tts: totalTTSCost.toFixed(4),
total: totalCost.toFixed(4)
},
estimates: {
totalAPICallsToProviders: totalUnits * 2, // Vision + TTS for each unit
estimatedProcessingTimeMinutes: (totalUnits * 3) / 60 // rough estimate, 3 seconds per unit
}
};
return costBreakdown;
}