152 lines
5.4 KiB
TypeScript
152 lines
5.4 KiB
TypeScript
|
|
import { Config } from '../config/config';
|
||
|
|
import { CostBreakdown } from '../interfaces';
|
||
|
|
import { getVideoDuration } from './mediaUtils';
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Estimate the cost of generating audio descriptions for a video
|
||
|
|
* @param videoFilePath - Path to the input video file
|
||
|
|
* @param options - Optional configuration overrides
|
||
|
|
* @returns Cost estimation breakdown
|
||
|
|
*/
|
||
|
|
export async function estimateCost(
|
||
|
|
videoFilePath: string,
|
||
|
|
options: Partial<Config> = {}
|
||
|
|
): Promise<CostBreakdown> {
|
||
|
|
// Merge provided options with defaults
|
||
|
|
const settings = { ...options } as Config;
|
||
|
|
|
||
|
|
// Get video duration
|
||
|
|
const videoDuration = getVideoDuration(videoFilePath);
|
||
|
|
console.log(`Video duration: ${videoDuration} seconds`);
|
||
|
|
|
||
|
|
// Calculate the number of frames or batches to process
|
||
|
|
let totalUnits: number;
|
||
|
|
let unitCostMultiplier: number;
|
||
|
|
let unitType: string;
|
||
|
|
|
||
|
|
if (settings.batchTimeMode) {
|
||
|
|
totalUnits = Math.floor(videoDuration / settings.batchWindowDuration);
|
||
|
|
unitCostMultiplier = settings.framesInBatch; // Cost multiplier for batch mode
|
||
|
|
unitType = "batches";
|
||
|
|
} else {
|
||
|
|
totalUnits = Math.floor(videoDuration / settings.captureIntervalSeconds);
|
||
|
|
unitCostMultiplier = 1; // No multiplier for normal mode
|
||
|
|
unitType = "frames";
|
||
|
|
}
|
||
|
|
|
||
|
|
console.log(`Will process ${totalUnits} ${unitType}`);
|
||
|
|
|
||
|
|
// Pricing constants (as of March 2025, update as needed)
|
||
|
|
const pricing = {
|
||
|
|
// Get pricing based on vision provider
|
||
|
|
vision: {
|
||
|
|
openai: {
|
||
|
|
'gpt-4o': {
|
||
|
|
input: 0.0025, // per 1K input tokens
|
||
|
|
output: 0.01 // per 1K output tokens
|
||
|
|
}
|
||
|
|
// Add other OpenAI models here
|
||
|
|
},
|
||
|
|
gemini: {
|
||
|
|
'gemini-pro-vision': {
|
||
|
|
input: 0.0025, // per 1K input tokens
|
||
|
|
output: 0.0025 // per 1K output tokens
|
||
|
|
}
|
||
|
|
}
|
||
|
|
// Add other vision providers here
|
||
|
|
},
|
||
|
|
// Get pricing based on TTS provider
|
||
|
|
tts: {
|
||
|
|
openai: {
|
||
|
|
'tts-1': 0.015, // per 1K characters
|
||
|
|
'tts-1-hd': 0.030 // per 1K characters
|
||
|
|
}
|
||
|
|
// Add other TTS providers here
|
||
|
|
}
|
||
|
|
};
|
||
|
|
|
||
|
|
// Get the pricing for the selected providers
|
||
|
|
const visionProvider = settings.visionProvider;
|
||
|
|
const visionModel = settings.visionProviders[visionProvider].model;
|
||
|
|
const ttsProvider = settings.ttsProvider;
|
||
|
|
const ttsModel = settings.ttsProviders[ttsProvider].model;
|
||
|
|
|
||
|
|
// Check if the pricing data exists
|
||
|
|
const visionPricing = pricing.vision[visionProvider]?.[visionModel];
|
||
|
|
const ttsPricing = pricing.tts[ttsProvider]?.[ttsModel];
|
||
|
|
|
||
|
|
if (!visionPricing) {
|
||
|
|
console.warn(`Warning: No pricing data for vision provider "${visionProvider}" and model "${visionModel}".`);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (!ttsPricing) {
|
||
|
|
console.warn(`Warning: No pricing data for TTS provider "${ttsProvider}" and model "${ttsModel}".`);
|
||
|
|
}
|
||
|
|
|
||
|
|
// Estimated token counts
|
||
|
|
const estimatedVisionInputTokens = 1000 * unitCostMultiplier; // Base tokens for the vision input
|
||
|
|
const estimatedPromptTokens = 100; // Tokens for the prompt text
|
||
|
|
const estimatedOutputTokensPerUnit = 75; // Average tokens for description output
|
||
|
|
|
||
|
|
// Estimated character counts for TTS
|
||
|
|
const estimatedCharsPerDescription = 200; // Average characters per description
|
||
|
|
|
||
|
|
// Calculate estimated costs for first unit
|
||
|
|
const firstUnitCost = {
|
||
|
|
visionInput: (estimatedVisionInputTokens + estimatedPromptTokens) * (visionPricing?.input || 0) / 1000,
|
||
|
|
visionOutput: estimatedOutputTokensPerUnit * (visionPricing?.output || 0) / 1000,
|
||
|
|
tts: estimatedCharsPerDescription * (ttsPricing || 0) / 1000
|
||
|
|
};
|
||
|
|
|
||
|
|
// For subsequent units, we need context (e.g., previous frames)
|
||
|
|
const contextMultiplier = settings.batchTimeMode ? 1.2 : 2; // Less overhead in batch mode
|
||
|
|
|
||
|
|
const subsequentUnitCost = {
|
||
|
|
visionInput: (estimatedVisionInputTokens * contextMultiplier + estimatedPromptTokens) * (visionPricing?.input || 0) / 1000,
|
||
|
|
visionOutput: estimatedOutputTokensPerUnit * (visionPricing?.output || 0) / 1000,
|
||
|
|
tts: estimatedCharsPerDescription * (ttsPricing || 0) / 1000
|
||
|
|
};
|
||
|
|
|
||
|
|
// Calculate total costs
|
||
|
|
const totalVisionInputCost =
|
||
|
|
firstUnitCost.visionInput +
|
||
|
|
(totalUnits - 1) * subsequentUnitCost.visionInput;
|
||
|
|
|
||
|
|
const totalVisionOutputCost =
|
||
|
|
firstUnitCost.visionOutput +
|
||
|
|
(totalUnits - 1) * subsequentUnitCost.visionOutput;
|
||
|
|
|
||
|
|
const totalTTSCost =
|
||
|
|
firstUnitCost.tts +
|
||
|
|
(totalUnits - 1) * subsequentUnitCost.tts;
|
||
|
|
|
||
|
|
const totalCost = totalVisionInputCost + totalVisionOutputCost + totalTTSCost;
|
||
|
|
|
||
|
|
// Create cost breakdown
|
||
|
|
const costBreakdown: CostBreakdown = {
|
||
|
|
videoInfo: {
|
||
|
|
duration: videoDuration,
|
||
|
|
totalUnits: totalUnits,
|
||
|
|
unitType: unitType,
|
||
|
|
processingInterval: settings.batchTimeMode ? settings.batchWindowDuration : settings.captureIntervalSeconds
|
||
|
|
},
|
||
|
|
providerInfo: {
|
||
|
|
visionProvider: visionProvider,
|
||
|
|
visionModel: visionModel,
|
||
|
|
ttsProvider: ttsProvider,
|
||
|
|
ttsModel: ttsModel
|
||
|
|
},
|
||
|
|
apiCosts: {
|
||
|
|
visionInput: totalVisionInputCost.toFixed(4),
|
||
|
|
visionOutput: totalVisionOutputCost.toFixed(4),
|
||
|
|
tts: totalTTSCost.toFixed(4),
|
||
|
|
total: totalCost.toFixed(4)
|
||
|
|
},
|
||
|
|
estimates: {
|
||
|
|
totalAPICallsToProviders: totalUnits * 2, // Vision + TTS for each unit
|
||
|
|
estimatedProcessingTimeMinutes: (totalUnits * 3) / 60 // rough estimate, 3 seconds per unit
|
||
|
|
}
|
||
|
|
};
|
||
|
|
|
||
|
|
return costBreakdown;
|
||
|
|
}
|