100 lines
2.5 KiB
TypeScript
100 lines
2.5 KiB
TypeScript
|
|
export interface VisionUsage {
|
||
|
|
inputTokens: number;
|
||
|
|
outputTokens: number;
|
||
|
|
totalTokens: number;
|
||
|
|
}
|
||
|
|
export interface VisionResult {
|
||
|
|
description: string;
|
||
|
|
usage: VisionUsage;
|
||
|
|
}
|
||
|
|
export interface VisionProviderConfig {
|
||
|
|
apiKey?: string;
|
||
|
|
model: string;
|
||
|
|
maxTokens?: number;
|
||
|
|
baseUrl?: string;
|
||
|
|
}
|
||
|
|
export interface VisionProvider {
|
||
|
|
describeImage(imagePath: string, prompt: string): Promise<VisionResult>;
|
||
|
|
compareImages(image1Path: string, image2Path: string, prompt: string): Promise<VisionResult>;
|
||
|
|
describeBatch(imagePaths: string[], lastBatchContext: any, prompt: string): Promise<VisionResult>;
|
||
|
|
}
|
||
|
|
export interface TTSResult {
|
||
|
|
duration: number;
|
||
|
|
cost: number;
|
||
|
|
}
|
||
|
|
export interface TTSOptions {
|
||
|
|
voice?: string;
|
||
|
|
model?: string;
|
||
|
|
speedFactor?: number;
|
||
|
|
instructions?: string;
|
||
|
|
}
|
||
|
|
export interface TTSProviderConfig {
|
||
|
|
apiKey?: string;
|
||
|
|
model: string;
|
||
|
|
voice?: string;
|
||
|
|
keyFilename?: string;
|
||
|
|
}
|
||
|
|
export interface TTSProvider {
|
||
|
|
textToSpeech(text: string, outputPath: string, options?: TTSOptions): Promise<TTSResult>;
|
||
|
|
}
|
||
|
|
export interface AudioSegment {
|
||
|
|
audioFile: string;
|
||
|
|
startTime: number;
|
||
|
|
duration: number;
|
||
|
|
description: string;
|
||
|
|
}
|
||
|
|
export interface Stats {
|
||
|
|
totalFrames: number;
|
||
|
|
totalBatches: number;
|
||
|
|
totalVisionInputCost: number;
|
||
|
|
totalVisionOutputCost: number;
|
||
|
|
totalTTSCost: number;
|
||
|
|
totalCost: number;
|
||
|
|
}
|
||
|
|
export interface BatchContext {
|
||
|
|
lastDescription?: string;
|
||
|
|
lastFramePaths?: string[];
|
||
|
|
}
|
||
|
|
export interface ProgressInfo {
|
||
|
|
type: 'frame' | 'batch';
|
||
|
|
index: number;
|
||
|
|
total: number;
|
||
|
|
segment: AudioSegment;
|
||
|
|
}
|
||
|
|
export interface ProcessingOptions {
|
||
|
|
startIndex?: number;
|
||
|
|
existingSegments?: AudioSegment[];
|
||
|
|
lastContext?: BatchContext;
|
||
|
|
currentTimePosition?: number;
|
||
|
|
onProgress?: (info: ProgressInfo) => void;
|
||
|
|
}
|
||
|
|
export interface ProcessingResult {
|
||
|
|
videoFile: string;
|
||
|
|
audioDescriptionFile: string;
|
||
|
|
segments: AudioSegment[];
|
||
|
|
}
|
||
|
|
export interface CostBreakdown {
|
||
|
|
videoInfo: {
|
||
|
|
duration: number;
|
||
|
|
totalUnits: number;
|
||
|
|
unitType: string;
|
||
|
|
processingInterval: number;
|
||
|
|
};
|
||
|
|
providerInfo: {
|
||
|
|
visionProvider: string;
|
||
|
|
visionModel: string;
|
||
|
|
ttsProvider: string;
|
||
|
|
ttsModel: string;
|
||
|
|
};
|
||
|
|
apiCosts: {
|
||
|
|
visionInput: string;
|
||
|
|
visionOutput: string;
|
||
|
|
tts: string;
|
||
|
|
total: string;
|
||
|
|
};
|
||
|
|
estimates: {
|
||
|
|
totalAPICallsToProviders: number;
|
||
|
|
estimatedProcessingTimeMinutes: number;
|
||
|
|
};
|
||
|
|
}
|