aidio-description/dist/interfaces/index.d.ts

export interface VisionUsage {
    inputTokens: number;
    outputTokens: number;
    totalTokens: number;
}
export interface VisionResult {
    description: string;
    usage: VisionUsage;
}
export interface VisionProviderConfig {
    apiKey?: string;
    model: string;
    maxTokens?: number;
    baseUrl?: string;
}
export interface VisionProvider {
    describeImage(imagePath: string, prompt: string): Promise<VisionResult>;
    compareImages(image1Path: string, image2Path: string, prompt: string): Promise<VisionResult>;
    describeBatch(imagePaths: string[], lastBatchContext: any, prompt: string): Promise<VisionResult>;
}
export interface TTSResult {
    duration: number;
    cost: number;
}
export interface TTSOptions {
    voice?: string;
    model?: string;
    speedFactor?: number;
    instructions?: string;
}
export interface TTSProviderConfig {
    apiKey?: string;
    model: string;
    voice?: string;
    keyFilename?: string;
}
export interface TTSProvider {
    textToSpeech(text: string, outputPath: string, options?: TTSOptions): Promise<TTSResult>;
}
export interface AudioSegment {
    audioFile: string;
    startTime: number;
    duration: number;
    description: string;
}
export interface Stats {
    totalFrames: number;
    totalBatches: number;
    totalVisionInputCost: number;
    totalVisionOutputCost: number;
    totalTTSCost: number;
    totalCost: number;
}
export interface BatchContext {
    lastDescription?: string;
    lastFramePaths?: string[];
}
export interface ProgressInfo {
    type: 'frame' | 'batch';
    index: number;
    total: number;
    segment: AudioSegment;
}
export interface ProcessingOptions {
    startIndex?: number;
    existingSegments?: AudioSegment[];
    lastContext?: BatchContext;
    currentTimePosition?: number;
    onProgress?: (info: ProgressInfo) => void;
}
export interface ProcessingResult {
    videoFile: string;
    audioDescriptionFile: string;
    segments: AudioSegment[];
}
export interface CostBreakdown {
    videoInfo: {
        duration: number;
        totalUnits: number;
        unitType: string;
        processingInterval: number;
    };
    providerInfo: {
        visionProvider: string;
        visionModel: string;
        ttsProvider: string;
        ttsModel: string;
    };
    apiCosts: {
        visionInput: string;
        visionOutput: string;
        tts: string;
        total: string;
    };
    estimates: {
        totalAPICallsToProviders: number;
        estimatedProcessingTimeMinutes: number;
    };
}
Rewrite frontend as single self-contained HTML file — all CSS/JS inline, no external files to fail loading 2026-05-13 17:24:10 +02:00			`export interface VisionUsage {`
			`inputTokens: number;`
			`outputTokens: number;`
			`totalTokens: number;`
			`}`
			`export interface VisionResult {`
			`description: string;`
			`usage: VisionUsage;`
			`}`
			`export interface VisionProviderConfig {`
			`apiKey?: string;`
			`model: string;`
			`maxTokens?: number;`
			`baseUrl?: string;`
			`}`
			`export interface VisionProvider {`
			`describeImage(imagePath: string, prompt: string): Promise<VisionResult>;`
			`compareImages(image1Path: string, image2Path: string, prompt: string): Promise<VisionResult>;`
			`describeBatch(imagePaths: string[], lastBatchContext: any, prompt: string): Promise<VisionResult>;`
			`}`
			`export interface TTSResult {`
			`duration: number;`
			`cost: number;`
			`}`
			`export interface TTSOptions {`
			`voice?: string;`
			`model?: string;`
			`speedFactor?: number;`
			`instructions?: string;`
			`}`
			`export interface TTSProviderConfig {`
			`apiKey?: string;`
			`model: string;`
			`voice?: string;`
			`keyFilename?: string;`
			`}`
			`export interface TTSProvider {`
			`textToSpeech(text: string, outputPath: string, options?: TTSOptions): Promise<TTSResult>;`
			`}`
			`export interface AudioSegment {`
			`audioFile: string;`
			`startTime: number;`
			`duration: number;`
			`description: string;`
			`}`
			`export interface Stats {`
			`totalFrames: number;`
			`totalBatches: number;`
			`totalVisionInputCost: number;`
			`totalVisionOutputCost: number;`
			`totalTTSCost: number;`
			`totalCost: number;`
			`}`
			`export interface BatchContext {`
			`lastDescription?: string;`
			`lastFramePaths?: string[];`
			`}`
			`export interface ProgressInfo {`
			`type: 'frame' \| 'batch';`
			`index: number;`
			`total: number;`
			`segment: AudioSegment;`
			`}`
			`export interface ProcessingOptions {`
			`startIndex?: number;`
			`existingSegments?: AudioSegment[];`
			`lastContext?: BatchContext;`
			`currentTimePosition?: number;`
			`onProgress?: (info: ProgressInfo) => void;`
			`}`
			`export interface ProcessingResult {`
			`videoFile: string;`
			`audioDescriptionFile: string;`
			`segments: AudioSegment[];`
			`}`
			`export interface CostBreakdown {`
			`videoInfo: {`
			`duration: number;`
			`totalUnits: number;`
			`unitType: string;`
			`processingInterval: number;`
			`};`
			`providerInfo: {`
			`visionProvider: string;`
			`visionModel: string;`
			`ttsProvider: string;`
			`ttsModel: string;`
			`};`
			`apiCosts: {`
			`visionInput: string;`
			`visionOutput: string;`
			`tts: string;`
			`total: string;`
			`};`
			`estimates: {`
			`totalAPICallsToProviders: number;`
			`estimatedProcessingTimeMinutes: number;`
			`};`
			`}`