aidio-description/dist/utils/processor.js

"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
    return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.generateAudioDescriptionFromOptions = generateAudioDescriptionFromOptions;
exports.generateAudioDescription = generateAudioDescription;
const fs_1 = __importDefault(require("fs"));
const path_1 = __importDefault(require("path"));
const config_1 = require("../config/config");
const stats_1 = require("../config/stats");
const visionProviderFactory_1 = require("../providers/vision/visionProviderFactory");
const ttsProviderFactory_1 = require("../providers/tts/ttsProviderFactory");
const mediaUtils_1 = require("./mediaUtils");
/**
 * High-level API: Generate audio description for a video with just options.
 * This internally creates providers and stats so callers don't need to.
 *
 * @param videoFilePath - Path to the input video file
 * @param options - Optional configuration overrides
 * @returns Result of the operation
 */
async function generateAudioDescriptionFromOptions(videoFilePath, options = {}, processingOptions = {}) {
    const config = { ...(0, config_1.getDefaultConfig)(), ...options };
    if (!fs_1.default.existsSync(config.tempDir)) {
        fs_1.default.mkdirSync(config.tempDir, { recursive: true });
    }
    if (!fs_1.default.existsSync(config.outputDir)) {
        fs_1.default.mkdirSync(config.outputDir, { recursive: true });
    }
    const visionProvider = visionProviderFactory_1.VisionProviderFactory.getProvider(config);
    const ttsProvider = ttsProviderFactory_1.TTSProviderFactory.getProvider(config);
    const stats = (0, stats_1.createStats)();
    return generateAudioDescription(videoFilePath, visionProvider, ttsProvider, config, stats, processingOptions);
}
/**
 * Generate audio description for a video (low-level API requiring pre-initialized providers).
 * @param videoFilePath - Path to the input video file
 * @param visionProvider - Vision provider instance
 * @param ttsProvider - TTS provider instance
 * @param options - Optional configuration overrides
 * @param stats - Stats object for tracking
 * @returns Result of the operation
 */
async function generateAudioDescription(videoFilePath, visionProvider, ttsProvider, options = {}, stats, processingOptions = {}) {
    // Merge provided options with defaults
    const settings = { ...options };
    // Ensure temporary and output directories exist
    if (!fs_1.default.existsSync(settings.tempDir)) {
        fs_1.default.mkdirSync(settings.tempDir, { recursive: true });
    }
    if (!fs_1.default.existsSync(settings.outputDir)) {
        fs_1.default.mkdirSync(settings.outputDir, { recursive: true });
    }
    // Get video duration
    const videoDuration = (0, mediaUtils_1.getVideoDuration)(videoFilePath);
    stats.totalFrames = Math.floor(videoDuration / settings.captureIntervalSeconds);
    console.log(`Video duration: ${videoDuration} seconds`);
    // If batchTimeMode is enabled, use the new approach
    if (settings.batchTimeMode) {
        return await generateAudioDescriptionBatch(videoFilePath, videoDuration, settings, visionProvider, ttsProvider, stats, processingOptions);
    }
    // Calculate the number of frames to capture
    const totalFrames = Math.floor(videoDuration / settings.captureIntervalSeconds);
    console.log(`Will capture ${totalFrames} frames at ${settings.captureIntervalSeconds} second intervals`);
    // Context window to store previous frames
    const frameContext = [];
    // Array to store audio segment information - preload with existing segments if resuming
    const audioSegments = processingOptions.existingSegments
        ? [...processingOptions.existingSegments]
        : [];
    // Track our current time position (will be adjusted for audio overlap)
    let currentTimePosition = processingOptions.currentTimePosition || 0;
    // Start from given index if resuming
    const startIndex = processingOptions.startIndex || 0;
    // Track drift from the original schedule
    let timelineDrift = 0;
    const maxAllowableDrift = settings.captureIntervalSeconds * 2; // Maximum drift before warning
    // Process each frame
    for (let i = startIndex; i < totalFrames; i++) {
        // Calculate the ideal time position based on the original schedule
        const idealTimePosition = i * settings.captureIntervalSeconds;
        // Use the adjusted time position that accounts for previous audio durations
        const timePosition = currentTimePosition;
        // Calculate drift from the original schedule
        timelineDrift = timePosition - idealTimePosition;
        // Log if drift is becoming significant
        if (Math.abs(timelineDrift) > maxAllowableDrift) {
            console.warn(`WARNING: Timeline drift at frame ${i} is ${timelineDrift.toFixed(2)} seconds.`);
        }
        const frameFilePath = path_1.default.join(settings.tempDir, `frame_${i.toString().padStart(5, '0')}.jpg`);
        // Capture frame at current time position (use the ideal time to capture the frame)
        (0, mediaUtils_1.captureVideoFrame)(videoFilePath, idealTimePosition, frameFilePath);
        console.log(`Captured frame at ${idealTimePosition} seconds (scheduled at ${timePosition.toFixed(2)} seconds)`);
        // Add current frame to context
        const currentFrame = {
            index: i,
            path: frameFilePath,
            timePosition
        };
        frameContext.push(currentFrame);
        // Keep context window at specified size
        if (frameContext.length > settings.contextWindowSize) {
            frameContext.shift();
        }
        // Generate description
        let description;
        let usageStats;
        if (frameContext.length === 1) {
            // First frame - just describe what's in it
            const result = await visionProvider.describeImage(frameFilePath, settings.defaultPrompt);
            description = result.description;
            usageStats = result.usage;
        }
        else {
            // Compare with previous frame
            const previousFrame = frameContext[frameContext.length - 2];
            const result = await visionProvider.compareImages(previousFrame.path, frameFilePath, settings.changePrompt);
            description = result.description;
            usageStats = result.usage;
        }
        // Update stats
        stats.totalVisionInputCost += usageStats.inputTokens;
        stats.totalVisionOutputCost += usageStats.outputTokens;
        stats.totalCost += usageStats.totalTokens;
        console.log(`Description: ${description}`);
        // Generate speech from description
        const audioFilePath = path_1.default.join(settings.tempDir, `audio_${i.toString().padStart(5, '0')}.mp3`);
        const ttsResult = await ttsProvider.textToSpeech(description, audioFilePath, {
            voice: settings.ttsVoice,
            model: settings.ttsProviders[settings.ttsProvider].model,
            speedFactor: settings.ttsSpeedFactor,
            instructions: settings.ttsInstructions
        });
        const audioDuration = ttsResult.duration;
        stats.totalTTSCost += ttsResult.cost;
        console.log(`Audio duration: ${audioDuration} seconds`);
        // Store segment information
        const segment = {
            audioFile: audioFilePath,
            startTime: timePosition,
            duration: audioDuration,
            description
        };
        audioSegments.push(segment);
        // Notify progress callback
        if (processingOptions.onProgress) {
            processingOptions.onProgress({
                type: 'frame',
                index: i,
                total: totalFrames,
                segment
            });
        }
        // Update the time position for the next iteration
        // Add a small buffer (0.25 sec) between descriptions to prevent hard cuts
        const bufferTime = 0.25;
        currentTimePosition = timePosition + audioDuration + bufferTime;
        // If we've fallen behind schedule, try to catch up (but don't skip content)
        const nextIdealPosition = (i + 1) * settings.captureIntervalSeconds;
        if (currentTimePosition < nextIdealPosition) {
            console.log(`Audio finished before next scheduled frame. Catching up with timeline.`);
            currentTimePosition = nextIdealPosition;
            timelineDrift = 0; // Reset drift since we've caught up
        }
    }
    // Combine audio segments into final audio description track
    const outputAudioPath = path_1.default.join(settings.outputDir, `${path_1.default.basename(videoFilePath, path_1.default.extname(videoFilePath))}_description.mp3`);
    (0, mediaUtils_1.combineAudioSegments)(audioSegments, outputAudioPath, videoDuration, settings);
    // Clean up temporary files if desired
    // cleanupTempFiles(settings.tempDir);
    console.log(`\nAudio description generated: ${outputAudioPath}`);
    console.log(`To play with video, use: ffplay -i ${videoFilePath} -i ${outputAudioPath} -map 0:v -map 1:a`);
    (0, stats_1.printStats)(stats, settings);
    return {
        videoFile: videoFilePath,
        audioDescriptionFile: outputAudioPath,
        segments: audioSegments
    };
}
/**
 * Generate audio description using the "batch time" mode with overlap prevention.
 * @param videoFilePath - Path to the input video file
 * @param videoDuration - Duration of the video in seconds
 * @param settings - The merged config and user options
 * @param visionProvider - The vision provider instance
 * @param ttsProvider - The TTS provider instance
 * @param stats - Stats object for tracking
 */
async function generateAudioDescriptionBatch(videoFilePath, videoDuration, settings, visionProvider, ttsProvider, stats, processingOptions = {}) {
    const totalBatches = Math.floor(videoDuration / settings.batchWindowDuration);
    console.log(`Using batchTimeMode. Total batches: ${totalBatches} (each covers ${settings.batchWindowDuration} sec)`);
    // We'll hold the last batch's frames or last batch's description for context
    let lastBatchContext = processingOptions.lastContext || {};
    // Preload with existing segments if resuming
    const audioSegments = processingOptions.existingSegments
        ? [...processingOptions.existingSegments]
        : [];
    // Track our current time position (will be adjusted for audio overlap)
    let currentTimePosition = processingOptions.currentTimePosition || 0;
    // Start from given index if resuming
    const startBatchIndex = processingOptions.startIndex || 0;
    // Track drift from the original schedule
    let timelineDrift = 0;
    const maxAllowableDrift = settings.batchWindowDuration * 0.5; // Maximum drift of 50% of batch window
    for (let batchIndex = startBatchIndex; batchIndex < totalBatches; batchIndex++) {
        // Calculate ideal batch timing based on configuration
        const idealBatchStart = batchIndex * settings.batchWindowDuration;
        // Use adjusted time position that accounts for previous audio durations
        const batchStart = currentTimePosition;
        // Calculate drift from the original schedule
        timelineDrift = batchStart - idealBatchStart;
        // Log if drift is becoming significant
        if (Math.abs(timelineDrift) > maxAllowableDrift) {
            console.warn(`WARNING: Timeline drift at batch ${batchIndex} is ${timelineDrift.toFixed(2)} seconds.`);
        }
        const batchEnd = idealBatchStart + settings.batchWindowDuration;
        if (batchEnd > videoDuration)
            break; // Safety check
        console.log(`Processing batch #${batchIndex}: Original time window ${idealBatchStart}-${batchEnd} sec, scheduled at ${batchStart.toFixed(2)} sec`);
        // Capture frames for this batch - use the ideal timing for frame capture
        const framePaths = [];
        for (let i = 0; i < settings.framesInBatch; i++) {
            const t = idealBatchStart + (i * settings.batchWindowDuration) / settings.framesInBatch;
            const frameFilePath = path_1.default.join(settings.tempDir, `batch_${batchIndex}_frame_${i}.jpg`);
            (0, mediaUtils_1.captureVideoFrame)(videoFilePath, t, frameFilePath);
            framePaths.push(frameFilePath);
        }
        // Use AI to describe this batch of frames, possibly providing some context
        const result = await visionProvider.describeBatch(framePaths, lastBatchContext, settings.batchPrompt);
        const description = result.description;
        const usageStats = result.usage;
        // Update stats
        stats.totalVisionInputCost += usageStats.inputTokens;
        stats.totalVisionOutputCost += usageStats.outputTokens;
        stats.totalCost += usageStats.totalTokens;
        console.log(`Batch #${batchIndex} description:\n${description}\n`);
        // Convert description to TTS
        const audioFilePath = path_1.default.join(settings.tempDir, `batch_audio_${batchIndex}.mp3`);
        const ttsResult = await ttsProvider.textToSpeech(description, audioFilePath, {
            voice: settings.ttsVoice,
            model: settings.ttsProviders[settings.ttsProvider].model,
            speedFactor: settings.ttsSpeedFactor,
            instructions: settings.ttsInstructions
        });
        const audioDuration = ttsResult.duration;
        stats.totalTTSCost += ttsResult.cost;
        console.log(`Batch #${batchIndex} audio duration: ${audioDuration} seconds`);
        // Store segment info with the adjusted start time
        const segment = {
            audioFile: audioFilePath,
            startTime: batchStart,
            duration: audioDuration,
            description
        };
        audioSegments.push(segment);
        // Notify progress callback
        if (processingOptions.onProgress) {
            processingOptions.onProgress({
                type: 'batch',
                index: batchIndex,
                total: totalBatches,
                segment
            });
        }
        // Update the time position for the next iteration
        // Add a small buffer (0.5 sec) between descriptions
        const bufferTime = 0.5;
        currentTimePosition = batchStart + audioDuration + bufferTime;
        // If we've fallen behind schedule, try to catch up (but don't skip content)
        const nextIdealPosition = (batchIndex + 1) * settings.batchWindowDuration;
        if (currentTimePosition < nextIdealPosition) {
            console.log(`Batch audio finished before next scheduled batch. Catching up with timeline.`);
            currentTimePosition = nextIdealPosition;
            timelineDrift = 0; // Reset drift since we've caught up
        }
        // Update lastBatchContext so the next batch can keep track of what's previously seen
        lastBatchContext = {
            lastDescription: description,
            lastFramePaths: framePaths.slice(-2) // keep the last 2 frames from this batch
        };
    }
    // Combine all the audio segments into one track
    const outputAudioPath = path_1.default.join(settings.outputDir, `${path_1.default.basename(videoFilePath, path_1.default.extname(videoFilePath))}_description_batch.mp3`);
    (0, mediaUtils_1.combineAudioSegments)(audioSegments, outputAudioPath, videoDuration, settings);
    console.log(`\nBatch audio description generated: ${outputAudioPath}`);
    console.log(`To play with video, use: ffplay -i ${videoFilePath} -i ${outputAudioPath} -map 0:v -map 1:a`);
    (0, stats_1.printStats)(stats, settings);
    return {
        videoFile: videoFilePath,
        audioDescriptionFile: outputAudioPath,
        segments: audioSegments
    };
}
//# sourceMappingURL=processor.js.map
Rewrite frontend as single self-contained HTML file — all CSS/JS inline, no external files to fail loading 2026-05-13 17:24:10 +02:00			`"use strict";`
			`var __importDefault = (this && this.__importDefault) \|\| function (mod) {`
			`return (mod && mod.__esModule) ? mod : { "default": mod };`
			`};`
			`Object.defineProperty(exports, "__esModule", { value: true });`
			`exports.generateAudioDescriptionFromOptions = generateAudioDescriptionFromOptions;`
			`exports.generateAudioDescription = generateAudioDescription;`
			`const fs_1 = __importDefault(require("fs"));`
			`const path_1 = __importDefault(require("path"));`
			`const config_1 = require("../config/config");`
			`const stats_1 = require("../config/stats");`
			`const visionProviderFactory_1 = require("../providers/vision/visionProviderFactory");`
			`const ttsProviderFactory_1 = require("../providers/tts/ttsProviderFactory");`
			`const mediaUtils_1 = require("./mediaUtils");`
			`/**`
			`* High-level API: Generate audio description for a video with just options.`
			`* This internally creates providers and stats so callers don't need to.`
			`*`
			`* @param videoFilePath - Path to the input video file`
			`* @param options - Optional configuration overrides`
			`* @returns Result of the operation`
			`*/`
			`async function generateAudioDescriptionFromOptions(videoFilePath, options = {}, processingOptions = {}) {`
			`const config = { ...(0, config_1.getDefaultConfig)(), ...options };`
			`if (!fs_1.default.existsSync(config.tempDir)) {`
			`fs_1.default.mkdirSync(config.tempDir, { recursive: true });`
			`}`
			`if (!fs_1.default.existsSync(config.outputDir)) {`
			`fs_1.default.mkdirSync(config.outputDir, { recursive: true });`
			`}`
			`const visionProvider = visionProviderFactory_1.VisionProviderFactory.getProvider(config);`
			`const ttsProvider = ttsProviderFactory_1.TTSProviderFactory.getProvider(config);`
			`const stats = (0, stats_1.createStats)();`
			`return generateAudioDescription(videoFilePath, visionProvider, ttsProvider, config, stats, processingOptions);`
			`}`
			`/**`
			`* Generate audio description for a video (low-level API requiring pre-initialized providers).`
			`* @param videoFilePath - Path to the input video file`
			`* @param visionProvider - Vision provider instance`
			`* @param ttsProvider - TTS provider instance`
			`* @param options - Optional configuration overrides`
			`* @param stats - Stats object for tracking`
			`* @returns Result of the operation`
			`*/`
			`async function generateAudioDescription(videoFilePath, visionProvider, ttsProvider, options = {}, stats, processingOptions = {}) {`
			`// Merge provided options with defaults`
			`const settings = { ...options };`
			`// Ensure temporary and output directories exist`
			`if (!fs_1.default.existsSync(settings.tempDir)) {`
			`fs_1.default.mkdirSync(settings.tempDir, { recursive: true });`
			`}`
			`if (!fs_1.default.existsSync(settings.outputDir)) {`
			`fs_1.default.mkdirSync(settings.outputDir, { recursive: true });`
			`}`
			`// Get video duration`
			`const videoDuration = (0, mediaUtils_1.getVideoDuration)(videoFilePath);`
			`stats.totalFrames = Math.floor(videoDuration / settings.captureIntervalSeconds);`
			console.log(`Video duration: ${videoDuration} seconds`);
			`// If batchTimeMode is enabled, use the new approach`
			`if (settings.batchTimeMode) {`
			`return await generateAudioDescriptionBatch(videoFilePath, videoDuration, settings, visionProvider, ttsProvider, stats, processingOptions);`
			`}`
			`// Calculate the number of frames to capture`
			`const totalFrames = Math.floor(videoDuration / settings.captureIntervalSeconds);`
			console.log(`Will capture ${totalFrames} frames at ${settings.captureIntervalSeconds} second intervals`);
			`// Context window to store previous frames`
			`const frameContext = [];`
			`// Array to store audio segment information - preload with existing segments if resuming`
			`const audioSegments = processingOptions.existingSegments`
			`? [...processingOptions.existingSegments]`
			`: [];`
			`// Track our current time position (will be adjusted for audio overlap)`
			`let currentTimePosition = processingOptions.currentTimePosition \|\| 0;`
			`// Start from given index if resuming`
			`const startIndex = processingOptions.startIndex \|\| 0;`
			`// Track drift from the original schedule`
			`let timelineDrift = 0;`
			`const maxAllowableDrift = settings.captureIntervalSeconds * 2; // Maximum drift before warning`
			`// Process each frame`
			`for (let i = startIndex; i < totalFrames; i++) {`
			`// Calculate the ideal time position based on the original schedule`
			`const idealTimePosition = i * settings.captureIntervalSeconds;`
			`// Use the adjusted time position that accounts for previous audio durations`
			`const timePosition = currentTimePosition;`
			`// Calculate drift from the original schedule`
			`timelineDrift = timePosition - idealTimePosition;`
			`// Log if drift is becoming significant`
			`if (Math.abs(timelineDrift) > maxAllowableDrift) {`
			console.warn(`WARNING: Timeline drift at frame ${i} is ${timelineDrift.toFixed(2)} seconds.`);
			`}`
			const frameFilePath = path_1.default.join(settings.tempDir, `frame_${i.toString().padStart(5, '0')}.jpg`);
			`// Capture frame at current time position (use the ideal time to capture the frame)`
			`(0, mediaUtils_1.captureVideoFrame)(videoFilePath, idealTimePosition, frameFilePath);`
			console.log(`Captured frame at ${idealTimePosition} seconds (scheduled at ${timePosition.toFixed(2)} seconds)`);
			`// Add current frame to context`
			`const currentFrame = {`
			`index: i,`
			`path: frameFilePath,`
			`timePosition`
			`};`
			`frameContext.push(currentFrame);`
			`// Keep context window at specified size`
			`if (frameContext.length > settings.contextWindowSize) {`
			`frameContext.shift();`
			`}`
			`// Generate description`
			`let description;`
			`let usageStats;`
			`if (frameContext.length === 1) {`
			`// First frame - just describe what's in it`
			`const result = await visionProvider.describeImage(frameFilePath, settings.defaultPrompt);`
			`description = result.description;`
			`usageStats = result.usage;`
			`}`
			`else {`
			`// Compare with previous frame`
			`const previousFrame = frameContext[frameContext.length - 2];`
			`const result = await visionProvider.compareImages(previousFrame.path, frameFilePath, settings.changePrompt);`
			`description = result.description;`
			`usageStats = result.usage;`
			`}`
			`// Update stats`
			`stats.totalVisionInputCost += usageStats.inputTokens;`
			`stats.totalVisionOutputCost += usageStats.outputTokens;`
			`stats.totalCost += usageStats.totalTokens;`
			console.log(`Description: ${description}`);
			`// Generate speech from description`
			const audioFilePath = path_1.default.join(settings.tempDir, `audio_${i.toString().padStart(5, '0')}.mp3`);
			`const ttsResult = await ttsProvider.textToSpeech(description, audioFilePath, {`
			`voice: settings.ttsVoice,`
			`model: settings.ttsProviders[settings.ttsProvider].model,`
			`speedFactor: settings.ttsSpeedFactor,`
			`instructions: settings.ttsInstructions`
			`});`
			`const audioDuration = ttsResult.duration;`
			`stats.totalTTSCost += ttsResult.cost;`
			console.log(`Audio duration: ${audioDuration} seconds`);
			`// Store segment information`
			`const segment = {`
			`audioFile: audioFilePath,`
			`startTime: timePosition,`
			`duration: audioDuration,`
			`description`
			`};`
			`audioSegments.push(segment);`
			`// Notify progress callback`
			`if (processingOptions.onProgress) {`
			`processingOptions.onProgress({`
			`type: 'frame',`
			`index: i,`
			`total: totalFrames,`
			`segment`
			`});`
			`}`
			`// Update the time position for the next iteration`
			`// Add a small buffer (0.25 sec) between descriptions to prevent hard cuts`
			`const bufferTime = 0.25;`
			`currentTimePosition = timePosition + audioDuration + bufferTime;`
			`// If we've fallen behind schedule, try to catch up (but don't skip content)`
			`const nextIdealPosition = (i + 1) * settings.captureIntervalSeconds;`
			`if (currentTimePosition < nextIdealPosition) {`
			console.log(`Audio finished before next scheduled frame. Catching up with timeline.`);
			`currentTimePosition = nextIdealPosition;`
			`timelineDrift = 0; // Reset drift since we've caught up`
			`}`
			`}`
			`// Combine audio segments into final audio description track`
			const outputAudioPath = path_1.default.join(settings.outputDir, `${path_1.default.basename(videoFilePath, path_1.default.extname(videoFilePath))}_description.mp3`);
			`(0, mediaUtils_1.combineAudioSegments)(audioSegments, outputAudioPath, videoDuration, settings);`
			`// Clean up temporary files if desired`
			`// cleanupTempFiles(settings.tempDir);`
			console.log(`\nAudio description generated: ${outputAudioPath}`);
			console.log(`To play with video, use: ffplay -i ${videoFilePath} -i ${outputAudioPath} -map 0:v -map 1:a`);
			`(0, stats_1.printStats)(stats, settings);`
			`return {`
			`videoFile: videoFilePath,`
			`audioDescriptionFile: outputAudioPath,`
			`segments: audioSegments`
			`};`
			`}`
			`/**`
			`* Generate audio description using the "batch time" mode with overlap prevention.`
			`* @param videoFilePath - Path to the input video file`
			`* @param videoDuration - Duration of the video in seconds`
			`* @param settings - The merged config and user options`
			`* @param visionProvider - The vision provider instance`
			`* @param ttsProvider - The TTS provider instance`
			`* @param stats - Stats object for tracking`
			`*/`
			`async function generateAudioDescriptionBatch(videoFilePath, videoDuration, settings, visionProvider, ttsProvider, stats, processingOptions = {}) {`
			`const totalBatches = Math.floor(videoDuration / settings.batchWindowDuration);`
			console.log(`Using batchTimeMode. Total batches: ${totalBatches} (each covers ${settings.batchWindowDuration} sec)`);
			`// We'll hold the last batch's frames or last batch's description for context`
			`let lastBatchContext = processingOptions.lastContext \|\| {};`
			`// Preload with existing segments if resuming`
			`const audioSegments = processingOptions.existingSegments`
			`? [...processingOptions.existingSegments]`
			`: [];`
			`// Track our current time position (will be adjusted for audio overlap)`
			`let currentTimePosition = processingOptions.currentTimePosition \|\| 0;`
			`// Start from given index if resuming`
			`const startBatchIndex = processingOptions.startIndex \|\| 0;`
			`// Track drift from the original schedule`
			`let timelineDrift = 0;`
			`const maxAllowableDrift = settings.batchWindowDuration * 0.5; // Maximum drift of 50% of batch window`
			`for (let batchIndex = startBatchIndex; batchIndex < totalBatches; batchIndex++) {`
			`// Calculate ideal batch timing based on configuration`
			`const idealBatchStart = batchIndex * settings.batchWindowDuration;`
			`// Use adjusted time position that accounts for previous audio durations`
			`const batchStart = currentTimePosition;`
			`// Calculate drift from the original schedule`
			`timelineDrift = batchStart - idealBatchStart;`
			`// Log if drift is becoming significant`
			`if (Math.abs(timelineDrift) > maxAllowableDrift) {`
			console.warn(`WARNING: Timeline drift at batch ${batchIndex} is ${timelineDrift.toFixed(2)} seconds.`);
			`}`
			`const batchEnd = idealBatchStart + settings.batchWindowDuration;`
			`if (batchEnd > videoDuration)`
			`break; // Safety check`
			console.log(`Processing batch #${batchIndex}: Original time window ${idealBatchStart}-${batchEnd} sec, scheduled at ${batchStart.toFixed(2)} sec`);
			`// Capture frames for this batch - use the ideal timing for frame capture`
			`const framePaths = [];`
			`for (let i = 0; i < settings.framesInBatch; i++) {`
			`const t = idealBatchStart + (i * settings.batchWindowDuration) / settings.framesInBatch;`
			const frameFilePath = path_1.default.join(settings.tempDir, `batch_${batchIndex}_frame_${i}.jpg`);
			`(0, mediaUtils_1.captureVideoFrame)(videoFilePath, t, frameFilePath);`
			`framePaths.push(frameFilePath);`
			`}`
			`// Use AI to describe this batch of frames, possibly providing some context`
			`const result = await visionProvider.describeBatch(framePaths, lastBatchContext, settings.batchPrompt);`
			`const description = result.description;`
			`const usageStats = result.usage;`
			`// Update stats`
			`stats.totalVisionInputCost += usageStats.inputTokens;`
			`stats.totalVisionOutputCost += usageStats.outputTokens;`
			`stats.totalCost += usageStats.totalTokens;`
			console.log(`Batch #${batchIndex} description:\n${description}\n`);
			`// Convert description to TTS`
			const audioFilePath = path_1.default.join(settings.tempDir, `batch_audio_${batchIndex}.mp3`);
			`const ttsResult = await ttsProvider.textToSpeech(description, audioFilePath, {`
			`voice: settings.ttsVoice,`
			`model: settings.ttsProviders[settings.ttsProvider].model,`
			`speedFactor: settings.ttsSpeedFactor,`
			`instructions: settings.ttsInstructions`
			`});`
			`const audioDuration = ttsResult.duration;`
			`stats.totalTTSCost += ttsResult.cost;`
			console.log(`Batch #${batchIndex} audio duration: ${audioDuration} seconds`);
			`// Store segment info with the adjusted start time`
			`const segment = {`
			`audioFile: audioFilePath,`
			`startTime: batchStart,`
			`duration: audioDuration,`
			`description`
			`};`
			`audioSegments.push(segment);`
			`// Notify progress callback`
			`if (processingOptions.onProgress) {`
			`processingOptions.onProgress({`
			`type: 'batch',`
			`index: batchIndex,`
			`total: totalBatches,`
			`segment`
			`});`
			`}`
			`// Update the time position for the next iteration`
			`// Add a small buffer (0.5 sec) between descriptions`
			`const bufferTime = 0.5;`
			`currentTimePosition = batchStart + audioDuration + bufferTime;`
			`// If we've fallen behind schedule, try to catch up (but don't skip content)`
			`const nextIdealPosition = (batchIndex + 1) * settings.batchWindowDuration;`
			`if (currentTimePosition < nextIdealPosition) {`
			console.log(`Batch audio finished before next scheduled batch. Catching up with timeline.`);
			`currentTimePosition = nextIdealPosition;`
			`timelineDrift = 0; // Reset drift since we've caught up`
			`}`
			`// Update lastBatchContext so the next batch can keep track of what's previously seen`
			`lastBatchContext = {`
			`lastDescription: description,`
			`lastFramePaths: framePaths.slice(-2) // keep the last 2 frames from this batch`
			`};`
			`}`
			`// Combine all the audio segments into one track`
			const outputAudioPath = path_1.default.join(settings.outputDir, `${path_1.default.basename(videoFilePath, path_1.default.extname(videoFilePath))}_description_batch.mp3`);
			`(0, mediaUtils_1.combineAudioSegments)(audioSegments, outputAudioPath, videoDuration, settings);`
			console.log(`\nBatch audio description generated: ${outputAudioPath}`);
			console.log(`To play with video, use: ffplay -i ${videoFilePath} -i ${outputAudioPath} -map 0:v -map 1:a`);
			`(0, stats_1.printStats)(stats, settings);`
			`return {`
			`videoFile: videoFilePath,`
			`audioDescriptionFile: outputAudioPath,`
			`segments: audioSegments`
			`};`
			`}`
			`//# sourceMappingURL=processor.js.map`