aidio-description/dist/config/config.js

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.defaultConfig = void 0;
exports.getDefaultConfig = getDefaultConfig;
/**
 * Get default configuration options.
 * Uses a function so that process.env is read at call time
 * (after dotenv has been loaded), not at module import time.
 */
function getDefaultConfig() {
    return {
        captureIntervalSeconds: 10,
        contextWindowSize: 5,
        defaultPrompt: "Describe this frame from a video in 1-2 sentences for someone who cannot see it. Focus on key visual elements. Avoid using terms like 'in this frame', simply describe the actual frame. Keep sentences short and concise, as this will be used to generate an audio track which is overlayed on the video.",
        changePrompt: "Describe what has changed between these frames in 1-2 sentences for someone who cannot see the video. Focus on significant visual changes only. Avoid talking about meta information such as 'in this frame', or 'the significant change is', and merely describe the actual change taking place. Only describe the changes relevant to the last frame. The previous frames are attached for you to build context and build situational awareness. Keep it short and concise, as your text will be used to generate audio description tracks to be played with the video.",
        batchPrompt: "Describe the sequence of frames in this batch over time for someone who cannot see it. Focus on what happens, changes, or stands out visually during these seconds. Keep it to 1-3 concise sentences, avoiding words like 'in these frames'—just describe what's happening. Use context from the previous batch if relevant. Keep sentences short and concise. Avoid speculation or overly verbose or unnecessary sentences. Try not to use nested sentences and keep sentences short to help flow. This will be used for audio description and mixed back in with the video file later, so we need to maintain consistency and quick pacing. Avoid using phrases such as 'as evidenced by' or 'suggesting'. Only focus on describing the visual scene. Do not repeat information given in the previous prompt, and focus only on what has changed since that description. Avoid talking about the scene or sequence, simply focus on the action within these frames. The listener knows that this is a video, so we do not need to remind them. Also avoid overusing phrases such as 'the scene shifts', the shifting or perspective change should be evident from the description of the sequence itself.",
        // Vision AI settings
        visionProvider: "openai",
        visionModel: "gpt-5.4-mini",
        visionProviders: {
            openai: {
                apiKey: process.env.OPENAI_API_KEY,
                model: "gpt-5.4-mini",
                maxTokens: 300
            },
            gemini: {
                apiKey: process.env.GOOGLE_API_KEY,
                model: "gemini-2.0-flash",
                maxTokens: 300
            },
            ollama: {
                baseUrl: "http://localhost:11434",
                model: "gemma3:12b",
                maxTokens: 3000
            },
            openrouter: {
                apiKey: process.env.OPENROUTER_API_KEY,
                model: "anthropic/claude-sonnet-4.5",
                baseUrl: "https://openrouter.ai/api/v1",
                maxTokens: 300
            }
        },
        // TTS settings
        ttsProvider: "openai",
        ttsVoice: "alloy",
        ttsSpeedFactor: 1.5,
        ttsInstructions: "Speak in a calm, narrating tone suitable for audio descriptions. Keep a steady pace and clear enunciation.",
        ttsProviders: {
            openai: {
                apiKey: process.env.OPENAI_API_KEY,
                model: "gpt-4o-mini-tts",
                voice: "shimmer"
            },
            elevenlabs: {
                apiKey: process.env.ELEVENLABS_API_KEY,
                model: "eleven_multilingual_v2",
                voice: "JBFqnCBsd6RMkjVDRZzb"
            },
            google: {
                apiKey: process.env.GOOGLE_CLOUD_TTS_KEY,
                keyFilename: process.env.GOOGLE_CLOUD_TTS_KEYFILE,
                model: "chirp-hd",
                voice: "en-US-Chirp-HD-F"
            }
        },
        // Video processing settings
        outputDir: "./desc/output/",
        tempDir: "./desc/tmp/",
        batchTimeMode: true,
        batchWindowDuration: 15,
        framesInBatch: 10,
    };
}
// Keep a static export alias for backward compatibility
// (but callers should prefer getDefaultConfig() for correct env loading)
exports.defaultConfig = getDefaultConfig();
//# sourceMappingURL=config.js.map
Rewrite frontend as single self-contained HTML file — all CSS/JS inline, no external files to fail loading 2026-05-13 17:24:10 +02:00			`"use strict";`
			`Object.defineProperty(exports, "__esModule", { value: true });`
			`exports.defaultConfig = void 0;`
			`exports.getDefaultConfig = getDefaultConfig;`
			`/**`
			`* Get default configuration options.`
			`* Uses a function so that process.env is read at call time`
			`* (after dotenv has been loaded), not at module import time.`
			`*/`
			`function getDefaultConfig() {`
			`return {`
			`captureIntervalSeconds: 10,`
			`contextWindowSize: 5,`
			`defaultPrompt: "Describe this frame from a video in 1-2 sentences for someone who cannot see it. Focus on key visual elements. Avoid using terms like 'in this frame', simply describe the actual frame. Keep sentences short and concise, as this will be used to generate an audio track which is overlayed on the video.",`
			changePrompt: "Describe what has changed between these frames in 1-2 sentences for someone who cannot see the video. Focus on significant visual changes only. Avoid talking about meta information such as 'in this frame', or 'the significant change is', and merely describe the actual change taking place. Only describe the changes relevant to the last frame. The previous frames are attached for you to build context and build situational awareness. Keep it short and concise, as your text will be used to generate audio description tracks to be played with the video.",
			batchPrompt: "Describe the sequence of frames in this batch over time for someone who cannot see it. Focus on what happens, changes, or stands out visually during these seconds. Keep it to 1-3 concise sentences, avoiding words like 'in these frames'—just describe what's happening. Use context from the previous batch if relevant. Keep sentences short and concise. Avoid speculation or overly verbose or unnecessary sentences. Try not to use nested sentences and keep sentences short to help flow. This will be used for audio description and mixed back in with the video file later, so we need to maintain consistency and quick pacing. Avoid using phrases such as 'as evidenced by' or 'suggesting'. Only focus on describing the visual scene. Do not repeat information given in the previous prompt, and focus only on what has changed since that description. Avoid talking about the scene or sequence, simply focus on the action within these frames. The listener knows that this is a video, so we do not need to remind them. Also avoid overusing phrases such as 'the scene shifts', the shifting or perspective change should be evident from the description of the sequence itself.",
			`// Vision AI settings`
			`visionProvider: "openai",`
			`visionModel: "gpt-5.4-mini",`
			`visionProviders: {`
			`openai: {`
			`apiKey: process.env.OPENAI_API_KEY,`
			`model: "gpt-5.4-mini",`
			`maxTokens: 300`
			`},`
			`gemini: {`
			`apiKey: process.env.GOOGLE_API_KEY,`
			`model: "gemini-2.0-flash",`
			`maxTokens: 300`
			`},`
			`ollama: {`
			`baseUrl: "http://localhost:11434",`
			`model: "gemma3:12b",`
			`maxTokens: 3000`
			`},`
			`openrouter: {`
			`apiKey: process.env.OPENROUTER_API_KEY,`
			`model: "anthropic/claude-sonnet-4.5",`
			`baseUrl: "https://openrouter.ai/api/v1",`
			`maxTokens: 300`
			`}`
			`},`
			`// TTS settings`
			`ttsProvider: "openai",`
			`ttsVoice: "alloy",`
			`ttsSpeedFactor: 1.5,`
			`ttsInstructions: "Speak in a calm, narrating tone suitable for audio descriptions. Keep a steady pace and clear enunciation.",`
			`ttsProviders: {`
			`openai: {`
			`apiKey: process.env.OPENAI_API_KEY,`
			`model: "gpt-4o-mini-tts",`
			`voice: "shimmer"`
			`},`
			`elevenlabs: {`
			`apiKey: process.env.ELEVENLABS_API_KEY,`
			`model: "eleven_multilingual_v2",`
			`voice: "JBFqnCBsd6RMkjVDRZzb"`
			`},`
			`google: {`
			`apiKey: process.env.GOOGLE_CLOUD_TTS_KEY,`
			`keyFilename: process.env.GOOGLE_CLOUD_TTS_KEYFILE,`
			`model: "chirp-hd",`
			`voice: "en-US-Chirp-HD-F"`
			`}`
			`},`
			`// Video processing settings`
			`outputDir: "./desc/output/",`
			`tempDir: "./desc/tmp/",`
			`batchTimeMode: true,`
			`batchWindowDuration: 15,`
			`framesInBatch: 10,`
			`};`
			`}`
			`// Keep a static export alias for backward compatibility`
			`// (but callers should prefer getDefaultConfig() for correct env loading)`
			`exports.defaultConfig = getDefaultConfig();`
			`//# sourceMappingURL=config.js.map`