Add server: Express web UI + API for remote audio description generation with job queue, basic auth, resumable processing, subtitles, and muxing

This commit is contained in:
2026-05-13 16:23:43 +02:00
parent ce22dadd80
commit 7d1a0029bc
22 changed files with 1904 additions and 22 deletions

4
.gitignore vendored
View File

@@ -1,3 +1,5 @@
.env .env
node_modules node_modules
desc/ desc/
data/
uploads/

View File

@@ -14,6 +14,8 @@
"build": "tsc", "build": "tsc",
"start": "node dist/cli/index.js", "start": "node dist/cli/index.js",
"dev": "ts-node src/cli/index.ts", "dev": "ts-node src/cli/index.ts",
"server": "ts-node src/server/index.ts",
"server:build": "node dist/server/index.js",
"test": "jest", "test": "jest",
"lint": "eslint src/**/*.ts", "lint": "eslint src/**/*.ts",
"prepublishOnly": "npm run build" "prepublishOnly": "npm run build"

View File

@@ -80,7 +80,7 @@ export function getDefaultConfig(): Config {
openai: { openai: {
apiKey: process.env.OPENAI_API_KEY, apiKey: process.env.OPENAI_API_KEY,
model: "gpt-4o-mini-tts", model: "gpt-4o-mini-tts",
voice: "alloy" voice: "shimmer"
}, },
elevenlabs: { elevenlabs: {
apiKey: process.env.ELEVENLABS_API_KEY, apiKey: process.env.ELEVENLABS_API_KEY,

View File

@@ -19,6 +19,8 @@ export { loadConfigFromFile, saveConfigToFile } from './utils/configUtils';
export type { Config } from './config/config'; export type { Config } from './config/config';
export type { export type {
ProcessingResult, ProcessingResult,
ProcessingOptions,
ProgressInfo,
CostBreakdown, CostBreakdown,
Stats, Stats,
VisionProvider, VisionProvider,

View File

@@ -81,10 +81,28 @@ export interface BatchContext {
lastFramePaths?: string[]; lastFramePaths?: string[];
} }
// Progress callback for real-time tracking
export interface ProgressInfo {
type: 'frame' | 'batch';
index: number;
total: number;
segment: AudioSegment;
}
// Extended processing options for resumability
export interface ProcessingOptions {
startIndex?: number;
existingSegments?: AudioSegment[];
lastContext?: BatchContext;
currentTimePosition?: number;
onProgress?: (info: ProgressInfo) => void;
}
// Result interfaces // Result interfaces
export interface ProcessingResult { export interface ProcessingResult {
videoFile: string; videoFile: string;
audioDescriptionFile: string; audioDescriptionFile: string;
segments: AudioSegment[];
} }
export interface CostBreakdown { export interface CostBreakdown {

32
src/server/app.ts Normal file
View File

@@ -0,0 +1,32 @@
import express from 'express';
import cors from 'cors';
import path from 'path';
import fs from 'fs';
import { basicAuth } from './middleware/auth';
import authRoutes from './routes/auth';
import configRoutes from './routes/config';
import filesRoutes from './routes/files';
import { createJobsRouter } from './routes/jobs';
import { JobManager } from './services/jobManager';
export function createApp(jobManager: JobManager): express.Application {
const app = express();
app.use(cors());
app.use(express.json({ limit: '50mb' }));
// Auth middleware
app.use(basicAuth);
// API routes
app.use('/api/auth', authRoutes);
app.use('/api/config', configRoutes);
app.use('/api/files', filesRoutes);
app.use('/api/jobs', createJobsRouter(jobManager));
// Serve static frontend from src/server/public (works with ts-node and compiled)
const publicDir = path.resolve(__dirname, '..', '..', 'src', 'server', 'public');
app.use(express.static(publicDir));
return app;
}

59
src/server/db/index.ts Normal file
View File

@@ -0,0 +1,59 @@
import Database from 'better-sqlite3';
import path from 'path';
import fs from 'fs';
const DB_PATH = path.resolve('./data/server.db');
let db: Database.Database;
export function getDb(): Database.Database {
if (!db) {
const dir = path.dirname(DB_PATH);
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
db = new Database(DB_PATH);
db.pragma('journal_mode = WAL');
db.pragma('foreign_keys = ON');
migrate();
}
return db;
}
function migrate(): void {
db.exec(`
CREATE TABLE IF NOT EXISTS jobs (
id TEXT PRIMARY KEY,
video_path TEXT NOT NULL,
video_filename TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'pending',
config TEXT NOT NULL,
progress REAL DEFAULT 0,
current_index INTEGER DEFAULT 0,
total_units INTEGER DEFAULT 0,
segments TEXT DEFAULT '[]',
last_context TEXT DEFAULT '{}',
current_time_position REAL DEFAULT 0,
error TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL,
completed_at TEXT,
output_audio TEXT,
output_subtitles_srt TEXT,
output_subtitles_vtt TEXT,
output_muxed TEXT,
output_options TEXT DEFAULT '{}'
);
CREATE TABLE IF NOT EXISTS config (
key TEXT PRIMARY KEY,
value TEXT NOT NULL
);
`);
}
export function closeDb(): void {
if (db) {
db.close();
}
}

123
src/server/db/jobStore.ts Normal file
View File

@@ -0,0 +1,123 @@
import { getDb } from '../db';
import { v4 as uuidv4 } from 'uuid';
export interface OutputOptions {
audio: boolean;
subtitles: boolean;
muxed: boolean;
}
export interface Job {
id: string;
video_path: string;
video_filename: string;
status: 'pending' | 'queued' | 'processing' | 'paused' | 'completed' | 'failed' | 'cancelled';
config: string;
progress: number;
current_index: number;
total_units: number;
segments: string;
last_context: string;
current_time_position: number;
error: string | null;
created_at: string;
updated_at: string;
completed_at: string | null;
output_audio: string | null;
output_subtitles_srt: string | null;
output_subtitles_vtt: string | null;
output_muxed: string | null;
output_options: string;
}
export function getAllJobs(): Job[] {
const db = getDb();
return db.prepare('SELECT * FROM jobs ORDER BY created_at DESC').all() as Job[];
}
export function getJob(id: string): Job | undefined {
const db = getDb();
return db.prepare('SELECT * FROM jobs WHERE id = ?').get(id) as Job | undefined;
}
export function createJob(videoPath: string, filename: string, config: object, outputOptions: OutputOptions): Job {
const db = getDb();
const id = uuidv4();
const now = new Date().toISOString();
db.prepare(`
INSERT INTO jobs (id, video_path, video_filename, config, output_options, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?)
`).run(id, videoPath, filename, JSON.stringify(config), JSON.stringify(outputOptions), now, now);
return getJob(id)!;
}
export function updateJobStatus(id: string, status: Job['status'], error?: string): void {
const db = getDb();
const now = new Date().toISOString();
const completedAt = status === 'completed' ? now : null;
db.prepare(`
UPDATE jobs SET status = ?, error = ?, updated_at = ?, completed_at = ? WHERE id = ?
`).run(status, error || null, now, completedAt, id);
}
export function saveCheckpoint(
id: string,
segments: string,
currentIndex: number,
totalUnits: number,
currentTimePosition: number,
lastContext: string,
progress: number
): void {
const db = getDb();
const now = new Date().toISOString();
db.prepare(`
UPDATE jobs SET segments = ?, current_index = ?, total_units = ?, current_time_position = ?, last_context = ?, progress = ?, updated_at = ? WHERE id = ?
`).run(segments, currentIndex, totalUnits, currentTimePosition, lastContext, progress, now, id);
}
export function saveJobOutputs(
id: string,
outputs: { audio?: string; subtitlesSrt?: string; subtitlesVtt?: string; muxed?: string }
): void {
const db = getDb();
const now = new Date().toISOString();
db.prepare(`
UPDATE jobs SET output_audio = ?, output_subtitles_srt = ?, output_subtitles_vtt = ?, output_muxed = ?, updated_at = ? WHERE id = ?
`).run(
outputs.audio || null,
outputs.subtitlesSrt || null,
outputs.subtitlesVtt || null,
outputs.muxed || null,
now,
id
);
}
export function deleteJob(id: string): void {
const db = getDb();
db.prepare('DELETE FROM jobs WHERE id = ?').run(id);
}
export function getConfigValue(key: string): string | undefined {
const db = getDb();
const row = db.prepare('SELECT value FROM config WHERE key = ?').get(key) as { value: string } | undefined;
return row?.value;
}
export function setConfigValue(key: string, value: string): void {
const db = getDb();
db.prepare('INSERT OR REPLACE INTO config (key, value) VALUES (?, ?)').run(key, value);
}
export function getAllConfig(): Record<string, string> {
const db = getDb();
const rows = db.prepare('SELECT key, value FROM config').all() as { key: string; value: string }[];
const config: Record<string, string> = {};
for (const row of rows) {
config[row.key] = row.value;
}
return config;
}

41
src/server/index.ts Normal file
View File

@@ -0,0 +1,41 @@
import 'dotenv/config';
import { createApp } from './app';
import { JobManager } from './services/jobManager';
import { getDb, closeDb } from './db';
const PORT = parseInt(process.env.SERVER_PORT || '3000', 10);
const USERNAME = process.env.SERVER_USERNAME || 'admin';
const PASSWORD = process.env.SERVER_PASSWORD || 'aidio2024';
// Initialize database
getDb();
// Create job manager
const jobManager = new JobManager();
// Create app
const app = createApp(jobManager);
app.listen(PORT, () => {
console.log(`
╔══════════════════════════════════════════════════════╗
║ Audio Description Server v1.0 ║
║ http://localhost:${PORT}
║ ║
║ Username: ${USERNAME.padEnd(41)}
║ Password: ${PASSWORD.padEnd(41)}
╚══════════════════════════════════════════════════════╝
`);
});
// Graceful shutdown
process.on('SIGINT', () => {
console.log('\nShutting down...');
closeDb();
process.exit(0);
});
process.on('SIGTERM', () => {
closeDb();
process.exit(0);
});

View File

@@ -0,0 +1,29 @@
import { Request, Response, NextFunction } from 'express';
const AUTH_USERNAME = process.env.SERVER_USERNAME || 'admin';
const AUTH_PASSWORD = process.env.SERVER_PASSWORD || 'aidio2024';
export function basicAuth(req: Request, res: Response, next: NextFunction): void {
if (req.path === '/api/auth/login' || req.path === '/api/auth/check') {
next();
return;
}
const authHeader = req.headers.authorization;
if (!authHeader || !authHeader.startsWith('Basic ')) {
res.setHeader('WWW-Authenticate', 'Basic realm="Audio Description Server"');
res.status(401).json({ error: 'Authentication required' });
return;
}
const credentials = Buffer.from(authHeader.slice(6), 'base64').toString('utf-8');
const [username, password] = credentials.split(':');
if (username === AUTH_USERNAME && password === AUTH_PASSWORD) {
next();
return;
}
res.setHeader('WWW-Authenticate', 'Basic realm="Audio Description Server"');
res.status(401).json({ error: 'Invalid credentials' });
}

519
src/server/public/app.js Normal file
View File

@@ -0,0 +1,519 @@
let authToken = sessionStorage.getItem('authToken');
let selectedFilePath = '';
let currentConfig = {};
function apiHeaders() {
const h = { 'Content-Type': 'application/json' };
if (authToken) h['Authorization'] = `Basic ${authToken}`;
return h;
}
async function api(method, url, body) {
const res = await fetch(url, {
method,
headers: apiHeaders(),
body: body ? JSON.stringify(body) : undefined
});
if (res.status === 401) {
sessionStorage.removeItem('authToken');
authToken = null;
showLogin();
throw new Error('Unauthorized');
}
return res;
}
async function apiJson(method, url, body) {
const res = await api(method, url, body);
const data = await res.json();
if (!res.ok) throw new Error(data.error || 'Request failed');
return data;
}
// Login
function showLogin() {
document.getElementById('login-screen').classList.remove('hidden');
document.getElementById('main-screen').classList.add('hidden');
document.getElementById('login-error').classList.add('hidden');
}
function showMain() {
document.getElementById('login-screen').classList.add('hidden');
document.getElementById('main-screen').classList.remove('hidden');
}
document.getElementById('login-form').addEventListener('submit', async (e) => {
e.preventDefault();
const username = document.getElementById('login-username').value;
const password = document.getElementById('login-password').value;
try {
const res = await fetch('/api/auth/login', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ username, password })
});
const data = await res.json();
if (data.authenticated) {
authToken = data.token;
sessionStorage.setItem('authToken', authToken);
showMain();
initApp();
} else {
document.getElementById('login-error').textContent = data.error;
document.getElementById('login-error').classList.remove('hidden');
}
} catch (err) {
document.getElementById('login-error').textContent = 'Connection failed';
document.getElementById('login-error').classList.remove('hidden');
}
});
document.getElementById('logout-btn').addEventListener('click', () => {
sessionStorage.removeItem('authToken');
authToken = null;
showLogin();
});
// Tab navigation
document.querySelectorAll('button.tab').forEach(btn => {
btn.addEventListener('click', () => {
document.querySelectorAll('button.tab').forEach(b => b.classList.remove('active'));
btn.classList.add('active');
document.querySelectorAll('.tab-content').forEach(c => c.classList.remove('active'));
document.getElementById(btn.dataset.tab).classList.add('active');
if (btn.dataset.tab === 'dashboard') loadJobs();
if (btn.dataset.tab === 'files') loadFilesList();
});
});
// Mini tabs (video source)
document.querySelectorAll('button.tab-mini').forEach(btn => {
btn.addEventListener('click', () => {
document.querySelectorAll('button.tab-mini').forEach(b => b.classList.remove('active'));
btn.classList.add('active');
document.querySelectorAll('.src-panel').forEach(p => p.classList.remove('active'));
document.getElementById('src-' + btn.dataset.src).classList.add('active');
});
});
// File upload
document.getElementById('video-upload').addEventListener('change', () => {
const file = document.getElementById('video-upload').files[0];
if (file) selectedFilePath = null;
});
// Refresh browse files
document.getElementById('refresh-files').addEventListener('click', loadBrowseFiles);
async function loadBrowseFiles() {
try {
const data = await apiJson('GET', '/api/files');
const sel = document.getElementById('video-select');
sel.innerHTML = '<option value="">-- Select file --</option>';
data.files.forEach(f => {
sel.innerHTML += `<option value="${f.filePath}">${f.filename} (${formatSize(f.size)})</option>`;
});
} catch (err) {
console.error(err);
}
}
document.getElementById('video-select').addEventListener('change', (e) => {
if (e.target.value) selectedFilePath = e.target.value;
});
// YouTube download
document.getElementById('download-url').addEventListener('click', async () => {
const url = document.getElementById('youtube-url').value;
if (!url) return;
const status = document.getElementById('download-status');
status.textContent = 'Downloading...';
status.className = 'status';
try {
const data = await apiJson('POST', '/api/files/youtube', { url });
status.textContent = `Downloaded: ${data.filename}`;
status.className = 'status success';
selectedFilePath = data.filePath;
document.getElementById('video-select').innerHTML += `<option value="${data.filePath}" selected>${data.filename}</option>`;
} catch (err) {
status.textContent = `Error: ${err.message}`;
status.className = 'status error';
}
});
// New job form
document.getElementById('new-job-form').addEventListener('submit', async (e) => {
e.preventDefault();
if (!selectedFilePath) {
const fileEl = document.getElementById('video-upload');
if (fileEl.files.length > 0) {
const formData = new FormData();
formData.append('video', fileEl.files[0]);
try {
const res = await fetch('/api/files/upload', { method: 'POST', headers: { Authorization: `Basic ${authToken}` }, body: formData });
const data = await res.json();
if (!res.ok) throw new Error(data.error || 'Upload failed');
selectedFilePath = data.filePath;
} catch (err) {
alert('Upload error: ' + err.message);
return;
}
} else {
alert('Please select a video file or source');
return;
}
}
const fd = new FormData(e.target);
const config = {};
for (const [key, val] of fd.entries()) {
if (key === '') continue;
if (val === 'on') config[key] = true;
else if (val === 'off') config[key] = false;
else if (!isNaN(val) && val !== '') config[key] = parseFloat(val);
else config[key] = val;
}
const outputOptions = {
audio: fd.get('output-audio') === 'on',
subtitles: fd.get('output-subtitles') === 'on',
muxed: fd.get('output-muxed') === 'on'
};
// Build config with vision/tts providers
if (config.visionProvider) {
config.visionProviders = {};
config.visionProviders[config.visionProvider] = {
model: config.visionModel || 'gpt-4o',
maxTokens: config.visionMaxTokens ? parseInt(config.visionMaxTokens) : 300
};
}
if (config.ttsProvider) {
config.ttsProviders = {};
config.ttsProviders[config.ttsProvider] = {
model: config.ttsModel || 'tts-1',
voice: config.ttsVoice || 'alloy'
};
}
delete config.visionModel;
delete config.visionMaxTokens;
delete config.ttsModel;
delete config['output-audio'];
delete config['output-subtitles'];
delete config['output-muxed'];
try {
const data = await apiJson('POST', '/api/jobs', { videoPath: selectedFilePath, config, outputOptions });
await apiJson('POST', `/api/jobs/${data.job.id}/start`);
selectedFilePath = '';
document.getElementById('video-upload').value = '';
document.getElementById('new-job-form').reset();
document.querySelector('.tab[data-tab="dashboard"]').click();
loadJobs();
} catch (err) {
alert('Error creating job: ' + err.message);
}
});
// Load jobs
async function loadJobs() {
try {
const data = await apiJson('GET', '/api/jobs');
renderJobs(data.jobs);
} catch (err) {
console.error(err);
}
}
function renderJobs(jobs) {
const container = document.getElementById('jobs-list');
if (!jobs.length) {
container.innerHTML = '<p class="empty">No jobs yet. Create one from the "New Job" tab.</p>';
return;
}
container.innerHTML = jobs.map(j => {
const segs = JSON.parse(j.segments || '[]');
const progressClass = j.status === 'completed' ? 'completed' : j.status === 'failed' ? 'failed' : '';
const downloads = [];
if (j.status === 'completed') {
if (j.output_audio) downloads.push(`<a href="/api/jobs/${j.id}/download/audio" download>Audio</a>`);
if (j.output_subtitles_srt) downloads.push(`<a href="/api/jobs/${j.id}/download/subtitles?format=srt" download>SRT</a>`);
if (j.output_subtitles_vtt) downloads.push(`<a href="/api/jobs/${j.id}/download/subtitles?format=vtt" download>VTT</a>`);
if (j.output_muxed) downloads.push(`<a href="/api/jobs/${j.id}/download/muxed" download>Muxed</a>`);
}
let actions = '';
if (j.status === 'pending' || j.status === 'queued') {
actions += `<button class="start-job" data-id="${j.id}">Start</button>`;
}
if (j.status === 'processing') {
actions += `<button class="pause-job" data-id="${j.id}">Pause</button>`;
}
if (j.status === 'failed' || j.status === 'paused' || j.status === 'cancelled') {
actions += `<button class="restart-job" data-id="${j.id}">Restart</button>`;
}
if (j.status !== 'processing') {
actions += `<button class="delete-job danger" data-id="${j.id}">Delete</button>`;
}
return `
<div class="job-card" data-id="${j.id}">
<div class="job-card-header">
<h3>${escapeHtml(j.video_filename)}</h3>
<div class="job-actions">${actions}</div>
</div>
<span class="status-badge status-${j.status}">${j.status}</span>
<div class="progress-bar"><div class="progress-fill ${progressClass}" style="width:${j.progress}%"></div></div>
<div class="job-meta">
<span>${Math.round(j.progress)}%</span>
<span>Index: ${j.current_index}/${j.total_units}</span>
<span>${new Date(j.created_at).toLocaleString()}</span>
</div>
${j.error ? `<div class="error-msg">${escapeHtml(j.error)}</div>` : ''}
${downloads.length ? `<div class="download-links">${downloads.join('')}</div>` : ''}
<div class="job-detail" data-id="${j.id}">
<div class="segment-log">${segs.map((s, i) => `<div class="segment-entry"><span class="segment-time">[${s.startTime.toFixed(1)}s]</span> ${escapeHtml(s.description)}</div>`).join('')}</div>
</div>
<button class="toggle-detail" data-id="${j.id}">${segs.length} segments</button>
</div>`;
}).join('');
// Wire up buttons
container.querySelectorAll('.start-job').forEach(b => b.addEventListener('click', () => handleJobAction(b.dataset.id, 'start')));
container.querySelectorAll('.pause-job').forEach(b => b.addEventListener('click', () => handleJobAction(b.dataset.id, 'pause')));
container.querySelectorAll('.restart-job').forEach(b => b.addEventListener('click', () => handleJobAction(b.dataset.id, 'restart')));
container.querySelectorAll('.delete-job').forEach(b => b.addEventListener('click', () => handleJobAction(b.dataset.id, 'delete')));
container.querySelectorAll('.toggle-detail').forEach(b => b.addEventListener('click', () => {
const detail = container.querySelector(`.job-detail[data-id="${b.dataset.id}"]`);
detail.classList.toggle('open');
b.textContent = detail.classList.contains('open') ? 'Hide segments' : `${JSON.parse((jobs.find(j => j.id === b.dataset.id) || {}).segments || '[]').length} segments`;
}));
}
async function handleJobAction(id, action) {
const method = action === 'delete' ? 'DELETE' : 'POST';
const url = `/api/jobs/${id}${action === 'delete' ? '' : '/' + action}`;
try {
await api(method, url);
loadJobs();
} catch (err) {
alert(`Error: ${err.message}`);
}
}
// Jobs refresh
document.getElementById('refresh-jobs').addEventListener('click', loadJobs);
// Auto-refresh jobs
let jobsInterval;
function startJobsPolling() {
jobsInterval = setInterval(loadJobs, 5000);
}
function stopJobsPolling() {
clearInterval(jobsInterval);
}
// Settings
async function loadSettings() {
try {
const data = await apiJson('GET', '/api/config');
const container = document.getElementById('settings-fields');
const config = data.config || {};
currentConfig = config;
let html = '';
for (const [key, value] of Object.entries(config)) {
html += `<label>${key} <input type="text" name="${key}" value="${escapeHtml(String(value))}"></label>`;
}
if (!Object.keys(config).length) {
html = '<p class="empty">No custom settings yet. Settings from .env are used as defaults.</p>';
}
container.innerHTML = html;
} catch (err) {
console.error(err);
}
}
document.getElementById('settings-form').addEventListener('submit', async (e) => {
e.preventDefault();
const fd = new FormData(e.target);
const config = {};
for (const [key, val] of fd.entries()) {
config[key] = val;
}
try {
await apiJson('PUT', '/api/config', config);
alert('Settings saved');
} catch (err) {
alert('Error: ' + err.message);
}
});
// Files list
let selectedFiles = new Set();
async function loadFilesList() {
try {
const data = await apiJson('GET', '/api/files');
const tbody = document.querySelector('#files-table tbody');
tbody.innerHTML = data.files.map(f => `
<tr>
<td><input type="checkbox" class="file-checkbox" data-path="${escapeHtml(f.filePath)}"></td>
<td>${escapeHtml(f.filename)}</td>
<td>${formatSize(f.size)}</td>
</tr>
`).join('');
document.querySelectorAll('.file-checkbox').forEach(cb => {
cb.addEventListener('change', () => updateFileSelection());
});
} catch (err) {
console.error(err);
}
}
function updateFileSelection() {
selectedFiles.clear();
document.querySelectorAll('.file-checkbox:checked').forEach(cb => {
selectedFiles.add(cb.dataset.path);
});
document.getElementById('delete-selected-files').disabled = selectedFiles.size === 0;
}
document.getElementById('select-all-files').addEventListener('change', (e) => {
document.querySelectorAll('.file-checkbox').forEach(cb => { cb.checked = e.target.checked; });
updateFileSelection();
});
document.getElementById('delete-selected-files').addEventListener('click', async () => {
if (!confirm(`Delete ${selectedFiles.size} file(s)?`)) return;
for (const path of selectedFiles) {
// Files are served from uploads dir, delete via fs on server...
// Not implementing server-side file deletion for now
}
alert('File deletion not yet implemented');
});
document.getElementById('refresh-files-list').addEventListener('click', loadFilesList);
// Pre-fill new job form with config defaults
async function loadConfigDefaults() {
try {
const data = await apiJson('GET', '/api/config');
const config = data.config || {};
if (config.visionProvider) {
const sel = document.querySelector('[name="visionProvider"]');
sel.innerHTML = '<option value="openai">OpenAI</option><option value="gemini">Gemini</option><option value="ollama">Ollama</option><option value="openrouter">OpenRouter</option>';
sel.value = config.visionProvider;
}
if (config.visionModel) document.querySelector('[name="visionModel"]').value = config.visionModel;
if (config.ttsProvider) {
const sel = document.querySelector('[name="ttsProvider"]');
sel.innerHTML = '<option value="openai">OpenAI</option><option value="elevenlabs">ElevenLabs</option><option value="google">Google Cloud</option>';
sel.value = config.ttsProvider;
}
if (config.ttsModel) document.querySelector('[name="ttsModel"]').value = config.ttsModel;
if (config.ttsVoice) document.querySelector('[name="ttsVoice"]').value = config.ttsVoice;
if (config.ttsSpeedFactor) document.querySelector('[name="ttsSpeedFactor"]').value = config.ttsSpeedFactor;
if (config.ttsInstructions) document.querySelector('[name="ttsInstructions"]').value = config.ttsInstructions;
if (config.batchWindowDuration) document.querySelector('[name="batchWindowDuration"]').value = config.batchWindowDuration;
if (config.framesInBatch) document.querySelector('[name="framesInBatch"]').value = config.framesInBatch;
if (config.captureIntervalSeconds) document.querySelector('[name="captureIntervalSeconds"]').value = config.captureIntervalSeconds;
if (config.contextWindowSize) document.querySelector('[name="contextWindowSize"]').value = config.contextWindowSize;
if (config.defaultPrompt) document.querySelector('[name="defaultPrompt"]').value = config.defaultPrompt;
if (config.changePrompt) document.querySelector('[name="changePrompt"]').value = config.changePrompt;
if (config.batchPrompt) document.querySelector('[name="batchPrompt"]').value = config.batchPrompt;
} catch (err) {
console.error(err);
}
}
// Setup SSE for live progress
const sseConnections = {};
function connectSSE(jobId) {
if (sseConnections[jobId]) return;
const source = new EventSource(`/api/jobs/${jobId}/progress`);
source.onmessage = (event) => {
const data = JSON.parse(event.data);
updateJobCard(jobId, data);
if (data.status === 'completed' || data.status === 'failed' || data.status === 'cancelled') {
source.close();
delete sseConnections[jobId];
}
};
source.onerror = () => {
source.close();
delete sseConnections[jobId];
};
sseConnections[jobId] = source;
}
function updateJobCard(jobId, data) {
const card = document.querySelector(`.job-card[data-id="${jobId}"]`);
if (!card) return;
const badge = card.querySelector('.status-badge');
badge.className = `status-badge status-${data.status}`;
badge.textContent = data.status;
const fill = card.querySelector('.progress-fill');
fill.style.width = data.progress + '%';
fill.className = 'progress-fill';
if (data.status === 'completed') fill.classList.add('completed');
if (data.status === 'failed') fill.classList.add('failed');
const metaSpans = card.querySelectorAll('.job-meta span');
if (metaSpans[0]) metaSpans[0].textContent = Math.round(data.progress) + '%';
if (metaSpans[1]) metaSpans[1].textContent = `Index: ${data.currentIndex}/${data.totalUnits}`;
// Update segments
const log = card.querySelector('.segment-log');
if (data.segments && log) {
log.innerHTML = data.segments.map((s, i) => `<div class="segment-entry"><span class="segment-time">[${s.startTime.toFixed(1)}s]</span> ${escapeHtml(s.description)}</div>`).join('');
}
// Update segment count button
const toggleBtn = card.querySelector('.toggle-detail');
if (toggleBtn && data.segments) {
toggleBtn.textContent = `${data.segments.length} segments`;
}
}
// Initialize
function initApp() {
loadJobs();
loadBrowseFiles();
loadConfigDefaults();
startJobsPolling();
}
// Escape HTML for safe rendering
function escapeHtml(str) {
if (!str) return '';
return String(str).replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;');
}
function formatSize(bytes) {
if (!bytes) return '0 B';
const units = ['B', 'KB', 'MB', 'GB'];
let i = 0;
let size = bytes;
while (size >= 1024 && i < units.length - 1) { size /= 1024; i++; }
return `${size.toFixed(1)} ${units[i]}`;
}
// Check if already authenticated
(async () => {
if (authToken) {
try {
const res = await fetch('/api/auth/check', { headers: { Authorization: `Basic ${authToken}` } });
const data = await res.json();
if (data.authenticated) {
showMain();
initApp();
return;
}
} catch (e) {}
}
showLogin();
})();

View File

@@ -0,0 +1,145 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Audio Description Server</title>
<link rel="stylesheet" href="/style.css">
</head>
<body>
<div id="app">
<div id="login-screen" class="screen">
<div class="login-card">
<h1>Audio Description Server</h1>
<p>Please log in to continue</p>
<form id="login-form">
<label>Username <input type="text" id="login-username" required autocomplete="username"></label>
<label>Password <input type="password" id="login-password" required autocomplete="current-password"></label>
<button type="submit">Login</button>
</form>
<p id="login-error" class="error hidden"></p>
</div>
</div>
<div id="main-screen" class="screen hidden">
<header>
<h1>Audio Description Server</h1>
<nav>
<button class="tab active" data-tab="dashboard">Dashboard</button>
<button class="tab" data-tab="new-job">New Job</button>
<button class="tab" data-tab="settings">Settings</button>
<button class="tab" data-tab="files">Files</button>
<button id="logout-btn" class="tab danger">Logout</button>
</nav>
</header>
<div id="dashboard" class="tab-content active">
<div class="toolbar">
<h2>Jobs</h2>
<button id="refresh-jobs">Refresh</button>
</div>
<div id="jobs-list" class="jobs-list">
<p class="empty">No jobs yet. Create one from the "New Job" tab.</p>
</div>
</div>
<div id="new-job" class="tab-content hidden">
<h2>Create New Job</h2>
<form id="new-job-form">
<fieldset>
<legend>Video Source</legend>
<div class="tabs-mini">
<button type="button" class="tab-mini active" data-src="upload">Upload</button>
<button type="button" class="tab-mini" data-src="browse">Browse Files</button>
<button type="button" class="tab-mini" data-src="youtube">YouTube / URL</button>
</div>
<div id="src-upload" class="src-panel active">
<input type="file" id="video-upload" accept="video/*">
</div>
<div id="src-browse" class="src-panel hidden">
<select id="video-select"><option value="">-- Select file --</option></select>
<button type="button" id="refresh-files">Refresh</button>
</div>
<div id="src-youtube" class="src-panel hidden">
<input type="url" id="youtube-url" placeholder="https://www.youtube.com/watch?v=...">
<button type="button" id="download-url">Download</button>
<p id="download-status" class="status"></p>
</div>
</fieldset>
<fieldset>
<legend>Output Options</legend>
<label><input type="checkbox" name="output-audio" checked> Audio Description Track</label>
<label><input type="checkbox" name="output-subtitles" checked> Subtitles (SRT + VTT)</label>
<label><input type="checkbox" name="output-muxed"> Muxed Video (MKV with 2nd audio track)</label>
</fieldset>
<details>
<summary>Vision Settings</summary>
<div class="form-grid">
<label>Provider <select name="visionProvider"></select></label>
<label>Model <input type="text" name="visionModel"></label>
<label>Max Tokens <input type="number" name="visionMaxTokens" min="10" max="10000"></label>
</div>
</details>
<details>
<summary>TTS Settings</summary>
<div class="form-grid">
<label>Provider <select name="ttsProvider"></select></label>
<label>Model <input type="text" name="ttsModel"></label>
<label>Voice <input type="text" name="ttsVoice"></label>
<label>Speed Factor <input type="number" name="ttsSpeedFactor" min="0.5" max="3" step="0.1"></label>
<label class="full">Instructions <textarea name="ttsInstructions" rows="2"></textarea></label>
</div>
</details>
<details>
<summary>Processing Settings</summary>
<div class="form-grid">
<label>Batch Mode <input type="checkbox" name="batchTimeMode" checked></label>
<label>Batch Window (sec) <input type="number" name="batchWindowDuration" min="1" max="120"></label>
<label>Frames Per Batch <input type="number" name="framesInBatch" min="1" max="60"></label>
<label>Capture Interval (sec) <input type="number" name="captureIntervalSeconds" min="1" max="120"></label>
<label>Context Window Size <input type="number" name="contextWindowSize" min="1" max="20"></label>
</div>
</details>
<details>
<summary>Prompts</summary>
<div class="form-grid">
<label class="full">Default Prompt <textarea name="defaultPrompt" rows="3"></textarea></label>
<label class="full">Change Prompt <textarea name="changePrompt" rows="3"></textarea></label>
<label class="full">Batch Prompt <textarea name="batchPrompt" rows="3"></textarea></label>
</div>
</details>
<button type="submit" class="btn-primary">Create & Start Job</button>
</form>
</div>
<div id="settings" class="tab-content hidden">
<h2>Server Configuration</h2>
<p class="hint">These settings are stored on the server and used as defaults for new jobs.</p>
<form id="settings-form">
<div id="settings-fields" class="form-grid"></div>
<button type="submit" class="btn-primary">Save Settings</button>
</form>
</div>
<div id="files" class="tab-content hidden">
<h2>Uploaded Files</h2>
<div class="toolbar">
<button id="refresh-files-list">Refresh</button>
<button id="delete-selected-files" class="danger" disabled>Delete Selected</button>
</div>
<div id="files-table-container">
<table id="files-table"><thead><tr><th><input type="checkbox" id="select-all-files"></th><th>Filename</th><th>Size</th></tr></thead><tbody></tbody></table>
</div>
</div>
</div>
</div>
<script src="/app.js"></script>
</body>
</html>

111
src/server/public/style.css Normal file
View File

@@ -0,0 +1,111 @@
*, *::before, *::after { box-sizing: border-box; }
body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; margin: 0; background: #0d1117; color: #c9d1d9; }
.hidden { display: none !important; }
.error { color: #f85149; }
.success { color: #3fb950; }
.status { font-size: 0.85rem; margin: 4px 0; }
.screen { min-height: 100vh; }
#login-screen { display: flex; align-items: center; justify-content: center; }
.login-card { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 32px; width: 360px; text-align: center; }
.login-card h1 { margin: 0 0 8px; font-size: 1.4rem; }
.login-card p { margin: 0 0 20px; color: #8b949e; }
.login-card label { display: block; text-align: left; font-size: 0.85rem; margin-bottom: 12px; color: #8b949e; }
.login-card input { width: 100%; margin-top: 4px; padding: 8px 12px; background: #0d1117; border: 1px solid #30363d; border-radius: 6px; color: #c9d1d9; font-size: 1rem; }
.login-card button { width: 100%; padding: 10px; background: #238636; color: #fff; border: none; border-radius: 6px; font-size: 1rem; cursor: pointer; margin-top: 8px; }
.login-card button:hover { background: #2ea043; }
header { display: flex; align-items: center; justify-content: space-between; padding: 12px 24px; background: #161b22; border-bottom: 1px solid #30363d; }
header h1 { font-size: 1.1rem; margin: 0; }
nav { display: flex; gap: 4px; }
button.tab { background: transparent; color: #8b949e; border: none; padding: 8px 16px; cursor: pointer; border-radius: 6px; font-size: 0.9rem; }
button.tab:hover { background: #21262d; color: #c9d1d9; }
button.tab.active { background: #1f6feb; color: #fff; }
button.tab.danger:hover { background: #da3633; color: #fff; }
.tab-content { padding: 24px; display: none; }
.tab-content.active { display: block; }
.toolbar { display: flex; align-items: center; justify-content: space-between; margin-bottom: 16px; }
.toolbar h2 { margin: 0; font-size: 1.2rem; }
button { padding: 8px 16px; background: #21262d; color: #c9d1d9; border: 1px solid #30363d; border-radius: 6px; cursor: pointer; font-size: 0.9rem; }
button:hover { background: #30363d; }
button.btn-primary { background: #238636; border-color: #238636; color: #fff; }
button.btn-primary:hover { background: #2ea043; }
button.danger { background: transparent; color: #f85149; }
button.danger:hover { background: #da3633; color: #fff; border-color: #da3633; }
button:disabled { opacity: 0.5; cursor: not-allowed; }
.empty { color: #8b949e; font-style: italic; text-align: center; padding: 40px; }
fieldset { border: 1px solid #30363d; border-radius: 8px; padding: 16px; margin-bottom: 16px; }
legend { font-weight: 600; padding: 0 8px; }
.tabs-mini { display: flex; gap: 4px; margin-bottom: 12px; }
button.tab-mini { background: transparent; color: #8b949e; border: 1px solid #30363d; padding: 6px 12px; cursor: pointer; border-radius: 4px; font-size: 0.85rem; }
button.tab-mini.active { background: #1f6feb; color: #fff; border-color: #1f6feb; }
.src-panel { display: none; }
.src-panel.active { display: block; }
.form-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 12px; }
.form-grid label.full { grid-column: 1 / -1; }
.form-grid label { display: flex; flex-direction: column; font-size: 0.85rem; color: #8b949e; gap: 4px; }
.form-grid input, .form-grid select, .form-grid textarea { padding: 8px 12px; background: #0d1117; border: 1px solid #30363d; border-radius: 6px; color: #c9d1d9; font-size: 0.9rem; }
.form-grid textarea { resize: vertical; min-height: 60px; }
.form-grid input[type="checkbox"] { width: auto; }
details { margin-bottom: 12px; border: 1px solid #30363d; border-radius: 8px; padding: 12px 16px; }
details summary { cursor: pointer; font-weight: 600; padding: 4px 0; }
details .form-grid { margin-top: 12px; }
.hint { color: #8b949e; font-size: 0.85rem; margin-top: -12px; margin-bottom: 16px; }
select, input[type="file"], input[type="url"] { padding: 8px 12px; background: #0d1117; border: 1px solid #30363d; border-radius: 6px; color: #c9d1d9; font-size: 0.9rem; }
/* Job cards */
.jobs-list { display: flex; flex-direction: column; gap: 8px; }
.job-card { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 16px; }
.job-card-header { display: flex; align-items: center; justify-content: space-between; margin-bottom: 8px; }
.job-card-header h3 { margin: 0; font-size: 1rem; word-break: break-all; }
.job-actions { display: flex; gap: 4px; }
.job-actions button { font-size: 0.8rem; padding: 4px 10px; }
.status-badge { display: inline-block; padding: 2px 10px; border-radius: 12px; font-size: 0.75rem; font-weight: 600; text-transform: uppercase; }
.status-pending { background: #21262d; color: #8b949e; }
.status-queued { background: #1a2332; color: #58a6ff; }
.status-processing { background: #1a2332; color: #58a6ff; }
.status-completed { background: #172f1e; color: #3fb950; }
.status-failed { background: #2d1518; color: #f85149; }
.status-paused { background: #2d2400; color: #d29922; }
.status-cancelled { background: #21262d; color: #8b949e; }
.progress-bar { height: 6px; background: #21262d; border-radius: 3px; margin: 8px 0; overflow: hidden; }
.progress-fill { height: 100%; background: #1f6feb; border-radius: 3px; transition: width 0.5s ease; }
.progress-fill.completed { background: #3fb950; }
.progress-fill.failed { background: #f85149; }
.job-meta { display: flex; gap: 16px; font-size: 0.8rem; color: #8b949e; margin-bottom: 8px; }
.job-detail { margin-top: 12px; padding-top: 12px; border-top: 1px solid #30363d; display: none; }
.job-detail.open { display: block; }
.segment-log { max-height: 200px; overflow-y: auto; font-size: 0.8rem; color: #8b949e; background: #0d1117; padding: 8px; border-radius: 4px; margin-bottom: 8px; }
.segment-entry { padding: 4px 0; border-bottom: 1px solid #1c2128; }
.segment-entry:last-child { border-bottom: none; }
.segment-time { color: #58a6ff; }
.download-links { display: flex; gap: 8px; flex-wrap: wrap; }
.download-links a { padding: 6px 12px; background: #21262d; color: #58a6ff; text-decoration: none; border-radius: 4px; font-size: 0.85rem; border: 1px solid #30363d; }
.download-links a:hover { background: #30363d; }
.error-msg { color: #f85149; font-size: 0.85rem; background: #2d1518; padding: 8px; border-radius: 4px; margin: 8px 0; }
/* Files table */
#files-table { width: 100%; border-collapse: collapse; }
#files-table th, #files-table td { text-align: left; padding: 8px 12px; border-bottom: 1px solid #30363d; }
#files-table th { font-size: 0.85rem; color: #8b949e; }
#files-table tbody tr:hover { background: #161b22; }
/* Messages */
#login-error { margin-top: 12px; }

33
src/server/routes/auth.ts Normal file
View File

@@ -0,0 +1,33 @@
import { Router, Request, Response } from 'express';
const router = Router();
router.post('/login', (req: Request, res: Response) => {
const { username, password } = req.body;
const serverUser = process.env.SERVER_USERNAME || 'admin';
const serverPass = process.env.SERVER_PASSWORD || 'aidio2024';
if (username === serverUser && password === serverPass) {
const token = Buffer.from(`${username}:${password}`).toString('base64');
res.json({ authenticated: true, token, username });
} else {
res.status(401).json({ authenticated: false, error: 'Invalid credentials' });
}
});
router.get('/check', (req: Request, res: Response) => {
const authHeader = req.headers.authorization;
if (!authHeader || !authHeader.startsWith('Basic ')) {
res.json({ authenticated: false });
return;
}
const credentials = Buffer.from(authHeader.slice(6), 'base64').toString('utf-8');
const [username, password] = credentials.split(':');
const serverUser = process.env.SERVER_USERNAME || 'admin';
const serverPass = process.env.SERVER_PASSWORD || 'aidio2024';
res.json({ authenticated: username === serverUser && password === serverPass, username });
});
export default router;

View File

@@ -0,0 +1,24 @@
import { Router, Request, Response } from 'express';
import { getAllConfig, setConfigValue } from '../db/jobStore';
const router = Router();
router.get('/', (_req: Request, res: Response) => {
const config = getAllConfig();
res.json({ config });
});
router.put('/', (req: Request, res: Response) => {
const updates = req.body;
if (typeof updates !== 'object' || updates === null) {
res.status(400).json({ error: 'Body must be a JSON object of key-value pairs' });
return;
}
for (const [key, value] of Object.entries(updates)) {
setConfigValue(key, String(value));
}
const config = getAllConfig();
res.json({ config });
});
export default router;

View File

@@ -0,0 +1,89 @@
import { Router, Request, Response } from 'express';
import multer from 'multer';
import path from 'path';
import fs from 'fs';
import { downloadVideo, isYtDlpAvailable } from '../services/ytDlp';
const UPLOADS_DIR = path.resolve('./uploads');
const storage = multer.diskStorage({
destination: (_req, _file, cb) => {
if (!fs.existsSync(UPLOADS_DIR)) {
fs.mkdirSync(UPLOADS_DIR, { recursive: true });
}
cb(null, UPLOADS_DIR);
},
filename: (_req, file, cb) => {
const uniqueSuffix = Date.now() + '-' + Math.round(Math.random() * 1e9);
cb(null, uniqueSuffix + path.extname(file.originalname));
}
});
const upload = multer({
storage,
fileFilter: (_req, file, cb) => {
const allowedMimes = [
'video/mp4', 'video/webm', 'video/x-matroska', 'video/quicktime',
'video/x-msvideo', 'video/mpeg', 'video/x-ms-wmv', 'video/x-flv'
];
if (allowedMimes.includes(file.mimetype) || file.originalname.match(/\.(mp4|mkv|webm|mov|avi|mpg|mpeg|wmv|flv)$/i)) {
cb(null, true);
} else {
cb(new Error('Invalid file type. Only video files are allowed.'));
}
},
limits: { fileSize: 10 * 1024 * 1024 * 1024 } // 10GB
});
const router = Router();
router.post('/upload', upload.single('video'), (req: Request, res: Response) => {
if (!req.file) {
res.status(400).json({ error: 'No video file uploaded' });
return;
}
res.json({
filePath: req.file.path,
filename: req.file.originalname,
size: req.file.size
});
});
router.get('/', (_req: Request, res: Response) => {
if (!fs.existsSync(UPLOADS_DIR)) {
res.json({ files: [] });
return;
}
const entries = fs.readdirSync(UPLOADS_DIR, { withFileTypes: true });
const files = entries
.filter(e => e.isFile())
.map(e => ({
filename: e.name,
filePath: path.join(UPLOADS_DIR, e.name),
size: fs.statSync(path.join(UPLOADS_DIR, e.name)).size
}))
.sort((a, b) => b.filePath.localeCompare(a.filePath));
res.json({ files });
});
router.post('/youtube', (req: Request, res: Response) => {
if (!isYtDlpAvailable()) {
res.status(400).json({ error: 'yt-dlp is not installed or not in PATH' });
return;
}
const { url } = req.body;
if (!url) {
res.status(400).json({ error: 'URL is required' });
return;
}
try {
const result = downloadVideo(url, UPLOADS_DIR);
res.json(result);
} catch (err: any) {
res.status(500).json({ error: `Failed to download: ${err.message}` });
}
});
export default router;

174
src/server/routes/jobs.ts Normal file
View File

@@ -0,0 +1,174 @@
import { Router, Request, Response } from 'express';
import path from 'path';
import fs from 'fs';
import { JobManager } from '../services/jobManager';
import { getJob } from '../db/jobStore';
function getParam(req: Request, name: string): string {
const val = req.params[name];
return Array.isArray(val) ? val[0] : val;
}
export function createJobsRouter(jobManager: JobManager): Router {
const router = Router();
router.get('/', (_req: Request, res: Response) => {
const jobs = jobManager.listJobs();
res.json({ jobs });
});
router.post('/', (req: Request, res: Response) => {
const { videoPath, config, outputOptions } = req.body;
if (!videoPath) {
res.status(400).json({ error: 'videoPath is required' });
return;
}
if (!fs.existsSync(videoPath)) {
res.status(400).json({ error: `Video file not found: ${videoPath}` });
return;
}
try {
const job = jobManager.createJob(videoPath, config || {}, outputOptions || {});
res.status(201).json({ job });
} catch (err: any) {
res.status(500).json({ error: err.message });
}
});
router.get('/:id', (req: Request, res: Response) => {
const job = getJob(getParam(req, 'id'));
if (!job) {
res.status(404).json({ error: 'Job not found' });
return;
}
res.json({ job });
});
router.post('/:id/start', async (req: Request, res: Response) => {
try {
await jobManager.startJob(getParam(req, 'id'));
res.json({ success: true });
} catch (err: any) {
res.status(400).json({ error: err.message });
}
});
router.post('/:id/pause', async (req: Request, res: Response) => {
try {
await jobManager.pauseJob(getParam(req, 'id'));
res.json({ success: true });
} catch (err: any) {
res.status(400).json({ error: err.message });
}
});
router.post('/:id/restart', async (req: Request, res: Response) => {
try {
await jobManager.restartJob(getParam(req, 'id'));
res.json({ success: true });
} catch (err: any) {
res.status(400).json({ error: err.message });
}
});
router.post('/:id/cancel', async (req: Request, res: Response) => {
try {
await jobManager.cancelJob(getParam(req, 'id'));
res.json({ success: true });
} catch (err: any) {
res.status(400).json({ error: err.message });
}
});
router.delete('/:id', (req: Request, res: Response) => {
try {
jobManager.deleteJob(getParam(req, 'id'));
res.json({ success: true });
} catch (err: any) {
res.status(400).json({ error: err.message });
}
});
router.get('/:id/progress', (req: Request, res: Response) => {
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache');
res.setHeader('Connection', 'keep-alive');
res.setHeader('X-Accel-Buffering', 'no');
const sendProgress = (data: Record<string, unknown>) => {
res.write(`data: ${JSON.stringify(data)}\n\n`);
};
const initialJob = getJob(getParam(req, 'id'));
if (initialJob) {
sendProgress({
id: initialJob.id,
status: initialJob.status,
progress: initialJob.progress,
currentIndex: initialJob.current_index,
totalUnits: initialJob.total_units,
segments: JSON.parse(initialJob.segments),
error: initialJob.error,
output_audio: initialJob.output_audio,
output_subtitles_srt: initialJob.output_subtitles_srt,
output_subtitles_vtt: initialJob.output_subtitles_vtt,
output_muxed: initialJob.output_muxed
});
}
const unsubscribe = jobManager.onJobProgress(getParam(req, 'id'), (data) => {
if (data.status === 'completed' || data.status === 'failed' || data.status === 'cancelled') {
sendProgress(data as unknown as Record<string, unknown>);
res.end();
unsubscribe();
return;
}
sendProgress(data as unknown as Record<string, unknown>);
});
req.on('close', () => {
unsubscribe();
});
});
router.get('/:id/download/:type', (req: Request, res: Response) => {
const job = getJob(getParam(req, 'id'));
if (!job) {
res.status(404).json({ error: 'Job not found' });
return;
}
const type = getParam(req, 'type');
let filePath: string | null = null;
let filename: string = '';
switch (type) {
case 'audio':
filePath = job.output_audio;
filename = `${path.basename(job.video_filename, path.extname(job.video_filename))}_description.mp3`;
break;
case 'subtitles':
const format = (req.query.format as string) || 'srt';
filePath = format === 'vtt' ? job.output_subtitles_vtt : job.output_subtitles_srt;
filename = `${path.basename(job.video_filename, path.extname(job.video_filename))}_description.${format}`;
break;
case 'muxed':
filePath = job.output_muxed;
filename = `${path.basename(job.video_filename, path.extname(job.video_filename))}_described.mkv`;
break;
default:
res.status(400).json({ error: 'Invalid download type' });
return;
}
if (!filePath || !fs.existsSync(filePath)) {
res.status(404).json({ error: 'Output file not found' });
return;
}
res.download(filePath, filename);
});
return router;
}

View File

@@ -0,0 +1,293 @@
import path from 'path';
import fs from 'fs';
import {
getAllJobs, getJob, createJob, updateJobStatus, saveCheckpoint,
saveJobOutputs, deleteJob as deleteJobFromDb, Job, OutputOptions
} from '../db/jobStore';
import { generateAudioDescriptionFromOptions } from '../../utils/processor';
import { generateSRT, generateVTT } from './subtitleGenerator';
import { muxAudioDescription } from './muxer';
import { getDefaultConfig, Config } from '../../config/config';
import { AudioSegment, BatchContext } from '../../interfaces';
import { getVideoDuration } from '../../utils/mediaUtils';
import { EventEmitter } from 'events';
interface ProgressData {
id: string;
status: string;
progress: number;
currentIndex: number;
totalUnits: number;
segments: AudioSegment[];
error: string | null;
output_audio: string | null;
output_subtitles_srt: string | null;
output_subtitles_vtt: string | null;
output_muxed: string | null;
}
export class JobManager {
private queue: string[] = [];
private processing = false;
private pausedJobs = new Set<string>();
private emitter = new EventEmitter();
private pollInterval: ReturnType<typeof setInterval> | null = null;
constructor() {
this.recoverStuckJobs();
this.emitter.setMaxListeners(100);
}
private recoverStuckJobs(): void {
const jobs = getAllJobs();
for (const job of jobs) {
if (job.status === 'processing') {
updateJobStatus(job.id, 'failed', 'Server restarted while job was in progress. Click Restart to resume from the last checkpoint.');
}
}
}
createJob(videoPath: string, configOverride: Partial<Config> = {}, outputOptions: Partial<OutputOptions> = {}): Job {
const baseConfig = getDefaultConfig();
const mergedConfig: Config = { ...baseConfig, ...configOverride };
const filename = path.basename(videoPath);
const opts: OutputOptions = {
audio: outputOptions.audio !== false,
subtitles: outputOptions.subtitles !== false,
muxed: outputOptions.muxed || false
};
return createJob(videoPath, filename, mergedConfig, opts);
}
async startJob(jobId: string): Promise<void> {
const job = getJob(jobId);
if (!job) throw new Error('Job not found');
if (job.status === 'processing') throw new Error('Job is already processing');
if (job.status === 'completed') throw new Error('Job is already completed');
updateJobStatus(jobId, 'queued');
this.queue.push(jobId);
this.processNext();
}
async pauseJob(jobId: string): Promise<void> {
const job = getJob(jobId);
if (!job) throw new Error('Job not found');
if (job.status !== 'processing') throw new Error('Only processing jobs can be paused');
this.pausedJobs.add(jobId);
updateJobStatus(jobId, 'paused');
this.emitProgress(jobId);
}
async restartJob(jobId: string): Promise<void> {
const job = getJob(jobId);
if (!job) throw new Error('Job not found');
if (job.status !== 'failed' && job.status !== 'paused' && job.status !== 'cancelled') {
throw new Error('Only failed, paused, or cancelled jobs can be restarted');
}
this.pausedJobs.delete(jobId);
updateJobStatus(jobId, 'queued');
this.queue.push(jobId);
this.processNext();
}
async cancelJob(jobId: string): Promise<void> {
const job = getJob(jobId);
if (!job) throw new Error('Job not found');
if (job.status === 'processing') {
this.pausedJobs.add(jobId);
}
updateJobStatus(jobId, 'cancelled');
this.emitProgress(jobId);
}
deleteJob(jobId: string): void {
const job = getJob(jobId);
if (!job) throw new Error('Job not found');
if (job.status === 'processing') throw new Error('Cannot delete a running job');
deleteJobFromDb(jobId);
}
listJobs(): Job[] {
return getAllJobs();
}
onJobProgress(jobId: string, callback: (data: ProgressData) => void): () => void {
this.emitter.on(`progress:${jobId}`, callback);
if (!this.pollInterval) {
this.pollInterval = setInterval(() => {
for (const id of this.emitter.eventNames()) {
const eventName = String(id);
if (eventName.startsWith('progress:')) {
const jId = eventName.replace('progress:', '');
this.emitProgress(jId);
}
}
}, 2000);
}
return () => {
this.emitter.off(`progress:${jobId}`, callback);
};
}
private emitProgress(jobId: string): void {
const job = getJob(jobId);
if (!job) return;
const data: ProgressData = {
id: job.id,
status: job.status,
progress: job.progress,
currentIndex: job.current_index,
totalUnits: job.total_units,
segments: JSON.parse(job.segments || '[]'),
error: job.error,
output_audio: job.output_audio,
output_subtitles_srt: job.output_subtitles_srt,
output_subtitles_vtt: job.output_subtitles_vtt,
output_muxed: job.output_muxed
};
this.emitter.emit(`progress:${jobId}`, data);
}
private async processNext(): Promise<void> {
if (this.processing) return;
while (this.queue.length > 0) {
this.processing = true;
const jobId = this.queue.shift()!;
const job = getJob(jobId);
if (!job || job.status !== 'queued') continue;
try {
await this.processJob(job);
} catch (err: any) {
console.error(`Job ${jobId} failed:`, err.message);
}
}
this.processing = false;
}
private async processJob(job: Job): Promise<void> {
updateJobStatus(job.id, 'processing');
this.emitProgress(job.id);
const config: Config = JSON.parse(job.config);
const outputOptions: OutputOptions = JSON.parse(job.output_options);
const existingSegments: AudioSegment[] = JSON.parse(job.segments || '[]');
const lastContext: BatchContext = JSON.parse(job.last_context || '{}');
const startIndex = existingSegments.length > 0 ? job.current_index : 0;
const startTimePosition = job.current_time_position || 0;
const videoDuration = getVideoDuration(job.video_path);
const totalUnits = config.batchTimeMode
? Math.floor(videoDuration / config.batchWindowDuration)
: Math.floor(videoDuration / config.captureIntervalSeconds);
saveCheckpoint(job.id, JSON.stringify(existingSegments), startIndex, totalUnits, startTimePosition, JSON.stringify(lastContext), 0);
this.emitProgress(job.id);
try {
const result = await generateAudioDescriptionFromOptions(
job.video_path,
config,
{
startIndex,
existingSegments,
lastContext,
currentTimePosition: startTimePosition,
onProgress: (info) => {
if (this.pausedJobs.has(job.id)) {
throw new Error('JOB_PAUSED');
}
const allSegments = existingSegments.length > 0 && info.index === startIndex
? [...existingSegments, info.segment]
: (() => {
const currentJob = getJob(job.id);
if (!currentJob) return [info.segment];
const segs = JSON.parse(currentJob.segments || '[]');
segs.push(info.segment);
return segs;
})();
const progress = totalUnits > 0 ? Math.min(((info.index + 1) / totalUnits) * 100, 99) : 50;
saveCheckpoint(
job.id,
JSON.stringify(allSegments),
info.index + 1,
totalUnits,
info.segment.startTime + info.segment.duration + (config.batchTimeMode ? 0.5 : 0.25),
JSON.stringify(lastContext),
progress
);
this.emitProgress(job.id);
}
}
);
// All segments from the result
const segments = result.segments || [];
// Combine audio segments into final audio (use the result's pre-combined file)
const outputAudio = result.audioDescriptionFile;
let outputSubtitlesSrt: string | null = null;
let outputSubtitlesVtt: string | null = null;
let outputMuxed: string | null = null;
const baseName = path.basename(job.video_path, path.extname(job.video_path));
const outputDir = config.outputDir;
if (outputOptions.subtitles && segments.length > 0) {
const srtPath = path.join(outputDir, `${baseName}_description.srt`);
const vttPath = path.join(outputDir, `${baseName}_description.vtt`);
fs.writeFileSync(srtPath, generateSRT(segments, videoDuration));
fs.writeFileSync(vttPath, generateVTT(segments, videoDuration));
outputSubtitlesSrt = srtPath;
outputSubtitlesVtt = vttPath;
}
if (outputOptions.muxed && fs.existsSync(outputAudio)) {
const muxedPath = path.join(outputDir, `${baseName}_described.mkv`);
muxAudioDescription(job.video_path, outputAudio, muxedPath);
outputMuxed = muxedPath;
}
saveJobOutputs(job.id, {
audio: outputAudio,
subtitlesSrt: outputSubtitlesSrt || undefined,
subtitlesVtt: outputSubtitlesVtt || undefined,
muxed: outputMuxed || undefined
});
saveCheckpoint(job.id, JSON.stringify(segments), totalUnits, totalUnits, 0, '{}', 100);
updateJobStatus(job.id, 'completed');
this.emitProgress(job.id);
} catch (err: any) {
if (err.message === 'JOB_PAUSED') {
updateJobStatus(job.id, 'paused');
this.emitProgress(job.id);
return;
}
const errorMsg = err.message || 'Unknown error';
updateJobStatus(job.id, 'failed', errorMsg);
this.emitProgress(job.id);
}
}
}

View File

@@ -0,0 +1,29 @@
import { execSync } from 'child_process';
import path from 'path';
export function muxAudioDescription(
videoPath: string,
audioPath: string,
outputPath: string
): void {
const ext = path.extname(outputPath).toLowerCase();
const isMkv = ext === '.mkv';
const cmd = [
'ffmpeg -v error',
`-i "${videoPath}"`,
`-i "${audioPath}"`,
'-map 0:v',
'-map 0:a?',
'-map 1:a',
'-c:v copy',
'-c:a copy',
isMkv
? '-metadata:s:a:1 title="Audio Description"'
: '-metadata:s:a:1 title="Audio Description"',
`"${outputPath}"`,
'-y'
].join(' ');
execSync(cmd);
}

View File

@@ -0,0 +1,70 @@
import { AudioSegment } from '../../interfaces';
function formatSrtTime(seconds: number): string {
const h = Math.floor(seconds / 3600);
const m = Math.floor((seconds % 3600) / 60);
const s = Math.floor(seconds % 60);
const ms = Math.floor((seconds % 1) * 1000);
return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.toString().padStart(2, '0')},${ms.toString().padStart(3, '0')}`;
}
function formatVttTime(seconds: number): string {
const h = Math.floor(seconds / 3600);
const m = Math.floor((seconds % 3600) / 60);
const s = Math.floor(seconds % 60);
const ms = Math.floor((seconds % 1) * 1000);
return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.toString().padStart(2, '0')}.${ms.toString().padStart(3, '0')}`;
}
function cleanDescription(text: string): string {
return text.replace(/\n+/g, ' ').replace(/\s+/g, ' ').trim();
}
export function generateSRT(segments: AudioSegment[], videoDuration: number): string {
if (segments.length === 0) return '';
const sorted = [...segments].sort((a, b) => a.startTime - b.startTime);
const lines: string[] = [];
for (let i = 0; i < sorted.length; i++) {
const seg = sorted[i];
const startTime = seg.startTime;
let endTime: number;
if (i < sorted.length - 1) {
endTime = sorted[i + 1].startTime;
} else {
endTime = Math.min(seg.startTime + seg.duration + 0.5, videoDuration);
}
lines.push((i + 1).toString());
lines.push(`${formatSrtTime(startTime)} --> ${formatSrtTime(endTime)}`);
lines.push(cleanDescription(seg.description));
lines.push('');
}
return lines.join('\n');
}
export function generateVTT(segments: AudioSegment[], videoDuration: number): string {
if (segments.length === 0) return '';
const sorted = [...segments].sort((a, b) => a.startTime - b.startTime);
const lines: string[] = ['WEBVTT', ''];
for (let i = 0; i < sorted.length; i++) {
const seg = sorted[i];
const startTime = seg.startTime;
let endTime: number;
if (i < sorted.length - 1) {
endTime = sorted[i + 1].startTime;
} else {
endTime = Math.min(seg.startTime + seg.duration + 0.5, videoDuration);
}
lines.push(`${formatVttTime(startTime)} --> ${formatVttTime(endTime)}`);
lines.push(cleanDescription(seg.description));
lines.push('');
}
return lines.join('\n');
}

View File

@@ -0,0 +1,47 @@
import { execSync } from 'child_process';
import path from 'path';
import fs from 'fs';
export interface YtDlpResult {
filePath: string;
filename: string;
title: string;
}
export function isYtDlpAvailable(): boolean {
try {
execSync('yt-dlp --version', { stdio: 'pipe' });
return true;
} catch {
return false;
}
}
export function downloadVideo(url: string, outputDir: string): YtDlpResult {
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
const outputTemplate = path.join(outputDir, '%(title)s.%(ext)s');
const result = execSync(
`yt-dlp -f "best[ext=mp4]/best" -o "${outputTemplate}" --print filename --print title "${url}"`,
{ encoding: 'utf-8', timeout: 600000 }
);
const lines = result.trim().split('\n');
const filename = lines[0]?.trim();
const title = lines[1]?.trim() || filename;
if (!filename) {
throw new Error('yt-dlp: Failed to parse downloaded filename');
}
const filePath = path.resolve(outputDir, filename);
if (!fs.existsSync(filePath)) {
throw new Error(`yt-dlp: Downloaded file not found at ${filePath}`);
}
return { filePath, filename, title };
}

View File

@@ -6,7 +6,8 @@ import {
AudioSegment, AudioSegment,
Stats, Stats,
BatchContext, BatchContext,
ProcessingResult ProcessingResult,
ProcessingOptions
} from '../interfaces'; } from '../interfaces';
import { Config, getDefaultConfig } from '../config/config'; import { Config, getDefaultConfig } from '../config/config';
import { printStats, createStats } from '../config/stats'; import { printStats, createStats } from '../config/stats';
@@ -28,7 +29,8 @@ import {
*/ */
export async function generateAudioDescriptionFromOptions( export async function generateAudioDescriptionFromOptions(
videoFilePath: string, videoFilePath: string,
options: Partial<Config> = {} options: Partial<Config> = {},
processingOptions: ProcessingOptions = {}
): Promise<ProcessingResult> { ): Promise<ProcessingResult> {
const config = { ...getDefaultConfig(), ...options }; const config = { ...getDefaultConfig(), ...options };
@@ -43,7 +45,7 @@ export async function generateAudioDescriptionFromOptions(
const ttsProvider = TTSProviderFactory.getProvider(config); const ttsProvider = TTSProviderFactory.getProvider(config);
const stats = createStats(); const stats = createStats();
return generateAudioDescription(videoFilePath, visionProvider, ttsProvider, config, stats); return generateAudioDescription(videoFilePath, visionProvider, ttsProvider, config, stats, processingOptions);
} }
/** /**
@@ -60,7 +62,8 @@ export async function generateAudioDescription(
visionProvider: VisionProvider, visionProvider: VisionProvider,
ttsProvider: TTSProvider, ttsProvider: TTSProvider,
options: Partial<Config> = {}, options: Partial<Config> = {},
stats: Stats stats: Stats,
processingOptions: ProcessingOptions = {}
): Promise<ProcessingResult> { ): Promise<ProcessingResult> {
// Merge provided options with defaults // Merge provided options with defaults
const settings = { ...options } as Config; const settings = { ...options } as Config;
@@ -86,7 +89,8 @@ export async function generateAudioDescription(
settings, settings,
visionProvider, visionProvider,
ttsProvider, ttsProvider,
stats stats,
processingOptions
); );
} }
@@ -97,18 +101,23 @@ export async function generateAudioDescription(
// Context window to store previous frames // Context window to store previous frames
const frameContext: { index: number; path: string; timePosition: number }[] = []; const frameContext: { index: number; path: string; timePosition: number }[] = [];
// Array to store audio segment information // Array to store audio segment information - preload with existing segments if resuming
const audioSegments: AudioSegment[] = []; const audioSegments: AudioSegment[] = processingOptions.existingSegments
? [...processingOptions.existingSegments]
: [];
// Track our current time position (will be adjusted for audio overlap) // Track our current time position (will be adjusted for audio overlap)
let currentTimePosition = 0; let currentTimePosition = processingOptions.currentTimePosition || 0;
// Start from given index if resuming
const startIndex = processingOptions.startIndex || 0;
// Track drift from the original schedule // Track drift from the original schedule
let timelineDrift = 0; let timelineDrift = 0;
const maxAllowableDrift = settings.captureIntervalSeconds * 2; // Maximum drift before warning const maxAllowableDrift = settings.captureIntervalSeconds * 2; // Maximum drift before warning
// Process each frame // Process each frame
for (let i = 0; i < totalFrames; i++) { for (let i = startIndex; i < totalFrames; i++) {
// Calculate the ideal time position based on the original schedule // Calculate the ideal time position based on the original schedule
const idealTimePosition = i * settings.captureIntervalSeconds; const idealTimePosition = i * settings.captureIntervalSeconds;
@@ -183,12 +192,23 @@ export async function generateAudioDescription(
console.log(`Audio duration: ${audioDuration} seconds`); console.log(`Audio duration: ${audioDuration} seconds`);
// Store segment information // Store segment information
audioSegments.push({ const segment: AudioSegment = {
audioFile: audioFilePath, audioFile: audioFilePath,
startTime: timePosition, startTime: timePosition,
duration: audioDuration, duration: audioDuration,
description description
}); };
audioSegments.push(segment);
// Notify progress callback
if (processingOptions.onProgress) {
processingOptions.onProgress({
type: 'frame',
index: i,
total: totalFrames,
segment
});
}
// Update the time position for the next iteration // Update the time position for the next iteration
// Add a small buffer (0.25 sec) between descriptions to prevent hard cuts // Add a small buffer (0.25 sec) between descriptions to prevent hard cuts
@@ -217,7 +237,8 @@ export async function generateAudioDescription(
return { return {
videoFile: videoFilePath, videoFile: videoFilePath,
audioDescriptionFile: outputAudioPath audioDescriptionFile: outputAudioPath,
segments: audioSegments
}; };
} }
@@ -236,24 +257,31 @@ async function generateAudioDescriptionBatch(
settings: Config, settings: Config,
visionProvider: VisionProvider, visionProvider: VisionProvider,
ttsProvider: TTSProvider, ttsProvider: TTSProvider,
stats: Stats stats: Stats,
processingOptions: ProcessingOptions = {}
): Promise<ProcessingResult> { ): Promise<ProcessingResult> {
const totalBatches = Math.floor(videoDuration / settings.batchWindowDuration); const totalBatches = Math.floor(videoDuration / settings.batchWindowDuration);
console.log(`Using batchTimeMode. Total batches: ${totalBatches} (each covers ${settings.batchWindowDuration} sec)`); console.log(`Using batchTimeMode. Total batches: ${totalBatches} (each covers ${settings.batchWindowDuration} sec)`);
// We'll hold the last batch's frames or last batch's description for context // We'll hold the last batch's frames or last batch's description for context
let lastBatchContext: BatchContext = {}; let lastBatchContext: BatchContext = processingOptions.lastContext || {};
const audioSegments: AudioSegment[] = []; // Preload with existing segments if resuming
const audioSegments: AudioSegment[] = processingOptions.existingSegments
? [...processingOptions.existingSegments]
: [];
// Track our current time position (will be adjusted for audio overlap) // Track our current time position (will be adjusted for audio overlap)
let currentTimePosition = 0; let currentTimePosition = processingOptions.currentTimePosition || 0;
// Start from given index if resuming
const startBatchIndex = processingOptions.startIndex || 0;
// Track drift from the original schedule // Track drift from the original schedule
let timelineDrift = 0; let timelineDrift = 0;
const maxAllowableDrift = settings.batchWindowDuration * 0.5; // Maximum drift of 50% of batch window const maxAllowableDrift = settings.batchWindowDuration * 0.5; // Maximum drift of 50% of batch window
for (let batchIndex = 0; batchIndex < totalBatches; batchIndex++) { for (let batchIndex = startBatchIndex; batchIndex < totalBatches; batchIndex++) {
// Calculate ideal batch timing based on configuration // Calculate ideal batch timing based on configuration
const idealBatchStart = batchIndex * settings.batchWindowDuration; const idealBatchStart = batchIndex * settings.batchWindowDuration;
@@ -315,12 +343,23 @@ async function generateAudioDescriptionBatch(
console.log(`Batch #${batchIndex} audio duration: ${audioDuration} seconds`); console.log(`Batch #${batchIndex} audio duration: ${audioDuration} seconds`);
// Store segment info with the adjusted start time // Store segment info with the adjusted start time
audioSegments.push({ const segment: AudioSegment = {
audioFile: audioFilePath, audioFile: audioFilePath,
startTime: batchStart, startTime: batchStart,
duration: audioDuration, duration: audioDuration,
description description
}); };
audioSegments.push(segment);
// Notify progress callback
if (processingOptions.onProgress) {
processingOptions.onProgress({
type: 'batch',
index: batchIndex,
total: totalBatches,
segment
});
}
// Update the time position for the next iteration // Update the time position for the next iteration
// Add a small buffer (0.5 sec) between descriptions // Add a small buffer (0.5 sec) between descriptions
@@ -355,6 +394,7 @@ async function generateAudioDescriptionBatch(
return { return {
videoFile: videoFilePath, videoFile: videoFilePath,
audioDescriptionFile: outputAudioPath audioDescriptionFile: outputAudioPath,
segments: audioSegments
}; };
} }