From 7d1a0029bc4c2670376a390b001a1620b70f2630 Mon Sep 17 00:00:00 2001 From: Talon Date: Wed, 13 May 2026 16:23:43 +0200 Subject: [PATCH] Add server: Express web UI + API for remote audio description generation with job queue, basic auth, resumable processing, subtitles, and muxing --- .gitignore | 4 +- package.json | 2 + src/config/config.ts | 2 +- src/index.ts | 2 + src/interfaces/index.ts | 18 + src/server/app.ts | 32 ++ src/server/db/index.ts | 59 +++ src/server/db/jobStore.ts | 123 ++++++ src/server/index.ts | 41 ++ src/server/middleware/auth.ts | 29 ++ src/server/public/app.js | 519 +++++++++++++++++++++++ src/server/public/index.html | 145 +++++++ src/server/public/style.css | 111 +++++ src/server/routes/auth.ts | 33 ++ src/server/routes/config.ts | 24 ++ src/server/routes/files.ts | 89 ++++ src/server/routes/jobs.ts | 174 ++++++++ src/server/services/jobManager.ts | 293 +++++++++++++ src/server/services/muxer.ts | 29 ++ src/server/services/subtitleGenerator.ts | 70 +++ src/server/services/ytDlp.ts | 47 ++ src/utils/processor.ts | 80 +++- 22 files changed, 1904 insertions(+), 22 deletions(-) create mode 100644 src/server/app.ts create mode 100644 src/server/db/index.ts create mode 100644 src/server/db/jobStore.ts create mode 100644 src/server/index.ts create mode 100644 src/server/middleware/auth.ts create mode 100644 src/server/public/app.js create mode 100644 src/server/public/index.html create mode 100644 src/server/public/style.css create mode 100644 src/server/routes/auth.ts create mode 100644 src/server/routes/config.ts create mode 100644 src/server/routes/files.ts create mode 100644 src/server/routes/jobs.ts create mode 100644 src/server/services/jobManager.ts create mode 100644 src/server/services/muxer.ts create mode 100644 src/server/services/subtitleGenerator.ts create mode 100644 src/server/services/ytDlp.ts diff --git a/.gitignore b/.gitignore index 5c5d877..7d1a791 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ .env node_modules -desc/ \ No newline at end of file +desc/ +data/ +uploads/ \ No newline at end of file diff --git a/package.json b/package.json index 420f1fe..f3ea4c9 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,8 @@ "build": "tsc", "start": "node dist/cli/index.js", "dev": "ts-node src/cli/index.ts", + "server": "ts-node src/server/index.ts", + "server:build": "node dist/server/index.js", "test": "jest", "lint": "eslint src/**/*.ts", "prepublishOnly": "npm run build" diff --git a/src/config/config.ts b/src/config/config.ts index ff41c8d..2b38266 100644 --- a/src/config/config.ts +++ b/src/config/config.ts @@ -80,7 +80,7 @@ export function getDefaultConfig(): Config { openai: { apiKey: process.env.OPENAI_API_KEY, model: "gpt-4o-mini-tts", - voice: "alloy" + voice: "shimmer" }, elevenlabs: { apiKey: process.env.ELEVENLABS_API_KEY, diff --git a/src/index.ts b/src/index.ts index 9ff2ec3..a9a1d32 100644 --- a/src/index.ts +++ b/src/index.ts @@ -19,6 +19,8 @@ export { loadConfigFromFile, saveConfigToFile } from './utils/configUtils'; export type { Config } from './config/config'; export type { ProcessingResult, + ProcessingOptions, + ProgressInfo, CostBreakdown, Stats, VisionProvider, diff --git a/src/interfaces/index.ts b/src/interfaces/index.ts index dc27749..f143418 100644 --- a/src/interfaces/index.ts +++ b/src/interfaces/index.ts @@ -81,10 +81,28 @@ export interface BatchContext { lastFramePaths?: string[]; } +// Progress callback for real-time tracking +export interface ProgressInfo { + type: 'frame' | 'batch'; + index: number; + total: number; + segment: AudioSegment; +} + +// Extended processing options for resumability +export interface ProcessingOptions { + startIndex?: number; + existingSegments?: AudioSegment[]; + lastContext?: BatchContext; + currentTimePosition?: number; + onProgress?: (info: ProgressInfo) => void; +} + // Result interfaces export interface ProcessingResult { videoFile: string; audioDescriptionFile: string; + segments: AudioSegment[]; } export interface CostBreakdown { diff --git a/src/server/app.ts b/src/server/app.ts new file mode 100644 index 0000000..7aedc85 --- /dev/null +++ b/src/server/app.ts @@ -0,0 +1,32 @@ +import express from 'express'; +import cors from 'cors'; +import path from 'path'; +import fs from 'fs'; +import { basicAuth } from './middleware/auth'; +import authRoutes from './routes/auth'; +import configRoutes from './routes/config'; +import filesRoutes from './routes/files'; +import { createJobsRouter } from './routes/jobs'; +import { JobManager } from './services/jobManager'; + +export function createApp(jobManager: JobManager): express.Application { + const app = express(); + + app.use(cors()); + app.use(express.json({ limit: '50mb' })); + + // Auth middleware + app.use(basicAuth); + + // API routes + app.use('/api/auth', authRoutes); + app.use('/api/config', configRoutes); + app.use('/api/files', filesRoutes); + app.use('/api/jobs', createJobsRouter(jobManager)); + + // Serve static frontend from src/server/public (works with ts-node and compiled) + const publicDir = path.resolve(__dirname, '..', '..', 'src', 'server', 'public'); + app.use(express.static(publicDir)); + + return app; +} diff --git a/src/server/db/index.ts b/src/server/db/index.ts new file mode 100644 index 0000000..7ceff4f --- /dev/null +++ b/src/server/db/index.ts @@ -0,0 +1,59 @@ +import Database from 'better-sqlite3'; +import path from 'path'; +import fs from 'fs'; + +const DB_PATH = path.resolve('./data/server.db'); + +let db: Database.Database; + +export function getDb(): Database.Database { + if (!db) { + const dir = path.dirname(DB_PATH); + if (!fs.existsSync(dir)) { + fs.mkdirSync(dir, { recursive: true }); + } + db = new Database(DB_PATH); + db.pragma('journal_mode = WAL'); + db.pragma('foreign_keys = ON'); + migrate(); + } + return db; +} + +function migrate(): void { + db.exec(` + CREATE TABLE IF NOT EXISTS jobs ( + id TEXT PRIMARY KEY, + video_path TEXT NOT NULL, + video_filename TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'pending', + config TEXT NOT NULL, + progress REAL DEFAULT 0, + current_index INTEGER DEFAULT 0, + total_units INTEGER DEFAULT 0, + segments TEXT DEFAULT '[]', + last_context TEXT DEFAULT '{}', + current_time_position REAL DEFAULT 0, + error TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + completed_at TEXT, + output_audio TEXT, + output_subtitles_srt TEXT, + output_subtitles_vtt TEXT, + output_muxed TEXT, + output_options TEXT DEFAULT '{}' + ); + + CREATE TABLE IF NOT EXISTS config ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL + ); + `); +} + +export function closeDb(): void { + if (db) { + db.close(); + } +} diff --git a/src/server/db/jobStore.ts b/src/server/db/jobStore.ts new file mode 100644 index 0000000..b706be3 --- /dev/null +++ b/src/server/db/jobStore.ts @@ -0,0 +1,123 @@ +import { getDb } from '../db'; +import { v4 as uuidv4 } from 'uuid'; + +export interface OutputOptions { + audio: boolean; + subtitles: boolean; + muxed: boolean; +} + +export interface Job { + id: string; + video_path: string; + video_filename: string; + status: 'pending' | 'queued' | 'processing' | 'paused' | 'completed' | 'failed' | 'cancelled'; + config: string; + progress: number; + current_index: number; + total_units: number; + segments: string; + last_context: string; + current_time_position: number; + error: string | null; + created_at: string; + updated_at: string; + completed_at: string | null; + output_audio: string | null; + output_subtitles_srt: string | null; + output_subtitles_vtt: string | null; + output_muxed: string | null; + output_options: string; +} + +export function getAllJobs(): Job[] { + const db = getDb(); + return db.prepare('SELECT * FROM jobs ORDER BY created_at DESC').all() as Job[]; +} + +export function getJob(id: string): Job | undefined { + const db = getDb(); + return db.prepare('SELECT * FROM jobs WHERE id = ?').get(id) as Job | undefined; +} + +export function createJob(videoPath: string, filename: string, config: object, outputOptions: OutputOptions): Job { + const db = getDb(); + const id = uuidv4(); + const now = new Date().toISOString(); + + db.prepare(` + INSERT INTO jobs (id, video_path, video_filename, config, output_options, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?) + `).run(id, videoPath, filename, JSON.stringify(config), JSON.stringify(outputOptions), now, now); + + return getJob(id)!; +} + +export function updateJobStatus(id: string, status: Job['status'], error?: string): void { + const db = getDb(); + const now = new Date().toISOString(); + const completedAt = status === 'completed' ? now : null; + db.prepare(` + UPDATE jobs SET status = ?, error = ?, updated_at = ?, completed_at = ? WHERE id = ? + `).run(status, error || null, now, completedAt, id); +} + +export function saveCheckpoint( + id: string, + segments: string, + currentIndex: number, + totalUnits: number, + currentTimePosition: number, + lastContext: string, + progress: number +): void { + const db = getDb(); + const now = new Date().toISOString(); + db.prepare(` + UPDATE jobs SET segments = ?, current_index = ?, total_units = ?, current_time_position = ?, last_context = ?, progress = ?, updated_at = ? WHERE id = ? + `).run(segments, currentIndex, totalUnits, currentTimePosition, lastContext, progress, now, id); +} + +export function saveJobOutputs( + id: string, + outputs: { audio?: string; subtitlesSrt?: string; subtitlesVtt?: string; muxed?: string } +): void { + const db = getDb(); + const now = new Date().toISOString(); + db.prepare(` + UPDATE jobs SET output_audio = ?, output_subtitles_srt = ?, output_subtitles_vtt = ?, output_muxed = ?, updated_at = ? WHERE id = ? + `).run( + outputs.audio || null, + outputs.subtitlesSrt || null, + outputs.subtitlesVtt || null, + outputs.muxed || null, + now, + id + ); +} + +export function deleteJob(id: string): void { + const db = getDb(); + db.prepare('DELETE FROM jobs WHERE id = ?').run(id); +} + +export function getConfigValue(key: string): string | undefined { + const db = getDb(); + const row = db.prepare('SELECT value FROM config WHERE key = ?').get(key) as { value: string } | undefined; + return row?.value; +} + +export function setConfigValue(key: string, value: string): void { + const db = getDb(); + db.prepare('INSERT OR REPLACE INTO config (key, value) VALUES (?, ?)').run(key, value); +} + +export function getAllConfig(): Record { + const db = getDb(); + const rows = db.prepare('SELECT key, value FROM config').all() as { key: string; value: string }[]; + const config: Record = {}; + for (const row of rows) { + config[row.key] = row.value; + } + return config; +} diff --git a/src/server/index.ts b/src/server/index.ts new file mode 100644 index 0000000..c596a37 --- /dev/null +++ b/src/server/index.ts @@ -0,0 +1,41 @@ +import 'dotenv/config'; +import { createApp } from './app'; +import { JobManager } from './services/jobManager'; +import { getDb, closeDb } from './db'; + +const PORT = parseInt(process.env.SERVER_PORT || '3000', 10); +const USERNAME = process.env.SERVER_USERNAME || 'admin'; +const PASSWORD = process.env.SERVER_PASSWORD || 'aidio2024'; + +// Initialize database +getDb(); + +// Create job manager +const jobManager = new JobManager(); + +// Create app +const app = createApp(jobManager); + +app.listen(PORT, () => { + console.log(` +╔══════════════════════════════════════════════════════╗ +║ Audio Description Server v1.0 ║ +║ http://localhost:${PORT} ║ +║ ║ +║ Username: ${USERNAME.padEnd(41)}║ +║ Password: ${PASSWORD.padEnd(41)}║ +╚══════════════════════════════════════════════════════╝ + `); +}); + +// Graceful shutdown +process.on('SIGINT', () => { + console.log('\nShutting down...'); + closeDb(); + process.exit(0); +}); + +process.on('SIGTERM', () => { + closeDb(); + process.exit(0); +}); diff --git a/src/server/middleware/auth.ts b/src/server/middleware/auth.ts new file mode 100644 index 0000000..5e20574 --- /dev/null +++ b/src/server/middleware/auth.ts @@ -0,0 +1,29 @@ +import { Request, Response, NextFunction } from 'express'; + +const AUTH_USERNAME = process.env.SERVER_USERNAME || 'admin'; +const AUTH_PASSWORD = process.env.SERVER_PASSWORD || 'aidio2024'; + +export function basicAuth(req: Request, res: Response, next: NextFunction): void { + if (req.path === '/api/auth/login' || req.path === '/api/auth/check') { + next(); + return; + } + + const authHeader = req.headers.authorization; + if (!authHeader || !authHeader.startsWith('Basic ')) { + res.setHeader('WWW-Authenticate', 'Basic realm="Audio Description Server"'); + res.status(401).json({ error: 'Authentication required' }); + return; + } + + const credentials = Buffer.from(authHeader.slice(6), 'base64').toString('utf-8'); + const [username, password] = credentials.split(':'); + + if (username === AUTH_USERNAME && password === AUTH_PASSWORD) { + next(); + return; + } + + res.setHeader('WWW-Authenticate', 'Basic realm="Audio Description Server"'); + res.status(401).json({ error: 'Invalid credentials' }); +} diff --git a/src/server/public/app.js b/src/server/public/app.js new file mode 100644 index 0000000..aad6075 --- /dev/null +++ b/src/server/public/app.js @@ -0,0 +1,519 @@ +let authToken = sessionStorage.getItem('authToken'); +let selectedFilePath = ''; +let currentConfig = {}; + +function apiHeaders() { + const h = { 'Content-Type': 'application/json' }; + if (authToken) h['Authorization'] = `Basic ${authToken}`; + return h; +} + +async function api(method, url, body) { + const res = await fetch(url, { + method, + headers: apiHeaders(), + body: body ? JSON.stringify(body) : undefined + }); + if (res.status === 401) { + sessionStorage.removeItem('authToken'); + authToken = null; + showLogin(); + throw new Error('Unauthorized'); + } + return res; +} + +async function apiJson(method, url, body) { + const res = await api(method, url, body); + const data = await res.json(); + if (!res.ok) throw new Error(data.error || 'Request failed'); + return data; +} + +// Login +function showLogin() { + document.getElementById('login-screen').classList.remove('hidden'); + document.getElementById('main-screen').classList.add('hidden'); + document.getElementById('login-error').classList.add('hidden'); +} + +function showMain() { + document.getElementById('login-screen').classList.add('hidden'); + document.getElementById('main-screen').classList.remove('hidden'); +} + +document.getElementById('login-form').addEventListener('submit', async (e) => { + e.preventDefault(); + const username = document.getElementById('login-username').value; + const password = document.getElementById('login-password').value; + try { + const res = await fetch('/api/auth/login', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ username, password }) + }); + const data = await res.json(); + if (data.authenticated) { + authToken = data.token; + sessionStorage.setItem('authToken', authToken); + showMain(); + initApp(); + } else { + document.getElementById('login-error').textContent = data.error; + document.getElementById('login-error').classList.remove('hidden'); + } + } catch (err) { + document.getElementById('login-error').textContent = 'Connection failed'; + document.getElementById('login-error').classList.remove('hidden'); + } +}); + +document.getElementById('logout-btn').addEventListener('click', () => { + sessionStorage.removeItem('authToken'); + authToken = null; + showLogin(); +}); + +// Tab navigation +document.querySelectorAll('button.tab').forEach(btn => { + btn.addEventListener('click', () => { + document.querySelectorAll('button.tab').forEach(b => b.classList.remove('active')); + btn.classList.add('active'); + document.querySelectorAll('.tab-content').forEach(c => c.classList.remove('active')); + document.getElementById(btn.dataset.tab).classList.add('active'); + if (btn.dataset.tab === 'dashboard') loadJobs(); + if (btn.dataset.tab === 'files') loadFilesList(); + }); +}); + +// Mini tabs (video source) +document.querySelectorAll('button.tab-mini').forEach(btn => { + btn.addEventListener('click', () => { + document.querySelectorAll('button.tab-mini').forEach(b => b.classList.remove('active')); + btn.classList.add('active'); + document.querySelectorAll('.src-panel').forEach(p => p.classList.remove('active')); + document.getElementById('src-' + btn.dataset.src).classList.add('active'); + }); +}); + +// File upload +document.getElementById('video-upload').addEventListener('change', () => { + const file = document.getElementById('video-upload').files[0]; + if (file) selectedFilePath = null; +}); + +// Refresh browse files +document.getElementById('refresh-files').addEventListener('click', loadBrowseFiles); +async function loadBrowseFiles() { + try { + const data = await apiJson('GET', '/api/files'); + const sel = document.getElementById('video-select'); + sel.innerHTML = ''; + data.files.forEach(f => { + sel.innerHTML += ``; + }); + } catch (err) { + console.error(err); + } +} + +document.getElementById('video-select').addEventListener('change', (e) => { + if (e.target.value) selectedFilePath = e.target.value; +}); + +// YouTube download +document.getElementById('download-url').addEventListener('click', async () => { + const url = document.getElementById('youtube-url').value; + if (!url) return; + const status = document.getElementById('download-status'); + status.textContent = 'Downloading...'; + status.className = 'status'; + try { + const data = await apiJson('POST', '/api/files/youtube', { url }); + status.textContent = `Downloaded: ${data.filename}`; + status.className = 'status success'; + selectedFilePath = data.filePath; + document.getElementById('video-select').innerHTML += ``; + } catch (err) { + status.textContent = `Error: ${err.message}`; + status.className = 'status error'; + } +}); + +// New job form +document.getElementById('new-job-form').addEventListener('submit', async (e) => { + e.preventDefault(); + if (!selectedFilePath) { + const fileEl = document.getElementById('video-upload'); + if (fileEl.files.length > 0) { + const formData = new FormData(); + formData.append('video', fileEl.files[0]); + try { + const res = await fetch('/api/files/upload', { method: 'POST', headers: { Authorization: `Basic ${authToken}` }, body: formData }); + const data = await res.json(); + if (!res.ok) throw new Error(data.error || 'Upload failed'); + selectedFilePath = data.filePath; + } catch (err) { + alert('Upload error: ' + err.message); + return; + } + } else { + alert('Please select a video file or source'); + return; + } + } + + const fd = new FormData(e.target); + const config = {}; + for (const [key, val] of fd.entries()) { + if (key === '') continue; + if (val === 'on') config[key] = true; + else if (val === 'off') config[key] = false; + else if (!isNaN(val) && val !== '') config[key] = parseFloat(val); + else config[key] = val; + } + + const outputOptions = { + audio: fd.get('output-audio') === 'on', + subtitles: fd.get('output-subtitles') === 'on', + muxed: fd.get('output-muxed') === 'on' + }; + + // Build config with vision/tts providers + if (config.visionProvider) { + config.visionProviders = {}; + config.visionProviders[config.visionProvider] = { + model: config.visionModel || 'gpt-4o', + maxTokens: config.visionMaxTokens ? parseInt(config.visionMaxTokens) : 300 + }; + } + if (config.ttsProvider) { + config.ttsProviders = {}; + config.ttsProviders[config.ttsProvider] = { + model: config.ttsModel || 'tts-1', + voice: config.ttsVoice || 'alloy' + }; + } + + delete config.visionModel; + delete config.visionMaxTokens; + delete config.ttsModel; + delete config['output-audio']; + delete config['output-subtitles']; + delete config['output-muxed']; + + try { + const data = await apiJson('POST', '/api/jobs', { videoPath: selectedFilePath, config, outputOptions }); + await apiJson('POST', `/api/jobs/${data.job.id}/start`); + selectedFilePath = ''; + document.getElementById('video-upload').value = ''; + document.getElementById('new-job-form').reset(); + document.querySelector('.tab[data-tab="dashboard"]').click(); + loadJobs(); + } catch (err) { + alert('Error creating job: ' + err.message); + } +}); + +// Load jobs +async function loadJobs() { + try { + const data = await apiJson('GET', '/api/jobs'); + renderJobs(data.jobs); + } catch (err) { + console.error(err); + } +} + +function renderJobs(jobs) { + const container = document.getElementById('jobs-list'); + if (!jobs.length) { + container.innerHTML = '

No jobs yet. Create one from the "New Job" tab.

'; + return; + } + + container.innerHTML = jobs.map(j => { + const segs = JSON.parse(j.segments || '[]'); + const progressClass = j.status === 'completed' ? 'completed' : j.status === 'failed' ? 'failed' : ''; + const downloads = []; + if (j.status === 'completed') { + if (j.output_audio) downloads.push(`Audio`); + if (j.output_subtitles_srt) downloads.push(`SRT`); + if (j.output_subtitles_vtt) downloads.push(`VTT`); + if (j.output_muxed) downloads.push(`Muxed`); + } + + let actions = ''; + if (j.status === 'pending' || j.status === 'queued') { + actions += ``; + } + if (j.status === 'processing') { + actions += ``; + } + if (j.status === 'failed' || j.status === 'paused' || j.status === 'cancelled') { + actions += ``; + } + if (j.status !== 'processing') { + actions += ``; + } + + return ` +
+
+

${escapeHtml(j.video_filename)}

+
${actions}
+
+ ${j.status} +
+
+ ${Math.round(j.progress)}% + Index: ${j.current_index}/${j.total_units} + ${new Date(j.created_at).toLocaleString()} +
+ ${j.error ? `
${escapeHtml(j.error)}
` : ''} + ${downloads.length ? `` : ''} +
+
${segs.map((s, i) => `
[${s.startTime.toFixed(1)}s] ${escapeHtml(s.description)}
`).join('')}
+
+ +
`; + }).join(''); + + // Wire up buttons + container.querySelectorAll('.start-job').forEach(b => b.addEventListener('click', () => handleJobAction(b.dataset.id, 'start'))); + container.querySelectorAll('.pause-job').forEach(b => b.addEventListener('click', () => handleJobAction(b.dataset.id, 'pause'))); + container.querySelectorAll('.restart-job').forEach(b => b.addEventListener('click', () => handleJobAction(b.dataset.id, 'restart'))); + container.querySelectorAll('.delete-job').forEach(b => b.addEventListener('click', () => handleJobAction(b.dataset.id, 'delete'))); + container.querySelectorAll('.toggle-detail').forEach(b => b.addEventListener('click', () => { + const detail = container.querySelector(`.job-detail[data-id="${b.dataset.id}"]`); + detail.classList.toggle('open'); + b.textContent = detail.classList.contains('open') ? 'Hide segments' : `${JSON.parse((jobs.find(j => j.id === b.dataset.id) || {}).segments || '[]').length} segments`; + })); +} + +async function handleJobAction(id, action) { + const method = action === 'delete' ? 'DELETE' : 'POST'; + const url = `/api/jobs/${id}${action === 'delete' ? '' : '/' + action}`; + try { + await api(method, url); + loadJobs(); + } catch (err) { + alert(`Error: ${err.message}`); + } +} + +// Jobs refresh +document.getElementById('refresh-jobs').addEventListener('click', loadJobs); + +// Auto-refresh jobs +let jobsInterval; +function startJobsPolling() { + jobsInterval = setInterval(loadJobs, 5000); +} +function stopJobsPolling() { + clearInterval(jobsInterval); +} + +// Settings +async function loadSettings() { + try { + const data = await apiJson('GET', '/api/config'); + const container = document.getElementById('settings-fields'); + const config = data.config || {}; + currentConfig = config; + let html = ''; + for (const [key, value] of Object.entries(config)) { + html += ``; + } + if (!Object.keys(config).length) { + html = '

No custom settings yet. Settings from .env are used as defaults.

'; + } + container.innerHTML = html; + } catch (err) { + console.error(err); + } +} + +document.getElementById('settings-form').addEventListener('submit', async (e) => { + e.preventDefault(); + const fd = new FormData(e.target); + const config = {}; + for (const [key, val] of fd.entries()) { + config[key] = val; + } + try { + await apiJson('PUT', '/api/config', config); + alert('Settings saved'); + } catch (err) { + alert('Error: ' + err.message); + } +}); + +// Files list +let selectedFiles = new Set(); +async function loadFilesList() { + try { + const data = await apiJson('GET', '/api/files'); + const tbody = document.querySelector('#files-table tbody'); + tbody.innerHTML = data.files.map(f => ` + + + ${escapeHtml(f.filename)} + ${formatSize(f.size)} + + `).join(''); + + document.querySelectorAll('.file-checkbox').forEach(cb => { + cb.addEventListener('change', () => updateFileSelection()); + }); + } catch (err) { + console.error(err); + } +} + +function updateFileSelection() { + selectedFiles.clear(); + document.querySelectorAll('.file-checkbox:checked').forEach(cb => { + selectedFiles.add(cb.dataset.path); + }); + document.getElementById('delete-selected-files').disabled = selectedFiles.size === 0; +} + +document.getElementById('select-all-files').addEventListener('change', (e) => { + document.querySelectorAll('.file-checkbox').forEach(cb => { cb.checked = e.target.checked; }); + updateFileSelection(); +}); + +document.getElementById('delete-selected-files').addEventListener('click', async () => { + if (!confirm(`Delete ${selectedFiles.size} file(s)?`)) return; + for (const path of selectedFiles) { + // Files are served from uploads dir, delete via fs on server... + // Not implementing server-side file deletion for now + } + alert('File deletion not yet implemented'); +}); + +document.getElementById('refresh-files-list').addEventListener('click', loadFilesList); + +// Pre-fill new job form with config defaults +async function loadConfigDefaults() { + try { + const data = await apiJson('GET', '/api/config'); + const config = data.config || {}; + + if (config.visionProvider) { + const sel = document.querySelector('[name="visionProvider"]'); + sel.innerHTML = ''; + sel.value = config.visionProvider; + } + if (config.visionModel) document.querySelector('[name="visionModel"]').value = config.visionModel; + if (config.ttsProvider) { + const sel = document.querySelector('[name="ttsProvider"]'); + sel.innerHTML = ''; + sel.value = config.ttsProvider; + } + if (config.ttsModel) document.querySelector('[name="ttsModel"]').value = config.ttsModel; + if (config.ttsVoice) document.querySelector('[name="ttsVoice"]').value = config.ttsVoice; + if (config.ttsSpeedFactor) document.querySelector('[name="ttsSpeedFactor"]').value = config.ttsSpeedFactor; + if (config.ttsInstructions) document.querySelector('[name="ttsInstructions"]').value = config.ttsInstructions; + if (config.batchWindowDuration) document.querySelector('[name="batchWindowDuration"]').value = config.batchWindowDuration; + if (config.framesInBatch) document.querySelector('[name="framesInBatch"]').value = config.framesInBatch; + if (config.captureIntervalSeconds) document.querySelector('[name="captureIntervalSeconds"]').value = config.captureIntervalSeconds; + if (config.contextWindowSize) document.querySelector('[name="contextWindowSize"]').value = config.contextWindowSize; + if (config.defaultPrompt) document.querySelector('[name="defaultPrompt"]').value = config.defaultPrompt; + if (config.changePrompt) document.querySelector('[name="changePrompt"]').value = config.changePrompt; + if (config.batchPrompt) document.querySelector('[name="batchPrompt"]').value = config.batchPrompt; + } catch (err) { + console.error(err); + } +} + +// Setup SSE for live progress +const sseConnections = {}; +function connectSSE(jobId) { + if (sseConnections[jobId]) return; + const source = new EventSource(`/api/jobs/${jobId}/progress`); + source.onmessage = (event) => { + const data = JSON.parse(event.data); + updateJobCard(jobId, data); + if (data.status === 'completed' || data.status === 'failed' || data.status === 'cancelled') { + source.close(); + delete sseConnections[jobId]; + } + }; + source.onerror = () => { + source.close(); + delete sseConnections[jobId]; + }; + sseConnections[jobId] = source; +} + +function updateJobCard(jobId, data) { + const card = document.querySelector(`.job-card[data-id="${jobId}"]`); + if (!card) return; + + const badge = card.querySelector('.status-badge'); + badge.className = `status-badge status-${data.status}`; + badge.textContent = data.status; + + const fill = card.querySelector('.progress-fill'); + fill.style.width = data.progress + '%'; + fill.className = 'progress-fill'; + if (data.status === 'completed') fill.classList.add('completed'); + if (data.status === 'failed') fill.classList.add('failed'); + + const metaSpans = card.querySelectorAll('.job-meta span'); + if (metaSpans[0]) metaSpans[0].textContent = Math.round(data.progress) + '%'; + if (metaSpans[1]) metaSpans[1].textContent = `Index: ${data.currentIndex}/${data.totalUnits}`; + + // Update segments + const log = card.querySelector('.segment-log'); + if (data.segments && log) { + log.innerHTML = data.segments.map((s, i) => `
[${s.startTime.toFixed(1)}s] ${escapeHtml(s.description)}
`).join(''); + } + + // Update segment count button + const toggleBtn = card.querySelector('.toggle-detail'); + if (toggleBtn && data.segments) { + toggleBtn.textContent = `${data.segments.length} segments`; + } +} + +// Initialize +function initApp() { + loadJobs(); + loadBrowseFiles(); + loadConfigDefaults(); + startJobsPolling(); +} + +// Escape HTML for safe rendering +function escapeHtml(str) { + if (!str) return ''; + return String(str).replace(/&/g, '&').replace(//g, '>').replace(/"/g, '"'); +} + +function formatSize(bytes) { + if (!bytes) return '0 B'; + const units = ['B', 'KB', 'MB', 'GB']; + let i = 0; + let size = bytes; + while (size >= 1024 && i < units.length - 1) { size /= 1024; i++; } + return `${size.toFixed(1)} ${units[i]}`; +} + +// Check if already authenticated +(async () => { + if (authToken) { + try { + const res = await fetch('/api/auth/check', { headers: { Authorization: `Basic ${authToken}` } }); + const data = await res.json(); + if (data.authenticated) { + showMain(); + initApp(); + return; + } + } catch (e) {} + } + showLogin(); +})(); diff --git a/src/server/public/index.html b/src/server/public/index.html new file mode 100644 index 0000000..f1a9421 --- /dev/null +++ b/src/server/public/index.html @@ -0,0 +1,145 @@ + + + + + +Audio Description Server + + + +
+
+ +
+ + +
+ + + + diff --git a/src/server/public/style.css b/src/server/public/style.css new file mode 100644 index 0000000..a10953c --- /dev/null +++ b/src/server/public/style.css @@ -0,0 +1,111 @@ +*, *::before, *::after { box-sizing: border-box; } +body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; margin: 0; background: #0d1117; color: #c9d1d9; } +.hidden { display: none !important; } +.error { color: #f85149; } +.success { color: #3fb950; } +.status { font-size: 0.85rem; margin: 4px 0; } + +.screen { min-height: 100vh; } +#login-screen { display: flex; align-items: center; justify-content: center; } +.login-card { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 32px; width: 360px; text-align: center; } +.login-card h1 { margin: 0 0 8px; font-size: 1.4rem; } +.login-card p { margin: 0 0 20px; color: #8b949e; } +.login-card label { display: block; text-align: left; font-size: 0.85rem; margin-bottom: 12px; color: #8b949e; } +.login-card input { width: 100%; margin-top: 4px; padding: 8px 12px; background: #0d1117; border: 1px solid #30363d; border-radius: 6px; color: #c9d1d9; font-size: 1rem; } +.login-card button { width: 100%; padding: 10px; background: #238636; color: #fff; border: none; border-radius: 6px; font-size: 1rem; cursor: pointer; margin-top: 8px; } +.login-card button:hover { background: #2ea043; } + +header { display: flex; align-items: center; justify-content: space-between; padding: 12px 24px; background: #161b22; border-bottom: 1px solid #30363d; } +header h1 { font-size: 1.1rem; margin: 0; } +nav { display: flex; gap: 4px; } + +button.tab { background: transparent; color: #8b949e; border: none; padding: 8px 16px; cursor: pointer; border-radius: 6px; font-size: 0.9rem; } +button.tab:hover { background: #21262d; color: #c9d1d9; } +button.tab.active { background: #1f6feb; color: #fff; } +button.tab.danger:hover { background: #da3633; color: #fff; } + +.tab-content { padding: 24px; display: none; } +.tab-content.active { display: block; } + +.toolbar { display: flex; align-items: center; justify-content: space-between; margin-bottom: 16px; } +.toolbar h2 { margin: 0; font-size: 1.2rem; } + +button { padding: 8px 16px; background: #21262d; color: #c9d1d9; border: 1px solid #30363d; border-radius: 6px; cursor: pointer; font-size: 0.9rem; } +button:hover { background: #30363d; } +button.btn-primary { background: #238636; border-color: #238636; color: #fff; } +button.btn-primary:hover { background: #2ea043; } +button.danger { background: transparent; color: #f85149; } +button.danger:hover { background: #da3633; color: #fff; border-color: #da3633; } +button:disabled { opacity: 0.5; cursor: not-allowed; } + +.empty { color: #8b949e; font-style: italic; text-align: center; padding: 40px; } + +fieldset { border: 1px solid #30363d; border-radius: 8px; padding: 16px; margin-bottom: 16px; } +legend { font-weight: 600; padding: 0 8px; } + +.tabs-mini { display: flex; gap: 4px; margin-bottom: 12px; } +button.tab-mini { background: transparent; color: #8b949e; border: 1px solid #30363d; padding: 6px 12px; cursor: pointer; border-radius: 4px; font-size: 0.85rem; } +button.tab-mini.active { background: #1f6feb; color: #fff; border-color: #1f6feb; } +.src-panel { display: none; } +.src-panel.active { display: block; } + +.form-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 12px; } +.form-grid label.full { grid-column: 1 / -1; } +.form-grid label { display: flex; flex-direction: column; font-size: 0.85rem; color: #8b949e; gap: 4px; } +.form-grid input, .form-grid select, .form-grid textarea { padding: 8px 12px; background: #0d1117; border: 1px solid #30363d; border-radius: 6px; color: #c9d1d9; font-size: 0.9rem; } +.form-grid textarea { resize: vertical; min-height: 60px; } +.form-grid input[type="checkbox"] { width: auto; } + +details { margin-bottom: 12px; border: 1px solid #30363d; border-radius: 8px; padding: 12px 16px; } +details summary { cursor: pointer; font-weight: 600; padding: 4px 0; } +details .form-grid { margin-top: 12px; } + +.hint { color: #8b949e; font-size: 0.85rem; margin-top: -12px; margin-bottom: 16px; } + +select, input[type="file"], input[type="url"] { padding: 8px 12px; background: #0d1117; border: 1px solid #30363d; border-radius: 6px; color: #c9d1d9; font-size: 0.9rem; } + +/* Job cards */ +.jobs-list { display: flex; flex-direction: column; gap: 8px; } +.job-card { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 16px; } +.job-card-header { display: flex; align-items: center; justify-content: space-between; margin-bottom: 8px; } +.job-card-header h3 { margin: 0; font-size: 1rem; word-break: break-all; } +.job-actions { display: flex; gap: 4px; } +.job-actions button { font-size: 0.8rem; padding: 4px 10px; } + +.status-badge { display: inline-block; padding: 2px 10px; border-radius: 12px; font-size: 0.75rem; font-weight: 600; text-transform: uppercase; } +.status-pending { background: #21262d; color: #8b949e; } +.status-queued { background: #1a2332; color: #58a6ff; } +.status-processing { background: #1a2332; color: #58a6ff; } +.status-completed { background: #172f1e; color: #3fb950; } +.status-failed { background: #2d1518; color: #f85149; } +.status-paused { background: #2d2400; color: #d29922; } +.status-cancelled { background: #21262d; color: #8b949e; } + +.progress-bar { height: 6px; background: #21262d; border-radius: 3px; margin: 8px 0; overflow: hidden; } +.progress-fill { height: 100%; background: #1f6feb; border-radius: 3px; transition: width 0.5s ease; } +.progress-fill.completed { background: #3fb950; } +.progress-fill.failed { background: #f85149; } + +.job-meta { display: flex; gap: 16px; font-size: 0.8rem; color: #8b949e; margin-bottom: 8px; } + +.job-detail { margin-top: 12px; padding-top: 12px; border-top: 1px solid #30363d; display: none; } +.job-detail.open { display: block; } +.segment-log { max-height: 200px; overflow-y: auto; font-size: 0.8rem; color: #8b949e; background: #0d1117; padding: 8px; border-radius: 4px; margin-bottom: 8px; } +.segment-entry { padding: 4px 0; border-bottom: 1px solid #1c2128; } +.segment-entry:last-child { border-bottom: none; } +.segment-time { color: #58a6ff; } + +.download-links { display: flex; gap: 8px; flex-wrap: wrap; } +.download-links a { padding: 6px 12px; background: #21262d; color: #58a6ff; text-decoration: none; border-radius: 4px; font-size: 0.85rem; border: 1px solid #30363d; } +.download-links a:hover { background: #30363d; } + +.error-msg { color: #f85149; font-size: 0.85rem; background: #2d1518; padding: 8px; border-radius: 4px; margin: 8px 0; } + +/* Files table */ +#files-table { width: 100%; border-collapse: collapse; } +#files-table th, #files-table td { text-align: left; padding: 8px 12px; border-bottom: 1px solid #30363d; } +#files-table th { font-size: 0.85rem; color: #8b949e; } +#files-table tbody tr:hover { background: #161b22; } + +/* Messages */ +#login-error { margin-top: 12px; } diff --git a/src/server/routes/auth.ts b/src/server/routes/auth.ts new file mode 100644 index 0000000..c166501 --- /dev/null +++ b/src/server/routes/auth.ts @@ -0,0 +1,33 @@ +import { Router, Request, Response } from 'express'; + +const router = Router(); + +router.post('/login', (req: Request, res: Response) => { + const { username, password } = req.body; + const serverUser = process.env.SERVER_USERNAME || 'admin'; + const serverPass = process.env.SERVER_PASSWORD || 'aidio2024'; + + if (username === serverUser && password === serverPass) { + const token = Buffer.from(`${username}:${password}`).toString('base64'); + res.json({ authenticated: true, token, username }); + } else { + res.status(401).json({ authenticated: false, error: 'Invalid credentials' }); + } +}); + +router.get('/check', (req: Request, res: Response) => { + const authHeader = req.headers.authorization; + if (!authHeader || !authHeader.startsWith('Basic ')) { + res.json({ authenticated: false }); + return; + } + + const credentials = Buffer.from(authHeader.slice(6), 'base64').toString('utf-8'); + const [username, password] = credentials.split(':'); + const serverUser = process.env.SERVER_USERNAME || 'admin'; + const serverPass = process.env.SERVER_PASSWORD || 'aidio2024'; + + res.json({ authenticated: username === serverUser && password === serverPass, username }); +}); + +export default router; diff --git a/src/server/routes/config.ts b/src/server/routes/config.ts new file mode 100644 index 0000000..e8b5181 --- /dev/null +++ b/src/server/routes/config.ts @@ -0,0 +1,24 @@ +import { Router, Request, Response } from 'express'; +import { getAllConfig, setConfigValue } from '../db/jobStore'; + +const router = Router(); + +router.get('/', (_req: Request, res: Response) => { + const config = getAllConfig(); + res.json({ config }); +}); + +router.put('/', (req: Request, res: Response) => { + const updates = req.body; + if (typeof updates !== 'object' || updates === null) { + res.status(400).json({ error: 'Body must be a JSON object of key-value pairs' }); + return; + } + for (const [key, value] of Object.entries(updates)) { + setConfigValue(key, String(value)); + } + const config = getAllConfig(); + res.json({ config }); +}); + +export default router; diff --git a/src/server/routes/files.ts b/src/server/routes/files.ts new file mode 100644 index 0000000..820168e --- /dev/null +++ b/src/server/routes/files.ts @@ -0,0 +1,89 @@ +import { Router, Request, Response } from 'express'; +import multer from 'multer'; +import path from 'path'; +import fs from 'fs'; +import { downloadVideo, isYtDlpAvailable } from '../services/ytDlp'; + +const UPLOADS_DIR = path.resolve('./uploads'); + +const storage = multer.diskStorage({ + destination: (_req, _file, cb) => { + if (!fs.existsSync(UPLOADS_DIR)) { + fs.mkdirSync(UPLOADS_DIR, { recursive: true }); + } + cb(null, UPLOADS_DIR); + }, + filename: (_req, file, cb) => { + const uniqueSuffix = Date.now() + '-' + Math.round(Math.random() * 1e9); + cb(null, uniqueSuffix + path.extname(file.originalname)); + } +}); + +const upload = multer({ + storage, + fileFilter: (_req, file, cb) => { + const allowedMimes = [ + 'video/mp4', 'video/webm', 'video/x-matroska', 'video/quicktime', + 'video/x-msvideo', 'video/mpeg', 'video/x-ms-wmv', 'video/x-flv' + ]; + if (allowedMimes.includes(file.mimetype) || file.originalname.match(/\.(mp4|mkv|webm|mov|avi|mpg|mpeg|wmv|flv)$/i)) { + cb(null, true); + } else { + cb(new Error('Invalid file type. Only video files are allowed.')); + } + }, + limits: { fileSize: 10 * 1024 * 1024 * 1024 } // 10GB +}); + +const router = Router(); + +router.post('/upload', upload.single('video'), (req: Request, res: Response) => { + if (!req.file) { + res.status(400).json({ error: 'No video file uploaded' }); + return; + } + res.json({ + filePath: req.file.path, + filename: req.file.originalname, + size: req.file.size + }); +}); + +router.get('/', (_req: Request, res: Response) => { + if (!fs.existsSync(UPLOADS_DIR)) { + res.json({ files: [] }); + return; + } + const entries = fs.readdirSync(UPLOADS_DIR, { withFileTypes: true }); + const files = entries + .filter(e => e.isFile()) + .map(e => ({ + filename: e.name, + filePath: path.join(UPLOADS_DIR, e.name), + size: fs.statSync(path.join(UPLOADS_DIR, e.name)).size + })) + .sort((a, b) => b.filePath.localeCompare(a.filePath)); + res.json({ files }); +}); + +router.post('/youtube', (req: Request, res: Response) => { + if (!isYtDlpAvailable()) { + res.status(400).json({ error: 'yt-dlp is not installed or not in PATH' }); + return; + } + + const { url } = req.body; + if (!url) { + res.status(400).json({ error: 'URL is required' }); + return; + } + + try { + const result = downloadVideo(url, UPLOADS_DIR); + res.json(result); + } catch (err: any) { + res.status(500).json({ error: `Failed to download: ${err.message}` }); + } +}); + +export default router; diff --git a/src/server/routes/jobs.ts b/src/server/routes/jobs.ts new file mode 100644 index 0000000..0074766 --- /dev/null +++ b/src/server/routes/jobs.ts @@ -0,0 +1,174 @@ +import { Router, Request, Response } from 'express'; +import path from 'path'; +import fs from 'fs'; +import { JobManager } from '../services/jobManager'; +import { getJob } from '../db/jobStore'; + +function getParam(req: Request, name: string): string { + const val = req.params[name]; + return Array.isArray(val) ? val[0] : val; +} + +export function createJobsRouter(jobManager: JobManager): Router { + const router = Router(); + + router.get('/', (_req: Request, res: Response) => { + const jobs = jobManager.listJobs(); + res.json({ jobs }); + }); + + router.post('/', (req: Request, res: Response) => { + const { videoPath, config, outputOptions } = req.body; + if (!videoPath) { + res.status(400).json({ error: 'videoPath is required' }); + return; + } + if (!fs.existsSync(videoPath)) { + res.status(400).json({ error: `Video file not found: ${videoPath}` }); + return; + } + + try { + const job = jobManager.createJob(videoPath, config || {}, outputOptions || {}); + res.status(201).json({ job }); + } catch (err: any) { + res.status(500).json({ error: err.message }); + } + }); + + router.get('/:id', (req: Request, res: Response) => { + const job = getJob(getParam(req, 'id')); + if (!job) { + res.status(404).json({ error: 'Job not found' }); + return; + } + res.json({ job }); + }); + + router.post('/:id/start', async (req: Request, res: Response) => { + try { + await jobManager.startJob(getParam(req, 'id')); + res.json({ success: true }); + } catch (err: any) { + res.status(400).json({ error: err.message }); + } + }); + + router.post('/:id/pause', async (req: Request, res: Response) => { + try { + await jobManager.pauseJob(getParam(req, 'id')); + res.json({ success: true }); + } catch (err: any) { + res.status(400).json({ error: err.message }); + } + }); + + router.post('/:id/restart', async (req: Request, res: Response) => { + try { + await jobManager.restartJob(getParam(req, 'id')); + res.json({ success: true }); + } catch (err: any) { + res.status(400).json({ error: err.message }); + } + }); + + router.post('/:id/cancel', async (req: Request, res: Response) => { + try { + await jobManager.cancelJob(getParam(req, 'id')); + res.json({ success: true }); + } catch (err: any) { + res.status(400).json({ error: err.message }); + } + }); + + router.delete('/:id', (req: Request, res: Response) => { + try { + jobManager.deleteJob(getParam(req, 'id')); + res.json({ success: true }); + } catch (err: any) { + res.status(400).json({ error: err.message }); + } + }); + + router.get('/:id/progress', (req: Request, res: Response) => { + res.setHeader('Content-Type', 'text/event-stream'); + res.setHeader('Cache-Control', 'no-cache'); + res.setHeader('Connection', 'keep-alive'); + res.setHeader('X-Accel-Buffering', 'no'); + + const sendProgress = (data: Record) => { + res.write(`data: ${JSON.stringify(data)}\n\n`); + }; + + const initialJob = getJob(getParam(req, 'id')); + if (initialJob) { + sendProgress({ + id: initialJob.id, + status: initialJob.status, + progress: initialJob.progress, + currentIndex: initialJob.current_index, + totalUnits: initialJob.total_units, + segments: JSON.parse(initialJob.segments), + error: initialJob.error, + output_audio: initialJob.output_audio, + output_subtitles_srt: initialJob.output_subtitles_srt, + output_subtitles_vtt: initialJob.output_subtitles_vtt, + output_muxed: initialJob.output_muxed + }); + } + + const unsubscribe = jobManager.onJobProgress(getParam(req, 'id'), (data) => { + if (data.status === 'completed' || data.status === 'failed' || data.status === 'cancelled') { + sendProgress(data as unknown as Record); + res.end(); + unsubscribe(); + return; + } + sendProgress(data as unknown as Record); + }); + + req.on('close', () => { + unsubscribe(); + }); + }); + + router.get('/:id/download/:type', (req: Request, res: Response) => { + const job = getJob(getParam(req, 'id')); + if (!job) { + res.status(404).json({ error: 'Job not found' }); + return; + } + + const type = getParam(req, 'type'); + let filePath: string | null = null; + let filename: string = ''; + + switch (type) { + case 'audio': + filePath = job.output_audio; + filename = `${path.basename(job.video_filename, path.extname(job.video_filename))}_description.mp3`; + break; + case 'subtitles': + const format = (req.query.format as string) || 'srt'; + filePath = format === 'vtt' ? job.output_subtitles_vtt : job.output_subtitles_srt; + filename = `${path.basename(job.video_filename, path.extname(job.video_filename))}_description.${format}`; + break; + case 'muxed': + filePath = job.output_muxed; + filename = `${path.basename(job.video_filename, path.extname(job.video_filename))}_described.mkv`; + break; + default: + res.status(400).json({ error: 'Invalid download type' }); + return; + } + + if (!filePath || !fs.existsSync(filePath)) { + res.status(404).json({ error: 'Output file not found' }); + return; + } + + res.download(filePath, filename); + }); + + return router; +} diff --git a/src/server/services/jobManager.ts b/src/server/services/jobManager.ts new file mode 100644 index 0000000..c5c15b0 --- /dev/null +++ b/src/server/services/jobManager.ts @@ -0,0 +1,293 @@ +import path from 'path'; +import fs from 'fs'; +import { + getAllJobs, getJob, createJob, updateJobStatus, saveCheckpoint, + saveJobOutputs, deleteJob as deleteJobFromDb, Job, OutputOptions +} from '../db/jobStore'; +import { generateAudioDescriptionFromOptions } from '../../utils/processor'; +import { generateSRT, generateVTT } from './subtitleGenerator'; +import { muxAudioDescription } from './muxer'; +import { getDefaultConfig, Config } from '../../config/config'; +import { AudioSegment, BatchContext } from '../../interfaces'; +import { getVideoDuration } from '../../utils/mediaUtils'; +import { EventEmitter } from 'events'; + +interface ProgressData { + id: string; + status: string; + progress: number; + currentIndex: number; + totalUnits: number; + segments: AudioSegment[]; + error: string | null; + output_audio: string | null; + output_subtitles_srt: string | null; + output_subtitles_vtt: string | null; + output_muxed: string | null; +} + +export class JobManager { + private queue: string[] = []; + private processing = false; + private pausedJobs = new Set(); + private emitter = new EventEmitter(); + private pollInterval: ReturnType | null = null; + + constructor() { + this.recoverStuckJobs(); + this.emitter.setMaxListeners(100); + } + + private recoverStuckJobs(): void { + const jobs = getAllJobs(); + for (const job of jobs) { + if (job.status === 'processing') { + updateJobStatus(job.id, 'failed', 'Server restarted while job was in progress. Click Restart to resume from the last checkpoint.'); + } + } + } + + createJob(videoPath: string, configOverride: Partial = {}, outputOptions: Partial = {}): Job { + const baseConfig = getDefaultConfig(); + const mergedConfig: Config = { ...baseConfig, ...configOverride }; + + const filename = path.basename(videoPath); + const opts: OutputOptions = { + audio: outputOptions.audio !== false, + subtitles: outputOptions.subtitles !== false, + muxed: outputOptions.muxed || false + }; + + return createJob(videoPath, filename, mergedConfig, opts); + } + + async startJob(jobId: string): Promise { + const job = getJob(jobId); + if (!job) throw new Error('Job not found'); + if (job.status === 'processing') throw new Error('Job is already processing'); + if (job.status === 'completed') throw new Error('Job is already completed'); + + updateJobStatus(jobId, 'queued'); + this.queue.push(jobId); + this.processNext(); + } + + async pauseJob(jobId: string): Promise { + const job = getJob(jobId); + if (!job) throw new Error('Job not found'); + if (job.status !== 'processing') throw new Error('Only processing jobs can be paused'); + + this.pausedJobs.add(jobId); + updateJobStatus(jobId, 'paused'); + this.emitProgress(jobId); + } + + async restartJob(jobId: string): Promise { + const job = getJob(jobId); + if (!job) throw new Error('Job not found'); + if (job.status !== 'failed' && job.status !== 'paused' && job.status !== 'cancelled') { + throw new Error('Only failed, paused, or cancelled jobs can be restarted'); + } + + this.pausedJobs.delete(jobId); + updateJobStatus(jobId, 'queued'); + this.queue.push(jobId); + this.processNext(); + } + + async cancelJob(jobId: string): Promise { + const job = getJob(jobId); + if (!job) throw new Error('Job not found'); + + if (job.status === 'processing') { + this.pausedJobs.add(jobId); + } + updateJobStatus(jobId, 'cancelled'); + this.emitProgress(jobId); + } + + deleteJob(jobId: string): void { + const job = getJob(jobId); + if (!job) throw new Error('Job not found'); + if (job.status === 'processing') throw new Error('Cannot delete a running job'); + + deleteJobFromDb(jobId); + } + + listJobs(): Job[] { + return getAllJobs(); + } + + onJobProgress(jobId: string, callback: (data: ProgressData) => void): () => void { + this.emitter.on(`progress:${jobId}`, callback); + + if (!this.pollInterval) { + this.pollInterval = setInterval(() => { + for (const id of this.emitter.eventNames()) { + const eventName = String(id); + if (eventName.startsWith('progress:')) { + const jId = eventName.replace('progress:', ''); + this.emitProgress(jId); + } + } + }, 2000); + } + + return () => { + this.emitter.off(`progress:${jobId}`, callback); + }; + } + + private emitProgress(jobId: string): void { + const job = getJob(jobId); + if (!job) return; + + const data: ProgressData = { + id: job.id, + status: job.status, + progress: job.progress, + currentIndex: job.current_index, + totalUnits: job.total_units, + segments: JSON.parse(job.segments || '[]'), + error: job.error, + output_audio: job.output_audio, + output_subtitles_srt: job.output_subtitles_srt, + output_subtitles_vtt: job.output_subtitles_vtt, + output_muxed: job.output_muxed + }; + this.emitter.emit(`progress:${jobId}`, data); + } + + private async processNext(): Promise { + if (this.processing) return; + + while (this.queue.length > 0) { + this.processing = true; + const jobId = this.queue.shift()!; + + const job = getJob(jobId); + if (!job || job.status !== 'queued') continue; + + try { + await this.processJob(job); + } catch (err: any) { + console.error(`Job ${jobId} failed:`, err.message); + } + } + this.processing = false; + } + + private async processJob(job: Job): Promise { + updateJobStatus(job.id, 'processing'); + this.emitProgress(job.id); + + const config: Config = JSON.parse(job.config); + const outputOptions: OutputOptions = JSON.parse(job.output_options); + + const existingSegments: AudioSegment[] = JSON.parse(job.segments || '[]'); + const lastContext: BatchContext = JSON.parse(job.last_context || '{}'); + + const startIndex = existingSegments.length > 0 ? job.current_index : 0; + const startTimePosition = job.current_time_position || 0; + + const videoDuration = getVideoDuration(job.video_path); + const totalUnits = config.batchTimeMode + ? Math.floor(videoDuration / config.batchWindowDuration) + : Math.floor(videoDuration / config.captureIntervalSeconds); + + saveCheckpoint(job.id, JSON.stringify(existingSegments), startIndex, totalUnits, startTimePosition, JSON.stringify(lastContext), 0); + this.emitProgress(job.id); + + try { + const result = await generateAudioDescriptionFromOptions( + job.video_path, + config, + { + startIndex, + existingSegments, + lastContext, + currentTimePosition: startTimePosition, + onProgress: (info) => { + if (this.pausedJobs.has(job.id)) { + throw new Error('JOB_PAUSED'); + } + + const allSegments = existingSegments.length > 0 && info.index === startIndex + ? [...existingSegments, info.segment] + : (() => { + const currentJob = getJob(job.id); + if (!currentJob) return [info.segment]; + const segs = JSON.parse(currentJob.segments || '[]'); + segs.push(info.segment); + return segs; + })(); + + const progress = totalUnits > 0 ? Math.min(((info.index + 1) / totalUnits) * 100, 99) : 50; + + saveCheckpoint( + job.id, + JSON.stringify(allSegments), + info.index + 1, + totalUnits, + info.segment.startTime + info.segment.duration + (config.batchTimeMode ? 0.5 : 0.25), + JSON.stringify(lastContext), + progress + ); + + this.emitProgress(job.id); + } + } + ); + + // All segments from the result + const segments = result.segments || []; + + // Combine audio segments into final audio (use the result's pre-combined file) + const outputAudio = result.audioDescriptionFile; + + let outputSubtitlesSrt: string | null = null; + let outputSubtitlesVtt: string | null = null; + let outputMuxed: string | null = null; + + const baseName = path.basename(job.video_path, path.extname(job.video_path)); + const outputDir = config.outputDir; + + if (outputOptions.subtitles && segments.length > 0) { + const srtPath = path.join(outputDir, `${baseName}_description.srt`); + const vttPath = path.join(outputDir, `${baseName}_description.vtt`); + fs.writeFileSync(srtPath, generateSRT(segments, videoDuration)); + fs.writeFileSync(vttPath, generateVTT(segments, videoDuration)); + outputSubtitlesSrt = srtPath; + outputSubtitlesVtt = vttPath; + } + + if (outputOptions.muxed && fs.existsSync(outputAudio)) { + const muxedPath = path.join(outputDir, `${baseName}_described.mkv`); + muxAudioDescription(job.video_path, outputAudio, muxedPath); + outputMuxed = muxedPath; + } + + saveJobOutputs(job.id, { + audio: outputAudio, + subtitlesSrt: outputSubtitlesSrt || undefined, + subtitlesVtt: outputSubtitlesVtt || undefined, + muxed: outputMuxed || undefined + }); + + saveCheckpoint(job.id, JSON.stringify(segments), totalUnits, totalUnits, 0, '{}', 100); + updateJobStatus(job.id, 'completed'); + this.emitProgress(job.id); + + } catch (err: any) { + if (err.message === 'JOB_PAUSED') { + updateJobStatus(job.id, 'paused'); + this.emitProgress(job.id); + return; + } + + const errorMsg = err.message || 'Unknown error'; + updateJobStatus(job.id, 'failed', errorMsg); + this.emitProgress(job.id); + } + } +} diff --git a/src/server/services/muxer.ts b/src/server/services/muxer.ts new file mode 100644 index 0000000..0ad0b8c --- /dev/null +++ b/src/server/services/muxer.ts @@ -0,0 +1,29 @@ +import { execSync } from 'child_process'; +import path from 'path'; + +export function muxAudioDescription( + videoPath: string, + audioPath: string, + outputPath: string +): void { + const ext = path.extname(outputPath).toLowerCase(); + const isMkv = ext === '.mkv'; + + const cmd = [ + 'ffmpeg -v error', + `-i "${videoPath}"`, + `-i "${audioPath}"`, + '-map 0:v', + '-map 0:a?', + '-map 1:a', + '-c:v copy', + '-c:a copy', + isMkv + ? '-metadata:s:a:1 title="Audio Description"' + : '-metadata:s:a:1 title="Audio Description"', + `"${outputPath}"`, + '-y' + ].join(' '); + + execSync(cmd); +} diff --git a/src/server/services/subtitleGenerator.ts b/src/server/services/subtitleGenerator.ts new file mode 100644 index 0000000..c45ae78 --- /dev/null +++ b/src/server/services/subtitleGenerator.ts @@ -0,0 +1,70 @@ +import { AudioSegment } from '../../interfaces'; + +function formatSrtTime(seconds: number): string { + const h = Math.floor(seconds / 3600); + const m = Math.floor((seconds % 3600) / 60); + const s = Math.floor(seconds % 60); + const ms = Math.floor((seconds % 1) * 1000); + return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.toString().padStart(2, '0')},${ms.toString().padStart(3, '0')}`; +} + +function formatVttTime(seconds: number): string { + const h = Math.floor(seconds / 3600); + const m = Math.floor((seconds % 3600) / 60); + const s = Math.floor(seconds % 60); + const ms = Math.floor((seconds % 1) * 1000); + return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.toString().padStart(2, '0')}.${ms.toString().padStart(3, '0')}`; +} + +function cleanDescription(text: string): string { + return text.replace(/\n+/g, ' ').replace(/\s+/g, ' ').trim(); +} + +export function generateSRT(segments: AudioSegment[], videoDuration: number): string { + if (segments.length === 0) return ''; + + const sorted = [...segments].sort((a, b) => a.startTime - b.startTime); + const lines: string[] = []; + + for (let i = 0; i < sorted.length; i++) { + const seg = sorted[i]; + const startTime = seg.startTime; + let endTime: number; + if (i < sorted.length - 1) { + endTime = sorted[i + 1].startTime; + } else { + endTime = Math.min(seg.startTime + seg.duration + 0.5, videoDuration); + } + + lines.push((i + 1).toString()); + lines.push(`${formatSrtTime(startTime)} --> ${formatSrtTime(endTime)}`); + lines.push(cleanDescription(seg.description)); + lines.push(''); + } + + return lines.join('\n'); +} + +export function generateVTT(segments: AudioSegment[], videoDuration: number): string { + if (segments.length === 0) return ''; + + const sorted = [...segments].sort((a, b) => a.startTime - b.startTime); + const lines: string[] = ['WEBVTT', '']; + + for (let i = 0; i < sorted.length; i++) { + const seg = sorted[i]; + const startTime = seg.startTime; + let endTime: number; + if (i < sorted.length - 1) { + endTime = sorted[i + 1].startTime; + } else { + endTime = Math.min(seg.startTime + seg.duration + 0.5, videoDuration); + } + + lines.push(`${formatVttTime(startTime)} --> ${formatVttTime(endTime)}`); + lines.push(cleanDescription(seg.description)); + lines.push(''); + } + + return lines.join('\n'); +} diff --git a/src/server/services/ytDlp.ts b/src/server/services/ytDlp.ts new file mode 100644 index 0000000..c06c9fd --- /dev/null +++ b/src/server/services/ytDlp.ts @@ -0,0 +1,47 @@ +import { execSync } from 'child_process'; +import path from 'path'; +import fs from 'fs'; + +export interface YtDlpResult { + filePath: string; + filename: string; + title: string; +} + +export function isYtDlpAvailable(): boolean { + try { + execSync('yt-dlp --version', { stdio: 'pipe' }); + return true; + } catch { + return false; + } +} + +export function downloadVideo(url: string, outputDir: string): YtDlpResult { + if (!fs.existsSync(outputDir)) { + fs.mkdirSync(outputDir, { recursive: true }); + } + + const outputTemplate = path.join(outputDir, '%(title)s.%(ext)s'); + + const result = execSync( + `yt-dlp -f "best[ext=mp4]/best" -o "${outputTemplate}" --print filename --print title "${url}"`, + { encoding: 'utf-8', timeout: 600000 } + ); + + const lines = result.trim().split('\n'); + const filename = lines[0]?.trim(); + const title = lines[1]?.trim() || filename; + + if (!filename) { + throw new Error('yt-dlp: Failed to parse downloaded filename'); + } + + const filePath = path.resolve(outputDir, filename); + + if (!fs.existsSync(filePath)) { + throw new Error(`yt-dlp: Downloaded file not found at ${filePath}`); + } + + return { filePath, filename, title }; +} diff --git a/src/utils/processor.ts b/src/utils/processor.ts index 46b3b7a..34db9c7 100644 --- a/src/utils/processor.ts +++ b/src/utils/processor.ts @@ -6,7 +6,8 @@ import { AudioSegment, Stats, BatchContext, - ProcessingResult + ProcessingResult, + ProcessingOptions } from '../interfaces'; import { Config, getDefaultConfig } from '../config/config'; import { printStats, createStats } from '../config/stats'; @@ -28,7 +29,8 @@ import { */ export async function generateAudioDescriptionFromOptions( videoFilePath: string, - options: Partial = {} + options: Partial = {}, + processingOptions: ProcessingOptions = {} ): Promise { const config = { ...getDefaultConfig(), ...options }; @@ -43,7 +45,7 @@ export async function generateAudioDescriptionFromOptions( const ttsProvider = TTSProviderFactory.getProvider(config); const stats = createStats(); - return generateAudioDescription(videoFilePath, visionProvider, ttsProvider, config, stats); + return generateAudioDescription(videoFilePath, visionProvider, ttsProvider, config, stats, processingOptions); } /** @@ -60,7 +62,8 @@ export async function generateAudioDescription( visionProvider: VisionProvider, ttsProvider: TTSProvider, options: Partial = {}, - stats: Stats + stats: Stats, + processingOptions: ProcessingOptions = {} ): Promise { // Merge provided options with defaults const settings = { ...options } as Config; @@ -86,7 +89,8 @@ export async function generateAudioDescription( settings, visionProvider, ttsProvider, - stats + stats, + processingOptions ); } @@ -97,18 +101,23 @@ export async function generateAudioDescription( // Context window to store previous frames const frameContext: { index: number; path: string; timePosition: number }[] = []; - // Array to store audio segment information - const audioSegments: AudioSegment[] = []; + // Array to store audio segment information - preload with existing segments if resuming + const audioSegments: AudioSegment[] = processingOptions.existingSegments + ? [...processingOptions.existingSegments] + : []; // Track our current time position (will be adjusted for audio overlap) - let currentTimePosition = 0; + let currentTimePosition = processingOptions.currentTimePosition || 0; + + // Start from given index if resuming + const startIndex = processingOptions.startIndex || 0; // Track drift from the original schedule let timelineDrift = 0; const maxAllowableDrift = settings.captureIntervalSeconds * 2; // Maximum drift before warning // Process each frame - for (let i = 0; i < totalFrames; i++) { + for (let i = startIndex; i < totalFrames; i++) { // Calculate the ideal time position based on the original schedule const idealTimePosition = i * settings.captureIntervalSeconds; @@ -183,12 +192,23 @@ export async function generateAudioDescription( console.log(`Audio duration: ${audioDuration} seconds`); // Store segment information - audioSegments.push({ + const segment: AudioSegment = { audioFile: audioFilePath, startTime: timePosition, duration: audioDuration, description - }); + }; + audioSegments.push(segment); + + // Notify progress callback + if (processingOptions.onProgress) { + processingOptions.onProgress({ + type: 'frame', + index: i, + total: totalFrames, + segment + }); + } // Update the time position for the next iteration // Add a small buffer (0.25 sec) between descriptions to prevent hard cuts @@ -217,7 +237,8 @@ export async function generateAudioDescription( return { videoFile: videoFilePath, - audioDescriptionFile: outputAudioPath + audioDescriptionFile: outputAudioPath, + segments: audioSegments }; } @@ -236,24 +257,31 @@ async function generateAudioDescriptionBatch( settings: Config, visionProvider: VisionProvider, ttsProvider: TTSProvider, - stats: Stats + stats: Stats, + processingOptions: ProcessingOptions = {} ): Promise { const totalBatches = Math.floor(videoDuration / settings.batchWindowDuration); console.log(`Using batchTimeMode. Total batches: ${totalBatches} (each covers ${settings.batchWindowDuration} sec)`); // We'll hold the last batch's frames or last batch's description for context - let lastBatchContext: BatchContext = {}; + let lastBatchContext: BatchContext = processingOptions.lastContext || {}; - const audioSegments: AudioSegment[] = []; + // Preload with existing segments if resuming + const audioSegments: AudioSegment[] = processingOptions.existingSegments + ? [...processingOptions.existingSegments] + : []; // Track our current time position (will be adjusted for audio overlap) - let currentTimePosition = 0; + let currentTimePosition = processingOptions.currentTimePosition || 0; + + // Start from given index if resuming + const startBatchIndex = processingOptions.startIndex || 0; // Track drift from the original schedule let timelineDrift = 0; const maxAllowableDrift = settings.batchWindowDuration * 0.5; // Maximum drift of 50% of batch window - for (let batchIndex = 0; batchIndex < totalBatches; batchIndex++) { + for (let batchIndex = startBatchIndex; batchIndex < totalBatches; batchIndex++) { // Calculate ideal batch timing based on configuration const idealBatchStart = batchIndex * settings.batchWindowDuration; @@ -315,12 +343,23 @@ async function generateAudioDescriptionBatch( console.log(`Batch #${batchIndex} audio duration: ${audioDuration} seconds`); // Store segment info with the adjusted start time - audioSegments.push({ + const segment: AudioSegment = { audioFile: audioFilePath, startTime: batchStart, duration: audioDuration, description - }); + }; + audioSegments.push(segment); + + // Notify progress callback + if (processingOptions.onProgress) { + processingOptions.onProgress({ + type: 'batch', + index: batchIndex, + total: totalBatches, + segment + }); + } // Update the time position for the next iteration // Add a small buffer (0.5 sec) between descriptions @@ -355,6 +394,7 @@ async function generateAudioDescriptionBatch( return { videoFile: videoFilePath, - audioDescriptionFile: outputAudioPath + audioDescriptionFile: outputAudioPath, + segments: audioSegments }; } \ No newline at end of file