import fs from 'fs'; import { execSync } from 'child_process'; import { OpenAI } from 'openai'; import { TTSProvider, TTSProviderConfig, TTSOptions, TTSResult } from '../../interfaces'; import { getAudioDuration } from '../../utils/mediaUtils'; /** * OpenAI TTS Provider Implementation */ export class OpenAITTSProvider implements TTSProvider { private config: TTSProviderConfig; private openai: OpenAI; constructor(config: TTSProviderConfig) { this.config = config; this.openai = new OpenAI({ apiKey: config.apiKey, }); } /** * Convert text to speech * @param text - Text to convert to speech * @param outputPath - Output path for the audio file * @param options - Additional options * @returns Duration of the generated audio in seconds and cost */ async textToSpeech( text: string, outputPath: string, options: TTSOptions = {} ): Promise { try { // Get the options, with defaults from config const voice = options.voice || this.config.voice; const model = options.model || this.config.model; const speedFactor = options.speedFactor || 1.0; // Generate the initial TTS output const tempOutputPath = outputPath.replace(/\.\w+$/, '_temp$&'); const mp3 = await this.openai.audio.speech.create({ model: model, voice: voice as any, input: text, ...(options.instructions ? { instructions: options.instructions } : {}) }); // Cost calculation is based on character count const cost = text.length; const buffer = Buffer.from(await mp3.arrayBuffer()); fs.writeFileSync(tempOutputPath, buffer); // Speed up the audio using FFmpeg if needed if (speedFactor !== 1.0) { execSync(`ffmpeg -v error -i "${tempOutputPath}" -filter:a "atempo=${speedFactor}" -c:a libmp3lame -q:a 2 "${outputPath}" -y`); // Clean up temporary file fs.unlinkSync(tempOutputPath); } else { // Just use the file as is fs.renameSync(tempOutputPath, outputPath); } // Get actual audio duration for accurate timing const audioDuration = getAudioDuration(outputPath); return { duration: audioDuration, cost: cost }; } catch (error) { console.error("Error generating speech:", error); // Create a silent audio file if TTS fails execSync(`ffmpeg -v error -f lavfi -i anullsrc=r=24000:cl=mono -t 1 -q:a 9 -acodec libmp3lame "${outputPath}" -y`); return { duration: 1, cost: 0 }; } } }