82 lines
2.6 KiB
TypeScript
82 lines
2.6 KiB
TypeScript
import fs from 'fs';
|
|
import { execSync } from 'child_process';
|
|
import { OpenAI } from 'openai';
|
|
import { TTSProvider, TTSProviderConfig, TTSOptions, TTSResult } from '../../interfaces';
|
|
import { getAudioDuration } from '../../utils/mediaUtils';
|
|
|
|
/**
|
|
* OpenAI TTS Provider Implementation
|
|
*/
|
|
export class OpenAITTSProvider implements TTSProvider {
|
|
private config: TTSProviderConfig;
|
|
private openai: OpenAI;
|
|
|
|
constructor(config: TTSProviderConfig) {
|
|
this.config = config;
|
|
this.openai = new OpenAI({
|
|
apiKey: config.apiKey,
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Convert text to speech
|
|
* @param text - Text to convert to speech
|
|
* @param outputPath - Output path for the audio file
|
|
* @param options - Additional options
|
|
* @returns Duration of the generated audio in seconds and cost
|
|
*/
|
|
async textToSpeech(
|
|
text: string,
|
|
outputPath: string,
|
|
options: TTSOptions = {}
|
|
): Promise<TTSResult> {
|
|
try {
|
|
// Get the options, with defaults from config
|
|
const voice = options.voice || this.config.voice;
|
|
const model = options.model || this.config.model;
|
|
const speedFactor = options.speedFactor || 1.0;
|
|
|
|
// Generate the initial TTS output
|
|
const tempOutputPath = outputPath.replace(/\.\w+$/, '_temp$&');
|
|
|
|
const mp3 = await this.openai.audio.speech.create({
|
|
model: model,
|
|
voice: voice as any,
|
|
input: text,
|
|
...(options.instructions ? { instructions: options.instructions } : {})
|
|
});
|
|
|
|
// Cost calculation is based on character count
|
|
const cost = text.length;
|
|
|
|
const buffer = Buffer.from(await mp3.arrayBuffer());
|
|
fs.writeFileSync(tempOutputPath, buffer);
|
|
|
|
// Speed up the audio using FFmpeg if needed
|
|
if (speedFactor !== 1.0) {
|
|
execSync(`ffmpeg -v error -i "${tempOutputPath}" -filter:a "atempo=${speedFactor}" -c:a libmp3lame -q:a 2 "${outputPath}" -y`);
|
|
// Clean up temporary file
|
|
fs.unlinkSync(tempOutputPath);
|
|
} else {
|
|
// Just use the file as is
|
|
fs.renameSync(tempOutputPath, outputPath);
|
|
}
|
|
|
|
// Get actual audio duration for accurate timing
|
|
const audioDuration = getAudioDuration(outputPath);
|
|
|
|
return {
|
|
duration: audioDuration,
|
|
cost: cost
|
|
};
|
|
} catch (error) {
|
|
console.error("Error generating speech:", error);
|
|
// Create a silent audio file if TTS fails
|
|
execSync(`ffmpeg -v error -f lavfi -i anullsrc=r=24000:cl=mono -t 1 -q:a 9 -acodec libmp3lame "${outputPath}" -y`);
|
|
return {
|
|
duration: 1,
|
|
cost: 0
|
|
};
|
|
}
|
|
}
|
|
} |