Files
aidio-description/src/providers/tts/openAITTSProvider.ts

81 lines
2.6 KiB
TypeScript
Raw Normal View History

2025-06-10 19:24:13 +02:00
import fs from 'fs';
import { execSync } from 'child_process';
import { OpenAI } from 'openai';
import { TTSProvider, TTSProviderConfig, TTSOptions, TTSResult } from '../../interfaces';
import { getAudioDuration } from '../../utils/mediaUtils';
/**
* OpenAI TTS Provider Implementation
*/
export class OpenAITTSProvider implements TTSProvider {
private config: TTSProviderConfig;
private openai: OpenAI;
constructor(config: TTSProviderConfig) {
this.config = config;
this.openai = new OpenAI({
apiKey: config.apiKey,
});
}
/**
* Convert text to speech
* @param text - Text to convert to speech
* @param outputPath - Output path for the audio file
* @param options - Additional options
* @returns Duration of the generated audio in seconds and cost
*/
async textToSpeech(
text: string,
outputPath: string,
options: TTSOptions = {}
): Promise<TTSResult> {
try {
// Get the options, with defaults from config
const voice = options.voice || this.config.voice;
const model = options.model || this.config.model;
const speedFactor = options.speedFactor || 1.0;
// Generate the initial TTS output
const tempOutputPath = outputPath.replace(/\.\w+$/, '_temp$&');
const mp3 = await this.openai.audio.speech.create({
model: model,
voice: voice as any, // Type casting to any to avoid type issues
input: text
});
// Cost calculation is based on character count
const cost = text.length;
const buffer = Buffer.from(await mp3.arrayBuffer());
fs.writeFileSync(tempOutputPath, buffer);
// Speed up the audio using FFmpeg if needed
if (speedFactor !== 1.0) {
execSync(`ffmpeg -v error -i "${tempOutputPath}" -filter:a "atempo=${speedFactor}" -c:a libmp3lame -q:a 2 "${outputPath}" -y`);
// Clean up temporary file
fs.unlinkSync(tempOutputPath);
} else {
// Just use the file as is
fs.renameSync(tempOutputPath, outputPath);
}
// Get actual audio duration for accurate timing
const audioDuration = getAudioDuration(outputPath);
return {
duration: audioDuration,
cost: cost
};
} catch (error) {
console.error("Error generating speech:", error);
// Create a silent audio file if TTS fails
execSync(`ffmpeg -v error -f lavfi -i anullsrc=r=24000:cl=mono -t 1 -q:a 9 -acodec libmp3lame "${outputPath}" -y`);
return {
duration: 1,
cost: 0
};
}
}
}