aidio-description/src/providers/tts/openAITTSProvider.ts

import fs from 'fs';
import { execSync } from 'child_process';
import { OpenAI } from 'openai';
import { TTSProvider, TTSProviderConfig, TTSOptions, TTSResult } from '../../interfaces';
import { getAudioDuration } from '../../utils/mediaUtils';

/**
 * OpenAI TTS Provider Implementation
 */
export class OpenAITTSProvider implements TTSProvider {
  private config: TTSProviderConfig;
  private openai: OpenAI;

  constructor(config: TTSProviderConfig) {
    this.config = config;
    this.openai = new OpenAI({
      apiKey: config.apiKey,
    });
  }

  /**
   * Convert text to speech
   * @param text - Text to convert to speech
   * @param outputPath - Output path for the audio file
   * @param options - Additional options
   * @returns Duration of the generated audio in seconds and cost
   */
  async textToSpeech(
    text: string, 
    outputPath: string, 
    options: TTSOptions = {}
  ): Promise<TTSResult> {
    try {
      // Get the options, with defaults from config
      const voice = options.voice || this.config.voice;
      const model = options.model || this.config.model;
      const speedFactor = options.speedFactor || 1.0;
      
      // Generate the initial TTS output
      const tempOutputPath = outputPath.replace(/\.\w+$/, '_temp$&');

      const mp3 = await this.openai.audio.speech.create({
        model: model,
        voice: voice as any, // Type casting to any to avoid type issues
        input: text
      });

      // Cost calculation is based on character count
      const cost = text.length;

      const buffer = Buffer.from(await mp3.arrayBuffer());
      fs.writeFileSync(tempOutputPath, buffer);

      // Speed up the audio using FFmpeg if needed
      if (speedFactor !== 1.0) {
        execSync(`ffmpeg -v error -i "${tempOutputPath}" -filter:a "atempo=${speedFactor}" -c:a libmp3lame -q:a 2 "${outputPath}" -y`);
        // Clean up temporary file
        fs.unlinkSync(tempOutputPath);
      } else {
        // Just use the file as is
        fs.renameSync(tempOutputPath, outputPath);
      }

      // Get actual audio duration for accurate timing
      const audioDuration = getAudioDuration(outputPath);
      
      return {
        duration: audioDuration,
        cost: cost
      };
    } catch (error) {
      console.error("Error generating speech:", error);
      // Create a silent audio file if TTS fails
      execSync(`ffmpeg -v error -f lavfi -i anullsrc=r=24000:cl=mono -t 1 -q:a 9 -acodec libmp3lame "${outputPath}" -y`);
      return {
        duration: 1,
        cost: 0
      };
    }
  }
}
WIP typescript conversion 2025-06-10 19:24:13 +02:00			`import fs from 'fs';`
			`import { execSync } from 'child_process';`
			`import { OpenAI } from 'openai';`
			`import { TTSProvider, TTSProviderConfig, TTSOptions, TTSResult } from '../../interfaces';`
			`import { getAudioDuration } from '../../utils/mediaUtils';`

			`/**`
			`* OpenAI TTS Provider Implementation`
			`*/`
			`export class OpenAITTSProvider implements TTSProvider {`
			`private config: TTSProviderConfig;`
			`private openai: OpenAI;`

			`constructor(config: TTSProviderConfig) {`
			`this.config = config;`
			`this.openai = new OpenAI({`
			`apiKey: config.apiKey,`
			`});`
			`}`

			`/**`
			`* Convert text to speech`
			`* @param text - Text to convert to speech`
			`* @param outputPath - Output path for the audio file`
			`* @param options - Additional options`
			`* @returns Duration of the generated audio in seconds and cost`
			`*/`
			`async textToSpeech(`
			`text: string,`
			`outputPath: string,`
			`options: TTSOptions = {}`
			`): Promise<TTSResult> {`
			`try {`
			`// Get the options, with defaults from config`
			`const voice = options.voice \|\| this.config.voice;`
			`const model = options.model \|\| this.config.model;`
			`const speedFactor = options.speedFactor \|\| 1.0;`

			`// Generate the initial TTS output`
			`const tempOutputPath = outputPath.replace(/\.\w+$/, '_temp$&');`

			`const mp3 = await this.openai.audio.speech.create({`
			`model: model,`
			`voice: voice as any, // Type casting to any to avoid type issues`
			`input: text`
			`});`

			`// Cost calculation is based on character count`
			`const cost = text.length;`

			`const buffer = Buffer.from(await mp3.arrayBuffer());`
			`fs.writeFileSync(tempOutputPath, buffer);`

			`// Speed up the audio using FFmpeg if needed`
			`if (speedFactor !== 1.0) {`
			execSync(`ffmpeg -v error -i "${tempOutputPath}" -filter:a "atempo=${speedFactor}" -c:a libmp3lame -q:a 2 "${outputPath}" -y`);
			`// Clean up temporary file`
			`fs.unlinkSync(tempOutputPath);`
			`} else {`
			`// Just use the file as is`
			`fs.renameSync(tempOutputPath, outputPath);`
			`}`

			`// Get actual audio duration for accurate timing`
			`const audioDuration = getAudioDuration(outputPath);`

			`return {`
			`duration: audioDuration,`
			`cost: cost`
			`};`
			`} catch (error) {`
			`console.error("Error generating speech:", error);`
			`// Create a silent audio file if TTS fails`
			execSync(`ffmpeg -v error -f lavfi -i anullsrc=r=24000:cl=mono -t 1 -q:a 9 -acodec libmp3lame "${outputPath}" -y`);
			`return {`
			`duration: 1,`
			`cost: 0`
			`};`
			`}`
			`}`
			`}`