WIP typescript conversion

2025-06-10 19:24:13 +02:00
parent 9425b4b256
commit 507d4f6474
26 changed files with 2128 additions and 27 deletions
--- a/src/providers/tts/openAITTSProvider.ts
+++ b/src/providers/tts/openAITTSProvider.ts
@@ -0,0 +1,81 @@
+import fs from 'fs';
+import { execSync } from 'child_process';
+import { OpenAI } from 'openai';
+import { TTSProvider, TTSProviderConfig, TTSOptions, TTSResult } from '../../interfaces';
+import { getAudioDuration } from '../../utils/mediaUtils';
+
+/**
+ * OpenAI TTS Provider Implementation
+ */
+export class OpenAITTSProvider implements TTSProvider {
+  private config: TTSProviderConfig;
+  private openai: OpenAI;
+
+  constructor(config: TTSProviderConfig) {
+    this.config = config;
+    this.openai = new OpenAI({
+      apiKey: config.apiKey,
+    });
+  }
+
+  /**
+   * Convert text to speech
+   * @param text - Text to convert to speech
+   * @param outputPath - Output path for the audio file
+   * @param options - Additional options
+   * @returns Duration of the generated audio in seconds and cost
+   */
+  async textToSpeech(
+    text: string, 
+    outputPath: string, 
+    options: TTSOptions = {}
+  ): Promise<TTSResult> {
+    try {
+      // Get the options, with defaults from config
+      const voice = options.voice || this.config.voice;
+      const model = options.model || this.config.model;
+      const speedFactor = options.speedFactor || 1.0;
+      
+      // Generate the initial TTS output
+      const tempOutputPath = outputPath.replace(/\.\w+$/, '_temp$&');
+
+      const mp3 = await this.openai.audio.speech.create({
+        model: model,
+        voice: voice as any, // Type casting to any to avoid type issues
+        input: text
+      });
+
+      // Cost calculation is based on character count
+      const cost = text.length;
+
+      const buffer = Buffer.from(await mp3.arrayBuffer());
+      fs.writeFileSync(tempOutputPath, buffer);
+
+      // Speed up the audio using FFmpeg if needed
+      if (speedFactor !== 1.0) {
+        execSync(`ffmpeg -v error -i "${tempOutputPath}" -filter:a "atempo=${speedFactor}" -c:a libmp3lame -q:a 2 "${outputPath}" -y`);
+        // Clean up temporary file
+        fs.unlinkSync(tempOutputPath);
+      } else {
+        // Just use the file as is
+        fs.renameSync(tempOutputPath, outputPath);
+      }
+
+      // Get actual audio duration for accurate timing
+      const audioDuration = getAudioDuration(outputPath);
+      
+      return {
+        duration: audioDuration,
+        cost: cost
+      };
+    } catch (error) {
+      console.error("Error generating speech:", error);
+      // Create a silent audio file if TTS fails
+      execSync(`ffmpeg -v error -f lavfi -i anullsrc=r=24000:cl=mono -t 1 -q:a 9 -acodec libmp3lame "${outputPath}" -y`);
+      return {
+        duration: 1,
+        cost: 0
+      };
+    }
+  }
+}