Add ElevenLabs TTS provider with segment continuity support

2026-05-13 02:38:16 +02:00
parent 19975917c5
commit eb15af3a36
4 changed files with 103 additions and 1 deletions
--- a/src/config/config.ts
+++ b/src/config/config.ts
@@ -75,6 +75,11 @@ export function getDefaultConfig(): Config {
        apiKey: process.env.OPENAI_API_KEY,
        model: "gpt-4o-mini-tts",
        voice: "alloy"
+      },
+      elevenlabs: {
+        apiKey: process.env.ELEVENLABS_API_KEY,
+        model: "eleven_multilingual_v2",
+        voice: "JBFqnCBsd6RMkjVDRZzb"
      }
    },
    
--- a/src/providers/tts/elevenLabsTTSProvider.ts
+++ b/src/providers/tts/elevenLabsTTSProvider.ts
@@ -0,0 +1,93 @@
+import fs from 'fs';
+import { execSync } from 'child_process';
+import axios, { AxiosInstance } from 'axios';
+import { TTSProvider, TTSProviderConfig, TTSOptions, TTSResult } from '../../interfaces';
+import { getAudioDuration } from '../../utils/mediaUtils';
+
+export class ElevenLabsTTSProvider implements TTSProvider {
+  private config: TTSProviderConfig;
+  private axiosInstance: AxiosInstance;
+  private lastRequestId: string | null = null;
+
+  constructor(config: TTSProviderConfig) {
+    this.config = config;
+    this.axiosInstance = axios.create({
+      baseURL: 'https://api.elevenlabs.io/v1',
+      headers: {
+        'xi-api-key': config.apiKey,
+        'Content-Type': 'application/json'
+      }
+    });
+  }
+
+  async textToSpeech(
+    text: string,
+    outputPath: string,
+    options: TTSOptions = {}
+  ): Promise<TTSResult> {
+    try {
+      const voice = options.voice || this.config.voice || 'JBFqnCBsd6RMkjVDRZzb';
+      const model = options.model || this.config.model || 'eleven_multilingual_v2';
+      const speedFactor = options.speedFactor || 1.0;
+
+      const requestBody: any = {
+        text,
+        model_id: model,
+        voice_settings: {
+          stability: 0.5,
+          similarity_boost: 0.75,
+          speed: speedFactor,
+          use_speaker_boost: true
+        }
+      };
+
+      if (this.lastRequestId) {
+        requestBody.previous_request_ids = [this.lastRequestId];
+      }
+
+      const tempOutputPath = outputPath.replace(/\.\w+$/, '_temp$&');
+
+      const response = await this.axiosInstance.post(
+        `/text-to-speech/${voice}`,
+        requestBody,
+        {
+          params: { output_format: 'mp3_44100_128' },
+          responseType: 'arraybuffer'
+        }
+      );
+
+      this.lastRequestId = response.headers['request-id'] || null;
+
+      const audioBuffer = Buffer.from(response.data);
+      fs.writeFileSync(tempOutputPath, audioBuffer);
+
+      const cost = text.length;
+
+      if (speedFactor !== 1.0) {
+        execSync(`ffmpeg -v error -i "${tempOutputPath}" -filter:a "atempo=${speedFactor}" -c:a libmp3lame -q:a 2 "${outputPath}" -y`);
+        fs.unlinkSync(tempOutputPath);
+      } else {
+        fs.renameSync(tempOutputPath, outputPath);
+      }
+
+      const audioDuration = getAudioDuration(outputPath);
+
+      return {
+        duration: audioDuration,
+        cost: cost
+      };
+    } catch (error: any) {
+      if (error.response) {
+        console.error(`ElevenLabs TTS error (${error.response.status}):`, 
+          Buffer.from(error.response.data).toString());
+      } else {
+        console.error('ElevenLabs TTS error:', error.message);
+      }
+      execSync(`ffmpeg -v error -f lavfi -i anullsrc=r=24000:cl=mono -t 1 -q:a 9 -acodec libmp3lame "${outputPath}" -y`);
+      return {
+        duration: 1,
+        cost: 0
+      };
+    }
+  }
+}
--- a/src/providers/tts/index.ts
+++ b/src/providers/tts/index.ts
@@ -1,2 +1,3 @@
 export * from './ttsProviderFactory';
 export * from './openAITTSProvider';
+export * from './elevenLabsTTSProvider';
--- a/src/providers/tts/ttsProviderFactory.ts
+++ b/src/providers/tts/ttsProviderFactory.ts
@@ -1,6 +1,7 @@
 import { TTSProvider } from '../../interfaces';
 import { Config } from '../../config/config';
 import { OpenAITTSProvider } from './openAITTSProvider';
+import { ElevenLabsTTSProvider } from './elevenLabsTTSProvider';

 /**
 * Factory for creating TTS providers
@@ -17,6 +18,8 @@ export class TTSProviderFactory {
    switch (providerName) {
      case 'openai':
        return new OpenAITTSProvider(providerConfig);
+      case 'elevenlabs':
+        return new ElevenLabsTTSProvider(providerConfig);
      // Add other providers here
      default:
        throw new Error(`TTS provider "${providerName}" not implemented.`);