From eb15af3a36f8b5181a213e139e93d3ddd8bcd869 Mon Sep 17 00:00:00 2001 From: Talon Date: Wed, 13 May 2026 02:38:16 +0200 Subject: [PATCH] Add ElevenLabs TTS provider with segment continuity support --- src/config/config.ts | 5 ++ src/providers/tts/elevenLabsTTSProvider.ts | 93 ++++++++++++++++++++++ src/providers/tts/index.ts | 3 +- src/providers/tts/ttsProviderFactory.ts | 3 + 4 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 src/providers/tts/elevenLabsTTSProvider.ts diff --git a/src/config/config.ts b/src/config/config.ts index 050af99..5ca2a7a 100644 --- a/src/config/config.ts +++ b/src/config/config.ts @@ -75,6 +75,11 @@ export function getDefaultConfig(): Config { apiKey: process.env.OPENAI_API_KEY, model: "gpt-4o-mini-tts", voice: "alloy" + }, + elevenlabs: { + apiKey: process.env.ELEVENLABS_API_KEY, + model: "eleven_multilingual_v2", + voice: "JBFqnCBsd6RMkjVDRZzb" } }, diff --git a/src/providers/tts/elevenLabsTTSProvider.ts b/src/providers/tts/elevenLabsTTSProvider.ts new file mode 100644 index 0000000..6632844 --- /dev/null +++ b/src/providers/tts/elevenLabsTTSProvider.ts @@ -0,0 +1,93 @@ +import fs from 'fs'; +import { execSync } from 'child_process'; +import axios, { AxiosInstance } from 'axios'; +import { TTSProvider, TTSProviderConfig, TTSOptions, TTSResult } from '../../interfaces'; +import { getAudioDuration } from '../../utils/mediaUtils'; + +export class ElevenLabsTTSProvider implements TTSProvider { + private config: TTSProviderConfig; + private axiosInstance: AxiosInstance; + private lastRequestId: string | null = null; + + constructor(config: TTSProviderConfig) { + this.config = config; + this.axiosInstance = axios.create({ + baseURL: 'https://api.elevenlabs.io/v1', + headers: { + 'xi-api-key': config.apiKey, + 'Content-Type': 'application/json' + } + }); + } + + async textToSpeech( + text: string, + outputPath: string, + options: TTSOptions = {} + ): Promise { + try { + const voice = options.voice || this.config.voice || 'JBFqnCBsd6RMkjVDRZzb'; + const model = options.model || this.config.model || 'eleven_multilingual_v2'; + const speedFactor = options.speedFactor || 1.0; + + const requestBody: any = { + text, + model_id: model, + voice_settings: { + stability: 0.5, + similarity_boost: 0.75, + speed: speedFactor, + use_speaker_boost: true + } + }; + + if (this.lastRequestId) { + requestBody.previous_request_ids = [this.lastRequestId]; + } + + const tempOutputPath = outputPath.replace(/\.\w+$/, '_temp$&'); + + const response = await this.axiosInstance.post( + `/text-to-speech/${voice}`, + requestBody, + { + params: { output_format: 'mp3_44100_128' }, + responseType: 'arraybuffer' + } + ); + + this.lastRequestId = response.headers['request-id'] || null; + + const audioBuffer = Buffer.from(response.data); + fs.writeFileSync(tempOutputPath, audioBuffer); + + const cost = text.length; + + if (speedFactor !== 1.0) { + execSync(`ffmpeg -v error -i "${tempOutputPath}" -filter:a "atempo=${speedFactor}" -c:a libmp3lame -q:a 2 "${outputPath}" -y`); + fs.unlinkSync(tempOutputPath); + } else { + fs.renameSync(tempOutputPath, outputPath); + } + + const audioDuration = getAudioDuration(outputPath); + + return { + duration: audioDuration, + cost: cost + }; + } catch (error: any) { + if (error.response) { + console.error(`ElevenLabs TTS error (${error.response.status}):`, + Buffer.from(error.response.data).toString()); + } else { + console.error('ElevenLabs TTS error:', error.message); + } + execSync(`ffmpeg -v error -f lavfi -i anullsrc=r=24000:cl=mono -t 1 -q:a 9 -acodec libmp3lame "${outputPath}" -y`); + return { + duration: 1, + cost: 0 + }; + } + } +} diff --git a/src/providers/tts/index.ts b/src/providers/tts/index.ts index 2fa7939..f17f0d1 100644 --- a/src/providers/tts/index.ts +++ b/src/providers/tts/index.ts @@ -1,2 +1,3 @@ export * from './ttsProviderFactory'; -export * from './openAITTSProvider'; \ No newline at end of file +export * from './openAITTSProvider'; +export * from './elevenLabsTTSProvider'; \ No newline at end of file diff --git a/src/providers/tts/ttsProviderFactory.ts b/src/providers/tts/ttsProviderFactory.ts index 03151d4..13db2ab 100644 --- a/src/providers/tts/ttsProviderFactory.ts +++ b/src/providers/tts/ttsProviderFactory.ts @@ -1,6 +1,7 @@ import { TTSProvider } from '../../interfaces'; import { Config } from '../../config/config'; import { OpenAITTSProvider } from './openAITTSProvider'; +import { ElevenLabsTTSProvider } from './elevenLabsTTSProvider'; /** * Factory for creating TTS providers @@ -17,6 +18,8 @@ export class TTSProviderFactory { switch (providerName) { case 'openai': return new OpenAITTSProvider(providerConfig); + case 'elevenlabs': + return new ElevenLabsTTSProvider(providerConfig); // Add other providers here default: throw new Error(`TTS provider "${providerName}" not implemented.`);