Add ElevenLabs TTS provider with segment continuity support

This commit is contained in:
2026-05-13 02:38:16 +02:00
parent 19975917c5
commit eb15af3a36
4 changed files with 103 additions and 1 deletions

View File

@@ -75,6 +75,11 @@ export function getDefaultConfig(): Config {
apiKey: process.env.OPENAI_API_KEY,
model: "gpt-4o-mini-tts",
voice: "alloy"
},
elevenlabs: {
apiKey: process.env.ELEVENLABS_API_KEY,
model: "eleven_multilingual_v2",
voice: "JBFqnCBsd6RMkjVDRZzb"
}
},

View File

@@ -0,0 +1,93 @@
import fs from 'fs';
import { execSync } from 'child_process';
import axios, { AxiosInstance } from 'axios';
import { TTSProvider, TTSProviderConfig, TTSOptions, TTSResult } from '../../interfaces';
import { getAudioDuration } from '../../utils/mediaUtils';
export class ElevenLabsTTSProvider implements TTSProvider {
private config: TTSProviderConfig;
private axiosInstance: AxiosInstance;
private lastRequestId: string | null = null;
constructor(config: TTSProviderConfig) {
this.config = config;
this.axiosInstance = axios.create({
baseURL: 'https://api.elevenlabs.io/v1',
headers: {
'xi-api-key': config.apiKey,
'Content-Type': 'application/json'
}
});
}
async textToSpeech(
text: string,
outputPath: string,
options: TTSOptions = {}
): Promise<TTSResult> {
try {
const voice = options.voice || this.config.voice || 'JBFqnCBsd6RMkjVDRZzb';
const model = options.model || this.config.model || 'eleven_multilingual_v2';
const speedFactor = options.speedFactor || 1.0;
const requestBody: any = {
text,
model_id: model,
voice_settings: {
stability: 0.5,
similarity_boost: 0.75,
speed: speedFactor,
use_speaker_boost: true
}
};
if (this.lastRequestId) {
requestBody.previous_request_ids = [this.lastRequestId];
}
const tempOutputPath = outputPath.replace(/\.\w+$/, '_temp$&');
const response = await this.axiosInstance.post(
`/text-to-speech/${voice}`,
requestBody,
{
params: { output_format: 'mp3_44100_128' },
responseType: 'arraybuffer'
}
);
this.lastRequestId = response.headers['request-id'] || null;
const audioBuffer = Buffer.from(response.data);
fs.writeFileSync(tempOutputPath, audioBuffer);
const cost = text.length;
if (speedFactor !== 1.0) {
execSync(`ffmpeg -v error -i "${tempOutputPath}" -filter:a "atempo=${speedFactor}" -c:a libmp3lame -q:a 2 "${outputPath}" -y`);
fs.unlinkSync(tempOutputPath);
} else {
fs.renameSync(tempOutputPath, outputPath);
}
const audioDuration = getAudioDuration(outputPath);
return {
duration: audioDuration,
cost: cost
};
} catch (error: any) {
if (error.response) {
console.error(`ElevenLabs TTS error (${error.response.status}):`,
Buffer.from(error.response.data).toString());
} else {
console.error('ElevenLabs TTS error:', error.message);
}
execSync(`ffmpeg -v error -f lavfi -i anullsrc=r=24000:cl=mono -t 1 -q:a 9 -acodec libmp3lame "${outputPath}" -y`);
return {
duration: 1,
cost: 0
};
}
}
}

View File

@@ -1,2 +1,3 @@
export * from './ttsProviderFactory';
export * from './openAITTSProvider';
export * from './elevenLabsTTSProvider';

View File

@@ -1,6 +1,7 @@
import { TTSProvider } from '../../interfaces';
import { Config } from '../../config/config';
import { OpenAITTSProvider } from './openAITTSProvider';
import { ElevenLabsTTSProvider } from './elevenLabsTTSProvider';
/**
* Factory for creating TTS providers
@@ -17,6 +18,8 @@ export class TTSProviderFactory {
switch (providerName) {
case 'openai':
return new OpenAITTSProvider(providerConfig);
case 'elevenlabs':
return new ElevenLabsTTSProvider(providerConfig);
// Add other providers here
default:
throw new Error(`TTS provider "${providerName}" not implemented.`);