Add ElevenLabs TTS provider with segment continuity support
This commit is contained in:
@@ -75,6 +75,11 @@ export function getDefaultConfig(): Config {
|
||||
apiKey: process.env.OPENAI_API_KEY,
|
||||
model: "gpt-4o-mini-tts",
|
||||
voice: "alloy"
|
||||
},
|
||||
elevenlabs: {
|
||||
apiKey: process.env.ELEVENLABS_API_KEY,
|
||||
model: "eleven_multilingual_v2",
|
||||
voice: "JBFqnCBsd6RMkjVDRZzb"
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
93
src/providers/tts/elevenLabsTTSProvider.ts
Normal file
93
src/providers/tts/elevenLabsTTSProvider.ts
Normal file
@@ -0,0 +1,93 @@
|
||||
import fs from 'fs';
|
||||
import { execSync } from 'child_process';
|
||||
import axios, { AxiosInstance } from 'axios';
|
||||
import { TTSProvider, TTSProviderConfig, TTSOptions, TTSResult } from '../../interfaces';
|
||||
import { getAudioDuration } from '../../utils/mediaUtils';
|
||||
|
||||
export class ElevenLabsTTSProvider implements TTSProvider {
|
||||
private config: TTSProviderConfig;
|
||||
private axiosInstance: AxiosInstance;
|
||||
private lastRequestId: string | null = null;
|
||||
|
||||
constructor(config: TTSProviderConfig) {
|
||||
this.config = config;
|
||||
this.axiosInstance = axios.create({
|
||||
baseURL: 'https://api.elevenlabs.io/v1',
|
||||
headers: {
|
||||
'xi-api-key': config.apiKey,
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async textToSpeech(
|
||||
text: string,
|
||||
outputPath: string,
|
||||
options: TTSOptions = {}
|
||||
): Promise<TTSResult> {
|
||||
try {
|
||||
const voice = options.voice || this.config.voice || 'JBFqnCBsd6RMkjVDRZzb';
|
||||
const model = options.model || this.config.model || 'eleven_multilingual_v2';
|
||||
const speedFactor = options.speedFactor || 1.0;
|
||||
|
||||
const requestBody: any = {
|
||||
text,
|
||||
model_id: model,
|
||||
voice_settings: {
|
||||
stability: 0.5,
|
||||
similarity_boost: 0.75,
|
||||
speed: speedFactor,
|
||||
use_speaker_boost: true
|
||||
}
|
||||
};
|
||||
|
||||
if (this.lastRequestId) {
|
||||
requestBody.previous_request_ids = [this.lastRequestId];
|
||||
}
|
||||
|
||||
const tempOutputPath = outputPath.replace(/\.\w+$/, '_temp$&');
|
||||
|
||||
const response = await this.axiosInstance.post(
|
||||
`/text-to-speech/${voice}`,
|
||||
requestBody,
|
||||
{
|
||||
params: { output_format: 'mp3_44100_128' },
|
||||
responseType: 'arraybuffer'
|
||||
}
|
||||
);
|
||||
|
||||
this.lastRequestId = response.headers['request-id'] || null;
|
||||
|
||||
const audioBuffer = Buffer.from(response.data);
|
||||
fs.writeFileSync(tempOutputPath, audioBuffer);
|
||||
|
||||
const cost = text.length;
|
||||
|
||||
if (speedFactor !== 1.0) {
|
||||
execSync(`ffmpeg -v error -i "${tempOutputPath}" -filter:a "atempo=${speedFactor}" -c:a libmp3lame -q:a 2 "${outputPath}" -y`);
|
||||
fs.unlinkSync(tempOutputPath);
|
||||
} else {
|
||||
fs.renameSync(tempOutputPath, outputPath);
|
||||
}
|
||||
|
||||
const audioDuration = getAudioDuration(outputPath);
|
||||
|
||||
return {
|
||||
duration: audioDuration,
|
||||
cost: cost
|
||||
};
|
||||
} catch (error: any) {
|
||||
if (error.response) {
|
||||
console.error(`ElevenLabs TTS error (${error.response.status}):`,
|
||||
Buffer.from(error.response.data).toString());
|
||||
} else {
|
||||
console.error('ElevenLabs TTS error:', error.message);
|
||||
}
|
||||
execSync(`ffmpeg -v error -f lavfi -i anullsrc=r=24000:cl=mono -t 1 -q:a 9 -acodec libmp3lame "${outputPath}" -y`);
|
||||
return {
|
||||
duration: 1,
|
||||
cost: 0
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,2 +1,3 @@
|
||||
export * from './ttsProviderFactory';
|
||||
export * from './openAITTSProvider';
|
||||
export * from './openAITTSProvider';
|
||||
export * from './elevenLabsTTSProvider';
|
||||
@@ -1,6 +1,7 @@
|
||||
import { TTSProvider } from '../../interfaces';
|
||||
import { Config } from '../../config/config';
|
||||
import { OpenAITTSProvider } from './openAITTSProvider';
|
||||
import { ElevenLabsTTSProvider } from './elevenLabsTTSProvider';
|
||||
|
||||
/**
|
||||
* Factory for creating TTS providers
|
||||
@@ -17,6 +18,8 @@ export class TTSProviderFactory {
|
||||
switch (providerName) {
|
||||
case 'openai':
|
||||
return new OpenAITTSProvider(providerConfig);
|
||||
case 'elevenlabs':
|
||||
return new ElevenLabsTTSProvider(providerConfig);
|
||||
// Add other providers here
|
||||
default:
|
||||
throw new Error(`TTS provider "${providerName}" not implemented.`);
|
||||
|
||||
Reference in New Issue
Block a user