Add ElevenLabs TTS provider with segment continuity support
This commit is contained in:
@@ -75,6 +75,11 @@ export function getDefaultConfig(): Config {
|
|||||||
apiKey: process.env.OPENAI_API_KEY,
|
apiKey: process.env.OPENAI_API_KEY,
|
||||||
model: "gpt-4o-mini-tts",
|
model: "gpt-4o-mini-tts",
|
||||||
voice: "alloy"
|
voice: "alloy"
|
||||||
|
},
|
||||||
|
elevenlabs: {
|
||||||
|
apiKey: process.env.ELEVENLABS_API_KEY,
|
||||||
|
model: "eleven_multilingual_v2",
|
||||||
|
voice: "JBFqnCBsd6RMkjVDRZzb"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|||||||
93
src/providers/tts/elevenLabsTTSProvider.ts
Normal file
93
src/providers/tts/elevenLabsTTSProvider.ts
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
import fs from 'fs';
|
||||||
|
import { execSync } from 'child_process';
|
||||||
|
import axios, { AxiosInstance } from 'axios';
|
||||||
|
import { TTSProvider, TTSProviderConfig, TTSOptions, TTSResult } from '../../interfaces';
|
||||||
|
import { getAudioDuration } from '../../utils/mediaUtils';
|
||||||
|
|
||||||
|
export class ElevenLabsTTSProvider implements TTSProvider {
|
||||||
|
private config: TTSProviderConfig;
|
||||||
|
private axiosInstance: AxiosInstance;
|
||||||
|
private lastRequestId: string | null = null;
|
||||||
|
|
||||||
|
constructor(config: TTSProviderConfig) {
|
||||||
|
this.config = config;
|
||||||
|
this.axiosInstance = axios.create({
|
||||||
|
baseURL: 'https://api.elevenlabs.io/v1',
|
||||||
|
headers: {
|
||||||
|
'xi-api-key': config.apiKey,
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async textToSpeech(
|
||||||
|
text: string,
|
||||||
|
outputPath: string,
|
||||||
|
options: TTSOptions = {}
|
||||||
|
): Promise<TTSResult> {
|
||||||
|
try {
|
||||||
|
const voice = options.voice || this.config.voice || 'JBFqnCBsd6RMkjVDRZzb';
|
||||||
|
const model = options.model || this.config.model || 'eleven_multilingual_v2';
|
||||||
|
const speedFactor = options.speedFactor || 1.0;
|
||||||
|
|
||||||
|
const requestBody: any = {
|
||||||
|
text,
|
||||||
|
model_id: model,
|
||||||
|
voice_settings: {
|
||||||
|
stability: 0.5,
|
||||||
|
similarity_boost: 0.75,
|
||||||
|
speed: speedFactor,
|
||||||
|
use_speaker_boost: true
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if (this.lastRequestId) {
|
||||||
|
requestBody.previous_request_ids = [this.lastRequestId];
|
||||||
|
}
|
||||||
|
|
||||||
|
const tempOutputPath = outputPath.replace(/\.\w+$/, '_temp$&');
|
||||||
|
|
||||||
|
const response = await this.axiosInstance.post(
|
||||||
|
`/text-to-speech/${voice}`,
|
||||||
|
requestBody,
|
||||||
|
{
|
||||||
|
params: { output_format: 'mp3_44100_128' },
|
||||||
|
responseType: 'arraybuffer'
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
this.lastRequestId = response.headers['request-id'] || null;
|
||||||
|
|
||||||
|
const audioBuffer = Buffer.from(response.data);
|
||||||
|
fs.writeFileSync(tempOutputPath, audioBuffer);
|
||||||
|
|
||||||
|
const cost = text.length;
|
||||||
|
|
||||||
|
if (speedFactor !== 1.0) {
|
||||||
|
execSync(`ffmpeg -v error -i "${tempOutputPath}" -filter:a "atempo=${speedFactor}" -c:a libmp3lame -q:a 2 "${outputPath}" -y`);
|
||||||
|
fs.unlinkSync(tempOutputPath);
|
||||||
|
} else {
|
||||||
|
fs.renameSync(tempOutputPath, outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
const audioDuration = getAudioDuration(outputPath);
|
||||||
|
|
||||||
|
return {
|
||||||
|
duration: audioDuration,
|
||||||
|
cost: cost
|
||||||
|
};
|
||||||
|
} catch (error: any) {
|
||||||
|
if (error.response) {
|
||||||
|
console.error(`ElevenLabs TTS error (${error.response.status}):`,
|
||||||
|
Buffer.from(error.response.data).toString());
|
||||||
|
} else {
|
||||||
|
console.error('ElevenLabs TTS error:', error.message);
|
||||||
|
}
|
||||||
|
execSync(`ffmpeg -v error -f lavfi -i anullsrc=r=24000:cl=mono -t 1 -q:a 9 -acodec libmp3lame "${outputPath}" -y`);
|
||||||
|
return {
|
||||||
|
duration: 1,
|
||||||
|
cost: 0
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,2 +1,3 @@
|
|||||||
export * from './ttsProviderFactory';
|
export * from './ttsProviderFactory';
|
||||||
export * from './openAITTSProvider';
|
export * from './openAITTSProvider';
|
||||||
|
export * from './elevenLabsTTSProvider';
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
import { TTSProvider } from '../../interfaces';
|
import { TTSProvider } from '../../interfaces';
|
||||||
import { Config } from '../../config/config';
|
import { Config } from '../../config/config';
|
||||||
import { OpenAITTSProvider } from './openAITTSProvider';
|
import { OpenAITTSProvider } from './openAITTSProvider';
|
||||||
|
import { ElevenLabsTTSProvider } from './elevenLabsTTSProvider';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory for creating TTS providers
|
* Factory for creating TTS providers
|
||||||
@@ -17,6 +18,8 @@ export class TTSProviderFactory {
|
|||||||
switch (providerName) {
|
switch (providerName) {
|
||||||
case 'openai':
|
case 'openai':
|
||||||
return new OpenAITTSProvider(providerConfig);
|
return new OpenAITTSProvider(providerConfig);
|
||||||
|
case 'elevenlabs':
|
||||||
|
return new ElevenLabsTTSProvider(providerConfig);
|
||||||
// Add other providers here
|
// Add other providers here
|
||||||
default:
|
default:
|
||||||
throw new Error(`TTS provider "${providerName}" not implemented.`);
|
throw new Error(`TTS provider "${providerName}" not implemented.`);
|
||||||
|
|||||||
Reference in New Issue
Block a user