79 lines
3.3 KiB
JavaScript
79 lines
3.3 KiB
JavaScript
|
|
"use strict";
|
||
|
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
||
|
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
||
|
|
};
|
||
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||
|
|
exports.ElevenLabsTTSProvider = void 0;
|
||
|
|
const fs_1 = __importDefault(require("fs"));
|
||
|
|
const child_process_1 = require("child_process");
|
||
|
|
const axios_1 = __importDefault(require("axios"));
|
||
|
|
const mediaUtils_1 = require("../../utils/mediaUtils");
|
||
|
|
class ElevenLabsTTSProvider {
|
||
|
|
constructor(config) {
|
||
|
|
this.lastRequestId = null;
|
||
|
|
this.config = config;
|
||
|
|
this.axiosInstance = axios_1.default.create({
|
||
|
|
baseURL: 'https://api.elevenlabs.io/v1',
|
||
|
|
headers: {
|
||
|
|
'xi-api-key': config.apiKey,
|
||
|
|
'Content-Type': 'application/json'
|
||
|
|
}
|
||
|
|
});
|
||
|
|
}
|
||
|
|
async textToSpeech(text, outputPath, options = {}) {
|
||
|
|
try {
|
||
|
|
const voice = options.voice || this.config.voice || 'JBFqnCBsd6RMkjVDRZzb';
|
||
|
|
const model = options.model || this.config.model || 'eleven_multilingual_v2';
|
||
|
|
const speedFactor = options.speedFactor || 1.0;
|
||
|
|
const requestBody = {
|
||
|
|
text,
|
||
|
|
model_id: model,
|
||
|
|
voice_settings: {
|
||
|
|
stability: 0.5,
|
||
|
|
similarity_boost: 0.75,
|
||
|
|
speed: speedFactor,
|
||
|
|
use_speaker_boost: true
|
||
|
|
}
|
||
|
|
};
|
||
|
|
if (this.lastRequestId) {
|
||
|
|
requestBody.previous_request_ids = [this.lastRequestId];
|
||
|
|
}
|
||
|
|
const tempOutputPath = outputPath.replace(/\.\w+$/, '_temp$&');
|
||
|
|
const response = await this.axiosInstance.post(`/text-to-speech/${voice}`, requestBody, {
|
||
|
|
params: { output_format: 'mp3_44100_128' },
|
||
|
|
responseType: 'arraybuffer'
|
||
|
|
});
|
||
|
|
this.lastRequestId = response.headers['request-id'] || null;
|
||
|
|
const audioBuffer = Buffer.from(response.data);
|
||
|
|
fs_1.default.writeFileSync(tempOutputPath, audioBuffer);
|
||
|
|
const cost = text.length;
|
||
|
|
if (speedFactor !== 1.0) {
|
||
|
|
(0, child_process_1.execSync)(`ffmpeg -v error -i "${tempOutputPath}" -filter:a "atempo=${speedFactor}" -c:a libmp3lame -q:a 2 "${outputPath}" -y`);
|
||
|
|
fs_1.default.unlinkSync(tempOutputPath);
|
||
|
|
}
|
||
|
|
else {
|
||
|
|
fs_1.default.renameSync(tempOutputPath, outputPath);
|
||
|
|
}
|
||
|
|
const audioDuration = (0, mediaUtils_1.getAudioDuration)(outputPath);
|
||
|
|
return {
|
||
|
|
duration: audioDuration,
|
||
|
|
cost: cost
|
||
|
|
};
|
||
|
|
}
|
||
|
|
catch (error) {
|
||
|
|
if (error.response) {
|
||
|
|
console.error(`ElevenLabs TTS error (${error.response.status}):`, Buffer.from(error.response.data).toString());
|
||
|
|
}
|
||
|
|
else {
|
||
|
|
console.error('ElevenLabs TTS error:', error.message);
|
||
|
|
}
|
||
|
|
(0, child_process_1.execSync)(`ffmpeg -v error -f lavfi -i anullsrc=r=24000:cl=mono -t 1 -q:a 9 -acodec libmp3lame "${outputPath}" -y`);
|
||
|
|
return {
|
||
|
|
duration: 1,
|
||
|
|
cost: 0
|
||
|
|
};
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
exports.ElevenLabsTTSProvider = ElevenLabsTTSProvider;
|
||
|
|
//# sourceMappingURL=elevenLabsTTSProvider.js.map
|