80 lines
3.2 KiB
JavaScript
80 lines
3.2 KiB
JavaScript
|
|
"use strict";
|
||
|
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
||
|
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
||
|
|
};
|
||
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||
|
|
exports.GoogleCloudTTSProvider = void 0;
|
||
|
|
const fs_1 = __importDefault(require("fs"));
|
||
|
|
const child_process_1 = require("child_process");
|
||
|
|
const text_to_speech_1 = require("@google-cloud/text-to-speech");
|
||
|
|
const mediaUtils_1 = require("../../utils/mediaUtils");
|
||
|
|
class GoogleCloudTTSProvider {
|
||
|
|
constructor(config) {
|
||
|
|
this.config = config;
|
||
|
|
const clientConfig = {
|
||
|
|
apiKey: config.apiKey,
|
||
|
|
fallback: true
|
||
|
|
};
|
||
|
|
if (config.keyFilename) {
|
||
|
|
clientConfig.keyFilename = config.keyFilename;
|
||
|
|
}
|
||
|
|
this.client = new text_to_speech_1.TextToSpeechClient(clientConfig);
|
||
|
|
}
|
||
|
|
async textToSpeech(text, outputPath, options = {}) {
|
||
|
|
try {
|
||
|
|
const voice = options.voice || this.config.voice || 'en-US-Chirp-HD-F';
|
||
|
|
const model = options.model || this.config.model || 'chirp-hd';
|
||
|
|
const speedFactor = options.speedFactor || 1.0;
|
||
|
|
const request = {
|
||
|
|
input: { text },
|
||
|
|
voice: {
|
||
|
|
languageCode: this.extractLanguageCode(voice),
|
||
|
|
name: voice
|
||
|
|
},
|
||
|
|
audioConfig: {
|
||
|
|
audioEncoding: 'MP3',
|
||
|
|
speakingRate: speedFactor
|
||
|
|
}
|
||
|
|
};
|
||
|
|
const [response] = await this.client.synthesizeSpeech(request);
|
||
|
|
if (!response.audioContent) {
|
||
|
|
throw new Error('No audio content returned from Google Cloud TTS');
|
||
|
|
}
|
||
|
|
const audioBuffer = response.audioContent instanceof Uint8Array
|
||
|
|
? Buffer.from(response.audioContent)
|
||
|
|
: Buffer.from(response.audioContent);
|
||
|
|
const tempOutputPath = outputPath.replace(/\.\w+$/, '_temp$&');
|
||
|
|
fs_1.default.writeFileSync(tempOutputPath, audioBuffer);
|
||
|
|
const cost = text.length;
|
||
|
|
if (speedFactor !== 1.0) {
|
||
|
|
(0, child_process_1.execSync)(`ffmpeg -v error -i "${tempOutputPath}" -filter:a "atempo=${speedFactor}" -c:a libmp3lame -q:a 2 "${outputPath}" -y`);
|
||
|
|
fs_1.default.unlinkSync(tempOutputPath);
|
||
|
|
}
|
||
|
|
else {
|
||
|
|
fs_1.default.renameSync(tempOutputPath, outputPath);
|
||
|
|
}
|
||
|
|
const audioDuration = (0, mediaUtils_1.getAudioDuration)(outputPath);
|
||
|
|
return {
|
||
|
|
duration: audioDuration,
|
||
|
|
cost: cost
|
||
|
|
};
|
||
|
|
}
|
||
|
|
catch (error) {
|
||
|
|
console.error('Google Cloud TTS error:', error.message);
|
||
|
|
(0, child_process_1.execSync)(`ffmpeg -v error -f lavfi -i anullsrc=r=24000:cl=mono -t 1 -q:a 9 -acodec libmp3lame "${outputPath}" -y`);
|
||
|
|
return {
|
||
|
|
duration: 1,
|
||
|
|
cost: 0
|
||
|
|
};
|
||
|
|
}
|
||
|
|
}
|
||
|
|
extractLanguageCode(voiceName) {
|
||
|
|
const parts = voiceName.split('-');
|
||
|
|
if (parts.length >= 2) {
|
||
|
|
return `${parts[0]}-${parts[1]}`;
|
||
|
|
}
|
||
|
|
return 'en-US';
|
||
|
|
}
|
||
|
|
}
|
||
|
|
exports.GoogleCloudTTSProvider = GoogleCloudTTSProvider;
|
||
|
|
//# sourceMappingURL=googleCloudTTSProvider.js.map
|