Add Google Cloud Chirp 3 TTS provider with service account support

This commit is contained in:
2026-05-13 02:42:54 +02:00
parent 6e9a26557f
commit f05e57493c
7 changed files with 992 additions and 13 deletions

895
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -19,6 +19,7 @@
"prepublishOnly": "npm run build"
},
"dependencies": {
"@google-cloud/text-to-speech": "^6.4.1",
"@google/generative-ai": "^0.24.0",
"axios": "^1.6.2",
"dotenv": "^16.3.1",
@@ -51,4 +52,4 @@
],
"author": "",
"license": "MIT"
}
}

View File

@@ -86,6 +86,12 @@ export function getDefaultConfig(): Config {
apiKey: process.env.ELEVENLABS_API_KEY,
model: "eleven_multilingual_v2",
voice: "JBFqnCBsd6RMkjVDRZzb"
},
google: {
apiKey: process.env.GOOGLE_CLOUD_TTS_KEY,
keyFilename: process.env.GOOGLE_CLOUD_TTS_KEYFILE,
model: "chirp-hd",
voice: "en-US-Chirp-HD-F"
}
},

View File

@@ -46,6 +46,7 @@ export interface TTSProviderConfig {
apiKey?: string;
model: string;
voice?: string;
keyFilename?: string;
}
export interface TTSProvider {

View File

@@ -0,0 +1,94 @@
import fs from 'fs';
import { execSync } from 'child_process';
import { TextToSpeechClient } from '@google-cloud/text-to-speech';
import { google } from '@google-cloud/text-to-speech/build/protos/protos';
import { TTSProvider, TTSProviderConfig, TTSOptions, TTSResult } from '../../interfaces';
import { getAudioDuration } from '../../utils/mediaUtils';
export class GoogleCloudTTSProvider implements TTSProvider {
private config: TTSProviderConfig;
private client: TextToSpeechClient;
constructor(config: TTSProviderConfig) {
this.config = config;
const clientConfig: any = {
apiKey: config.apiKey,
fallback: true
};
if (config.keyFilename) {
clientConfig.keyFilename = config.keyFilename;
}
this.client = new TextToSpeechClient(clientConfig);
}
async textToSpeech(
text: string,
outputPath: string,
options: TTSOptions = {}
): Promise<TTSResult> {
try {
const voice = options.voice || this.config.voice || 'en-US-Chirp-HD-F';
const model = options.model || this.config.model || 'chirp-hd';
const speedFactor = options.speedFactor || 1.0;
const request: google.cloud.texttospeech.v1.ISynthesizeSpeechRequest = {
input: { text },
voice: {
languageCode: this.extractLanguageCode(voice),
name: voice
},
audioConfig: {
audioEncoding: 'MP3',
speakingRate: speedFactor
}
};
const [response] = await this.client.synthesizeSpeech(request);
if (!response.audioContent) {
throw new Error('No audio content returned from Google Cloud TTS');
}
const audioBuffer = response.audioContent instanceof Uint8Array
? Buffer.from(response.audioContent)
: Buffer.from(response.audioContent as any);
const tempOutputPath = outputPath.replace(/\.\w+$/, '_temp$&');
fs.writeFileSync(tempOutputPath, audioBuffer);
const cost = text.length;
if (speedFactor !== 1.0) {
execSync(`ffmpeg -v error -i "${tempOutputPath}" -filter:a "atempo=${speedFactor}" -c:a libmp3lame -q:a 2 "${outputPath}" -y`);
fs.unlinkSync(tempOutputPath);
} else {
fs.renameSync(tempOutputPath, outputPath);
}
const audioDuration = getAudioDuration(outputPath);
return {
duration: audioDuration,
cost: cost
};
} catch (error: any) {
console.error('Google Cloud TTS error:', error.message);
execSync(`ffmpeg -v error -f lavfi -i anullsrc=r=24000:cl=mono -t 1 -q:a 9 -acodec libmp3lame "${outputPath}" -y`);
return {
duration: 1,
cost: 0
};
}
}
private extractLanguageCode(voiceName: string): string {
const parts = voiceName.split('-');
if (parts.length >= 2) {
return `${parts[0]}-${parts[1]}`;
}
return 'en-US';
}
}

View File

@@ -1,3 +1,4 @@
export * from './ttsProviderFactory';
export * from './openAITTSProvider';
export * from './elevenLabsTTSProvider';
export * from './elevenLabsTTSProvider';
export * from './googleCloudTTSProvider';

View File

@@ -2,6 +2,7 @@ import { TTSProvider } from '../../interfaces';
import { Config } from '../../config/config';
import { OpenAITTSProvider } from './openAITTSProvider';
import { ElevenLabsTTSProvider } from './elevenLabsTTSProvider';
import { GoogleCloudTTSProvider } from './googleCloudTTSProvider';
/**
* Factory for creating TTS providers
@@ -20,6 +21,8 @@ export class TTSProviderFactory {
return new OpenAITTSProvider(providerConfig);
case 'elevenlabs':
return new ElevenLabsTTSProvider(providerConfig);
case 'google':
return new GoogleCloudTTSProvider(providerConfig);
// Add other providers here
default:
throw new Error(`TTS provider "${providerName}" not implemented.`);