Add Google Cloud Chirp 3 TTS provider with service account support
This commit is contained in:
895
package-lock.json
generated
895
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -19,6 +19,7 @@
|
||||
"prepublishOnly": "npm run build"
|
||||
},
|
||||
"dependencies": {
|
||||
"@google-cloud/text-to-speech": "^6.4.1",
|
||||
"@google/generative-ai": "^0.24.0",
|
||||
"axios": "^1.6.2",
|
||||
"dotenv": "^16.3.1",
|
||||
@@ -51,4 +52,4 @@
|
||||
],
|
||||
"author": "",
|
||||
"license": "MIT"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -86,6 +86,12 @@ export function getDefaultConfig(): Config {
|
||||
apiKey: process.env.ELEVENLABS_API_KEY,
|
||||
model: "eleven_multilingual_v2",
|
||||
voice: "JBFqnCBsd6RMkjVDRZzb"
|
||||
},
|
||||
google: {
|
||||
apiKey: process.env.GOOGLE_CLOUD_TTS_KEY,
|
||||
keyFilename: process.env.GOOGLE_CLOUD_TTS_KEYFILE,
|
||||
model: "chirp-hd",
|
||||
voice: "en-US-Chirp-HD-F"
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
@@ -46,6 +46,7 @@ export interface TTSProviderConfig {
|
||||
apiKey?: string;
|
||||
model: string;
|
||||
voice?: string;
|
||||
keyFilename?: string;
|
||||
}
|
||||
|
||||
export interface TTSProvider {
|
||||
|
||||
94
src/providers/tts/googleCloudTTSProvider.ts
Normal file
94
src/providers/tts/googleCloudTTSProvider.ts
Normal file
@@ -0,0 +1,94 @@
|
||||
import fs from 'fs';
|
||||
import { execSync } from 'child_process';
|
||||
import { TextToSpeechClient } from '@google-cloud/text-to-speech';
|
||||
import { google } from '@google-cloud/text-to-speech/build/protos/protos';
|
||||
import { TTSProvider, TTSProviderConfig, TTSOptions, TTSResult } from '../../interfaces';
|
||||
import { getAudioDuration } from '../../utils/mediaUtils';
|
||||
|
||||
export class GoogleCloudTTSProvider implements TTSProvider {
|
||||
private config: TTSProviderConfig;
|
||||
private client: TextToSpeechClient;
|
||||
|
||||
constructor(config: TTSProviderConfig) {
|
||||
this.config = config;
|
||||
|
||||
const clientConfig: any = {
|
||||
apiKey: config.apiKey,
|
||||
fallback: true
|
||||
};
|
||||
|
||||
if (config.keyFilename) {
|
||||
clientConfig.keyFilename = config.keyFilename;
|
||||
}
|
||||
|
||||
this.client = new TextToSpeechClient(clientConfig);
|
||||
}
|
||||
|
||||
async textToSpeech(
|
||||
text: string,
|
||||
outputPath: string,
|
||||
options: TTSOptions = {}
|
||||
): Promise<TTSResult> {
|
||||
try {
|
||||
const voice = options.voice || this.config.voice || 'en-US-Chirp-HD-F';
|
||||
const model = options.model || this.config.model || 'chirp-hd';
|
||||
const speedFactor = options.speedFactor || 1.0;
|
||||
|
||||
const request: google.cloud.texttospeech.v1.ISynthesizeSpeechRequest = {
|
||||
input: { text },
|
||||
voice: {
|
||||
languageCode: this.extractLanguageCode(voice),
|
||||
name: voice
|
||||
},
|
||||
audioConfig: {
|
||||
audioEncoding: 'MP3',
|
||||
speakingRate: speedFactor
|
||||
}
|
||||
};
|
||||
|
||||
const [response] = await this.client.synthesizeSpeech(request);
|
||||
|
||||
if (!response.audioContent) {
|
||||
throw new Error('No audio content returned from Google Cloud TTS');
|
||||
}
|
||||
|
||||
const audioBuffer = response.audioContent instanceof Uint8Array
|
||||
? Buffer.from(response.audioContent)
|
||||
: Buffer.from(response.audioContent as any);
|
||||
|
||||
const tempOutputPath = outputPath.replace(/\.\w+$/, '_temp$&');
|
||||
fs.writeFileSync(tempOutputPath, audioBuffer);
|
||||
|
||||
const cost = text.length;
|
||||
|
||||
if (speedFactor !== 1.0) {
|
||||
execSync(`ffmpeg -v error -i "${tempOutputPath}" -filter:a "atempo=${speedFactor}" -c:a libmp3lame -q:a 2 "${outputPath}" -y`);
|
||||
fs.unlinkSync(tempOutputPath);
|
||||
} else {
|
||||
fs.renameSync(tempOutputPath, outputPath);
|
||||
}
|
||||
|
||||
const audioDuration = getAudioDuration(outputPath);
|
||||
|
||||
return {
|
||||
duration: audioDuration,
|
||||
cost: cost
|
||||
};
|
||||
} catch (error: any) {
|
||||
console.error('Google Cloud TTS error:', error.message);
|
||||
execSync(`ffmpeg -v error -f lavfi -i anullsrc=r=24000:cl=mono -t 1 -q:a 9 -acodec libmp3lame "${outputPath}" -y`);
|
||||
return {
|
||||
duration: 1,
|
||||
cost: 0
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private extractLanguageCode(voiceName: string): string {
|
||||
const parts = voiceName.split('-');
|
||||
if (parts.length >= 2) {
|
||||
return `${parts[0]}-${parts[1]}`;
|
||||
}
|
||||
return 'en-US';
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
export * from './ttsProviderFactory';
|
||||
export * from './openAITTSProvider';
|
||||
export * from './elevenLabsTTSProvider';
|
||||
export * from './elevenLabsTTSProvider';
|
||||
export * from './googleCloudTTSProvider';
|
||||
@@ -2,6 +2,7 @@ import { TTSProvider } from '../../interfaces';
|
||||
import { Config } from '../../config/config';
|
||||
import { OpenAITTSProvider } from './openAITTSProvider';
|
||||
import { ElevenLabsTTSProvider } from './elevenLabsTTSProvider';
|
||||
import { GoogleCloudTTSProvider } from './googleCloudTTSProvider';
|
||||
|
||||
/**
|
||||
* Factory for creating TTS providers
|
||||
@@ -20,6 +21,8 @@ export class TTSProviderFactory {
|
||||
return new OpenAITTSProvider(providerConfig);
|
||||
case 'elevenlabs':
|
||||
return new ElevenLabsTTSProvider(providerConfig);
|
||||
case 'google':
|
||||
return new GoogleCloudTTSProvider(providerConfig);
|
||||
// Add other providers here
|
||||
default:
|
||||
throw new Error(`TTS provider "${providerName}" not implemented.`);
|
||||
|
||||
Reference in New Issue
Block a user