Add Google Cloud Chirp 3 TTS provider with service account support
This commit is contained in:
895
package-lock.json
generated
895
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -19,6 +19,7 @@
|
|||||||
"prepublishOnly": "npm run build"
|
"prepublishOnly": "npm run build"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"@google-cloud/text-to-speech": "^6.4.1",
|
||||||
"@google/generative-ai": "^0.24.0",
|
"@google/generative-ai": "^0.24.0",
|
||||||
"axios": "^1.6.2",
|
"axios": "^1.6.2",
|
||||||
"dotenv": "^16.3.1",
|
"dotenv": "^16.3.1",
|
||||||
|
|||||||
@@ -86,6 +86,12 @@ export function getDefaultConfig(): Config {
|
|||||||
apiKey: process.env.ELEVENLABS_API_KEY,
|
apiKey: process.env.ELEVENLABS_API_KEY,
|
||||||
model: "eleven_multilingual_v2",
|
model: "eleven_multilingual_v2",
|
||||||
voice: "JBFqnCBsd6RMkjVDRZzb"
|
voice: "JBFqnCBsd6RMkjVDRZzb"
|
||||||
|
},
|
||||||
|
google: {
|
||||||
|
apiKey: process.env.GOOGLE_CLOUD_TTS_KEY,
|
||||||
|
keyFilename: process.env.GOOGLE_CLOUD_TTS_KEYFILE,
|
||||||
|
model: "chirp-hd",
|
||||||
|
voice: "en-US-Chirp-HD-F"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|||||||
@@ -46,6 +46,7 @@ export interface TTSProviderConfig {
|
|||||||
apiKey?: string;
|
apiKey?: string;
|
||||||
model: string;
|
model: string;
|
||||||
voice?: string;
|
voice?: string;
|
||||||
|
keyFilename?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface TTSProvider {
|
export interface TTSProvider {
|
||||||
|
|||||||
94
src/providers/tts/googleCloudTTSProvider.ts
Normal file
94
src/providers/tts/googleCloudTTSProvider.ts
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
import fs from 'fs';
|
||||||
|
import { execSync } from 'child_process';
|
||||||
|
import { TextToSpeechClient } from '@google-cloud/text-to-speech';
|
||||||
|
import { google } from '@google-cloud/text-to-speech/build/protos/protos';
|
||||||
|
import { TTSProvider, TTSProviderConfig, TTSOptions, TTSResult } from '../../interfaces';
|
||||||
|
import { getAudioDuration } from '../../utils/mediaUtils';
|
||||||
|
|
||||||
|
export class GoogleCloudTTSProvider implements TTSProvider {
|
||||||
|
private config: TTSProviderConfig;
|
||||||
|
private client: TextToSpeechClient;
|
||||||
|
|
||||||
|
constructor(config: TTSProviderConfig) {
|
||||||
|
this.config = config;
|
||||||
|
|
||||||
|
const clientConfig: any = {
|
||||||
|
apiKey: config.apiKey,
|
||||||
|
fallback: true
|
||||||
|
};
|
||||||
|
|
||||||
|
if (config.keyFilename) {
|
||||||
|
clientConfig.keyFilename = config.keyFilename;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.client = new TextToSpeechClient(clientConfig);
|
||||||
|
}
|
||||||
|
|
||||||
|
async textToSpeech(
|
||||||
|
text: string,
|
||||||
|
outputPath: string,
|
||||||
|
options: TTSOptions = {}
|
||||||
|
): Promise<TTSResult> {
|
||||||
|
try {
|
||||||
|
const voice = options.voice || this.config.voice || 'en-US-Chirp-HD-F';
|
||||||
|
const model = options.model || this.config.model || 'chirp-hd';
|
||||||
|
const speedFactor = options.speedFactor || 1.0;
|
||||||
|
|
||||||
|
const request: google.cloud.texttospeech.v1.ISynthesizeSpeechRequest = {
|
||||||
|
input: { text },
|
||||||
|
voice: {
|
||||||
|
languageCode: this.extractLanguageCode(voice),
|
||||||
|
name: voice
|
||||||
|
},
|
||||||
|
audioConfig: {
|
||||||
|
audioEncoding: 'MP3',
|
||||||
|
speakingRate: speedFactor
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const [response] = await this.client.synthesizeSpeech(request);
|
||||||
|
|
||||||
|
if (!response.audioContent) {
|
||||||
|
throw new Error('No audio content returned from Google Cloud TTS');
|
||||||
|
}
|
||||||
|
|
||||||
|
const audioBuffer = response.audioContent instanceof Uint8Array
|
||||||
|
? Buffer.from(response.audioContent)
|
||||||
|
: Buffer.from(response.audioContent as any);
|
||||||
|
|
||||||
|
const tempOutputPath = outputPath.replace(/\.\w+$/, '_temp$&');
|
||||||
|
fs.writeFileSync(tempOutputPath, audioBuffer);
|
||||||
|
|
||||||
|
const cost = text.length;
|
||||||
|
|
||||||
|
if (speedFactor !== 1.0) {
|
||||||
|
execSync(`ffmpeg -v error -i "${tempOutputPath}" -filter:a "atempo=${speedFactor}" -c:a libmp3lame -q:a 2 "${outputPath}" -y`);
|
||||||
|
fs.unlinkSync(tempOutputPath);
|
||||||
|
} else {
|
||||||
|
fs.renameSync(tempOutputPath, outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
const audioDuration = getAudioDuration(outputPath);
|
||||||
|
|
||||||
|
return {
|
||||||
|
duration: audioDuration,
|
||||||
|
cost: cost
|
||||||
|
};
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('Google Cloud TTS error:', error.message);
|
||||||
|
execSync(`ffmpeg -v error -f lavfi -i anullsrc=r=24000:cl=mono -t 1 -q:a 9 -acodec libmp3lame "${outputPath}" -y`);
|
||||||
|
return {
|
||||||
|
duration: 1,
|
||||||
|
cost: 0
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private extractLanguageCode(voiceName: string): string {
|
||||||
|
const parts = voiceName.split('-');
|
||||||
|
if (parts.length >= 2) {
|
||||||
|
return `${parts[0]}-${parts[1]}`;
|
||||||
|
}
|
||||||
|
return 'en-US';
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,3 +1,4 @@
|
|||||||
export * from './ttsProviderFactory';
|
export * from './ttsProviderFactory';
|
||||||
export * from './openAITTSProvider';
|
export * from './openAITTSProvider';
|
||||||
export * from './elevenLabsTTSProvider';
|
export * from './elevenLabsTTSProvider';
|
||||||
|
export * from './googleCloudTTSProvider';
|
||||||
@@ -2,6 +2,7 @@ import { TTSProvider } from '../../interfaces';
|
|||||||
import { Config } from '../../config/config';
|
import { Config } from '../../config/config';
|
||||||
import { OpenAITTSProvider } from './openAITTSProvider';
|
import { OpenAITTSProvider } from './openAITTSProvider';
|
||||||
import { ElevenLabsTTSProvider } from './elevenLabsTTSProvider';
|
import { ElevenLabsTTSProvider } from './elevenLabsTTSProvider';
|
||||||
|
import { GoogleCloudTTSProvider } from './googleCloudTTSProvider';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory for creating TTS providers
|
* Factory for creating TTS providers
|
||||||
@@ -20,6 +21,8 @@ export class TTSProviderFactory {
|
|||||||
return new OpenAITTSProvider(providerConfig);
|
return new OpenAITTSProvider(providerConfig);
|
||||||
case 'elevenlabs':
|
case 'elevenlabs':
|
||||||
return new ElevenLabsTTSProvider(providerConfig);
|
return new ElevenLabsTTSProvider(providerConfig);
|
||||||
|
case 'google':
|
||||||
|
return new GoogleCloudTTSProvider(providerConfig);
|
||||||
// Add other providers here
|
// Add other providers here
|
||||||
default:
|
default:
|
||||||
throw new Error(`TTS provider "${providerName}" not implemented.`);
|
throw new Error(`TTS provider "${providerName}" not implemented.`);
|
||||||
|
|||||||
Reference in New Issue
Block a user