Update code to typescript

2026-05-14 20:06:15 +02:00
parent fdb4b2d50f
commit f2ce38c176
68 changed files with 7647 additions and 5121 deletions
--- a/src/tts/BaseEngine.ts
+++ b/src/tts/BaseEngine.ts
@@ -0,0 +1,56 @@
+import { writeFile } from "node:fs/promises";
+
+export type VoiceParams = Record<string, unknown>;
+
+/**
+ * Common contract for every TTS provider. Subclasses override either
+ * `getSpeech` (returning a fetch-like Response) or `getSpeechFile` (writing
+ * directly to disk) — the default `getSpeechFile` pipes `getSpeech` to a file.
+ */
+export abstract class BaseEngine {
+  /** Short ID used in env / commands (e.g. "azure"). */
+  readonly shortName: string;
+  /** Human-readable name shown in messages. */
+  readonly longName: string;
+  /** Output file extension without the dot (e.g. "mp3"). */
+  readonly fileExtension: string;
+
+  protected voices: Record<string, string | { name: string; lang: string }> = {};
+
+  constructor(shortName: string, longName: string, fileExtension: string) {
+    this.shortName = shortName;
+    this.longName = longName;
+    this.fileExtension = fileExtension;
+  }
+
+  /** Maps a user-friendly voice name to the provider's internal identifier. */
+  getInternalVoiceName(str: string): string {
+    const v = this.voices[str];
+    if (v == null) return str;
+    return typeof v === "string" ? v : v.name;
+  }
+
+  abstract getDefaultVoice(): string;
+
+  validateVoice(voice: string): boolean {
+    if (Object.keys(this.voices).length === 0) return true;
+    return this.voices[voice] != null;
+  }
+
+  /** Default implementation: subclass should override either this or getSpeechFile. */
+  async getSpeech(_text: string, _voice?: string, _params?: VoiceParams): Promise<Response> {
+    throw new Error(`${this.shortName}: getSpeech not implemented`);
+  }
+
+  async getSpeechFile(
+    text: string,
+    filepath: string,
+    voice: string = this.getDefaultVoice(),
+    params: VoiceParams = {},
+  ): Promise<string> {
+    const data = await this.getSpeech(text, voice, params);
+    const buf = Buffer.from(await data.arrayBuffer());
+    await writeFile(filepath, buf);
+    return filepath;
+  }
+}
--- a/src/tts/azure.ts
+++ b/src/tts/azure.ts
@@ -0,0 +1,77 @@
+import * as sdk from "microsoft-cognitiveservices-speech-sdk";
+import { config } from "../config.js";
+import { BaseEngine, type VoiceParams } from "./BaseEngine.js";
+
+interface AzureVoiceMeta {
+  DisplayName: string;
+  ShortName: string;
+  Name: string;
+}
+
+export class AzureEngine extends BaseEngine {
+  protected override voices: Record<string, string> = {};
+
+  constructor() {
+    super("azure", "Microsoft Azure TTS", "wav");
+    void this.populateVoiceList();
+  }
+
+  override getDefaultVoice(): string {
+    return "aria";
+  }
+
+  override getSpeechFile(
+    text: string,
+    filepath: string,
+    voice: string = this.getDefaultVoice(),
+    _params: VoiceParams = {},
+  ): Promise<string> {
+    return new Promise((resolve, reject) => {
+      if (!config.AZURE_API_KEY || !config.AZURE_REGION) {
+        reject(new Error("AZURE_API_KEY and AZURE_REGION must be set"));
+        return;
+      }
+      const speechConfig = sdk.SpeechConfig.fromSubscription(
+        config.AZURE_API_KEY,
+        config.AZURE_REGION,
+      );
+      speechConfig.speechSynthesisOutputFormat = sdk.SpeechSynthesisOutputFormat.Riff24Khz16BitMonoPcm;
+      const internal = this.voices[voice] ?? this.voices[this.getDefaultVoice()];
+      if (internal) speechConfig.speechSynthesisVoiceName = internal;
+      const audioConfig = sdk.AudioConfig.fromAudioFileOutput(filepath);
+      const synthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig);
+      synthesizer.speakTextAsync(
+        text,
+        (result) => {
+          synthesizer.close();
+          if (result) resolve(filepath);
+          else reject(new Error("Azure TTS returned no result"));
+        },
+        (error: unknown) => {
+          synthesizer.close();
+          reject(error instanceof Error ? error : new Error(String(error)));
+        },
+      );
+    });
+  }
+
+  private async populateVoiceList(): Promise<void> {
+    if (!config.AZURE_LIST_ENDPOINT || !config.AZURE_API_KEY) return;
+    try {
+      const res = await fetch(config.AZURE_LIST_ENDPOINT, {
+        headers: { "Ocp-Apim-Subscription-Key": config.AZURE_API_KEY },
+      });
+      const json = (await res.json()) as AzureVoiceMeta[];
+      for (const voice of json) {
+        const key = voice.DisplayName.toLowerCase();
+        if (this.voices[key]) {
+          if (voice.Name.includes("Neural")) this.voices[key] = voice.ShortName;
+        } else {
+          this.voices[key] = voice.ShortName;
+        }
+      }
+    } catch (err) {
+      console.error("Azure: failed to populate voice list:", err);
+    }
+  }
+}
--- a/src/tts/eleven.ts
+++ b/src/tts/eleven.ts
@@ -0,0 +1,63 @@
+import { config } from "../config.js";
+import { BaseEngine, type VoiceParams } from "./BaseEngine.js";
+
+interface ElevenVoice {
+  name: string;
+  voice_id: string;
+}
+
+interface ElevenVoicesResponse {
+  voices: ElevenVoice[];
+}
+
+export class ElevenEngine extends BaseEngine {
+  protected override voices: Record<string, string> = {};
+
+  constructor() {
+    super("eleven", "Eleven Labs TTS", "mp3");
+    void this.populateVoiceList();
+  }
+
+  override getDefaultVoice(): string {
+    return "guillem";
+  }
+
+  private async populateVoiceList(): Promise<void> {
+    if (!config.XI_API_KEY) return;
+    try {
+      const res = await fetch("https://api.elevenlabs.io/v1/voices", {
+        method: "GET",
+        headers: { "xi-api-key": config.XI_API_KEY },
+      });
+      const json = (await res.json()) as ElevenVoicesResponse;
+      for (const v of json.voices) {
+        this.voices[v.name.toLowerCase()] = v.voice_id;
+      }
+    } catch (err) {
+      console.error("Eleven: failed to populate voice list:", err);
+    }
+  }
+
+  override async getSpeech(
+    text: string,
+    voice: string = this.getDefaultVoice(),
+    _params: VoiceParams = {},
+  ): Promise<Response> {
+    if (!config.XI_API_KEY) {
+      throw new Error("XI_API_KEY must be set");
+    }
+    const voiceId = this.getInternalVoiceName(voice);
+    const url = `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`;
+    return fetch(url, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        "xi-api-key": config.XI_API_KEY,
+      },
+      body: JSON.stringify({
+        model_id: "eleven_multilingual_v2",
+        text,
+      }),
+    });
+  }
+}
--- a/src/tts/espeak.ts
+++ b/src/tts/espeak.ts
@@ -0,0 +1,33 @@
+import { spawn } from "node:child_process";
+import { BaseEngine, type VoiceParams } from "./BaseEngine.js";
+
+export class EspeakEngine extends BaseEngine {
+  constructor() {
+    super("espeak", "ESpeak", "wav");
+  }
+
+  override getDefaultVoice(): string {
+    return "en";
+  }
+
+  override validateVoice(_voice: string): boolean {
+    return true;
+  }
+
+  override async getSpeechFile(
+    text: string,
+    filepath: string,
+    voice: string = this.getDefaultVoice(),
+    _params: VoiceParams = {},
+  ): Promise<string> {
+    return new Promise((resolve, reject) => {
+      const proc = spawn("espeak", ["-v", voice, "-w", filepath, "--stdin"]);
+      proc.on("error", reject);
+      proc.on("close", (code) => {
+        if (code === 0) resolve(filepath);
+        else reject(new Error(`espeak exited with code ${code}`));
+      });
+      proc.stdin.end(text);
+    });
+  }
+}
--- a/src/tts/google.ts
+++ b/src/tts/google.ts
@@ -0,0 +1,78 @@
+import { existsSync } from "node:fs";
+import { writeFile } from "node:fs/promises";
+import textToSpeech from "@google-cloud/text-to-speech";
+import type { TextToSpeechClient } from "@google-cloud/text-to-speech";
+import { config } from "../config.js";
+import { BaseEngine, type VoiceParams } from "./BaseEngine.js";
+
+interface GoogleVoiceMeta {
+  name: string;
+  lang: string;
+}
+
+export class GoogleEngine extends BaseEngine {
+  private client: TextToSpeechClient | undefined;
+  protected override voices: Record<string, GoogleVoiceMeta> = {};
+
+  constructor() {
+    super("google", "Google Cloud TTS", "wav");
+    void this.populateVoiceList();
+  }
+
+  override getDefaultVoice(): string {
+    return "en-us-wavenet-a";
+  }
+
+  private credentialsAvailable(): boolean {
+    return (
+      !!config.GOOGLE_APPLICATION_CREDENTIALS &&
+      existsSync(config.GOOGLE_APPLICATION_CREDENTIALS)
+    );
+  }
+
+  private getClient(): TextToSpeechClient {
+    if (!this.credentialsAvailable()) {
+      throw new Error(
+        "Google Cloud TTS unavailable: GOOGLE_APPLICATION_CREDENTIALS must point to a readable file",
+      );
+    }
+    this.client ??= new textToSpeech.TextToSpeechClient();
+    return this.client;
+  }
+
+  private async populateVoiceList(): Promise<void> {
+    if (!this.credentialsAvailable()) return;
+    try {
+      const [result] = await this.getClient().listVoices({});
+      for (const voice of result.voices ?? []) {
+        if (!voice.name || !voice.languageCodes?.[0]) continue;
+        this.voices[voice.name.toLowerCase()] = {
+          name: voice.name,
+          lang: voice.languageCodes[0],
+        };
+      }
+    } catch (err) {
+      console.error("Google Cloud TTS: failed to populate voice list:", err);
+    }
+  }
+
+  override async getSpeechFile(
+    text: string,
+    filepath: string,
+    voice: string = this.getDefaultVoice(),
+    _params: VoiceParams = {},
+  ): Promise<string> {
+    const meta = this.voices[voice];
+    if (!meta) throw new Error(`Google: unknown voice "${voice}"`);
+    const [response] = await this.getClient().synthesizeSpeech({
+      input: { text },
+      voice: { name: meta.name, languageCode: meta.lang },
+      audioConfig: { audioEncoding: "LINEAR16" },
+    });
+    if (!response.audioContent) {
+      throw new Error("Google: synthesizeSpeech returned no audioContent");
+    }
+    await writeFile(filepath, response.audioContent);
+    return filepath;
+  }
+}
--- a/src/tts/gtranslate.ts
+++ b/src/tts/gtranslate.ts
@@ -0,0 +1,25 @@
+import { getAudioUrl } from "@sefinek/google-tts-api";
+import { BaseEngine, type VoiceParams } from "./BaseEngine.js";
+
+export class GtranslateEngine extends BaseEngine {
+  constructor() {
+    super("gtranslate", "Google Translate TTS", "mp3");
+  }
+
+  override getDefaultVoice(): string {
+    return "en-us";
+  }
+
+  override validateVoice(_voice: string): boolean {
+    return true;
+  }
+
+  override async getSpeech(
+    text: string,
+    voice: string = this.getDefaultVoice(),
+    _params: VoiceParams = {},
+  ): Promise<Response> {
+    const url = getAudioUrl(text, { lang: voice });
+    return fetch(url);
+  }
+}
--- a/src/tts/openai.ts
+++ b/src/tts/openai.ts
@@ -0,0 +1,40 @@
+import { config } from "../config.js";
+import { BaseEngine, type VoiceParams } from "./BaseEngine.js";
+
+const OPENAI_VOICES = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"] as const;
+
+export class OpenAIEngine extends BaseEngine {
+  protected override voices: Record<string, string> = Object.fromEntries(
+    OPENAI_VOICES.map((v) => [v, v]),
+  );
+
+  constructor() {
+    super("openai", "OpenAI TTS", "mp3");
+  }
+
+  override getDefaultVoice(): string {
+    return "alloy";
+  }
+
+  override async getSpeech(
+    text: string,
+    voice: string = this.getDefaultVoice(),
+    _params: VoiceParams = {},
+  ): Promise<Response> {
+    if (!config.OPENAI_API_KEY) {
+      throw new Error("OPENAI_API_KEY must be set");
+    }
+    return fetch("https://api.openai.com/v1/audio/speech", {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        Authorization: `Bearer ${config.OPENAI_API_KEY}`,
+      },
+      body: JSON.stringify({
+        model: "tts-1-hd",
+        input: text,
+        voice,
+      }),
+    });
+  }
+}
--- a/src/tts/registry.ts
+++ b/src/tts/registry.ts
@@ -0,0 +1,69 @@
+import type { BaseEngine } from "./BaseEngine.js";
+import { AzureEngine } from "./azure.js";
+import { ElevenEngine } from "./eleven.js";
+import { EspeakEngine } from "./espeak.js";
+import { GoogleEngine } from "./google.js";
+import { GtranslateEngine } from "./gtranslate.js";
+import { OpenAIEngine } from "./openai.js";
+import { SamEngine } from "./sam.js";
+import { UnrealEngine } from "./unreal.js";
+import { WatsonEngine } from "./watson.js";
+
+export class TTSRegistry {
+  private engines: Record<string, BaseEngine>;
+  private _announcement: BaseEngine;
+  private _announcementVoice: string;
+
+  constructor(initialAnnouncementEngineName: string, initialAnnouncementVoice: string) {
+    this.engines = {
+      azure: new AzureEngine(),
+      eleven: new ElevenEngine(),
+      espeak: new EspeakEngine(),
+      google: new GoogleEngine(),
+      gtranslate: new GtranslateEngine(),
+      openai: new OpenAIEngine(),
+      sam: new SamEngine(),
+      unreal: new UnrealEngine(),
+      watson: new WatsonEngine(),
+    };
+    for (const name of Object.keys(this.engines)) {
+      console.log(`Loaded TTS engine: ${name}`);
+    }
+    const ann = this.engines[initialAnnouncementEngineName];
+    if (!ann) {
+      throw new Error(
+        `ANNOUNCEMENT_ENGINE "${initialAnnouncementEngineName}" is not a registered TTS engine`,
+      );
+    }
+    this._announcement = ann;
+    this._announcementVoice = initialAnnouncementVoice;
+  }
+
+  get(name: string): BaseEngine | undefined {
+    return this.engines[name];
+  }
+
+  has(name: string): boolean {
+    return name in this.engines;
+  }
+
+  list(): string[] {
+    return Object.keys(this.engines);
+  }
+
+  get announcement(): BaseEngine {
+    return this._announcement;
+  }
+
+  set announcement(engine: BaseEngine) {
+    this._announcement = engine;
+  }
+
+  get announcementVoice(): string {
+    return this._announcementVoice;
+  }
+
+  set announcementVoice(voice: string) {
+    this._announcementVoice = voice;
+  }
+}
--- a/src/tts/sam.ts
+++ b/src/tts/sam.ts
@@ -0,0 +1,41 @@
+import { writeFile } from "node:fs/promises";
+import Sam from "sam-js";
+import pkg from "wavefile";
+import { BaseEngine, type VoiceParams } from "./BaseEngine.js";
+
+const { WaveFile } = pkg;
+
+export class SamEngine extends BaseEngine {
+  constructor() {
+    super("sam", "Software Automatic Mouth", "wav");
+  }
+
+  override getDefaultVoice(): string {
+    return "sam";
+  }
+
+  override validateVoice(_voice: string): boolean {
+    return true;
+  }
+
+  override async getSpeechFile(
+    text: string,
+    filepath: string,
+    _voice: string = this.getDefaultVoice(),
+    _params: VoiceParams = {},
+  ): Promise<string> {
+    const sam = new Sam();
+    let phonetic = false;
+    let input = text;
+    if (input.startsWith("$")) {
+      input = input.slice(1);
+      phonetic = true;
+    }
+    const buf = sam.buf8(input, phonetic);
+    if (!(buf instanceof Uint8Array)) throw new Error("SAM produced no audio");
+    const wave = new WaveFile();
+    wave.fromScratch(1, 22050, "8", buf);
+    await writeFile(filepath, wave.toBuffer());
+    return filepath;
+  }
+}
--- a/src/tts/unreal.ts
+++ b/src/tts/unreal.ts
@@ -0,0 +1,52 @@
+import { writeFile } from "node:fs/promises";
+import { config } from "../config.js";
+import { BaseEngine, type VoiceParams } from "./BaseEngine.js";
+
+interface UnrealResponse {
+  OutputUri: string;
+}
+
+export class UnrealEngine extends BaseEngine {
+  protected override voices: Record<string, string> = {
+    scarlett: "Scarlett",
+    liv: "Liv",
+    dan: "Dan",
+    will: "Will",
+    amy: "Amy",
+  };
+
+  constructor() {
+    super("unreal", "Unreal Speech TTS", "mp3");
+  }
+
+  override getDefaultVoice(): string {
+    return "liv";
+  }
+
+  override async getSpeechFile(
+    text: string,
+    filepath: string,
+    voice: string = this.getDefaultVoice(),
+    _params: VoiceParams = {},
+  ): Promise<string> {
+    if (!config.UNREAL_API_KEY) throw new Error("UNREAL_API_KEY must be set");
+    const res = await fetch("https://api.v6.unrealspeech.com/speech", {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        Authorization: `Bearer ${config.UNREAL_API_KEY}`,
+      },
+      body: JSON.stringify({
+        Bitrate: "320k",
+        Temperature: 0.1,
+        VoiceId: this.getInternalVoiceName(voice),
+        Text: text,
+        AudioFormat: "mp3",
+      }),
+    });
+    const json = (await res.json()) as UnrealResponse;
+    const audio = await fetch(json.OutputUri);
+    await writeFile(filepath, Buffer.from(await audio.arrayBuffer()));
+    return filepath;
+  }
+}
--- a/src/tts/watson.ts
+++ b/src/tts/watson.ts
@@ -0,0 +1,64 @@
+import { config } from "../config.js";
+import { BaseEngine, type VoiceParams } from "./BaseEngine.js";
+
+interface WatsonVoice {
+  name: string;
+  description: string;
+}
+
+interface WatsonVoicesResponse {
+  voices: WatsonVoice[];
+}
+
+export class WatsonEngine extends BaseEngine {
+  protected override voices: Record<string, string> = {};
+
+  constructor() {
+    super("watson", "IBM Watson TTS", "ogg");
+    void this.populateVoiceList();
+  }
+
+  override getDefaultVoice(): string {
+    return "michael";
+  }
+
+  private authString(): string {
+    if (!config.watsonAPIKey) throw new Error("watsonAPIKey must be set");
+    const b64 = Buffer.from(`apikey:${config.watsonAPIKey}`).toString("base64");
+    return `Basic ${b64}`;
+  }
+
+  private async populateVoiceList(): Promise<void> {
+    if (!config.watsonURL || !config.watsonAPIKey) return;
+    try {
+      const res = await fetch(`${config.watsonURL}/v1/voices`, {
+        method: "GET",
+        headers: { Authorization: this.authString() },
+      });
+      const json = (await res.json()) as WatsonVoicesResponse;
+      for (const v of json.voices) {
+        const key = v.description.substring(0, v.description.indexOf(":")).toLowerCase();
+        this.voices[key] = v.name;
+      }
+    } catch (err) {
+      console.error("Watson: failed to populate voice list:", err);
+    }
+  }
+
+  override async getSpeech(
+    text: string,
+    voice: string = this.getDefaultVoice(),
+    _params: VoiceParams = {},
+  ): Promise<Response> {
+    if (!config.watsonURL) throw new Error("watsonURL must be set");
+    const url = `${config.watsonURL}/v1/synthesize?voice=${this.getInternalVoiceName(voice)}`;
+    return fetch(url, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        Authorization: this.authString(),
+      },
+      body: JSON.stringify({ text }),
+    });
+  }
+}