import { writeFile } from "node:fs/promises"; export type VoiceParams = Record; export type VoiceResolution = | { kind: "exact"; voice: string } | { kind: "fuzzy"; voice: string } | { kind: "ambiguous"; candidates: string[] } | { kind: "none" }; /** Joins up to `max` candidate voice names for a user-facing ambiguity message, summarizing the rest as "(+N more)". */ export function formatCandidates(candidates: string[], max = 5): string { const shown = candidates.slice(0, max).join(", "); const extra = candidates.length - max; return extra > 0 ? `${shown} (+${extra} more)` : shown; } /** * Common contract for every TTS provider. Subclasses override either * `getSpeech` (returning a fetch-like Response) or `getSpeechFile` (writing * directly to disk) — the default `getSpeechFile` pipes `getSpeech` to a file. */ export abstract class BaseEngine { /** Short ID used in env / commands (e.g. "azure"). */ readonly shortName: string; /** Human-readable name shown in messages. */ readonly longName: string; /** Output file extension without the dot (e.g. "mp3"). */ readonly fileExtension: string; protected voices: Record = {}; constructor(shortName: string, longName: string, fileExtension: string) { this.shortName = shortName; this.longName = longName; this.fileExtension = fileExtension; } /** Maps a user-friendly voice name to the provider's internal identifier. */ getInternalVoiceName(str: string): string { const v = this.voices[str]; if (v == null) return str; return typeof v === "string" ? v : v.name; } abstract getDefaultVoice(): string; validateVoice(voice: string): boolean { if (Object.keys(this.voices).length === 0) return true; return this.voices[voice] != null; } /** Returns sorted lowercase voice names known to this engine. Empty if the engine accepts any voice. */ listVoices(): string[] { return Object.keys(this.voices).sort(); } /** True when this engine has no static voice table and accepts any voice string. */ isFreeformVoice(): boolean { return Object.keys(this.voices).length === 0; } /** * Resolves a user-typed voice string against this engine's voice table using * exact match first, then token-prefix matching: each whitespace-separated * input token must be a prefix of some alphanumeric token of a key. * Freeform engines always succeed with the normalized input. */ resolveVoice(input: string): VoiceResolution { const norm = input.trim().toLowerCase(); if (this.isFreeformVoice()) return { kind: "exact", voice: norm }; if (this.voices[norm] != null) return { kind: "exact", voice: norm }; const inputTokens = norm.split(/\s+/).filter(Boolean); if (inputTokens.length === 0) return { kind: "none" }; const matches = Object.keys(this.voices).filter((key) => { const keyTokens = key.split(/[^a-z0-9]+/i).filter(Boolean); return inputTokens.every((it) => keyTokens.some((kt) => kt.startsWith(it))); }); if (matches.length === 1) return { kind: "fuzzy", voice: matches[0]! }; if (matches.length > 1) return { kind: "ambiguous", candidates: matches.sort() }; return { kind: "none" }; } /** Default implementation: subclass should override either this or getSpeechFile. */ async getSpeech(_text: string, _voice?: string, _params?: VoiceParams): Promise { throw new Error(`${this.shortName}: getSpeech not implemented`); } async getSpeechFile( text: string, filepath: string, voice: string = this.getDefaultVoice(), params: VoiceParams = {}, ): Promise { const data = await this.getSpeech(text, voice, params); const buf = Buffer.from(await data.arrayBuffer()); await writeFile(filepath, buf); return filepath; } }