101 lines
3.8 KiB
TypeScript
101 lines
3.8 KiB
TypeScript
import { writeFile } from "node:fs/promises";
|
|
|
|
export type VoiceParams = Record<string, unknown>;
|
|
|
|
export type VoiceResolution =
|
|
| { kind: "exact"; voice: string }
|
|
| { kind: "fuzzy"; voice: string }
|
|
| { kind: "ambiguous"; candidates: string[] }
|
|
| { kind: "none" };
|
|
|
|
/** Joins up to `max` candidate voice names for a user-facing ambiguity message, summarizing the rest as "(+N more)". */
|
|
export function formatCandidates(candidates: string[], max = 5): string {
|
|
const shown = candidates.slice(0, max).join(", ");
|
|
const extra = candidates.length - max;
|
|
return extra > 0 ? `${shown} (+${extra} more)` : shown;
|
|
}
|
|
|
|
/**
|
|
* Common contract for every TTS provider. Subclasses override either
|
|
* `getSpeech` (returning a fetch-like Response) or `getSpeechFile` (writing
|
|
* directly to disk) — the default `getSpeechFile` pipes `getSpeech` to a file.
|
|
*/
|
|
export abstract class BaseEngine {
|
|
/** Short ID used in env / commands (e.g. "azure"). */
|
|
readonly shortName: string;
|
|
/** Human-readable name shown in messages. */
|
|
readonly longName: string;
|
|
/** Output file extension without the dot (e.g. "mp3"). */
|
|
readonly fileExtension: string;
|
|
|
|
protected voices: Record<string, string | { name: string; lang: string }> = {};
|
|
|
|
constructor(shortName: string, longName: string, fileExtension: string) {
|
|
this.shortName = shortName;
|
|
this.longName = longName;
|
|
this.fileExtension = fileExtension;
|
|
}
|
|
|
|
/** Maps a user-friendly voice name to the provider's internal identifier. */
|
|
getInternalVoiceName(str: string): string {
|
|
const v = this.voices[str];
|
|
if (v == null) return str;
|
|
return typeof v === "string" ? v : v.name;
|
|
}
|
|
|
|
abstract getDefaultVoice(): string;
|
|
|
|
validateVoice(voice: string): boolean {
|
|
if (Object.keys(this.voices).length === 0) return true;
|
|
return this.voices[voice] != null;
|
|
}
|
|
|
|
/** Returns sorted lowercase voice names known to this engine. Empty if the engine accepts any voice. */
|
|
listVoices(): string[] {
|
|
return Object.keys(this.voices).sort();
|
|
}
|
|
|
|
/** True when this engine has no static voice table and accepts any voice string. */
|
|
isFreeformVoice(): boolean {
|
|
return Object.keys(this.voices).length === 0;
|
|
}
|
|
|
|
/**
|
|
* Resolves a user-typed voice string against this engine's voice table using
|
|
* exact match first, then token-prefix matching: each whitespace-separated
|
|
* input token must be a prefix of some alphanumeric token of a key.
|
|
* Freeform engines always succeed with the normalized input.
|
|
*/
|
|
resolveVoice(input: string): VoiceResolution {
|
|
const norm = input.trim().toLowerCase();
|
|
if (this.isFreeformVoice()) return { kind: "exact", voice: norm };
|
|
if (this.voices[norm] != null) return { kind: "exact", voice: norm };
|
|
const inputTokens = norm.split(/\s+/).filter(Boolean);
|
|
if (inputTokens.length === 0) return { kind: "none" };
|
|
const matches = Object.keys(this.voices).filter((key) => {
|
|
const keyTokens = key.split(/[^a-z0-9]+/i).filter(Boolean);
|
|
return inputTokens.every((it) => keyTokens.some((kt) => kt.startsWith(it)));
|
|
});
|
|
if (matches.length === 1) return { kind: "fuzzy", voice: matches[0]! };
|
|
if (matches.length > 1) return { kind: "ambiguous", candidates: matches.sort() };
|
|
return { kind: "none" };
|
|
}
|
|
|
|
/** Default implementation: subclass should override either this or getSpeechFile. */
|
|
async getSpeech(_text: string, _voice?: string, _params?: VoiceParams): Promise<Response> {
|
|
throw new Error(`${this.shortName}: getSpeech not implemented`);
|
|
}
|
|
|
|
async getSpeechFile(
|
|
text: string,
|
|
filepath: string,
|
|
voice: string = this.getDefaultVoice(),
|
|
params: VoiceParams = {},
|
|
): Promise<string> {
|
|
const data = await this.getSpeech(text, voice, params);
|
|
const buf = Buffer.from(await data.arrayBuffer());
|
|
await writeFile(filepath, buf);
|
|
return filepath;
|
|
}
|
|
}
|