Fuzzy voice matching

2026-05-14 20:33:31 +02:00
parent 456ec8c83f
commit afeb05447d
5 changed files with 63 additions and 17 deletions
--- a/src/tts/BaseEngine.ts
+++ b/src/tts/BaseEngine.ts
@@ -2,6 +2,19 @@ import { writeFile } from "node:fs/promises";

 export type VoiceParams = Record<string, unknown>;

+export type VoiceResolution =
+  | { kind: "exact"; voice: string }
+  | { kind: "fuzzy"; voice: string }
+  | { kind: "ambiguous"; candidates: string[] }
+  | { kind: "none" };
+
+/** Joins up to `max` candidate voice names for a user-facing ambiguity message, summarizing the rest as "(+N more)". */
+export function formatCandidates(candidates: string[], max = 5): string {
+  const shown = candidates.slice(0, max).join(", ");
+  const extra = candidates.length - max;
+  return extra > 0 ? `${shown} (+${extra} more)` : shown;
+}
+
 /**
 * Common contract for every TTS provider. Subclasses override either
 * `getSpeech` (returning a fetch-like Response) or `getSpeechFile` (writing
@@ -47,6 +60,27 @@ export abstract class BaseEngine {
    return Object.keys(this.voices).length === 0;
  }

+  /**
+   * Resolves a user-typed voice string against this engine's voice table using
+   * exact match first, then token-prefix matching: each whitespace-separated
+   * input token must be a prefix of some alphanumeric token of a key.
+   * Freeform engines always succeed with the normalized input.
+   */
+  resolveVoice(input: string): VoiceResolution {
+    const norm = input.trim().toLowerCase();
+    if (this.isFreeformVoice()) return { kind: "exact", voice: norm };
+    if (this.voices[norm] != null) return { kind: "exact", voice: norm };
+    const inputTokens = norm.split(/\s+/).filter(Boolean);
+    if (inputTokens.length === 0) return { kind: "none" };
+    const matches = Object.keys(this.voices).filter((key) => {
+      const keyTokens = key.split(/[^a-z0-9]+/i).filter(Boolean);
+      return inputTokens.every((it) => keyTokens.some((kt) => kt.startsWith(it)));
+    });
+    if (matches.length === 1) return { kind: "fuzzy", voice: matches[0]! };
+    if (matches.length > 1) return { kind: "ambiguous", candidates: matches.sort() };
+    return { kind: "none" };
+  }
+
  /** Default implementation: subclass should override either this or getSpeechFile. */
  async getSpeech(_text: string, _voice?: string, _params?: VoiceParams): Promise<Response> {
    throw new Error(`${this.shortName}: getSpeech not implemented`);