From afeb05447d2ae8b42e0403d322f42da1d6d1da71 Mon Sep 17 00:00:00 2001
From: Talon <talon@iamtalon.me>
Date: Thu, 14 May 2026 20:33:31 +0200
Subject: [PATCH] Fuzzy voice matching

---
 src/i18n/strings.ts        |  1 +
 src/modules/canttalk.ts    | 21 +++++++++++++--------
 src/modules/ttsSettings.ts | 23 ++++++++++++++---------
 src/tts/BaseEngine.ts      | 34 ++++++++++++++++++++++++++++++++++
 strings/en.json            |  1 +
 5 files changed, 63 insertions(+), 17 deletions(-)

diff --git a/src/i18n/strings.ts b/src/i18n/strings.ts
index ccd96d2..3e3c78b 100644
--- a/src/i18n/strings.ts
+++ b/src/i18n/strings.ts
@@ -11,6 +11,7 @@ export type StringKey =
   | "USER_VOICE_CHANGED"
   | "INVALID_ENGINE"
   | "INVALID_VOICE"
+  | "AMBIGUOUS_VOICE"
   | "TOO_MANY_ARGUMENTS"
   | "CURRENT_STORY"
   | "NO_STORY"
diff --git a/src/modules/canttalk.ts b/src/modules/canttalk.ts
index 297f11e..0e352b7 100644
--- a/src/modules/canttalk.ts
+++ b/src/modules/canttalk.ts
@@ -2,6 +2,7 @@ import { readdirSync } from "node:fs";
 import { join } from "node:path";
 import { respond } from "../audio/AudioService.js";
 import type { TTSPreferencesRow } from "../db/schema.js";
+import { formatCandidates } from "../tts/BaseEngine.js";
 import type { Module } from "./types.js";
 
 export const canttalk: Module = ({ client, audio, commands, tts, db, t, config, rootDir }) => {
@@ -38,23 +39,27 @@ export const canttalk: Module = ({ client, audio, commands, tts, db, t, config,
   });
 
   commands.register("myvoice", async (args, message) => {
-    if (args.length > 3) {
-      return respond(audio, sysmsg, message, t("TOO_MANY_ARGUMENTS"));
-    }
     const engineName = args[1];
-    const voiceArg = args[2];
-    if (!engineName || !voiceArg) {
+    if (!engineName || args.length < 3) {
       return respond(audio, sysmsg, message, t("TOO_MANY_ARGUMENTS"));
     }
     const engine = tts.get(engineName);
     if (!engine) {
       return respond(audio, sysmsg, message, t("INVALID_ENGINE", engineName));
     }
-    const userVoice = voiceArg.toLowerCase();
+    const voiceInput = args.slice(2).join(" ");
+    const res = engine.resolveVoice(voiceInput);
     let chosenVoice: string;
-    if (engine.validateVoice(userVoice)) {
-      chosenVoice = userVoice;
+    if (res.kind === "exact" || res.kind === "fuzzy") {
+      chosenVoice = res.voice;
       respond(audio, sysmsg, message, t("USER_VOICE_CHANGED", chosenVoice, engine.longName));
+    } else if (res.kind === "ambiguous") {
+      return respond(
+        audio,
+        sysmsg,
+        message,
+        t("AMBIGUOUS_VOICE", voiceInput, formatCandidates(res.candidates)),
+      );
     } else {
       chosenVoice = engine.getDefaultVoice();
       respond(audio, sysmsg, message, t("INVALID_VOICE", chosenVoice, engine.longName));
diff --git a/src/modules/ttsSettings.ts b/src/modules/ttsSettings.ts
index 2dd6991..e385a5c 100644
--- a/src/modules/ttsSettings.ts
+++ b/src/modules/ttsSettings.ts
@@ -1,19 +1,15 @@
 import { join } from "node:path";
 import { AttachmentBuilder } from "discord.js";
 import { respond } from "../audio/AudioService.js";
+import { formatCandidates } from "../tts/BaseEngine.js";
 import type { Module } from "./types.js";
 
 export const ttsSettings: Module = ({ audio, commands, tts, t, rootDir }) => {
   const sysmsg = join(rootDir, "sysmsg.wav");
 
   commands.register("announcevoice", (args, message) => {
-    if (args.length > 3) {
-      respond(audio, sysmsg, message, t("TOO_MANY_ARGUMENTS"));
-      return;
-    }
     const engineName = args[1];
-    const voiceArg = args[2];
-    if (!engineName || !voiceArg) {
+    if (!engineName || args.length < 3) {
       respond(audio, sysmsg, message, t("TOO_MANY_ARGUMENTS"));
       return;
     }
@@ -22,10 +18,19 @@ export const ttsSettings: Module = ({ audio, commands, tts, t, rootDir }) => {
       respond(audio, sysmsg, message, t("INVALID_ENGINE", engineName));
       return;
     }
+    const voiceInput = args.slice(2).join(" ");
+    const res = engine.resolveVoice(voiceInput);
     tts.announcement = engine;
-    if (engine.validateVoice(voiceArg)) {
-      tts.announcementVoice = voiceArg;
-      respond(audio, sysmsg, message, t("SYSTEM_VOICE_CHANGED", voiceArg, engine.longName));
+    if (res.kind === "exact" || res.kind === "fuzzy") {
+      tts.announcementVoice = res.voice;
+      respond(audio, sysmsg, message, t("SYSTEM_VOICE_CHANGED", res.voice, engine.longName));
+    } else if (res.kind === "ambiguous") {
+      respond(
+        audio,
+        sysmsg,
+        message,
+        t("AMBIGUOUS_VOICE", voiceInput, formatCandidates(res.candidates)),
+      );
     } else {
       tts.announcementVoice = engine.getDefaultVoice();
       respond(
diff --git a/src/tts/BaseEngine.ts b/src/tts/BaseEngine.ts
index f435289..b12757f 100644
--- a/src/tts/BaseEngine.ts
+++ b/src/tts/BaseEngine.ts
@@ -2,6 +2,19 @@ import { writeFile } from "node:fs/promises";
 
 export type VoiceParams = Record<string, unknown>;
 
+export type VoiceResolution =
+  | { kind: "exact"; voice: string }
+  | { kind: "fuzzy"; voice: string }
+  | { kind: "ambiguous"; candidates: string[] }
+  | { kind: "none" };
+
+/** Joins up to `max` candidate voice names for a user-facing ambiguity message, summarizing the rest as "(+N more)". */
+export function formatCandidates(candidates: string[], max = 5): string {
+  const shown = candidates.slice(0, max).join(", ");
+  const extra = candidates.length - max;
+  return extra > 0 ? `${shown} (+${extra} more)` : shown;
+}
+
 /**
  * Common contract for every TTS provider. Subclasses override either
  * `getSpeech` (returning a fetch-like Response) or `getSpeechFile` (writing
@@ -47,6 +60,27 @@ export abstract class BaseEngine {
     return Object.keys(this.voices).length === 0;
   }
 
+  /**
+   * Resolves a user-typed voice string against this engine's voice table using
+   * exact match first, then token-prefix matching: each whitespace-separated
+   * input token must be a prefix of some alphanumeric token of a key.
+   * Freeform engines always succeed with the normalized input.
+   */
+  resolveVoice(input: string): VoiceResolution {
+    const norm = input.trim().toLowerCase();
+    if (this.isFreeformVoice()) return { kind: "exact", voice: norm };
+    if (this.voices[norm] != null) return { kind: "exact", voice: norm };
+    const inputTokens = norm.split(/\s+/).filter(Boolean);
+    if (inputTokens.length === 0) return { kind: "none" };
+    const matches = Object.keys(this.voices).filter((key) => {
+      const keyTokens = key.split(/[^a-z0-9]+/i).filter(Boolean);
+      return inputTokens.every((it) => keyTokens.some((kt) => kt.startsWith(it)));
+    });
+    if (matches.length === 1) return { kind: "fuzzy", voice: matches[0]! };
+    if (matches.length > 1) return { kind: "ambiguous", candidates: matches.sort() };
+    return { kind: "none" };
+  }
+
   /** Default implementation: subclass should override either this or getSpeechFile. */
   async getSpeech(_text: string, _voice?: string, _params?: VoiceParams): Promise<Response> {
     throw new Error(`${this.shortName}: getSpeech not implemented`);
diff --git a/strings/en.json b/strings/en.json
index 5d0b450..df1f9f1 100644
--- a/strings/en.json
+++ b/strings/en.json
@@ -6,6 +6,7 @@
 	"USER_VOICE_CHANGED": "Your new voice is %s from %s",
 	"INVALID_ENGINE": "%s is not a valid engine name.",
 	"INVALID_VOICE": "invalid voice name. Using default voice %s for %s instead.",
+	"AMBIGUOUS_VOICE": "voice name \"%s\" is ambiguous. Candidates: %s",
 	"TOO_MANY_ARGUMENTS": "too many arguments for command.",
 	"CURRENT_STORY": "Here's the current story: %s",
 	"NO_STORY": "No story in progress at the moment.",