From afeb05447d2ae8b42e0403d322f42da1d6d1da71 Mon Sep 17 00:00:00 2001 From: Talon Date: Thu, 14 May 2026 20:33:31 +0200 Subject: [PATCH] Fuzzy voice matching --- src/i18n/strings.ts | 1 + src/modules/canttalk.ts | 21 +++++++++++++-------- src/modules/ttsSettings.ts | 23 ++++++++++++++--------- src/tts/BaseEngine.ts | 34 ++++++++++++++++++++++++++++++++++ strings/en.json | 1 + 5 files changed, 63 insertions(+), 17 deletions(-) diff --git a/src/i18n/strings.ts b/src/i18n/strings.ts index ccd96d2..3e3c78b 100644 --- a/src/i18n/strings.ts +++ b/src/i18n/strings.ts @@ -11,6 +11,7 @@ export type StringKey = | "USER_VOICE_CHANGED" | "INVALID_ENGINE" | "INVALID_VOICE" + | "AMBIGUOUS_VOICE" | "TOO_MANY_ARGUMENTS" | "CURRENT_STORY" | "NO_STORY" diff --git a/src/modules/canttalk.ts b/src/modules/canttalk.ts index 297f11e..0e352b7 100644 --- a/src/modules/canttalk.ts +++ b/src/modules/canttalk.ts @@ -2,6 +2,7 @@ import { readdirSync } from "node:fs"; import { join } from "node:path"; import { respond } from "../audio/AudioService.js"; import type { TTSPreferencesRow } from "../db/schema.js"; +import { formatCandidates } from "../tts/BaseEngine.js"; import type { Module } from "./types.js"; export const canttalk: Module = ({ client, audio, commands, tts, db, t, config, rootDir }) => { @@ -38,23 +39,27 @@ export const canttalk: Module = ({ client, audio, commands, tts, db, t, config, }); commands.register("myvoice", async (args, message) => { - if (args.length > 3) { - return respond(audio, sysmsg, message, t("TOO_MANY_ARGUMENTS")); - } const engineName = args[1]; - const voiceArg = args[2]; - if (!engineName || !voiceArg) { + if (!engineName || args.length < 3) { return respond(audio, sysmsg, message, t("TOO_MANY_ARGUMENTS")); } const engine = tts.get(engineName); if (!engine) { return respond(audio, sysmsg, message, t("INVALID_ENGINE", engineName)); } - const userVoice = voiceArg.toLowerCase(); + const voiceInput = args.slice(2).join(" "); + const res = engine.resolveVoice(voiceInput); let chosenVoice: string; - if (engine.validateVoice(userVoice)) { - chosenVoice = userVoice; + if (res.kind === "exact" || res.kind === "fuzzy") { + chosenVoice = res.voice; respond(audio, sysmsg, message, t("USER_VOICE_CHANGED", chosenVoice, engine.longName)); + } else if (res.kind === "ambiguous") { + return respond( + audio, + sysmsg, + message, + t("AMBIGUOUS_VOICE", voiceInput, formatCandidates(res.candidates)), + ); } else { chosenVoice = engine.getDefaultVoice(); respond(audio, sysmsg, message, t("INVALID_VOICE", chosenVoice, engine.longName)); diff --git a/src/modules/ttsSettings.ts b/src/modules/ttsSettings.ts index 2dd6991..e385a5c 100644 --- a/src/modules/ttsSettings.ts +++ b/src/modules/ttsSettings.ts @@ -1,19 +1,15 @@ import { join } from "node:path"; import { AttachmentBuilder } from "discord.js"; import { respond } from "../audio/AudioService.js"; +import { formatCandidates } from "../tts/BaseEngine.js"; import type { Module } from "./types.js"; export const ttsSettings: Module = ({ audio, commands, tts, t, rootDir }) => { const sysmsg = join(rootDir, "sysmsg.wav"); commands.register("announcevoice", (args, message) => { - if (args.length > 3) { - respond(audio, sysmsg, message, t("TOO_MANY_ARGUMENTS")); - return; - } const engineName = args[1]; - const voiceArg = args[2]; - if (!engineName || !voiceArg) { + if (!engineName || args.length < 3) { respond(audio, sysmsg, message, t("TOO_MANY_ARGUMENTS")); return; } @@ -22,10 +18,19 @@ export const ttsSettings: Module = ({ audio, commands, tts, t, rootDir }) => { respond(audio, sysmsg, message, t("INVALID_ENGINE", engineName)); return; } + const voiceInput = args.slice(2).join(" "); + const res = engine.resolveVoice(voiceInput); tts.announcement = engine; - if (engine.validateVoice(voiceArg)) { - tts.announcementVoice = voiceArg; - respond(audio, sysmsg, message, t("SYSTEM_VOICE_CHANGED", voiceArg, engine.longName)); + if (res.kind === "exact" || res.kind === "fuzzy") { + tts.announcementVoice = res.voice; + respond(audio, sysmsg, message, t("SYSTEM_VOICE_CHANGED", res.voice, engine.longName)); + } else if (res.kind === "ambiguous") { + respond( + audio, + sysmsg, + message, + t("AMBIGUOUS_VOICE", voiceInput, formatCandidates(res.candidates)), + ); } else { tts.announcementVoice = engine.getDefaultVoice(); respond( diff --git a/src/tts/BaseEngine.ts b/src/tts/BaseEngine.ts index f435289..b12757f 100644 --- a/src/tts/BaseEngine.ts +++ b/src/tts/BaseEngine.ts @@ -2,6 +2,19 @@ import { writeFile } from "node:fs/promises"; export type VoiceParams = Record; +export type VoiceResolution = + | { kind: "exact"; voice: string } + | { kind: "fuzzy"; voice: string } + | { kind: "ambiguous"; candidates: string[] } + | { kind: "none" }; + +/** Joins up to `max` candidate voice names for a user-facing ambiguity message, summarizing the rest as "(+N more)". */ +export function formatCandidates(candidates: string[], max = 5): string { + const shown = candidates.slice(0, max).join(", "); + const extra = candidates.length - max; + return extra > 0 ? `${shown} (+${extra} more)` : shown; +} + /** * Common contract for every TTS provider. Subclasses override either * `getSpeech` (returning a fetch-like Response) or `getSpeechFile` (writing @@ -47,6 +60,27 @@ export abstract class BaseEngine { return Object.keys(this.voices).length === 0; } + /** + * Resolves a user-typed voice string against this engine's voice table using + * exact match first, then token-prefix matching: each whitespace-separated + * input token must be a prefix of some alphanumeric token of a key. + * Freeform engines always succeed with the normalized input. + */ + resolveVoice(input: string): VoiceResolution { + const norm = input.trim().toLowerCase(); + if (this.isFreeformVoice()) return { kind: "exact", voice: norm }; + if (this.voices[norm] != null) return { kind: "exact", voice: norm }; + const inputTokens = norm.split(/\s+/).filter(Boolean); + if (inputTokens.length === 0) return { kind: "none" }; + const matches = Object.keys(this.voices).filter((key) => { + const keyTokens = key.split(/[^a-z0-9]+/i).filter(Boolean); + return inputTokens.every((it) => keyTokens.some((kt) => kt.startsWith(it))); + }); + if (matches.length === 1) return { kind: "fuzzy", voice: matches[0]! }; + if (matches.length > 1) return { kind: "ambiguous", candidates: matches.sort() }; + return { kind: "none" }; + } + /** Default implementation: subclass should override either this or getSpeechFile. */ async getSpeech(_text: string, _voice?: string, _params?: VoiceParams): Promise { throw new Error(`${this.shortName}: getSpeech not implemented`); diff --git a/strings/en.json b/strings/en.json index 5d0b450..df1f9f1 100644 --- a/strings/en.json +++ b/strings/en.json @@ -6,6 +6,7 @@ "USER_VOICE_CHANGED": "Your new voice is %s from %s", "INVALID_ENGINE": "%s is not a valid engine name.", "INVALID_VOICE": "invalid voice name. Using default voice %s for %s instead.", + "AMBIGUOUS_VOICE": "voice name \"%s\" is ambiguous. Candidates: %s", "TOO_MANY_ARGUMENTS": "too many arguments for command.", "CURRENT_STORY": "Here's the current story: %s", "NO_STORY": "No story in progress at the moment.",