Fuzzy voice matching
This commit is contained in:
@@ -11,6 +11,7 @@ export type StringKey =
|
||||
| "USER_VOICE_CHANGED"
|
||||
| "INVALID_ENGINE"
|
||||
| "INVALID_VOICE"
|
||||
| "AMBIGUOUS_VOICE"
|
||||
| "TOO_MANY_ARGUMENTS"
|
||||
| "CURRENT_STORY"
|
||||
| "NO_STORY"
|
||||
|
||||
@@ -2,6 +2,7 @@ import { readdirSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { respond } from "../audio/AudioService.js";
|
||||
import type { TTSPreferencesRow } from "../db/schema.js";
|
||||
import { formatCandidates } from "../tts/BaseEngine.js";
|
||||
import type { Module } from "./types.js";
|
||||
|
||||
export const canttalk: Module = ({ client, audio, commands, tts, db, t, config, rootDir }) => {
|
||||
@@ -38,23 +39,27 @@ export const canttalk: Module = ({ client, audio, commands, tts, db, t, config,
|
||||
});
|
||||
|
||||
commands.register("myvoice", async (args, message) => {
|
||||
if (args.length > 3) {
|
||||
return respond(audio, sysmsg, message, t("TOO_MANY_ARGUMENTS"));
|
||||
}
|
||||
const engineName = args[1];
|
||||
const voiceArg = args[2];
|
||||
if (!engineName || !voiceArg) {
|
||||
if (!engineName || args.length < 3) {
|
||||
return respond(audio, sysmsg, message, t("TOO_MANY_ARGUMENTS"));
|
||||
}
|
||||
const engine = tts.get(engineName);
|
||||
if (!engine) {
|
||||
return respond(audio, sysmsg, message, t("INVALID_ENGINE", engineName));
|
||||
}
|
||||
const userVoice = voiceArg.toLowerCase();
|
||||
const voiceInput = args.slice(2).join(" ");
|
||||
const res = engine.resolveVoice(voiceInput);
|
||||
let chosenVoice: string;
|
||||
if (engine.validateVoice(userVoice)) {
|
||||
chosenVoice = userVoice;
|
||||
if (res.kind === "exact" || res.kind === "fuzzy") {
|
||||
chosenVoice = res.voice;
|
||||
respond(audio, sysmsg, message, t("USER_VOICE_CHANGED", chosenVoice, engine.longName));
|
||||
} else if (res.kind === "ambiguous") {
|
||||
return respond(
|
||||
audio,
|
||||
sysmsg,
|
||||
message,
|
||||
t("AMBIGUOUS_VOICE", voiceInput, formatCandidates(res.candidates)),
|
||||
);
|
||||
} else {
|
||||
chosenVoice = engine.getDefaultVoice();
|
||||
respond(audio, sysmsg, message, t("INVALID_VOICE", chosenVoice, engine.longName));
|
||||
|
||||
@@ -1,19 +1,15 @@
|
||||
import { join } from "node:path";
|
||||
import { AttachmentBuilder } from "discord.js";
|
||||
import { respond } from "../audio/AudioService.js";
|
||||
import { formatCandidates } from "../tts/BaseEngine.js";
|
||||
import type { Module } from "./types.js";
|
||||
|
||||
export const ttsSettings: Module = ({ audio, commands, tts, t, rootDir }) => {
|
||||
const sysmsg = join(rootDir, "sysmsg.wav");
|
||||
|
||||
commands.register("announcevoice", (args, message) => {
|
||||
if (args.length > 3) {
|
||||
respond(audio, sysmsg, message, t("TOO_MANY_ARGUMENTS"));
|
||||
return;
|
||||
}
|
||||
const engineName = args[1];
|
||||
const voiceArg = args[2];
|
||||
if (!engineName || !voiceArg) {
|
||||
if (!engineName || args.length < 3) {
|
||||
respond(audio, sysmsg, message, t("TOO_MANY_ARGUMENTS"));
|
||||
return;
|
||||
}
|
||||
@@ -22,10 +18,19 @@ export const ttsSettings: Module = ({ audio, commands, tts, t, rootDir }) => {
|
||||
respond(audio, sysmsg, message, t("INVALID_ENGINE", engineName));
|
||||
return;
|
||||
}
|
||||
const voiceInput = args.slice(2).join(" ");
|
||||
const res = engine.resolveVoice(voiceInput);
|
||||
tts.announcement = engine;
|
||||
if (engine.validateVoice(voiceArg)) {
|
||||
tts.announcementVoice = voiceArg;
|
||||
respond(audio, sysmsg, message, t("SYSTEM_VOICE_CHANGED", voiceArg, engine.longName));
|
||||
if (res.kind === "exact" || res.kind === "fuzzy") {
|
||||
tts.announcementVoice = res.voice;
|
||||
respond(audio, sysmsg, message, t("SYSTEM_VOICE_CHANGED", res.voice, engine.longName));
|
||||
} else if (res.kind === "ambiguous") {
|
||||
respond(
|
||||
audio,
|
||||
sysmsg,
|
||||
message,
|
||||
t("AMBIGUOUS_VOICE", voiceInput, formatCandidates(res.candidates)),
|
||||
);
|
||||
} else {
|
||||
tts.announcementVoice = engine.getDefaultVoice();
|
||||
respond(
|
||||
|
||||
@@ -2,6 +2,19 @@ import { writeFile } from "node:fs/promises";
|
||||
|
||||
export type VoiceParams = Record<string, unknown>;
|
||||
|
||||
export type VoiceResolution =
|
||||
| { kind: "exact"; voice: string }
|
||||
| { kind: "fuzzy"; voice: string }
|
||||
| { kind: "ambiguous"; candidates: string[] }
|
||||
| { kind: "none" };
|
||||
|
||||
/** Joins up to `max` candidate voice names for a user-facing ambiguity message, summarizing the rest as "(+N more)". */
|
||||
export function formatCandidates(candidates: string[], max = 5): string {
|
||||
const shown = candidates.slice(0, max).join(", ");
|
||||
const extra = candidates.length - max;
|
||||
return extra > 0 ? `${shown} (+${extra} more)` : shown;
|
||||
}
|
||||
|
||||
/**
|
||||
* Common contract for every TTS provider. Subclasses override either
|
||||
* `getSpeech` (returning a fetch-like Response) or `getSpeechFile` (writing
|
||||
@@ -47,6 +60,27 @@ export abstract class BaseEngine {
|
||||
return Object.keys(this.voices).length === 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves a user-typed voice string against this engine's voice table using
|
||||
* exact match first, then token-prefix matching: each whitespace-separated
|
||||
* input token must be a prefix of some alphanumeric token of a key.
|
||||
* Freeform engines always succeed with the normalized input.
|
||||
*/
|
||||
resolveVoice(input: string): VoiceResolution {
|
||||
const norm = input.trim().toLowerCase();
|
||||
if (this.isFreeformVoice()) return { kind: "exact", voice: norm };
|
||||
if (this.voices[norm] != null) return { kind: "exact", voice: norm };
|
||||
const inputTokens = norm.split(/\s+/).filter(Boolean);
|
||||
if (inputTokens.length === 0) return { kind: "none" };
|
||||
const matches = Object.keys(this.voices).filter((key) => {
|
||||
const keyTokens = key.split(/[^a-z0-9]+/i).filter(Boolean);
|
||||
return inputTokens.every((it) => keyTokens.some((kt) => kt.startsWith(it)));
|
||||
});
|
||||
if (matches.length === 1) return { kind: "fuzzy", voice: matches[0]! };
|
||||
if (matches.length > 1) return { kind: "ambiguous", candidates: matches.sort() };
|
||||
return { kind: "none" };
|
||||
}
|
||||
|
||||
/** Default implementation: subclass should override either this or getSpeechFile. */
|
||||
async getSpeech(_text: string, _voice?: string, _params?: VoiceParams): Promise<Response> {
|
||||
throw new Error(`${this.shortName}: getSpeech not implemented`);
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
"USER_VOICE_CHANGED": "Your new voice is %s from %s",
|
||||
"INVALID_ENGINE": "%s is not a valid engine name.",
|
||||
"INVALID_VOICE": "invalid voice name. Using default voice %s for %s instead.",
|
||||
"AMBIGUOUS_VOICE": "voice name \"%s\" is ambiguous. Candidates: %s",
|
||||
"TOO_MANY_ARGUMENTS": "too many arguments for command.",
|
||||
"CURRENT_STORY": "Here's the current story: %s",
|
||||
"NO_STORY": "No story in progress at the moment.",
|
||||
|
||||
Reference in New Issue
Block a user