fallback to whisper when speechrecognition is not available

main
Cogent Apps 2023-03-20 21:03:12 +00:00
parent 1917bc19e0
commit 6359c9f50d
3 changed files with 29 additions and 14 deletions

View File

@ -8,8 +8,8 @@ import { useAppContext } from '../context';
import { useAppDispatch, useAppSelector } from '../store'; import { useAppDispatch, useAppSelector } from '../store';
import { selectMessage, setMessage } from '../store/message'; import { selectMessage, setMessage } from '../store/message';
import { selectTemperature } from '../store/parameters'; import { selectTemperature } from '../store/parameters';
import { openSystemPromptPanel, openTemperaturePanel } from '../store/settings-ui'; import { openOpenAIApiKeyPanel, openSystemPromptPanel, openTemperaturePanel } from '../store/settings-ui';
import { speechRecognition } from '../speech-recognition-types.d' import { speechRecognition, supportsSpeechRecognition } from '../speech-recognition-types'
import MicRecorder from 'mic-recorder-to-mp3'; import MicRecorder from 'mic-recorder-to-mp3';
import { selectUseOpenAIWhisper, selectOpenAIApiKey } from '../store/api-keys'; import { selectUseOpenAIWhisper, selectOpenAIApiKey } from '../store/api-keys';
import { Mp3Encoder } from 'lamejs'; import { Mp3Encoder } from 'lamejs';
@ -109,7 +109,7 @@ export default function MessageInput(props: MessageInputProps) {
console.error('speech recognition error', e); console.error('speech recognition error', e);
try { try {
speechRecognition.stop(); speechRecognition?.stop();
} catch (e) { } catch (e) {
} }
@ -122,14 +122,19 @@ export default function MessageInput(props: MessageInputProps) {
}, [recorder]); }, [recorder]);
const onSpeechStart = useCallback(() => { const onSpeechStart = useCallback(() => {
if (!openAIApiKey) {
dispatch(openOpenAIApiKeyPanel());
return false;
}
try { try {
if (!recording) { if (!recording) {
setRecording(true); setRecording(true);
// if we are using whisper, the we will just record with the browser and send the api when done // if we are using whisper, the we will just record with the browser and send the api when done
if (useOpenAIWhisper) { if (useOpenAIWhisper || !supportsSpeechRecognition) {
recorder.start().catch(onSpeechError); recorder.start().catch(onSpeechError);
} else { } else if (speechRecognition) {
const initialMessage = message; const initialMessage = message;
speechRecognition.continuous = true; speechRecognition.continuous = true;
@ -146,10 +151,12 @@ export default function MessageInput(props: MessageInputProps) {
}; };
speechRecognition.start(); speechRecognition.start();
} else {
onSpeechError(new Error('not supported'));
} }
} else { } else {
setRecording(false); setRecording(false);
if (useOpenAIWhisper) { if (useOpenAIWhisper || !supportsSpeechRecognition) {
setTranscribing(true); setTranscribing(true);
const mp3 = recorder.stop().getMp3(); const mp3 = recorder.stop().getMp3();
@ -185,14 +192,16 @@ export default function MessageInput(props: MessageInputProps) {
} }
}).catch(onSpeechError); }).catch(onSpeechError);
} else { } else if (speechRecognition) {
speechRecognition.stop(); speechRecognition.stop();
} else {
onSpeechError(new Error('not supported'));
} }
} }
} catch (e) { } catch (e) {
onSpeechError(e); onSpeechError(e);
} }
}, [recording, message, dispatch]); }, [recording, message, dispatch, onSpeechError, openAIApiKey]);
const onKeyDown = useCallback((e: React.KeyboardEvent<HTMLTextAreaElement>) => { const onKeyDown = useCallback((e: React.KeyboardEvent<HTMLTextAreaElement>) => {

View File

@ -6,6 +6,7 @@ import { useAppDispatch, useAppSelector } from "../../store";
import { selectOpenAIApiKey, setOpenAIApiKeyFromEvent, selectUseOpenAIWhisper, setUseOpenAIWhisperFromEvent } from "../../store/api-keys"; import { selectOpenAIApiKey, setOpenAIApiKeyFromEvent, selectUseOpenAIWhisper, setUseOpenAIWhisperFromEvent } from "../../store/api-keys";
import { selectSettingsOption } from "../../store/settings-ui"; import { selectSettingsOption } from "../../store/settings-ui";
import { FormattedMessage, useIntl } from "react-intl"; import { FormattedMessage, useIntl } from "react-intl";
import { supportsSpeechRecognition } from "../../speech-recognition-types";
export default function UserOptionsTab(props: any) { export default function UserOptionsTab(props: any) {
const option = useAppSelector(selectSettingsOption); const option = useAppSelector(selectSettingsOption);
@ -31,11 +32,11 @@ export default function UserOptionsTab(props: any) {
</a> </a>
</p> </p>
<Checkbox {supportsSpeechRecognition && <Checkbox
style={{ marginTop: '1rem' }} style={{ marginTop: '1rem' }}
id="use-openai-whisper-api" checked={useOpenAIWhisper!} onChange={onUseOpenAIWhisperChange} id="use-openai-whisper-api" checked={useOpenAIWhisper!} onChange={onUseOpenAIWhisperChange}
label="Use the OpenAI Whisper API for speech recognition." label="Use the OpenAI Whisper API for speech recognition."
/> />}
<p> <p>
<FormattedMessage defaultMessage="Your API key is stored only on this device and never transmitted to anyone except OpenAI." /> <FormattedMessage defaultMessage="Your API key is stored only on this device and never transmitted to anyone except OpenAI." />

View File

@ -122,12 +122,17 @@ declare global {
} }
} }
let speechRecognition: SpeechRecognition let speechRecognition: SpeechRecognition | null = null;
if (window.SpeechRecognition) { if (window.SpeechRecognition) {
speechRecognition = new SpeechRecognition() speechRecognition = new SpeechRecognition()
} else { } else if ((window as any).webkitSpeechRecognition) {
speechRecognition = new webkitSpeechRecognition() speechRecognition = new (window as any).webkitSpeechRecognition() as SpeechRecognition;
} }
export { speechRecognition } const supportsSpeechRecognition = speechRecognition !== null;
export {
speechRecognition,
supportsSpeechRecognition,
}