speech error handling & tweaks

2023-03-20 14:46:01 +00:00
parent f0b22eff64
commit ce63613bdf
1 changed files with 77 additions and 50 deletions
--- a/app/src/components/input.tsx
+++ b/app/src/components/input.tsx
@@ -81,6 +81,7 @@ export default function MessageInput(props: MessageInputProps) {
    const temperature = useAppSelector(selectTemperature);
    const message = useAppSelector(selectMessage);
    const [recording, setRecording] = useState(false);
    const [transcribing, setTranscribing] = useState(false);
    const hasVerticalSpace = useMediaQuery('(min-height: 1000px)');
    const recorder = useMemo(() => new MicRecorder({ bitRate: 128 }), []);
    const useOpenAIWhisper = useAppSelector(selectUseOpenAIWhisper);
@@ -104,65 +105,90 @@ export default function MessageInput(props: MessageInputProps) {
        }
    }, [context, message, dispatch]);
    const onSpeechError = useCallback((e: any) => {
        console.error('speech recognition error', e);
        try {
            speechRecognition.stop();
        } catch (e) {
        }
        try {
            recorder.stop();
        } catch (e) { }
        setRecording(false);
        setTranscribing(false);
    }, [recorder]);
    const onSpeechStart = useCallback(() => {
        try {
            if (!recording) {
                setRecording(true);
-        if (!recording) {
+                // if we are using whisper, the we will just record with the browser and send the api when done 
-            setRecording(true);
+                if (useOpenAIWhisper) {
                    recorder.start().catch(onSpeechError);
                } else {
                    const initialMessage = message;
-            // if we are using whisper, the we will just record with the browser and send the api when done 
+                    speechRecognition.continuous = true;
-            if (useOpenAIWhisper) {
+                    speechRecognition.interimResults = true;
-                recorder.start().catch((e: any) => console.error(e));
+
                    speechRecognition.onresult = (event) => {
                        let transcript = '';
                        for (let i = 0; i < event.results.length; ++i) {
                            transcript += event.results[i][0].transcript;
                        }
                        dispatch(setMessage(initialMessage + ' ' + transcript));
                    };
                    speechRecognition.start();
                }
            } else {
-                speechRecognition.continuous = true;
+                setRecording(false);
-                speechRecognition.interimResults = true;
+                if (useOpenAIWhisper) {
                    setTranscribing(true);
                    const mp3 = recorder.stop().getMp3();
-                speechRecognition.onresult = (event) => {
+                    mp3.then(async ([buffer, blob]) => {
-                    const transcript = event.results[event.results.length - 1][0].transcript;
+                        const file = new File(buffer, 'chat.mp3', {
-                    dispatch(setMessage(transcript));
+                            type: blob.type,
-                };
+                            lastModified: Date.now()
                speechRecognition.start();
            }
        } else {
            setRecording(false);
            if (useOpenAIWhisper) {
                const mp3 = recorder.stop().getMp3();
                mp3.then(async ([buffer, blob]) => {
                    const file = new File(buffer, 'chat.mp3', {
                        type: blob.type,
                        lastModified: Date.now()
                    });
                    // TODO: cut in chunks
                    var data = new FormData()
                    data.append('file', file);
                    data.append('model', 'whisper-1')
                    try {
                        const response = await fetch("https://api.openai.com/v1/audio/transcriptions", {
                            method: "POST",
                            headers: {
                                'Authorization': `Bearer ${openAIApiKey}`,
                            },
                            body: data,
                        });
-                        const json = await response.json()
+                        // TODO: cut in chunks
-                        if (json.text) {
+                        var data = new FormData()
-                            dispatch(setMessage(json.text));
+                        data.append('file', file);
                        data.append('model', 'whisper-1')
                        try {
                            const response = await fetch("https://api.openai.com/v1/audio/transcriptions", {
                                method: "POST",
                                headers: {
                                    'Authorization': `Bearer ${openAIApiKey}`,
                                },
                                body: data,
                            });
                            const json = await response.json()
                            if (json.text) {
                                dispatch(setMessage(message + ' ' + json.text));
                                setTranscribing(false);
                            }
                        } catch (e) {
                            onSpeechError(e);
                        }
                    } catch (e) {
                        console.log(e)
                    }
-                }).catch((e: any) => console.error(e));
+                    }).catch(onSpeechError);
-            } else {
+                } else {
-                speechRecognition.stop();
+                    speechRecognition.stop();
                }
            }
        } catch (e) {
            onSpeechError(e);
        }
    }, [recording, message, dispatch]);
@@ -197,7 +223,8 @@ export default function MessageInput(props: MessageInputProps) {
                    <>
                        <ActionIcon size="xl"
                            onClick={onSpeechStart}>
-                            <i className="fa fa-microphone" style={{ fontSize: '90%', color: recording ? 'red' : 'inherit' }} />
+                            {transcribing && <Loader size="xs" />}
                            {!transcribing && <i className="fa fa-microphone" style={{ fontSize: '90%', color: recording ? 'red' : 'inherit' }} />}
                        </ActionIcon>
                        <ActionIcon size="xl"
                            onClick={onSubmit}>
@@ -207,7 +234,7 @@ export default function MessageInput(props: MessageInputProps) {
                )}
            </div>
        );
-    }, [recording, onSubmit, props.disabled, context.generating]);
+    }, [recording, transcribing, onSubmit, props.disabled, context.generating]);
    const disabled = context.generating;