speech error handling & tweaks

main
Cogent Apps 2023-03-20 14:46:01 +00:00
parent f0b22eff64
commit ce63613bdf
1 changed files with 77 additions and 50 deletions

View File

@ -81,6 +81,7 @@ export default function MessageInput(props: MessageInputProps) {
const temperature = useAppSelector(selectTemperature); const temperature = useAppSelector(selectTemperature);
const message = useAppSelector(selectMessage); const message = useAppSelector(selectMessage);
const [recording, setRecording] = useState(false); const [recording, setRecording] = useState(false);
const [transcribing, setTranscribing] = useState(false);
const hasVerticalSpace = useMediaQuery('(min-height: 1000px)'); const hasVerticalSpace = useMediaQuery('(min-height: 1000px)');
const recorder = useMemo(() => new MicRecorder({ bitRate: 128 }), []); const recorder = useMemo(() => new MicRecorder({ bitRate: 128 }), []);
const useOpenAIWhisper = useAppSelector(selectUseOpenAIWhisper); const useOpenAIWhisper = useAppSelector(selectUseOpenAIWhisper);
@ -104,65 +105,90 @@ export default function MessageInput(props: MessageInputProps) {
} }
}, [context, message, dispatch]); }, [context, message, dispatch]);
const onSpeechError = useCallback((e: any) => {
console.error('speech recognition error', e);
try {
speechRecognition.stop();
} catch (e) {
}
try {
recorder.stop();
} catch (e) { }
setRecording(false);
setTranscribing(false);
}, [recorder]);
const onSpeechStart = useCallback(() => { const onSpeechStart = useCallback(() => {
try {
if (!recording) {
setRecording(true);
if (!recording) { // if we are using whisper, the we will just record with the browser and send the api when done
setRecording(true); if (useOpenAIWhisper) {
recorder.start().catch(onSpeechError);
} else {
const initialMessage = message;
// if we are using whisper, the we will just record with the browser and send the api when done speechRecognition.continuous = true;
if (useOpenAIWhisper) { speechRecognition.interimResults = true;
recorder.start().catch((e: any) => console.error(e));
speechRecognition.onresult = (event) => {
let transcript = '';
for (let i = 0; i < event.results.length; ++i) {
transcript += event.results[i][0].transcript;
}
dispatch(setMessage(initialMessage + ' ' + transcript));
};
speechRecognition.start();
}
} else { } else {
speechRecognition.continuous = true; setRecording(false);
speechRecognition.interimResults = true; if (useOpenAIWhisper) {
setTranscribing(true);
const mp3 = recorder.stop().getMp3();
speechRecognition.onresult = (event) => { mp3.then(async ([buffer, blob]) => {
const transcript = event.results[event.results.length - 1][0].transcript; const file = new File(buffer, 'chat.mp3', {
dispatch(setMessage(transcript)); type: blob.type,
}; lastModified: Date.now()
speechRecognition.start();
}
} else {
setRecording(false);
if (useOpenAIWhisper) {
const mp3 = recorder.stop().getMp3();
mp3.then(async ([buffer, blob]) => {
const file = new File(buffer, 'chat.mp3', {
type: blob.type,
lastModified: Date.now()
});
// TODO: cut in chunks
var data = new FormData()
data.append('file', file);
data.append('model', 'whisper-1')
try {
const response = await fetch("https://api.openai.com/v1/audio/transcriptions", {
method: "POST",
headers: {
'Authorization': `Bearer ${openAIApiKey}`,
},
body: data,
}); });
const json = await response.json() // TODO: cut in chunks
if (json.text) { var data = new FormData()
dispatch(setMessage(json.text)); data.append('file', file);
data.append('model', 'whisper-1')
try {
const response = await fetch("https://api.openai.com/v1/audio/transcriptions", {
method: "POST",
headers: {
'Authorization': `Bearer ${openAIApiKey}`,
},
body: data,
});
const json = await response.json()
if (json.text) {
dispatch(setMessage(message + ' ' + json.text));
setTranscribing(false);
}
} catch (e) {
onSpeechError(e);
} }
} catch (e) {
console.log(e)
}
}).catch((e: any) => console.error(e)); }).catch(onSpeechError);
} else { } else {
speechRecognition.stop(); speechRecognition.stop();
}
} }
} catch (e) {
onSpeechError(e);
} }
}, [recording, message, dispatch]); }, [recording, message, dispatch]);
@ -197,7 +223,8 @@ export default function MessageInput(props: MessageInputProps) {
<> <>
<ActionIcon size="xl" <ActionIcon size="xl"
onClick={onSpeechStart}> onClick={onSpeechStart}>
<i className="fa fa-microphone" style={{ fontSize: '90%', color: recording ? 'red' : 'inherit' }} /> {transcribing && <Loader size="xs" />}
{!transcribing && <i className="fa fa-microphone" style={{ fontSize: '90%', color: recording ? 'red' : 'inherit' }} />}
</ActionIcon> </ActionIcon>
<ActionIcon size="xl" <ActionIcon size="xl"
onClick={onSubmit}> onClick={onSubmit}>
@ -207,7 +234,7 @@ export default function MessageInput(props: MessageInputProps) {
)} )}
</div> </div>
); );
}, [recording, onSubmit, props.disabled, context.generating]); }, [recording, transcribing, onSubmit, props.disabled, context.generating]);
const disabled = context.generating; const disabled = context.generating;