From 7e9f67c7b82129f4e4a1eaa307efcc2b18dae93f Mon Sep 17 00:00:00 2001 From: Tycho Luyben Date: Sat, 18 Mar 2023 12:49:30 +0000 Subject: [PATCH] added the browser stt api --- app/package.json | 1 + app/src/components/input.tsx | 44 +++++++-- app/src/index.tsx | 2 +- app/src/speech-recognition-types.d.ts | 133 ++++++++++++++++++++++++++ app/src/store/index.ts | 2 + 5 files changed, 173 insertions(+), 9 deletions(-) create mode 100644 app/src/speech-recognition-types.d.ts diff --git a/app/package.json b/app/package.json index 7f6aba0..b39d8c1 100644 --- a/app/package.json +++ b/app/package.json @@ -4,6 +4,7 @@ "dependencies": { "@auth0/auth0-spa-js": "^2.0.4", "@emotion/css": "^11.10.6", + "@emotion/react": "^11.10.6", "@emotion/styled": "^11.10.6", "@mantine/core": "^5.10.5", "@mantine/hooks": "^5.10.5", diff --git a/app/src/components/input.tsx b/app/src/components/input.tsx index 18da4a1..b157369 100644 --- a/app/src/components/input.tsx +++ b/app/src/components/input.tsx @@ -1,7 +1,7 @@ import styled from '@emotion/styled'; import { Button, ActionIcon, Textarea, Loader } from '@mantine/core'; import { useMediaQuery } from '@mantine/hooks'; -import { useCallback, useMemo } from 'react'; +import { useCallback, useMemo, useState } from 'react'; import { FormattedMessage, useIntl } from 'react-intl'; import { useLocation } from 'react-router-dom'; import { useAppContext } from '../context'; @@ -9,6 +9,7 @@ import { useAppDispatch, useAppSelector } from '../store'; import { selectMessage, setMessage } from '../store/message'; import { selectTemperature } from '../store/parameters'; import { openSystemPromptPanel, openTemperaturePanel } from '../store/settings-ui'; +import { speechRecognition } from '../speech-recognition-types.d' const Container = styled.div` background: #292933; @@ -37,9 +38,9 @@ export interface MessageInputProps { export default function MessageInput(props: MessageInputProps) { const temperature = useAppSelector(selectTemperature); const message = useAppSelector(selectMessage); - + const [recording, setRecording] = useState(false); const hasVerticalSpace = useMediaQuery('(min-height: 1000px)'); - + const context = useAppContext(); const dispatch = useAppDispatch(); const intl = useIntl(); @@ -58,6 +59,26 @@ export default function MessageInput(props: MessageInputProps) { } }, [context, message, dispatch]); + const onSpeechStart = () => { + console.log("onSpeechStart", recording) + if (!recording) { + setRecording(true); + speechRecognition.continuous = true; + speechRecognition.interimResults = true; + + speechRecognition.onresult = (event) => { + const transcript = event.results[event.results.length - 1][0].transcript; + dispatch(setMessage(transcript)); + }; + + speechRecognition.start(); + } else { + setRecording(false); + speechRecognition.stop(); + } + } + + const onKeyDown = useCallback((e: React.KeyboardEvent) => { if (e.key === 'Enter' && e.shiftKey === false && !props.disabled) { e.preventDefault(); @@ -66,6 +87,7 @@ export default function MessageInput(props: MessageInputProps) { }, [onSubmit, props.disabled]); const rightSection = useMemo(() => { + return (
)} {!context.generating && ( - - - + <> + + + + + + + )}
); - }, [onSubmit, props.disabled, context.generating]); + }, [recording, onSubmit, props.disabled, context.generating]); const disabled = context.generating; diff --git a/app/src/index.tsx b/app/src/index.tsx index 877a99f..209c75b 100644 --- a/app/src/index.tsx +++ b/app/src/index.tsx @@ -72,7 +72,7 @@ async function bootstrapApplication() { root.render( - + diff --git a/app/src/speech-recognition-types.d.ts b/app/src/speech-recognition-types.d.ts new file mode 100644 index 0000000..8e205a5 --- /dev/null +++ b/app/src/speech-recognition-types.d.ts @@ -0,0 +1,133 @@ +declare global { + interface Window { + SpeechRecognition: SpeechRecognition + } + interface SpeechGrammar { + src: string + weight: number + } + + const SpeechGrammar: { + prototype: SpeechGrammar + new(): SpeechGrammar + } + + interface SpeechGrammarList { + readonly length: number + addFromString(string: string, weight?: number): void + addFromURI(src: string, weight?: number): void + item(index: number): SpeechGrammar + [index: number]: SpeechGrammar + } + + const SpeechGrammarList: { + prototype: SpeechGrammarList + new(): SpeechGrammarList + } + + interface SpeechRecognitionEventMap { + audioend: Event + audiostart: Event + end: Event + error: SpeechRecognitionError + nomatch: SpeechRecognitionEvent + result: SpeechRecognitionEvent + soundend: Event + soundstart: Event + speechend: Event + speechstart: Event + start: Event + } + + interface SpeechRecognition { + continuous: boolean + grammars: SpeechGrammarList + interimResults: boolean + lang: string + maxAlternatives: number + onaudioend: ((this: SpeechRecognition, ev: Event) => any) | null + onaudiostart: ((this: SpeechRecognition, ev: Event) => any) | null + onend: ((this: SpeechRecognition, ev: Event) => any) | null + onerror: + | ((this: SpeechRecognition, ev: SpeechRecognitionError) => any) + | null + onnomatch: + | ((this: SpeechRecognition, ev: SpeechRecognitionEvent) => any) + | null + onresult: + | ((this: SpeechRecognition, ev: SpeechRecognitionEvent) => any) + | null + onsoundend: ((this: SpeechRecognition, ev: Event) => any) | null + onsoundstart: ((this: SpeechRecognition, ev: Event) => any) | null + onspeechend: ((this: SpeechRecognition, ev: Event) => any) | null + onspeechstart: ((this: SpeechRecognition, ev: Event) => any) | null + onstart: ((this: SpeechRecognition, ev: Event) => any) | null + serviceURI: string + abort(): void + start(): void + stop(): void + addEventListener( + type: K, + listener: ( + this: SpeechRecognition, + ev: SpeechRecognitionEventMap[K] + ) => any, + options?: boolean | AddEventListenerOptions + ): void + addEventListener( + type: string, + listener: EventListenerOrEventListenerObject, + options?: boolean | AddEventListenerOptions + ): void + removeEventListener( + type: K, + listener: ( + this: SpeechRecognition, + ev: SpeechRecognitionEventMap[K] + ) => any, + options?: boolean | EventListenerOptions + ): void + removeEventListener( + type: string, + listener: EventListenerOrEventListenerObject, + options?: boolean | EventListenerOptions + ): void + } + + const SpeechRecognition: { + prototype: SpeechRecognition + new(): SpeechRecognition + } + + interface SpeechRecognitionError extends Event { + // readonly error: SpeechRecognitionErrorCode; + readonly message: string + } + + const SpeechRecognitionError: { + prototype: SpeechRecognitionError + new(): SpeechRecognitionError + } + + interface SpeechRecognitionEvent extends Event { + readonly emma: Document | null + readonly interpretation: any + readonly resultIndex: number + readonly results: SpeechRecognitionResultList + } + + const SpeechRecognitionEvent: { + prototype: SpeechRecognitionEvent + new(): SpeechRecognitionEvent + } +} + +let speechRecognition: SpeechRecognition + +if (window.SpeechRecognition) { + speechRecognition = new SpeechRecognition() +} else { + speechRecognition = new webkitSpeechRecognition() +} + +export { speechRecognition } \ No newline at end of file diff --git a/app/src/store/index.ts b/app/src/store/index.ts index fa5be68..be1fdde 100644 --- a/app/src/store/index.ts +++ b/app/src/store/index.ts @@ -25,6 +25,8 @@ const persistMessageConfig = { storage, } + + const store = configureStore({ reducer: { // auth: authReducer,