added the browser stt api

2023-03-18 12:49:30 +00:00 · 2023-03-18 12:49:30 +00:00 · 7e9f67c7b8
parent 1d0c334f22
commit 7e9f67c7b8
5 changed files with 173 additions and 9 deletions
--- a/app/package.json
+++ b/app/package.json
@ -4,6 +4,7 @@
  "dependencies": {
    "@auth0/auth0-spa-js": "^2.0.4",
    "@emotion/css": "^11.10.6",
+    "@emotion/react": "^11.10.6",
    "@emotion/styled": "^11.10.6",
    "@mantine/core": "^5.10.5",
    "@mantine/hooks": "^5.10.5",
--- a/app/src/components/input.tsx
+++ b/app/src/components/input.tsx
@ -1,7 +1,7 @@
 import styled from '@emotion/styled';
 import { Button, ActionIcon, Textarea, Loader } from '@mantine/core';
 import { useMediaQuery } from '@mantine/hooks';
-import { useCallback, useMemo } from 'react';
+import { useCallback, useMemo, useState } from 'react';
 import { FormattedMessage, useIntl } from 'react-intl';
 import { useLocation } from 'react-router-dom';
 import { useAppContext } from '../context';
@ -9,6 +9,7 @@ import { useAppDispatch, useAppSelector } from '../store';
 import { selectMessage, setMessage } from '../store/message';
 import { selectTemperature } from '../store/parameters';
 import { openSystemPromptPanel, openTemperaturePanel } from '../store/settings-ui';
+import { speechRecognition } from '../speech-recognition-types.d'

 const Container = styled.div`
    background: #292933;
@ -37,7 +38,7 @@ export interface MessageInputProps {
 export default function MessageInput(props: MessageInputProps) {
    const temperature = useAppSelector(selectTemperature);
    const message = useAppSelector(selectMessage);
-
+    const [recording, setRecording] = useState(false);
    const hasVerticalSpace = useMediaQuery('(min-height: 1000px)');

    const context = useAppContext();
@ -58,6 +59,26 @@ export default function MessageInput(props: MessageInputProps) {
        }
    }, [context, message, dispatch]);

+    const onSpeechStart = () => {
+        console.log("onSpeechStart", recording)
+        if (!recording) {
+            setRecording(true);
+            speechRecognition.continuous = true;
+            speechRecognition.interimResults = true;
+
+            speechRecognition.onresult = (event) => {
+                const transcript = event.results[event.results.length - 1][0].transcript;
+                dispatch(setMessage(transcript));
+            };
+
+            speechRecognition.start();
+        } else {
+            setRecording(false);
+            speechRecognition.stop();
+        }
+    }
+
+
    const onKeyDown = useCallback((e: React.KeyboardEvent<HTMLTextAreaElement>) => {
        if (e.key === 'Enter' && e.shiftKey === false && !props.disabled) {
            e.preventDefault();
@ -66,6 +87,7 @@ export default function MessageInput(props: MessageInputProps) {
    }, [onSubmit, props.disabled]);

    const rightSection = useMemo(() => {
+
        return (
            <div style={{
                opacity: '0.8',
@ -84,14 +106,20 @@ export default function MessageInput(props: MessageInputProps) {
                    <Loader size="xs" style={{ padding: '0 0.8rem 0 0.5rem' }} />
                </>)}
                {!context.generating && (
-                    <ActionIcon size="xl"
-                        onClick={onSubmit}>
-                        <i className="fa fa-paper-plane" style={{ fontSize: '90%' }} />
-                    </ActionIcon>
+                    <>
+                        <ActionIcon size="xl"
+                            onClick={onSubmit}>
+                            <i className="fa fa-paper-plane" style={{ fontSize: '90%' }} />
+                        </ActionIcon>
+                        <ActionIcon size="xl"
+                            onClick={onSpeechStart}>
+                            <i className="fa fa-microphone" style={{ fontSize: '90%', color: recording ? 'red' : 'inherit' }} />
+                        </ActionIcon>
+                    </>
                )}
            </div>
        );
-    }, [onSubmit, props.disabled, context.generating]);
+    }, [recording, onSubmit, props.disabled, context.generating]);

    const disabled = context.generating;

--- a/app/src/index.tsx
+++ b/app/src/index.tsx
@ -72,7 +72,7 @@ async function bootstrapApplication() {

    root.render(
        <React.StrictMode>
-            <IntlProvider locale={navigator.language} messages={messages}>
+            <IntlProvider locale={navigator.language} defaultLocale="en-GB" messages={messages}>
                <MantineProvider theme={{ colorScheme: "dark" }}>
                    <Provider store={store}>
                        <PersistGate loading={null} persistor={persistor}>
--- a/app/src/speech-recognition-types.d.ts
+++ b/app/src/speech-recognition-types.d.ts
@ -0,0 +1,133 @@
+declare global {
+    interface Window {
+        SpeechRecognition: SpeechRecognition
+    }
+    interface SpeechGrammar {
+        src: string
+        weight: number
+    }
+
+    const SpeechGrammar: {
+        prototype: SpeechGrammar
+        new(): SpeechGrammar
+    }
+
+    interface SpeechGrammarList {
+        readonly length: number
+        addFromString(string: string, weight?: number): void
+        addFromURI(src: string, weight?: number): void
+        item(index: number): SpeechGrammar
+        [index: number]: SpeechGrammar
+    }
+
+    const SpeechGrammarList: {
+        prototype: SpeechGrammarList
+        new(): SpeechGrammarList
+    }
+
+    interface SpeechRecognitionEventMap {
+        audioend: Event
+        audiostart: Event
+        end: Event
+        error: SpeechRecognitionError
+        nomatch: SpeechRecognitionEvent
+        result: SpeechRecognitionEvent
+        soundend: Event
+        soundstart: Event
+        speechend: Event
+        speechstart: Event
+        start: Event
+    }
+
+    interface SpeechRecognition {
+        continuous: boolean
+        grammars: SpeechGrammarList
+        interimResults: boolean
+        lang: string
+        maxAlternatives: number
+        onaudioend: ((this: SpeechRecognition, ev: Event) => any) | null
+        onaudiostart: ((this: SpeechRecognition, ev: Event) => any) | null
+        onend: ((this: SpeechRecognition, ev: Event) => any) | null
+        onerror:
+        | ((this: SpeechRecognition, ev: SpeechRecognitionError) => any)
+        | null
+        onnomatch:
+        | ((this: SpeechRecognition, ev: SpeechRecognitionEvent) => any)
+        | null
+        onresult:
+        | ((this: SpeechRecognition, ev: SpeechRecognitionEvent) => any)
+        | null
+        onsoundend: ((this: SpeechRecognition, ev: Event) => any) | null
+        onsoundstart: ((this: SpeechRecognition, ev: Event) => any) | null
+        onspeechend: ((this: SpeechRecognition, ev: Event) => any) | null
+        onspeechstart: ((this: SpeechRecognition, ev: Event) => any) | null
+        onstart: ((this: SpeechRecognition, ev: Event) => any) | null
+        serviceURI: string
+        abort(): void
+        start(): void
+        stop(): void
+        addEventListener<K extends keyof SpeechRecognitionEventMap>(
+            type: K,
+            listener: (
+                this: SpeechRecognition,
+                ev: SpeechRecognitionEventMap[K]
+            ) => any,
+            options?: boolean | AddEventListenerOptions
+        ): void
+        addEventListener(
+            type: string,
+            listener: EventListenerOrEventListenerObject,
+            options?: boolean | AddEventListenerOptions
+        ): void
+        removeEventListener<K extends keyof SpeechRecognitionEventMap>(
+            type: K,
+            listener: (
+                this: SpeechRecognition,
+                ev: SpeechRecognitionEventMap[K]
+            ) => any,
+            options?: boolean | EventListenerOptions
+        ): void
+        removeEventListener(
+            type: string,
+            listener: EventListenerOrEventListenerObject,
+            options?: boolean | EventListenerOptions
+        ): void
+    }
+
+    const SpeechRecognition: {
+        prototype: SpeechRecognition
+        new(): SpeechRecognition
+    }
+
+    interface SpeechRecognitionError extends Event {
+        // readonly error: SpeechRecognitionErrorCode;
+        readonly message: string
+    }
+
+    const SpeechRecognitionError: {
+        prototype: SpeechRecognitionError
+        new(): SpeechRecognitionError
+    }
+
+    interface SpeechRecognitionEvent extends Event {
+        readonly emma: Document | null
+        readonly interpretation: any
+        readonly resultIndex: number
+        readonly results: SpeechRecognitionResultList
+    }
+
+    const SpeechRecognitionEvent: {
+        prototype: SpeechRecognitionEvent
+        new(): SpeechRecognitionEvent
+    }
+}
+
+let speechRecognition: SpeechRecognition
+
+if (window.SpeechRecognition) {
+    speechRecognition = new SpeechRecognition()
+} else {
+    speechRecognition = new webkitSpeechRecognition()
+}
+
+export { speechRecognition }
--- a/app/src/store/index.ts
+++ b/app/src/store/index.ts
@ -25,6 +25,8 @@ const persistMessageConfig = {
  storage,
 }

+
+
 const store = configureStore({
  reducer: {
    // auth: authReducer,