diff --git a/app/src/components/input.tsx b/app/src/components/input.tsx index 82bedbe..0a7352c 100644 --- a/app/src/components/input.tsx +++ b/app/src/components/input.tsx @@ -11,8 +11,8 @@ import { selectTemperature } from '../store/parameters'; import { openSystemPromptPanel, openTemperaturePanel } from '../store/settings-ui'; import { speechRecognition } from '../speech-recognition-types.d' import MicRecorder from 'mic-recorder-to-mp3'; -import { selectUseOpenAIWhisper } from '../store/api-keys'; - +import { selectUseOpenAIWhisper, selectOpenAIApiKey } from '../store/api-keys'; +import { Mp3Encoder } from 'lamejs'; const Container = styled.div` background: #292933; @@ -38,13 +38,53 @@ export interface MessageInputProps { disabled?: boolean; } + + +async function chunkAndEncodeMP3File(file: Blob): Promise> { + const MAX_CHUNK_SIZE = 25 * 1024 * 1024; // 25 MB + const audioContext = new AudioContext(); + const audioBuffer = await audioContext.decodeAudioData(await file.arrayBuffer()); + const duration = audioBuffer.duration; + const sampleRate = audioBuffer.sampleRate; + const numChannels = audioBuffer.numberOfChannels; + const bytesPerSample = 2; // 16-bit audio + const samplesPerChunk = Math.floor((MAX_CHUNK_SIZE / bytesPerSample) / numChannels); + const totalSamples = Math.floor(duration * sampleRate); + const numChunks = Math.ceil(totalSamples / samplesPerChunk); + + const chunks: Array = []; + for (let i = 0; i < numChunks; i++) { + const startSample = i * samplesPerChunk; + const endSample = Math.min(startSample + samplesPerChunk, totalSamples); + const chunkDuration = (endSample - startSample) / sampleRate; + const chunkBuffer = audioContext.createBuffer(numChannels, endSample - startSample, sampleRate); + for (let c = 0; c < numChannels; c++) { + const channelData = audioBuffer.getChannelData(c).subarray(startSample, endSample); + chunkBuffer.copyToChannel(channelData, c); + } + const chunkBlob = await new Promise((resolve) => { + const encoder = new Mp3Encoder(numChannels, sampleRate, 128); + const leftData = chunkBuffer.getChannelData(0); + const rightData = numChannels === 1 ? leftData : chunkBuffer.getChannelData(1); + const mp3Data = encoder.encodeBuffer(leftData, rightData); + const blob = new Blob([mp3Data], { type: 'audio/mp3' }); + resolve(blob); + }); + chunks.push(new File([chunkBlob], `text-${i}.mp3`, { type: 'audio/mp3' })); + } + + return chunks; +} + + export default function MessageInput(props: MessageInputProps) { const temperature = useAppSelector(selectTemperature); const message = useAppSelector(selectMessage); const [recording, setRecording] = useState(false); const hasVerticalSpace = useMediaQuery('(min-height: 1000px)'); - const recorder = new MicRecorder({ bitRate: 128 }) + const recorder = useMemo(() => new MicRecorder({ bitRate: 128 }), []); const useOpenAIWhisper = useAppSelector(selectUseOpenAIWhisper); + const openAIApiKey = useAppSelector(selectOpenAIApiKey); const context = useAppContext(); const dispatch = useAppDispatch(); @@ -65,14 +105,14 @@ export default function MessageInput(props: MessageInputProps) { }, [context, message, dispatch]); const onSpeechStart = () => { + if (!recording) { setRecording(true); // if we are using whisper, the we will just record with the browser and send the api when done if (useOpenAIWhisper) { - + recorder.start().catch((e: any) => console.error(e)); } else { - speechRecognition.continuous = true; speechRecognition.interimResults = true; @@ -86,7 +126,36 @@ export default function MessageInput(props: MessageInputProps) { } else { setRecording(false); if (useOpenAIWhisper) { + const mp3 = recorder.stop().getMp3(); + mp3.then(async ([buffer, blob]) => { + + const file = new File(buffer, 'chat.mp3', { + type: blob.type, + lastModified: Date.now() + }); + + // TODO: cut in chunks + + var data = new FormData() + data.append('file', file); + data.append('model', 'whisper-1') + + const response = await fetch("https://api.openai.com/v1/audio/transcriptions", { + method: "POST", + headers: { + 'Authorization': `Bearer ${openAIApiKey}`, + }, + body: data, + }); + + const json = await response.json() + + if (json.text) { + dispatch(setMessage(json.text)); + } + + }).catch((e: any) => console.error(e)); } else { speechRecognition.stop(); diff --git a/app/src/components/message.tsx b/app/src/components/message.tsx index de77ae1..de14c6f 100644 --- a/app/src/components/message.tsx +++ b/app/src/components/message.tsx @@ -244,7 +244,7 @@ export default function MessageComponent(props: { message: Message, last: boolea )} @@ -263,7 +263,7 @@ export default function MessageComponent(props: { message: Message, last: boolea }}> - {editing ? + {editing ? : } diff --git a/app/src/openai.ts b/app/src/openai.ts index 0993f4d..43291ef 100644 --- a/app/src/openai.ts +++ b/app/src/openai.ts @@ -26,7 +26,7 @@ export interface OpenAIResponseChunk { function parseResponseChunk(buffer: any): OpenAIResponseChunk { const chunk = buffer.toString().replace('data: ', '').trim(); - + if (chunk === '[DONE]') { return { done: true, @@ -51,7 +51,7 @@ export async function createChatCompletion(messages: OpenAIMessage[], parameters const configuration = new Configuration({ apiKey: parameters.apiKey, }); - + const openai = new OpenAIApi(configuration); const response = await openai.createChatCompletion({ @@ -129,6 +129,7 @@ export async function createStreamingChatCompletion(messages: OpenAIMessage[], p }); eventSource.addEventListener('message', async (event: any) => { + if (event.data === '[DONE]') { emitter.emit('done'); return; @@ -147,7 +148,7 @@ export async function createStreamingChatCompletion(messages: OpenAIMessage[], p eventSource.stream(); - return { + return { emitter, cancel: () => eventSource.close(), }; diff --git a/server/src/endpoints/whisper.ts b/server/src/endpoints/whisper.ts new file mode 100644 index 0000000..94a8167 --- /dev/null +++ b/server/src/endpoints/whisper.ts @@ -0,0 +1,8 @@ +import express from 'express'; +import RequestHandler from "./base"; + +export default class WhisperRequestHandler extends RequestHandler { + handler(req: express.Request, res: express.Response): any { + res.json({ status: 'ok' }); + } +} \ No newline at end of file diff --git a/server/src/index.ts b/server/src/index.ts index 7c69623..23ef6dd 100644 --- a/server/src/index.ts +++ b/server/src/index.ts @@ -18,6 +18,7 @@ import BasicCompletionRequestHandler from './endpoints/completion/basic'; import StreamingCompletionRequestHandler from './endpoints/completion/streaming'; import SessionRequestHandler from './endpoints/session'; import GetShareRequestHandler from './endpoints/get-share'; +import WhisperRequestHandler from './endpoints/whisper'; import { configurePassport } from './passport'; import { configureAuth0 } from './auth0'; import DeleteChatRequestHandler from './endpoints/delete-chat'; @@ -82,6 +83,7 @@ export default class ChatServer { this.app.post('/chatapi/sync', (req, res) => new SyncRequestHandler(this, req, res)); this.app.get('/chatapi/share/:id', (req, res) => new GetShareRequestHandler(this, req, res)); this.app.post('/chatapi/share', (req, res) => new ShareRequestHandler(this, req, res)); + this.app.post('/chatapi/whisper', (req, res) => new WhisperRequestHandler(this, req, res)); if (process.env.ENABLE_SERVER_COMPLETION) { this.app.post('/chatapi/completion', (req, res) => new BasicCompletionRequestHandler(this, req, res));