import { Button } from "@mantine/core"; import EventEmitter from "events"; import { useCallback, useEffect, useRef, useState } from "react"; import { split } from 'sentence-splitter'; import { cloneArrayBuffer, md5, sleep } from "../utils"; import * as idb from '../idb'; import { useAppDispatch, useAppSelector } from "../store"; import { selectElevenLabsApiKey } from "../store/api-keys"; import { selectVoice } from "../store/voices"; import { openElevenLabsApiKeyPanel } from "../store/settings-ui"; import { defaultElevenLabsVoiceID } from "./defaults"; import { FormattedMessage, useIntl } from "react-intl"; const endpoint = 'https://api.elevenlabs.io'; let currentReader: ElevenLabsReader | null = null; const cache = new Map(); export function createHeaders(apiKey = localStorage.getItem('elevenlabs-api-key') || '') { return { 'xi-api-key': apiKey, 'content-type': 'application/json', }; } export async function getVoices() { const response = await fetch(`${endpoint}/v1/voices`, { headers: createHeaders(), }); const json = await response.json(); return json; } const audioContext = new AudioContext(); export default class ElevenLabsReader extends EventEmitter { private apiKey: string; private initialized = false; private cancelled = false; private textSegments: string[] = []; private currentTrack: number = -1; private nextTrack: number = 0; private audios: (AudioBuffer | null)[] = []; private element: HTMLElement | undefined | null; private voiceID = defaultElevenLabsVoiceID; currentSource: AudioBufferSourceNode | undefined; constructor() { super(); this.apiKey = localStorage.getItem('elevenlabs-api-key') || ''; } private async createAudio() { if (this.initialized) { return; } this.initialized = true; const chunkSize = 3; for (let i = 0; i < this.textSegments.length && !this.cancelled; i += chunkSize) { const chunk = this.textSegments.slice(i, i + chunkSize); await Promise.all(chunk.map((_, index) => this.createAudioForTextSegment(i + index))); } } private async createAudioForTextSegment(index: number) { if (this.audios[index] || this.cancelled) { return; } const hash = await md5(this.textSegments[index]); const cacheKey = `audio:${this.voiceID}:${hash}`; let buffer = cache.get(cacheKey); if (!buffer) { buffer = await idb.get(cacheKey); } if (!buffer) { const url = endpoint + '/v1/text-to-speech/' + this.voiceID; const maxAttempts = 3; for (let i = 0; i < maxAttempts && !this.cancelled; i++) { try { const response = await fetch(url, { headers: createHeaders(this.apiKey), method: 'POST', body: JSON.stringify({ text: this.textSegments[index], }), }); if (response.ok) { buffer = await response.arrayBuffer(); cache.set(cacheKey, cloneArrayBuffer(buffer)); idb.set(cacheKey, cloneArrayBuffer(buffer)); break; } } catch (e) { console.error(e); } await sleep(2000 + i * 5000); // increasing backoff time } } if (buffer) { const data = await audioContext.decodeAudioData(buffer); this.audios[index] = data; } } private async waitForAudio(index: number, timeoutSeconds = 30) { if (!this.initialized) { this.createAudio().then(() => { }); } const timeoutAt = Date.now() + timeoutSeconds * 1000; while (Date.now() < timeoutAt && !this.cancelled) { if (this.audios[index]) { return; } this.emit('buffering'); await sleep(100); } this.cancelled = true; this.emit('error', new Error('Timed out waiting for audio')); } public async play(element: HTMLElement, voiceID: string = defaultElevenLabsVoiceID, apiKey = this.apiKey) { this.element = element; this.voiceID = voiceID; this.apiKey = apiKey; if (!this.element || !this.voiceID) { return; } this.emit('init'); if (currentReader != null) { await currentReader.stop(); } currentReader = this; this.cancelled = false; if (!this.textSegments?.length) { this.textSegments = this.extractTextSegments(); } await this.next(true); } private async next(play = false) { if (this.cancelled) { return; } if (!play && this.nextTrack === 0) { this.emit('done'); return; } const currentTrack = this.nextTrack; this.currentTrack = currentTrack; const nextTrack = (this.nextTrack + 1) % this.textSegments.length; this.nextTrack = nextTrack; await this.waitForAudio(currentTrack); if (this.cancelled) { return; } this.emit('playing'); try { this.currentSource = audioContext.createBufferSource(); this.currentSource.buffer = this.audios[currentTrack]; this.currentSource.connect(audioContext.destination); this.currentSource.onended = () => { this.next(); }; this.currentSource.start(); } catch (e) { console.error('failed to play', e); this.emit('done'); } } public stop() { if (this.currentSource) { this.currentSource.stop(); } this.audios = []; this.textSegments = []; this.nextTrack = 0; this.cancelled = true; this.initialized = false; this.emit('done'); } private extractTextSegments() { const selector = 'p, li, th, td, blockquote, pre code, h1, h2, h3, h3, h5, h6'; const nodes = Array.from(this.element?.querySelectorAll(selector) || []); const lines: string[] = []; const blocks = nodes.filter(node => !node.parentElement?.closest(selector) && node.textContent); for (const block of blocks) { const tagName = block.tagName.toLowerCase(); if (tagName === 'p' || tagName === 'li' || tagName === 'blockquote') { const sentences = split(block.textContent!); for (const sentence of sentences) { lines.push(sentence.raw.trim()); } } else { lines.push(block.textContent!.trim()); } } return lines.filter(line => line.length); } } export function ElevenLabsReaderButton(props: { selector: string }) { const elevenLabsApiKey = useAppSelector(selectElevenLabsApiKey); const dispatch = useAppDispatch(); const intl = useIntl(); const voice = useAppSelector(selectVoice); const [status, setStatus] = useState<'idle' | 'init' | 'playing' | 'buffering'>('idle'); // const [error, setError] = useState(false); const reader = useRef(new ElevenLabsReader()); useEffect(() => { const currentReader = reader.current; currentReader.on('init', () => setStatus('init')); currentReader.on('playing', () => setStatus('playing')); currentReader.on('buffering', () => setStatus('buffering')); currentReader.on('error', () => { setStatus('idle'); // setError(true); }); currentReader.on('done', () => setStatus('idle')); return () => { currentReader.removeAllListeners(); currentReader.stop(); }; }, [props.selector]); const onClick = useCallback(() => { if (status === 'idle') { if (!elevenLabsApiKey?.length) { dispatch(openElevenLabsApiKeyPanel()); return; } audioContext.resume(); reader.current.play(document.querySelector(props.selector)!, voice, elevenLabsApiKey); } else { reader.current.stop(); } }, [dispatch, status, props.selector, elevenLabsApiKey, voice]); return ( ); }