Replace Node.js's Buffer with native Typed Arrays on the client-side

Vite does not natively ship a Buffer polyfill and most of the
functionality that is required here, can be implemented natively (except
for the byte-wise compare, for that I had to write my own function).
main
ZauberNerd 2023-07-07 15:21:20 +02:00
parent 4b91a1972b
commit ff9f3517ec
No known key found for this signature in database
GPG Key ID: 9B617FBFF79E4F60
3 changed files with 37 additions and 10 deletions

View File

@ -13,7 +13,6 @@
"@msgpack/msgpack": "^3.0.0-beta2", "@msgpack/msgpack": "^3.0.0-beta2",
"@reduxjs/toolkit": "^1.9.3", "@reduxjs/toolkit": "^1.9.3",
"broadcast-channel": "^4.20.2", "broadcast-channel": "^4.20.2",
"buffer": "^6.0.3",
"comlink": "^4.4.1", "comlink": "^4.4.1",
"events": "^3.3.0", "events": "^3.3.0",
"idb-keyval": "^6.2.0", "idb-keyval": "^6.2.0",

View File

@ -1,7 +1,12 @@
import { compareUint8Array } from "../utils";
const MAX_NUM_THREADS = 128; const MAX_NUM_THREADS = 128;
type MergeRange = { start: number, end: number }; type MergeRange = { start: number, end: number };
const textDecoder = new TextDecoder();
const textEncoder = new TextEncoder();
export class RankMap { export class RankMap {
private values = new Map<string, number>(); private values = new Map<string, number>();
@ -14,23 +19,23 @@ export class RankMap {
} }
public set(bytes: Uint8Array, rank: number) { public set(bytes: Uint8Array, rank: number) {
const key = Buffer.from(bytes).toString(); const key = textDecoder.decode(bytes);
this.values.set(key, rank); this.values.set(key, rank);
} }
public get(bytes: Uint8Array) { public get(bytes: Uint8Array) {
const key = Buffer.from(bytes).toString(); const key = textDecoder.decode(bytes);
return this.values.get(key); return this.values.get(key);
} }
public keys() { public keys() {
return Array.from(this.values.keys()).map(k => Buffer.from(k)); return Array.from(this.values.keys()).map(k => textEncoder.encode(k));
} }
public inverted() { public inverted() {
const inverted = new Map<number, Uint8Array>(); const inverted = new Map<number, Uint8Array>();
for (const [key, value] of Array.from(this.values.entries())) { for (const [key, value] of Array.from(this.values.entries())) {
inverted.set(value, new Uint8Array(Buffer.from(key))); inverted.set(value, textEncoder.encode(key));
} }
return inverted; return inverted;
} }
@ -100,10 +105,10 @@ export class CoreBPE {
const decoder: Map<number, Uint8Array> = encoder.inverted(); const decoder: Map<number, Uint8Array> = encoder.inverted();
const specialTokensDecoder: Map<number, Uint8Array> = new Map( const specialTokensDecoder: Map<number, Uint8Array> = new Map(
Array.from(specialTokensEncoder.entries()).map(([k, v]) => [v, new Uint8Array(Buffer.from(k))]) Array.from(specialTokensEncoder.entries()).map(([k, v]) => [v, textEncoder.encode(k)])
); );
const sortedTokenBytes: Uint8Array[] = Array.from(encoder.keys()); const sortedTokenBytes: Uint8Array[] = Array.from(encoder.keys());
sortedTokenBytes.sort((a, b) => Buffer.compare(a, b)); sortedTokenBytes.sort((a, b) => compareUint8Array(a, b));
this.encoder = encoder; this.encoder = encoder;
this.specialTokensEncoder = specialTokensEncoder; this.specialTokensEncoder = specialTokensEncoder;
@ -136,7 +141,7 @@ export class CoreBPE {
const ret: number[] = []; const ret: number[] = [];
let match: RegExpExecArray | null; let match: RegExpExecArray | null;
while ((match = regex.exec(text)) !== null) { while ((match = regex.exec(text)) !== null) {
const piece = new Uint8Array(Buffer.from(match[0])); const piece = textEncoder.encode(match[0]);
const token = this.encoder.get(piece); const token = this.encoder.get(piece);
if (token !== undefined) { if (token !== undefined) {
ret.push(token); ret.push(token);
@ -167,7 +172,7 @@ export class CoreBPE {
const end = nextSpecial === null ? text.length : nextSpecial.index; const end = nextSpecial === null ? text.length : nextSpecial.index;
let match: RegExpExecArray | null; let match: RegExpExecArray | null;
while ((match = regex.exec(text.slice(start, end))) !== null) { while ((match = regex.exec(text.slice(start, end))) !== null) {
const piece = new Uint8Array(Buffer.from(match[0])); const piece = textEncoder.encode(match[0]);
const token = this.encoder.get(piece); const token = this.encoder.get(piece);
if (token !== undefined) { if (token !== undefined) {
lastPieceTokenLen = 1; lastPieceTokenLen = 1;
@ -208,7 +213,7 @@ export class CoreBPE {
if (token !== undefined) { if (token !== undefined) {
return token; return token;
} }
const pieceStr = Buffer.from(piece).toString("utf-8"); const pieceStr = textDecoder.decode(piece);
if (this.specialTokensEncoder.has(pieceStr)) { if (this.specialTokensEncoder.has(pieceStr)) {
return this.specialTokensEncoder.get(pieceStr)!; return this.specialTokensEncoder.get(pieceStr)!;
} }

View File

@ -39,6 +39,29 @@ export function cloneArrayBuffer(buffer: ArrayBuffer): ArrayBuffer {
return newBuffer; return newBuffer;
} }
/**
* Lexicographically compare two `Uint8Array` instances.
*
* @param {Uint8Array} a - The first `Uint8Array` instance to compare.
* @param {Uint8Array} b - The second `Uint8Array` instance to compare.
* @returns {number} The comparison result. -1 if `a` is "less" than `b`, 1 if `a` is "greater" than `b`, or 0 if they are "equal".
*/
export function compareUint8Array(a: Uint8Array, b: Uint8Array): number {
if (a === b) return 0;
const len = Math.min(a.byteLength, b.byteLength);
for (let i = 0; i < len; ++i) {
if (a[i] < b[i]) return -1;
if (a[i] > b[i]) return 1;
}
if (a.byteLength < b.byteLength) return -1;
if (a.byteLength > b.byteLength) return 1;
return 0;
}
/** /**
* Shares the specified text using the Web Share API if available in the user's browser. * Shares the specified text using the Web Share API if available in the user's browser.
* *