Replace Node.js's Buffer with native Typed Arrays on the client-side
Vite does not natively ship a Buffer polyfill and most of the functionality that is required here, can be implemented natively (except for the byte-wise compare, for that I had to write my own function).main
parent
4b91a1972b
commit
ff9f3517ec
|
@ -13,7 +13,6 @@
|
||||||
"@msgpack/msgpack": "^3.0.0-beta2",
|
"@msgpack/msgpack": "^3.0.0-beta2",
|
||||||
"@reduxjs/toolkit": "^1.9.3",
|
"@reduxjs/toolkit": "^1.9.3",
|
||||||
"broadcast-channel": "^4.20.2",
|
"broadcast-channel": "^4.20.2",
|
||||||
"buffer": "^6.0.3",
|
|
||||||
"comlink": "^4.4.1",
|
"comlink": "^4.4.1",
|
||||||
"events": "^3.3.0",
|
"events": "^3.3.0",
|
||||||
"idb-keyval": "^6.2.0",
|
"idb-keyval": "^6.2.0",
|
||||||
|
|
|
@ -1,7 +1,12 @@
|
||||||
|
import { compareUint8Array } from "../utils";
|
||||||
|
|
||||||
const MAX_NUM_THREADS = 128;
|
const MAX_NUM_THREADS = 128;
|
||||||
|
|
||||||
type MergeRange = { start: number, end: number };
|
type MergeRange = { start: number, end: number };
|
||||||
|
|
||||||
|
const textDecoder = new TextDecoder();
|
||||||
|
const textEncoder = new TextEncoder();
|
||||||
|
|
||||||
export class RankMap {
|
export class RankMap {
|
||||||
private values = new Map<string, number>();
|
private values = new Map<string, number>();
|
||||||
|
|
||||||
|
@ -14,23 +19,23 @@ export class RankMap {
|
||||||
}
|
}
|
||||||
|
|
||||||
public set(bytes: Uint8Array, rank: number) {
|
public set(bytes: Uint8Array, rank: number) {
|
||||||
const key = Buffer.from(bytes).toString();
|
const key = textDecoder.decode(bytes);
|
||||||
this.values.set(key, rank);
|
this.values.set(key, rank);
|
||||||
}
|
}
|
||||||
|
|
||||||
public get(bytes: Uint8Array) {
|
public get(bytes: Uint8Array) {
|
||||||
const key = Buffer.from(bytes).toString();
|
const key = textDecoder.decode(bytes);
|
||||||
return this.values.get(key);
|
return this.values.get(key);
|
||||||
}
|
}
|
||||||
|
|
||||||
public keys() {
|
public keys() {
|
||||||
return Array.from(this.values.keys()).map(k => Buffer.from(k));
|
return Array.from(this.values.keys()).map(k => textEncoder.encode(k));
|
||||||
}
|
}
|
||||||
|
|
||||||
public inverted() {
|
public inverted() {
|
||||||
const inverted = new Map<number, Uint8Array>();
|
const inverted = new Map<number, Uint8Array>();
|
||||||
for (const [key, value] of Array.from(this.values.entries())) {
|
for (const [key, value] of Array.from(this.values.entries())) {
|
||||||
inverted.set(value, new Uint8Array(Buffer.from(key)));
|
inverted.set(value, textEncoder.encode(key));
|
||||||
}
|
}
|
||||||
return inverted;
|
return inverted;
|
||||||
}
|
}
|
||||||
|
@ -100,10 +105,10 @@ export class CoreBPE {
|
||||||
const decoder: Map<number, Uint8Array> = encoder.inverted();
|
const decoder: Map<number, Uint8Array> = encoder.inverted();
|
||||||
|
|
||||||
const specialTokensDecoder: Map<number, Uint8Array> = new Map(
|
const specialTokensDecoder: Map<number, Uint8Array> = new Map(
|
||||||
Array.from(specialTokensEncoder.entries()).map(([k, v]) => [v, new Uint8Array(Buffer.from(k))])
|
Array.from(specialTokensEncoder.entries()).map(([k, v]) => [v, textEncoder.encode(k)])
|
||||||
);
|
);
|
||||||
const sortedTokenBytes: Uint8Array[] = Array.from(encoder.keys());
|
const sortedTokenBytes: Uint8Array[] = Array.from(encoder.keys());
|
||||||
sortedTokenBytes.sort((a, b) => Buffer.compare(a, b));
|
sortedTokenBytes.sort((a, b) => compareUint8Array(a, b));
|
||||||
|
|
||||||
this.encoder = encoder;
|
this.encoder = encoder;
|
||||||
this.specialTokensEncoder = specialTokensEncoder;
|
this.specialTokensEncoder = specialTokensEncoder;
|
||||||
|
@ -136,7 +141,7 @@ export class CoreBPE {
|
||||||
const ret: number[] = [];
|
const ret: number[] = [];
|
||||||
let match: RegExpExecArray | null;
|
let match: RegExpExecArray | null;
|
||||||
while ((match = regex.exec(text)) !== null) {
|
while ((match = regex.exec(text)) !== null) {
|
||||||
const piece = new Uint8Array(Buffer.from(match[0]));
|
const piece = textEncoder.encode(match[0]);
|
||||||
const token = this.encoder.get(piece);
|
const token = this.encoder.get(piece);
|
||||||
if (token !== undefined) {
|
if (token !== undefined) {
|
||||||
ret.push(token);
|
ret.push(token);
|
||||||
|
@ -167,7 +172,7 @@ export class CoreBPE {
|
||||||
const end = nextSpecial === null ? text.length : nextSpecial.index;
|
const end = nextSpecial === null ? text.length : nextSpecial.index;
|
||||||
let match: RegExpExecArray | null;
|
let match: RegExpExecArray | null;
|
||||||
while ((match = regex.exec(text.slice(start, end))) !== null) {
|
while ((match = regex.exec(text.slice(start, end))) !== null) {
|
||||||
const piece = new Uint8Array(Buffer.from(match[0]));
|
const piece = textEncoder.encode(match[0]);
|
||||||
const token = this.encoder.get(piece);
|
const token = this.encoder.get(piece);
|
||||||
if (token !== undefined) {
|
if (token !== undefined) {
|
||||||
lastPieceTokenLen = 1;
|
lastPieceTokenLen = 1;
|
||||||
|
@ -208,7 +213,7 @@ export class CoreBPE {
|
||||||
if (token !== undefined) {
|
if (token !== undefined) {
|
||||||
return token;
|
return token;
|
||||||
}
|
}
|
||||||
const pieceStr = Buffer.from(piece).toString("utf-8");
|
const pieceStr = textDecoder.decode(piece);
|
||||||
if (this.specialTokensEncoder.has(pieceStr)) {
|
if (this.specialTokensEncoder.has(pieceStr)) {
|
||||||
return this.specialTokensEncoder.get(pieceStr)!;
|
return this.specialTokensEncoder.get(pieceStr)!;
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,6 +39,29 @@ export function cloneArrayBuffer(buffer: ArrayBuffer): ArrayBuffer {
|
||||||
return newBuffer;
|
return newBuffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lexicographically compare two `Uint8Array` instances.
|
||||||
|
*
|
||||||
|
* @param {Uint8Array} a - The first `Uint8Array` instance to compare.
|
||||||
|
* @param {Uint8Array} b - The second `Uint8Array` instance to compare.
|
||||||
|
* @returns {number} The comparison result. -1 if `a` is "less" than `b`, 1 if `a` is "greater" than `b`, or 0 if they are "equal".
|
||||||
|
*/
|
||||||
|
export function compareUint8Array(a: Uint8Array, b: Uint8Array): number {
|
||||||
|
if (a === b) return 0;
|
||||||
|
|
||||||
|
const len = Math.min(a.byteLength, b.byteLength);
|
||||||
|
|
||||||
|
for (let i = 0; i < len; ++i) {
|
||||||
|
if (a[i] < b[i]) return -1;
|
||||||
|
if (a[i] > b[i]) return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (a.byteLength < b.byteLength) return -1;
|
||||||
|
if (a.byteLength > b.byteLength) return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Shares the specified text using the Web Share API if available in the user's browser.
|
* Shares the specified text using the Web Share API if available in the user's browser.
|
||||||
*
|
*
|
||||||
|
|
Loading…
Reference in New Issue