From 1796c307d24502453d65a4da1c07c2444b3bdcb2 Mon Sep 17 00:00:00 2001 From: Cogent Apps <127109874+cogentapps@users.noreply.github.com> Date: Wed, 8 Mar 2023 13:17:34 -0800 Subject: [PATCH] add tokenizer --- src/tiktoken/dist/README.md | 134 ++++++++ src/tiktoken/dist/bundler.d.ts | 1 + src/tiktoken/dist/bundler.js | 1 + src/tiktoken/dist/init.d.ts | 8 + src/tiktoken/dist/init.js | 20 ++ src/tiktoken/dist/package.json | 37 +++ src/tiktoken/dist/tiktoken.d.ts | 108 ++++++ src/tiktoken/dist/tiktoken.js | 4 + src/tiktoken/dist/tiktoken.node.js | 425 ++++++++++++++++++++++++ src/tiktoken/dist/tiktoken_bg.js | 421 +++++++++++++++++++++++ src/tiktoken/dist/tiktoken_bg.wasm | Bin 0 -> 274 bytes src/tiktoken/dist/tiktoken_bg.wasm.d.ts | 20 ++ src/tiktoken/package.json | 31 ++ src/tiktoken/tsconfig.json | 14 + src/tokenizer.ts | 100 ++++++ 15 files changed, 1324 insertions(+) create mode 100644 src/tiktoken/dist/README.md create mode 100644 src/tiktoken/dist/bundler.d.ts create mode 100644 src/tiktoken/dist/bundler.js create mode 100644 src/tiktoken/dist/init.d.ts create mode 100644 src/tiktoken/dist/init.js create mode 100644 src/tiktoken/dist/package.json create mode 100644 src/tiktoken/dist/tiktoken.d.ts create mode 100644 src/tiktoken/dist/tiktoken.js create mode 100644 src/tiktoken/dist/tiktoken.node.js create mode 100644 src/tiktoken/dist/tiktoken_bg.js create mode 100644 src/tiktoken/dist/tiktoken_bg.wasm create mode 100644 src/tiktoken/dist/tiktoken_bg.wasm.d.ts create mode 100644 src/tiktoken/package.json create mode 100644 src/tiktoken/tsconfig.json create mode 100644 src/tokenizer.ts diff --git a/src/tiktoken/dist/README.md b/src/tiktoken/dist/README.md new file mode 100644 index 0000000..c757851 --- /dev/null +++ b/src/tiktoken/dist/README.md @@ -0,0 +1,134 @@ +# ⏳ tiktoken + +tiktoken is a [BPE](https://en.wikipedia.org/wiki/Byte_pair_encoding) tokeniser for use with +OpenAI's models, forked from the original tiktoken library to provide NPM bindings for Node and other JS runtimes. + +The open source version of `tiktoken` can be installed from NPM: + +``` +npm install @dqbd/tiktoken +``` + +## Usage + +Basic usage follows: + +```typescript +import assert from "node:assert"; +import { get_encoding, encoding_for_model } from "@dqbd/tiktoken"; + +const enc = get_encoding("gpt2"); +assert( + new TextDecoder().decode(enc.decode(enc.encode("hello world"))) === + "hello world" +); + +// To get the tokeniser corresponding to a specific model in the OpenAI API: +const enc = encoding_for_model("text-davinci-003"); + +// Extend existing encoding with custom special tokens +const enc = encoding_for_model("gpt2", { + "<|im_start|>": 100264, + "<|im_end|>": 100265, +}); + +// don't forget to free the encoder after it is not used +enc.free(); +``` + +If desired, you can create a Tiktoken instance directly with custom ranks, special tokens and regex pattern: + +```typescript +import { Tiktoken } from "../pkg"; +import { readFileSync } from "fs"; + +const encoder = new Tiktoken( + readFileSync("./ranks/gpt2.tiktoken").toString("utf-8"), + { "<|endoftext|>": 50256, "<|im_start|>": 100264, "<|im_end|>": 100265 }, + "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+" +); +``` + +## Compatibility + +As this is a WASM library, there might be some issues with specific runtimes. If you encounter any issues, please open an issue. + +| Runtime | Status | Notes | +| ------------------- | ------ | ------------------------------------------ | +| Node.js | ✅ | | +| Bun | ✅ | | +| Vite | ✅ | See [here](#vite) for notes | +| Next.js | ✅ | See [here](#nextjs) for notes | +| Vercel Edge Runtime | ✅ | See [here](#vercel-edge-runtime) for notes | +| Cloudflare Workers | 🚧 | Untested | +| Deno | ❌ | Currently unsupported | + +### [Vite](#vite) + +If you are using Vite, you will need to add both the `vite-plugin-wasm` and `vite-plugin-top-level-await`. Add the following to your `vite.config.js`: + +```js +import wasm from "vite-plugin-wasm"; +import topLevelAwait from "vite-plugin-top-level-await"; +import { defineConfig } from "vite"; + +export default defineConfig({ + plugins: [wasm(), topLevelAwait()], +}); +``` + +### [Next.js](#nextjs) + +Both API routes and `/pages` are supported with the following configuration. To overcome issues with importing Node.js version, you can import the package from `@dqbd/tiktoken/bundler` instead. + +```typescript +import { get_encoding } from "@dqbd/tiktoken/bundler"; +import { NextApiRequest, NextApiResponse } from "next"; + +export default function handler(req: NextApiRequest, res: NextApiResponse) { + const encoder = get_encoding("gpt2"); + const message = encoder.encode(`Hello World ${Math.random()}`); + encoder.free(); + return res.status(200).json({ message }); +} +``` + +Additional Webpack configuration is required. + +```typescript +const config = { + webpack(config, { isServer, dev }) { + config.experiments = { + asyncWebAssembly: true, + layers: true, + }; + + return config; + }, +}; +``` + +### [Vercel Edge Runtime](#vercel-edge-runtime) + +Vercel Edge Runtime does support WASM modules by adding a `?module` suffix. Initialize the encoder with the following snippet: + +```typescript +import wasm from "@dqbd/tiktoken/tiktoken_bg.wasm?module"; +import { init, get_encoding } from "@dqbd/tiktoken/init"; + +export const config = { runtime: "edge" }; + +export default async function (req: Request) { + await init((imports) => WebAssembly.instantiate(wasm, imports)); + + const encoder = get_encoding("cl100k_base"); + const tokens = encoder.encode("hello world"); + encoder.free(); + + return new Response(`${encoder.encode("hello world")}`); +} +``` + +## Acknowledgements + +- https://github.com/zurawiki/tiktoken-rs diff --git a/src/tiktoken/dist/bundler.d.ts b/src/tiktoken/dist/bundler.d.ts new file mode 100644 index 0000000..b80ee30 --- /dev/null +++ b/src/tiktoken/dist/bundler.d.ts @@ -0,0 +1 @@ +export * from "./tiktoken"; \ No newline at end of file diff --git a/src/tiktoken/dist/bundler.js b/src/tiktoken/dist/bundler.js new file mode 100644 index 0000000..b80ee30 --- /dev/null +++ b/src/tiktoken/dist/bundler.js @@ -0,0 +1 @@ +export * from "./tiktoken"; \ No newline at end of file diff --git a/src/tiktoken/dist/init.d.ts b/src/tiktoken/dist/init.d.ts new file mode 100644 index 0000000..6936c12 --- /dev/null +++ b/src/tiktoken/dist/init.d.ts @@ -0,0 +1,8 @@ +/* tslint:disable */ +/* eslint-disable */ +export * from "./tiktoken"; +export function init( + callback: ( + imports: WebAssembly.Imports + ) => Promise +): Promise; \ No newline at end of file diff --git a/src/tiktoken/dist/init.js b/src/tiktoken/dist/init.js new file mode 100644 index 0000000..eb68346 --- /dev/null +++ b/src/tiktoken/dist/init.js @@ -0,0 +1,20 @@ +import * as imports from "./tiktoken_bg.js"; + +export async function init(cb) { + const res = await cb({ + "./tiktoken_bg.js": imports, + }); + + const instance = + "instance" in res && res.instance instanceof WebAssembly.Instance + ? res.instance + : res instanceof WebAssembly.Instance + ? res + : null; + + if (instance == null) throw new Error("Missing instance"); + imports.__wbg_set_wasm(instance.exports); + return imports; +} + +export * from "./tiktoken_bg.js"; \ No newline at end of file diff --git a/src/tiktoken/dist/package.json b/src/tiktoken/dist/package.json new file mode 100644 index 0000000..ea87219 --- /dev/null +++ b/src/tiktoken/dist/package.json @@ -0,0 +1,37 @@ +{ + "name": "@dqbd/tiktoken", + "version": "1.0.0-alpha.1", + "description": "Javascript bindings for tiktoken", + "license": "MIT", + "repository": { + "type": "git", + "url": "https://github.com/dqbd/tiktoken" + }, + "dependencies": { + "node-fetch": "^3.3.0" + }, + "files": [ + "**/*" + ], + "main": "tiktoken.node.js", + "types": "tiktoken.d.ts", + "exports": { + ".": { + "types": "./tiktoken.d.ts", + "node": "./tiktoken.node.js", + "default": "./tiktoken.js" + }, + "./bundler": { + "types": "./bundler.d.ts", + "default": "./bundler.js" + }, + "./init": { + "types": "./init.d.ts", + "default": "./init.js" + }, + "./tiktoken_bg.wasm": { + "types": "./tiktoken_bg.wasm.d.ts", + "default": "./tiktoken_bg.wasm" + } + } +} \ No newline at end of file diff --git a/src/tiktoken/dist/tiktoken.d.ts b/src/tiktoken/dist/tiktoken.d.ts new file mode 100644 index 0000000..0f3d4d5 --- /dev/null +++ b/src/tiktoken/dist/tiktoken.d.ts @@ -0,0 +1,108 @@ +/* tslint:disable */ +/* eslint-disable */ + +export type TiktokenEncoding = "gpt2" | "r50k_base" | "p50k_base" | "p50k_edit" | "cl100k_base"; + +/** + * @param {TiktokenEncoding} encoding + * @param {Record} [extend_special_tokens] + * @returns {Tiktoken} + */ +export function get_encoding(encoding: TiktokenEncoding, extend_special_tokens?: Record): Tiktoken; + + + +export type TiktokenModel = + | "text-davinci-003" + | "text-davinci-002" + | "text-davinci-001" + | "text-curie-001" + | "text-babbage-001" + | "text-ada-001" + | "davinci" + | "curie" + | "babbage" + | "ada" + | "code-davinci-002" + | "code-davinci-001" + | "code-cushman-002" + | "code-cushman-001" + | "davinci-codex" + | "cushman-codex" + | "text-davinci-edit-001" + | "code-davinci-edit-001" + | "text-embedding-ada-002" + | "text-similarity-davinci-001" + | "text-similarity-curie-001" + | "text-similarity-babbage-001" + | "text-similarity-ada-001" + | "text-search-davinci-doc-001" + | "text-search-curie-doc-001" + | "text-search-babbage-doc-001" + | "text-search-ada-doc-001" + | "code-search-babbage-code-001" + | "code-search-ada-code-001" + | "gpt2" + | "gpt-3.5-turbo" + | "gpt-3.5-turbo-0301"; + +/** + * @param {TiktokenModel} encoding + * @param {Record} [extend_special_tokens] + * @returns {Tiktoken} + */ +export function encoding_for_model(model: TiktokenModel, extend_special_tokens?: Record): Tiktoken; + + +/** +*/ +export class Tiktoken { + free(): void; +/** +* @param {string} tiktoken_bfe +* @param {any} special_tokens +* @param {string} pat_str +*/ + constructor(tiktoken_bfe: string, special_tokens: Record, pat_str: string); +/** +* @param {string} text +* @param {any} allowed_special +* @param {any} disallowed_special +* @returns {Uint32Array} +*/ + encode(text: string, allowed_special?: "all" | string[], disallowed_special?: "all" | string[]): Uint32Array; +/** +* @param {string} text +* @returns {Uint32Array} +*/ + encode_ordinary(text: string): Uint32Array; +/** +* @param {string} text +* @param {any} allowed_special +* @param {any} disallowed_special +* @returns {any} +*/ + encode_with_unstable(text: string, allowed_special?: "all" | string[], disallowed_special?: "all" | string[]): any; +/** +* @param {Uint8Array} bytes +* @returns {number} +*/ + encode_single_token(bytes: Uint8Array): number; +/** +* @param {Uint32Array} tokens +* @returns {Uint8Array} +*/ + decode(tokens: Uint32Array): Uint8Array; +/** +* @param {number} token +* @returns {Uint8Array} +*/ + decode_single_token_bytes(token: number): Uint8Array; +/** +* @returns {any} +*/ + token_byte_values(): Array>; +/** +*/ + readonly name: string | undefined; +} diff --git a/src/tiktoken/dist/tiktoken.js b/src/tiktoken/dist/tiktoken.js new file mode 100644 index 0000000..5843675 --- /dev/null +++ b/src/tiktoken/dist/tiktoken.js @@ -0,0 +1,4 @@ +import * as wasm from "./tiktoken_bg.wasm"; +import { __wbg_set_wasm } from "./tiktoken_bg.js"; +__wbg_set_wasm(wasm); +export * from "./tiktoken_bg.js"; diff --git a/src/tiktoken/dist/tiktoken.node.js b/src/tiktoken/dist/tiktoken.node.js new file mode 100644 index 0000000..b933f64 --- /dev/null +++ b/src/tiktoken/dist/tiktoken.node.js @@ -0,0 +1,425 @@ +let imports = {}; +imports['./tiktoken_bg.js'] = module.exports; +let wasm; +const { TextEncoder, TextDecoder } = require(`util`); + +const heap = new Array(128).fill(undefined); + +heap.push(undefined, null, true, false); + +function getObject(idx) { return heap[idx]; } + +let heap_next = heap.length; + +function dropObject(idx) { + if (idx < 132) return; + heap[idx] = heap_next; + heap_next = idx; +} + +function takeObject(idx) { + const ret = getObject(idx); + dropObject(idx); + return ret; +} + +let WASM_VECTOR_LEN = 0; + +let cachedUint8Memory0 = null; + +function getUint8Memory0() { + if (cachedUint8Memory0 === null || cachedUint8Memory0.byteLength === 0) { + cachedUint8Memory0 = new Uint8Array(wasm.memory.buffer); + } + return cachedUint8Memory0; +} + +let cachedTextEncoder = new TextEncoder('utf-8'); + +const encodeString = (typeof cachedTextEncoder.encodeInto === 'function' + ? function (arg, view) { + return cachedTextEncoder.encodeInto(arg, view); +} + : function (arg, view) { + const buf = cachedTextEncoder.encode(arg); + view.set(buf); + return { + read: arg.length, + written: buf.length + }; +}); + +function passStringToWasm0(arg, malloc, realloc) { + + if (realloc === undefined) { + const buf = cachedTextEncoder.encode(arg); + const ptr = malloc(buf.length); + getUint8Memory0().subarray(ptr, ptr + buf.length).set(buf); + WASM_VECTOR_LEN = buf.length; + return ptr; + } + + let len = arg.length; + let ptr = malloc(len); + + const mem = getUint8Memory0(); + + let offset = 0; + + for (; offset < len; offset++) { + const code = arg.charCodeAt(offset); + if (code > 0x7F) break; + mem[ptr + offset] = code; + } + + if (offset !== len) { + if (offset !== 0) { + arg = arg.slice(offset); + } + ptr = realloc(ptr, len, len = offset + arg.length * 3); + const view = getUint8Memory0().subarray(ptr + offset, ptr + len); + const ret = encodeString(arg, view); + + offset += ret.written; + } + + WASM_VECTOR_LEN = offset; + return ptr; +} + +function isLikeNone(x) { + return x === undefined || x === null; +} + +let cachedInt32Memory0 = null; + +function getInt32Memory0() { + if (cachedInt32Memory0 === null || cachedInt32Memory0.byteLength === 0) { + cachedInt32Memory0 = new Int32Array(wasm.memory.buffer); + } + return cachedInt32Memory0; +} + +let cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true }); + +cachedTextDecoder.decode(); + +function getStringFromWasm0(ptr, len) { + return cachedTextDecoder.decode(getUint8Memory0().subarray(ptr, ptr + len)); +} + +function addHeapObject(obj) { + if (heap_next === heap.length) heap.push(heap.length + 1); + const idx = heap_next; + heap_next = heap[idx]; + + heap[idx] = obj; + return idx; +} + +let cachedUint32Memory0 = null; + +function getUint32Memory0() { + if (cachedUint32Memory0 === null || cachedUint32Memory0.byteLength === 0) { + cachedUint32Memory0 = new Uint32Array(wasm.memory.buffer); + } + return cachedUint32Memory0; +} + +function getArrayU32FromWasm0(ptr, len) { + return getUint32Memory0().subarray(ptr / 4, ptr / 4 + len); +} + +function passArray8ToWasm0(arg, malloc) { + const ptr = malloc(arg.length * 1); + getUint8Memory0().set(arg, ptr / 1); + WASM_VECTOR_LEN = arg.length; + return ptr; +} + +function passArray32ToWasm0(arg, malloc) { + const ptr = malloc(arg.length * 4); + getUint32Memory0().set(arg, ptr / 4); + WASM_VECTOR_LEN = arg.length; + return ptr; +} + +function getArrayU8FromWasm0(ptr, len) { + return getUint8Memory0().subarray(ptr / 1, ptr / 1 + len); +} +/** +* @param {string} encoding +* @param {any} extend_special_tokens +* @returns {Tiktoken} +*/ +module.exports.get_encoding = function(encoding, extend_special_tokens) { + try { + const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); + const ptr0 = passStringToWasm0(encoding, wasm.__wbindgen_export_0, wasm.__wbindgen_export_1); + const len0 = WASM_VECTOR_LEN; + wasm.get_encoding(retptr, ptr0, len0, addHeapObject(extend_special_tokens)); + var r0 = getInt32Memory0()[retptr / 4 + 0]; + var r1 = getInt32Memory0()[retptr / 4 + 1]; + var r2 = getInt32Memory0()[retptr / 4 + 2]; + if (r2) { + throw takeObject(r1); + } + return Tiktoken.__wrap(r0); + } finally { + wasm.__wbindgen_add_to_stack_pointer(16); + } +}; + +/** +* @param {string} model +* @param {any} extend_special_tokens +* @returns {Tiktoken} +*/ +module.exports.encoding_for_model = function(model, extend_special_tokens) { + try { + const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); + const ptr0 = passStringToWasm0(model, wasm.__wbindgen_export_0, wasm.__wbindgen_export_1); + const len0 = WASM_VECTOR_LEN; + wasm.encoding_for_model(retptr, ptr0, len0, addHeapObject(extend_special_tokens)); + var r0 = getInt32Memory0()[retptr / 4 + 0]; + var r1 = getInt32Memory0()[retptr / 4 + 1]; + var r2 = getInt32Memory0()[retptr / 4 + 2]; + if (r2) { + throw takeObject(r1); + } + return Tiktoken.__wrap(r0); + } finally { + wasm.__wbindgen_add_to_stack_pointer(16); + } +}; + +function handleError(f, args) { + try { + return f.apply(this, args); + } catch (e) { + wasm.__wbindgen_export_3(addHeapObject(e)); + } +} +/** +*/ +class Tiktoken { + + static __wrap(ptr) { + const obj = Object.create(Tiktoken.prototype); + obj.ptr = ptr; + + return obj; + } + + __destroy_into_raw() { + const ptr = this.ptr; + this.ptr = 0; + + return ptr; + } + + free() { + const ptr = this.__destroy_into_raw(); + wasm.__wbg_tiktoken_free(ptr); + } + /** + * @param {string} tiktoken_bfe + * @param {any} special_tokens + * @param {string} pat_str + */ + constructor(tiktoken_bfe, special_tokens, pat_str) { + const ptr0 = passStringToWasm0(tiktoken_bfe, wasm.__wbindgen_export_0, wasm.__wbindgen_export_1); + const len0 = WASM_VECTOR_LEN; + const ptr1 = passStringToWasm0(pat_str, wasm.__wbindgen_export_0, wasm.__wbindgen_export_1); + const len1 = WASM_VECTOR_LEN; + const ret = wasm.tiktoken_new(ptr0, len0, addHeapObject(special_tokens), ptr1, len1); + return Tiktoken.__wrap(ret); + } + /** + * @returns {string | undefined} + */ + get name() { + try { + const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); + wasm.tiktoken_name(retptr, this.ptr); + var r0 = getInt32Memory0()[retptr / 4 + 0]; + var r1 = getInt32Memory0()[retptr / 4 + 1]; + let v0; + if (r0 !== 0) { + v0 = getStringFromWasm0(r0, r1).slice(); + wasm.__wbindgen_export_2(r0, r1 * 1); + } + return v0; + } finally { + wasm.__wbindgen_add_to_stack_pointer(16); + } + } + /** + * @param {string} text + * @param {any} allowed_special + * @param {any} disallowed_special + * @returns {Uint32Array} + */ + encode(text, allowed_special, disallowed_special) { + try { + const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); + const ptr0 = passStringToWasm0(text, wasm.__wbindgen_export_0, wasm.__wbindgen_export_1); + const len0 = WASM_VECTOR_LEN; + wasm.tiktoken_encode(retptr, this.ptr, ptr0, len0, addHeapObject(allowed_special), addHeapObject(disallowed_special)); + var r0 = getInt32Memory0()[retptr / 4 + 0]; + var r1 = getInt32Memory0()[retptr / 4 + 1]; + var r2 = getInt32Memory0()[retptr / 4 + 2]; + var r3 = getInt32Memory0()[retptr / 4 + 3]; + if (r3) { + throw takeObject(r2); + } + var v1 = getArrayU32FromWasm0(r0, r1).slice(); + wasm.__wbindgen_export_2(r0, r1 * 4); + return v1; + } finally { + wasm.__wbindgen_add_to_stack_pointer(16); + } + } + /** + * @param {string} text + * @returns {Uint32Array} + */ + encode_ordinary(text) { + try { + const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); + const ptr0 = passStringToWasm0(text, wasm.__wbindgen_export_0, wasm.__wbindgen_export_1); + const len0 = WASM_VECTOR_LEN; + wasm.tiktoken_encode_ordinary(retptr, this.ptr, ptr0, len0); + var r0 = getInt32Memory0()[retptr / 4 + 0]; + var r1 = getInt32Memory0()[retptr / 4 + 1]; + var v1 = getArrayU32FromWasm0(r0, r1).slice(); + wasm.__wbindgen_export_2(r0, r1 * 4); + return v1; + } finally { + wasm.__wbindgen_add_to_stack_pointer(16); + } + } + /** + * @param {string} text + * @param {any} allowed_special + * @param {any} disallowed_special + * @returns {any} + */ + encode_with_unstable(text, allowed_special, disallowed_special) { + try { + const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); + const ptr0 = passStringToWasm0(text, wasm.__wbindgen_export_0, wasm.__wbindgen_export_1); + const len0 = WASM_VECTOR_LEN; + wasm.tiktoken_encode_with_unstable(retptr, this.ptr, ptr0, len0, addHeapObject(allowed_special), addHeapObject(disallowed_special)); + var r0 = getInt32Memory0()[retptr / 4 + 0]; + var r1 = getInt32Memory0()[retptr / 4 + 1]; + var r2 = getInt32Memory0()[retptr / 4 + 2]; + if (r2) { + throw takeObject(r1); + } + return takeObject(r0); + } finally { + wasm.__wbindgen_add_to_stack_pointer(16); + } + } + /** + * @param {Uint8Array} bytes + * @returns {number} + */ + encode_single_token(bytes) { + const ptr0 = passArray8ToWasm0(bytes, wasm.__wbindgen_export_0); + const len0 = WASM_VECTOR_LEN; + const ret = wasm.tiktoken_encode_single_token(this.ptr, ptr0, len0); + return ret >>> 0; + } + /** + * @param {Uint32Array} tokens + * @returns {Uint8Array} + */ + decode(tokens) { + try { + const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); + const ptr0 = passArray32ToWasm0(tokens, wasm.__wbindgen_export_0); + const len0 = WASM_VECTOR_LEN; + wasm.tiktoken_decode(retptr, this.ptr, ptr0, len0); + var r0 = getInt32Memory0()[retptr / 4 + 0]; + var r1 = getInt32Memory0()[retptr / 4 + 1]; + var v1 = getArrayU8FromWasm0(r0, r1).slice(); + wasm.__wbindgen_export_2(r0, r1 * 1); + return v1; + } finally { + wasm.__wbindgen_add_to_stack_pointer(16); + } + } + /** + * @param {number} token + * @returns {Uint8Array} + */ + decode_single_token_bytes(token) { + try { + const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); + wasm.tiktoken_decode_single_token_bytes(retptr, this.ptr, token); + var r0 = getInt32Memory0()[retptr / 4 + 0]; + var r1 = getInt32Memory0()[retptr / 4 + 1]; + var v0 = getArrayU8FromWasm0(r0, r1).slice(); + wasm.__wbindgen_export_2(r0, r1 * 1); + return v0; + } finally { + wasm.__wbindgen_add_to_stack_pointer(16); + } + } + /** + * @returns {any} + */ + token_byte_values() { + const ret = wasm.tiktoken_token_byte_values(this.ptr); + return takeObject(ret); + } +} +module.exports.Tiktoken = Tiktoken; + +module.exports.__wbindgen_object_drop_ref = function(arg0) { + takeObject(arg0); +}; + +module.exports.__wbindgen_is_undefined = function(arg0) { + const ret = getObject(arg0) === undefined; + return ret; +}; + +module.exports.__wbg_stringify_029a979dfb73aa17 = function() { return handleError(function (arg0) { + const ret = JSON.stringify(getObject(arg0)); + return addHeapObject(ret); +}, arguments) }; + +module.exports.__wbindgen_string_get = function(arg0, arg1) { + const obj = getObject(arg1); + const ret = typeof(obj) === 'string' ? obj : undefined; + var ptr0 = isLikeNone(ret) ? 0 : passStringToWasm0(ret, wasm.__wbindgen_export_0, wasm.__wbindgen_export_1); + var len0 = WASM_VECTOR_LEN; + getInt32Memory0()[arg0 / 4 + 1] = len0; + getInt32Memory0()[arg0 / 4 + 0] = ptr0; +}; + +module.exports.__wbindgen_error_new = function(arg0, arg1) { + const ret = new Error(getStringFromWasm0(arg0, arg1)); + return addHeapObject(ret); +}; + +module.exports.__wbg_parse_3ac95b51fc312db8 = function() { return handleError(function (arg0, arg1) { + const ret = JSON.parse(getStringFromWasm0(arg0, arg1)); + return addHeapObject(ret); +}, arguments) }; + +module.exports.__wbindgen_throw = function(arg0, arg1) { + throw new Error(getStringFromWasm0(arg0, arg1)); +}; + +const path = require('path').join(__dirname, 'tiktoken_bg.wasm'); +const bytes = require('fs').readFileSync(path); + +const wasmModule = new WebAssembly.Module(bytes); +const wasmInstance = new WebAssembly.Instance(wasmModule, imports); +wasm = wasmInstance.exports; +module.exports.__wasm = wasm; + diff --git a/src/tiktoken/dist/tiktoken_bg.js b/src/tiktoken/dist/tiktoken_bg.js new file mode 100644 index 0000000..9c01874 --- /dev/null +++ b/src/tiktoken/dist/tiktoken_bg.js @@ -0,0 +1,421 @@ +let wasm; +export function __wbg_set_wasm(val) { + wasm = val; +} + + +const heap = new Array(128).fill(undefined); + +heap.push(undefined, null, true, false); + +function getObject(idx) { return heap[idx]; } + +let heap_next = heap.length; + +function dropObject(idx) { + if (idx < 132) return; + heap[idx] = heap_next; + heap_next = idx; +} + +function takeObject(idx) { + const ret = getObject(idx); + dropObject(idx); + return ret; +} + +let WASM_VECTOR_LEN = 0; + +let cachedUint8Memory0 = null; + +function getUint8Memory0() { + if (cachedUint8Memory0 === null || cachedUint8Memory0.byteLength === 0) { + cachedUint8Memory0 = new Uint8Array(wasm.memory.buffer); + } + return cachedUint8Memory0; +} + +const lTextEncoder = typeof TextEncoder === 'undefined' ? (0, module.require)('util').TextEncoder : TextEncoder; + +let cachedTextEncoder = new lTextEncoder('utf-8'); + +const encodeString = (typeof cachedTextEncoder.encodeInto === 'function' + ? function (arg, view) { + return cachedTextEncoder.encodeInto(arg, view); +} + : function (arg, view) { + const buf = cachedTextEncoder.encode(arg); + view.set(buf); + return { + read: arg.length, + written: buf.length + }; +}); + +function passStringToWasm0(arg, malloc, realloc) { + + if (realloc === undefined) { + const buf = cachedTextEncoder.encode(arg); + const ptr = malloc(buf.length); + getUint8Memory0().subarray(ptr, ptr + buf.length).set(buf); + WASM_VECTOR_LEN = buf.length; + return ptr; + } + + let len = arg.length; + let ptr = malloc(len); + + const mem = getUint8Memory0(); + + let offset = 0; + + for (; offset < len; offset++) { + const code = arg.charCodeAt(offset); + if (code > 0x7F) break; + mem[ptr + offset] = code; + } + + if (offset !== len) { + if (offset !== 0) { + arg = arg.slice(offset); + } + ptr = realloc(ptr, len, len = offset + arg.length * 3); + const view = getUint8Memory0().subarray(ptr + offset, ptr + len); + const ret = encodeString(arg, view); + + offset += ret.written; + } + + WASM_VECTOR_LEN = offset; + return ptr; +} + +function isLikeNone(x) { + return x === undefined || x === null; +} + +let cachedInt32Memory0 = null; + +function getInt32Memory0() { + if (cachedInt32Memory0 === null || cachedInt32Memory0.byteLength === 0) { + cachedInt32Memory0 = new Int32Array(wasm.memory.buffer); + } + return cachedInt32Memory0; +} + +const lTextDecoder = typeof TextDecoder === 'undefined' ? (0, module.require)('util').TextDecoder : TextDecoder; + +let cachedTextDecoder = new lTextDecoder('utf-8', { ignoreBOM: true, fatal: true }); + +cachedTextDecoder.decode(); + +function getStringFromWasm0(ptr, len) { + return cachedTextDecoder.decode(getUint8Memory0().subarray(ptr, ptr + len)); +} + +function addHeapObject(obj) { + if (heap_next === heap.length) heap.push(heap.length + 1); + const idx = heap_next; + heap_next = heap[idx]; + + heap[idx] = obj; + return idx; +} + +let cachedUint32Memory0 = null; + +function getUint32Memory0() { + if (cachedUint32Memory0 === null || cachedUint32Memory0.byteLength === 0) { + cachedUint32Memory0 = new Uint32Array(wasm.memory.buffer); + } + return cachedUint32Memory0; +} + +function getArrayU32FromWasm0(ptr, len) { + return getUint32Memory0().subarray(ptr / 4, ptr / 4 + len); +} + +function passArray8ToWasm0(arg, malloc) { + const ptr = malloc(arg.length * 1); + getUint8Memory0().set(arg, ptr / 1); + WASM_VECTOR_LEN = arg.length; + return ptr; +} + +function passArray32ToWasm0(arg, malloc) { + const ptr = malloc(arg.length * 4); + getUint32Memory0().set(arg, ptr / 4); + WASM_VECTOR_LEN = arg.length; + return ptr; +} + +function getArrayU8FromWasm0(ptr, len) { + return getUint8Memory0().subarray(ptr / 1, ptr / 1 + len); +} +/** +* @param {string} encoding +* @param {any} extend_special_tokens +* @returns {Tiktoken} +*/ +export function get_encoding(encoding, extend_special_tokens) { + try { + const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); + const ptr0 = passStringToWasm0(encoding, wasm.__wbindgen_export_0, wasm.__wbindgen_export_1); + const len0 = WASM_VECTOR_LEN; + wasm.get_encoding(retptr, ptr0, len0, addHeapObject(extend_special_tokens)); + var r0 = getInt32Memory0()[retptr / 4 + 0]; + var r1 = getInt32Memory0()[retptr / 4 + 1]; + var r2 = getInt32Memory0()[retptr / 4 + 2]; + if (r2) { + throw takeObject(r1); + } + return Tiktoken.__wrap(r0); + } finally { + wasm.__wbindgen_add_to_stack_pointer(16); + } +} + +/** +* @param {string} model +* @param {any} extend_special_tokens +* @returns {Tiktoken} +*/ +export function encoding_for_model(model, extend_special_tokens) { + try { + const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); + const ptr0 = passStringToWasm0(model, wasm.__wbindgen_export_0, wasm.__wbindgen_export_1); + const len0 = WASM_VECTOR_LEN; + wasm.encoding_for_model(retptr, ptr0, len0, addHeapObject(extend_special_tokens)); + var r0 = getInt32Memory0()[retptr / 4 + 0]; + var r1 = getInt32Memory0()[retptr / 4 + 1]; + var r2 = getInt32Memory0()[retptr / 4 + 2]; + if (r2) { + throw takeObject(r1); + } + return Tiktoken.__wrap(r0); + } finally { + wasm.__wbindgen_add_to_stack_pointer(16); + } +} + +function handleError(f, args) { + try { + return f.apply(this, args); + } catch (e) { + wasm.__wbindgen_export_3(addHeapObject(e)); + } +} +/** +*/ +export class Tiktoken { + + static __wrap(ptr) { + const obj = Object.create(Tiktoken.prototype); + obj.ptr = ptr; + + return obj; + } + + __destroy_into_raw() { + const ptr = this.ptr; + this.ptr = 0; + + return ptr; + } + + free() { + const ptr = this.__destroy_into_raw(); + wasm.__wbg_tiktoken_free(ptr); + } + /** + * @param {string} tiktoken_bfe + * @param {any} special_tokens + * @param {string} pat_str + */ + constructor(tiktoken_bfe, special_tokens, pat_str) { + const ptr0 = passStringToWasm0(tiktoken_bfe, wasm.__wbindgen_export_0, wasm.__wbindgen_export_1); + const len0 = WASM_VECTOR_LEN; + const ptr1 = passStringToWasm0(pat_str, wasm.__wbindgen_export_0, wasm.__wbindgen_export_1); + const len1 = WASM_VECTOR_LEN; + const ret = wasm.tiktoken_new(ptr0, len0, addHeapObject(special_tokens), ptr1, len1); + return Tiktoken.__wrap(ret); + } + /** + * @returns {string | undefined} + */ + get name() { + try { + const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); + wasm.tiktoken_name(retptr, this.ptr); + var r0 = getInt32Memory0()[retptr / 4 + 0]; + var r1 = getInt32Memory0()[retptr / 4 + 1]; + let v0; + if (r0 !== 0) { + v0 = getStringFromWasm0(r0, r1).slice(); + wasm.__wbindgen_export_2(r0, r1 * 1); + } + return v0; + } finally { + wasm.__wbindgen_add_to_stack_pointer(16); + } + } + /** + * @param {string} text + * @param {any} allowed_special + * @param {any} disallowed_special + * @returns {Uint32Array} + */ + encode(text, allowed_special, disallowed_special) { + try { + const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); + const ptr0 = passStringToWasm0(text, wasm.__wbindgen_export_0, wasm.__wbindgen_export_1); + const len0 = WASM_VECTOR_LEN; + wasm.tiktoken_encode(retptr, this.ptr, ptr0, len0, addHeapObject(allowed_special), addHeapObject(disallowed_special)); + var r0 = getInt32Memory0()[retptr / 4 + 0]; + var r1 = getInt32Memory0()[retptr / 4 + 1]; + var r2 = getInt32Memory0()[retptr / 4 + 2]; + var r3 = getInt32Memory0()[retptr / 4 + 3]; + if (r3) { + throw takeObject(r2); + } + var v1 = getArrayU32FromWasm0(r0, r1).slice(); + wasm.__wbindgen_export_2(r0, r1 * 4); + return v1; + } finally { + wasm.__wbindgen_add_to_stack_pointer(16); + } + } + /** + * @param {string} text + * @returns {Uint32Array} + */ + encode_ordinary(text) { + try { + const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); + const ptr0 = passStringToWasm0(text, wasm.__wbindgen_export_0, wasm.__wbindgen_export_1); + const len0 = WASM_VECTOR_LEN; + wasm.tiktoken_encode_ordinary(retptr, this.ptr, ptr0, len0); + var r0 = getInt32Memory0()[retptr / 4 + 0]; + var r1 = getInt32Memory0()[retptr / 4 + 1]; + var v1 = getArrayU32FromWasm0(r0, r1).slice(); + wasm.__wbindgen_export_2(r0, r1 * 4); + return v1; + } finally { + wasm.__wbindgen_add_to_stack_pointer(16); + } + } + /** + * @param {string} text + * @param {any} allowed_special + * @param {any} disallowed_special + * @returns {any} + */ + encode_with_unstable(text, allowed_special, disallowed_special) { + try { + const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); + const ptr0 = passStringToWasm0(text, wasm.__wbindgen_export_0, wasm.__wbindgen_export_1); + const len0 = WASM_VECTOR_LEN; + wasm.tiktoken_encode_with_unstable(retptr, this.ptr, ptr0, len0, addHeapObject(allowed_special), addHeapObject(disallowed_special)); + var r0 = getInt32Memory0()[retptr / 4 + 0]; + var r1 = getInt32Memory0()[retptr / 4 + 1]; + var r2 = getInt32Memory0()[retptr / 4 + 2]; + if (r2) { + throw takeObject(r1); + } + return takeObject(r0); + } finally { + wasm.__wbindgen_add_to_stack_pointer(16); + } + } + /** + * @param {Uint8Array} bytes + * @returns {number} + */ + encode_single_token(bytes) { + const ptr0 = passArray8ToWasm0(bytes, wasm.__wbindgen_export_0); + const len0 = WASM_VECTOR_LEN; + const ret = wasm.tiktoken_encode_single_token(this.ptr, ptr0, len0); + return ret >>> 0; + } + /** + * @param {Uint32Array} tokens + * @returns {Uint8Array} + */ + decode(tokens) { + try { + const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); + const ptr0 = passArray32ToWasm0(tokens, wasm.__wbindgen_export_0); + const len0 = WASM_VECTOR_LEN; + wasm.tiktoken_decode(retptr, this.ptr, ptr0, len0); + var r0 = getInt32Memory0()[retptr / 4 + 0]; + var r1 = getInt32Memory0()[retptr / 4 + 1]; + var v1 = getArrayU8FromWasm0(r0, r1).slice(); + wasm.__wbindgen_export_2(r0, r1 * 1); + return v1; + } finally { + wasm.__wbindgen_add_to_stack_pointer(16); + } + } + /** + * @param {number} token + * @returns {Uint8Array} + */ + decode_single_token_bytes(token) { + try { + const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); + wasm.tiktoken_decode_single_token_bytes(retptr, this.ptr, token); + var r0 = getInt32Memory0()[retptr / 4 + 0]; + var r1 = getInt32Memory0()[retptr / 4 + 1]; + var v0 = getArrayU8FromWasm0(r0, r1).slice(); + wasm.__wbindgen_export_2(r0, r1 * 1); + return v0; + } finally { + wasm.__wbindgen_add_to_stack_pointer(16); + } + } + /** + * @returns {any} + */ + token_byte_values() { + const ret = wasm.tiktoken_token_byte_values(this.ptr); + return takeObject(ret); + } +} + +export function __wbindgen_object_drop_ref(arg0) { + takeObject(arg0); +}; + +export function __wbindgen_is_undefined(arg0) { + const ret = getObject(arg0) === undefined; + return ret; +}; + +export function __wbg_stringify_029a979dfb73aa17() { return handleError(function (arg0) { + const ret = JSON.stringify(getObject(arg0)); + return addHeapObject(ret); +}, arguments) }; + +export function __wbindgen_string_get(arg0, arg1) { + const obj = getObject(arg1); + const ret = typeof(obj) === 'string' ? obj : undefined; + var ptr0 = isLikeNone(ret) ? 0 : passStringToWasm0(ret, wasm.__wbindgen_export_0, wasm.__wbindgen_export_1); + var len0 = WASM_VECTOR_LEN; + getInt32Memory0()[arg0 / 4 + 1] = len0; + getInt32Memory0()[arg0 / 4 + 0] = ptr0; +}; + +export function __wbindgen_error_new(arg0, arg1) { + const ret = new Error(getStringFromWasm0(arg0, arg1)); + return addHeapObject(ret); +}; + +export function __wbg_parse_3ac95b51fc312db8() { return handleError(function (arg0, arg1) { + const ret = JSON.parse(getStringFromWasm0(arg0, arg1)); + return addHeapObject(ret); +}, arguments) }; + +export function __wbindgen_throw(arg0, arg1) { + throw new Error(getStringFromWasm0(arg0, arg1)); +}; + diff --git a/src/tiktoken/dist/tiktoken_bg.wasm b/src/tiktoken/dist/tiktoken_bg.wasm new file mode 100644 index 0000000000000000000000000000000000000000..985d6baf3b0e59bf3d38c3b9fbea9e70fdccd303 GIT binary patch literal 274 zcmYLE(G>zQ2n#*bQzvmN9N@nERhZ1@NUmU@t@lGx5(35ju?YbAe%|e7OGHRmn-$f8 zNx+mELDFzcp0C3|bf>tTs*_p`(A+(PI0ftpWOIO&*>392^+X0a>K3CWA0 d literal 0 HcmV?d00001 diff --git a/src/tiktoken/dist/tiktoken_bg.wasm.d.ts b/src/tiktoken/dist/tiktoken_bg.wasm.d.ts new file mode 100644 index 0000000..bbdd3f2 --- /dev/null +++ b/src/tiktoken/dist/tiktoken_bg.wasm.d.ts @@ -0,0 +1,20 @@ +/* tslint:disable */ +/* eslint-disable */ +export const memory: WebAssembly.Memory; +export function __wbg_tiktoken_free(a: number): void; +export function tiktoken_new(a: number, b: number, c: number, d: number, e: number): number; +export function tiktoken_name(a: number, b: number): void; +export function tiktoken_encode(a: number, b: number, c: number, d: number, e: number, f: number): void; +export function tiktoken_encode_ordinary(a: number, b: number, c: number, d: number): void; +export function tiktoken_encode_with_unstable(a: number, b: number, c: number, d: number, e: number, f: number): void; +export function tiktoken_encode_single_token(a: number, b: number, c: number): number; +export function tiktoken_decode(a: number, b: number, c: number, d: number): void; +export function tiktoken_decode_single_token_bytes(a: number, b: number, c: number): void; +export function tiktoken_token_byte_values(a: number): number; +export function get_encoding(a: number, b: number, c: number, d: number): void; +export function encoding_for_model(a: number, b: number, c: number, d: number): void; +export function __wbindgen_export_0(a: number): number; +export function __wbindgen_export_1(a: number, b: number, c: number): number; +export function __wbindgen_add_to_stack_pointer(a: number): number; +export function __wbindgen_export_2(a: number, b: number): void; +export function __wbindgen_export_3(a: number): void; diff --git a/src/tiktoken/package.json b/src/tiktoken/package.json new file mode 100644 index 0000000..50a63a1 --- /dev/null +++ b/src/tiktoken/package.json @@ -0,0 +1,31 @@ +{ + "name": "@dqbd/tiktoken", + "version": "1.0.0-alpha.1", + "description": "Javascript bindings for tiktoken", + "license": "MIT", + "scripts": { + "build": "run-s build:*", + "build:cleanup": "rm -rf dist/", + "build:rank": "tsx scripts/inline_ranks.ts", + "build:wasm": "run-s wasm:*", + "build:postprocess": "tsx scripts/post_process.ts", + "wasm:bundler": "wasm-pack build --target bundler --release --out-dir dist && rm -rf dist/.gitignore dist/README.md dist/package.json", + "wasm:node": "wasm-pack build --target nodejs --release --out-dir dist/node && rm -rf dist/node/.gitignore dist/node/README.md dist/node/package.json", + "test": "yarn vitest" + }, + "repository": { + "type": "git", + "url": "https://github.com/dqbd/tiktoken" + }, + "dependencies": { + "node-fetch": "^3.3.0" + }, + "devDependencies": { + "@types/node": "^18.14.4", + "npm-run-all": "^4.1.5", + "ts-morph": "^17.0.1", + "tsx": "^3.12.3", + "typescript": "^4.9.5", + "vitest": "^0.28.5" + } +} diff --git a/src/tiktoken/tsconfig.json b/src/tiktoken/tsconfig.json new file mode 100644 index 0000000..3c5ff0a --- /dev/null +++ b/src/tiktoken/tsconfig.json @@ -0,0 +1,14 @@ +{ + "compilerOptions": { + "target": "ES2022", + "lib": ["ESNext", "DOM"], + "module": "ES2020", + "moduleResolution": "node", + "strict": true, + "declaration": true, + "outDir": "./dist", + "allowSyntheticDefaultImports": true, + }, + "include": ["./**/*.ts", "./**/*.js"], + "exclude": ["node_modules", "dist"] +} diff --git a/src/tokenizer.ts b/src/tokenizer.ts new file mode 100644 index 0000000..0f9129f --- /dev/null +++ b/src/tokenizer.ts @@ -0,0 +1,100 @@ +import { encoding_for_model } from "./tiktoken/dist/tiktoken"; +import { OpenAIMessage } from "./types"; + +const enc = encoding_for_model("gpt-3.5-turbo"); + +export function getTokenCount(input: string): number { + return enc.encode(input).length; +} + +export function shortenStringToTokenCount(input: string, targetTokenCount: number) { + const tokens = enc.encode(input); + const buffer = enc.decode(tokens.slice(0, targetTokenCount)); + return new TextDecoder().decode(buffer) + "(...)"; +} + +function serializeChatMLMessage(role: string, content: string) { + const encodedContent = JSON.stringify(content) + .replace(/^"/g, '').replace(/"$/g, ''); + + let chatml = ''; + chatml += `{"token": "<|im_start|>"},\n `; + chatml += `"${role.toLocaleLowerCase}\\n${encodedContent}",\n `; + chatml += `{"token": "<|im_end|>"}, "\\n"`; + + return chatml; +} + +export function getTokenCountForMessages(messages: OpenAIMessage[]): number { + let chatml = '[\n'; + for (let i = 0; i < messages.length; i++) { + const m = messages[i]; + const serializeMessage = serializeChatMLMessage(m.role, m.content); + + chatml += ' ' + serializeMessage; + + if (i < messages.length - 1) { + chatml += ','; + } + chatml += '\n'; + } + chatml += ']'; + return getTokenCount(chatml); +} + +export function selectMessagesToSendSafely(messages: OpenAIMessage[]) { + const maxTokens = 2048; + + if (getTokenCountForMessages(messages) <= maxTokens) { + return messages; + } + + const insertedSystemMessage = serializeChatMLMessage('system', 'Several messages not included due to space constraints'); + const insertedSystemMessageTokenCount = getTokenCount(insertedSystemMessage); + const targetTokens = maxTokens - insertedSystemMessageTokenCount; + const firstUserMessageIndex = messages.findIndex(m => m.role === 'user'); + let output = [...messages]; + + let removed = false; + + // first, remove items in the 'middle' of the conversation until we're under the limit + for (let i = firstUserMessageIndex + 1; i < messages.length - 1; i++) { + if (getTokenCountForMessages(output) > targetTokens) { + output.splice(i, 1); + removed = true; + } + } + + // if we're still over the limit, trim message contents from oldest to newest (excluding the latest) + if (getTokenCountForMessages(output) > targetTokens) { + for (let i = 0; i < output.length - 1 && getTokenCountForMessages(output) > targetTokens; i++) { + output[i].content = shortenStringToTokenCount(output[i].content, 20); + removed = true; + } + } + + // if that still didn't work, just keep the system prompt and the latest message (truncated as needed) + if (getTokenCountForMessages(output) > targetTokens) { + const systemMessage = output.find(m => m.role === 'system')!; + const latestMessage = { ...messages[messages.length - 1] }; + output = [systemMessage, latestMessage]; + removed = true; + + const excessTokens = Math.max(0, getTokenCountForMessages(output) - targetTokens); + + if (excessTokens) { + const tokens = enc.encode(latestMessage.content); + const buffer = enc.decode(tokens.slice(0, Math.max(0, tokens.length - excessTokens))); + latestMessage.content = new TextDecoder().decode(buffer); + } + } + + if (removed) { + output.splice(1, 0, { + role: 'system', + content: 'Several messages not included due to space constraints', + }); + } + + return output; +} \ No newline at end of file