chat-with-gpt/app/src/tokenizer.ts

104 lines
3.6 KiB
TypeScript
Raw Normal View History

2023-03-08 21:17:34 +00:00
import { OpenAIMessage } from "./types";
2023-03-14 11:00:40 +00:00
let enc: any;
setTimeout(async () => {
const { encoding_for_model } = await import("./tiktoken/dist/tiktoken");
enc = encoding_for_model("gpt-3.5-turbo");
}, 2000);
2023-03-08 21:17:34 +00:00
export function getTokenCount(input: string): number {
return enc.encode(input).length;
}
export function shortenStringToTokenCount(input: string, targetTokenCount: number) {
const tokens = enc.encode(input);
const buffer = enc.decode(tokens.slice(0, targetTokenCount));
return new TextDecoder().decode(buffer) + "(...)";
}
function serializeChatMLMessage(role: string, content: string) {
const encodedContent = JSON.stringify(content)
.replace(/^"/g, '').replace(/"$/g, '');
let chatml = '';
chatml += `{"token": "<|im_start|>"},\n `;
chatml += `"${role.toLocaleLowerCase}\\n${encodedContent}",\n `;
chatml += `{"token": "<|im_end|>"}, "\\n"`;
return chatml;
}
export function getTokenCountForMessages(messages: OpenAIMessage[]): number {
let chatml = '[\n';
for (let i = 0; i < messages.length; i++) {
const m = messages[i];
const serializeMessage = serializeChatMLMessage(m.role, m.content);
chatml += ' ' + serializeMessage;
if (i < messages.length - 1) {
chatml += ',';
}
chatml += '\n';
}
chatml += ']';
return getTokenCount(chatml);
}
export function selectMessagesToSendSafely(messages: OpenAIMessage[]) {
const maxTokens = 2048;
if (getTokenCountForMessages(messages) <= maxTokens) {
return messages;
}
const insertedSystemMessage = serializeChatMLMessage('system', 'Several messages not included due to space constraints');
const insertedSystemMessageTokenCount = getTokenCount(insertedSystemMessage);
const targetTokens = maxTokens - insertedSystemMessageTokenCount;
const firstUserMessageIndex = messages.findIndex(m => m.role === 'user');
let output = [...messages];
let removed = false;
// first, remove items in the 'middle' of the conversation until we're under the limit
for (let i = firstUserMessageIndex + 1; i < messages.length - 1; i++) {
if (getTokenCountForMessages(output) > targetTokens) {
output.splice(i, 1);
removed = true;
}
}
// if we're still over the limit, trim message contents from oldest to newest (excluding the latest)
if (getTokenCountForMessages(output) > targetTokens) {
for (let i = 0; i < output.length - 1 && getTokenCountForMessages(output) > targetTokens; i++) {
output[i].content = shortenStringToTokenCount(output[i].content, 20);
removed = true;
}
}
// if that still didn't work, just keep the system prompt and the latest message (truncated as needed)
if (getTokenCountForMessages(output) > targetTokens) {
const systemMessage = output.find(m => m.role === 'system')!;
const latestMessage = { ...messages[messages.length - 1] };
output = [systemMessage, latestMessage];
removed = true;
const excessTokens = Math.max(0, getTokenCountForMessages(output) - targetTokens);
if (excessTokens) {
const tokens = enc.encode(latestMessage.content);
const buffer = enc.decode(tokens.slice(0, Math.max(0, tokens.length - excessTokens)));
latestMessage.content = new TextDecoder().decode(buffer);
}
}
if (removed) {
output.splice(1, 0, {
role: 'system',
content: 'Several messages not included due to space constraints',
});
}
return output;
}