refactor: refactor token counting logic in countTokens function

This commit is contained in:
dakai 2024-10-06 10:29:01 +08:00
parent 4acc742456
commit 586c4a493d
4 changed files with 69 additions and 33 deletions

View File

@ -43,6 +43,7 @@ export interface MultimodalContent {
export interface UploadFile {
name: string;
url: string;
tokenCount?: string;
}
export interface RequestMessage {

View File

@ -74,6 +74,7 @@ import {
isDalle3,
showPlugins,
safeLocalStorage,
countTokens,
} from "../utils";
import type { UploadFile } from "../client/api";
@ -1489,11 +1490,15 @@ function _Chat() {
setUploading(true);
const files = event.target.files;
const imagesData: UploadFile[] = [];
(async () => {
for (let i = 0; i < files.length; i++) {
const file = event.target.files[i];
uploadImageRemote(file)
.then((dataUrl) => {
imagesData.push({ name: file.name, url: dataUrl });
const file = files[i];
try {
const dataUrl = await uploadImageRemote(file);
const fileData: UploadFile = { name: file.name, url: dataUrl };
const tokenCount = await countTokens(fileData);
fileData.tokenCount = tokenCount;
imagesData.push(fileData);
if (
imagesData.length === 3 ||
imagesData.length === files.length
@ -1501,12 +1506,12 @@ function _Chat() {
setUploading(false);
res(imagesData);
}
})
.catch((e) => {
} catch (e) {
setUploading(false);
rej(e);
});
}
}
})();
};
fileInput.click();
})),
@ -1945,7 +1950,6 @@ function _Chat() {
.pop()
?.toLowerCase() as DefaultExtensionType;
const style = defaultStyles[extension];
return (
<a
href={file.url}
@ -1965,7 +1969,7 @@ function _Chat() {
styles["chat-message-item-file-name"]
}
>
{file.name}
{file.name} {file.tokenCount}
</div>
</a>
);
@ -2082,22 +2086,22 @@ function _Chat() {
</div>
{attachImages.length == 0 && (
<div className={styles["attach-file-name-full"]}>
{file.name}
{file.name} {file.tokenCount}
</div>
)}
{attachImages.length == 1 && (
<div className={styles["attach-file-name-half"]}>
{file.name}
{file.name} {file.tokenCount}
</div>
)}
{attachImages.length == 2 && (
<div className={styles["attach-file-name-less"]}>
{file.name}
{file.name} {file.tokenCount}
</div>
)}
{attachImages.length == 3 && (
<div className={styles["attach-file-name-min"]}>
{file.name}
{file.name} {file.tokenCount}
</div>
)}

View File

@ -19,7 +19,7 @@ import {
StoreKey,
} from "../constant";
import Locale, { getLang } from "../locales";
import { isDalle3, safeLocalStorage } from "../utils";
import { isDalle3, safeLocalStorage, readFileContent } from "../utils";
import { prettyObject } from "../utils/format";
import { createPersistStore } from "../utils/store";
import { estimateTokenLength } from "../utils/token";
@ -154,23 +154,6 @@ function fillTemplateWith(input: string, modelConfig: ModelConfig) {
return output;
}
const readFileContent = async (file: UploadFile): Promise<string> => {
try {
const response = await fetch(file.url);
if (!response.ok) {
throw new Error(
`Failed to fetch content from ${file.url}: ${response.statusText}`,
);
}
const content = await response.text();
const result = file.name + "\n" + content;
return result;
} catch (error) {
console.error("Error reading file content:", error);
return "";
}
};
const DEFAULT_CHAT_STATE = {
sessions: [createEmptySession()],
currentSessionIndex: 0,

View File

@ -17,6 +17,54 @@ export function trimTopic(topic: string) {
);
}
export const readFileContent = async (file: UploadFile): Promise<string> => {
try {
const response = await fetch(file.url);
if (!response.ok) {
throw new Error(
`Failed to fetch content from ${file.url}: ${response.statusText}`,
);
}
const content = await response.text();
const result = file.name + "\n" + content;
return result;
} catch (error) {
console.error("Error reading file content:", error);
return "";
}
};
export const countTokens = async (file: UploadFile) => {
const text = await readFileContent(file);
let totalTokens = 0;
for (let i = 0; i < text.length; i++) {
const char = text[i];
const nextChar = text[i + 1];
if (char === " " && nextChar === " ") {
totalTokens += 0.081;
} else if ("NORabcdefghilnopqrstuvy ".includes(char)) {
totalTokens += 0.202;
} else if ("CHLMPQSTUVfkmspwx".includes(char)) {
totalTokens += 0.237;
} else if ("-.ABDEFGIKWY_\\r\\tz{ü".includes(char)) {
totalTokens += 0.304;
} else if ("!{{input}}(/;=JX`j\\n}ö".includes(char)) {
totalTokens += 0.416;
} else if ('"#%)*+56789<>?@Z[\\]^|§«äç’'.includes(char)) {
totalTokens += 0.479;
} else if (",01234:~Üß".includes(char) || char.charCodeAt(0) > 255) {
totalTokens += 0.658;
} else {
totalTokens += 0.98;
}
}
let totalTokenCount = (totalTokens / 1000).toFixed(2).toString() + "K";
console.log(totalTokenCount);
return totalTokenCount;
};
export async function copyToClipboard(text: string) {
try {
if (window.__TAURI__) {