diff --git a/README.md b/README.md
index 182d3bc02..e01cfa51d 100644
--- a/README.md
+++ b/README.md
@@ -1,16 +1,17 @@
-
-
+
+
+
NextChat (ChatGPT Next Web)
English / [简体中文](./README_CN.md)
-One-Click to get a well-designed cross-platform ChatGPT web UI, with GPT3, GPT4 & Gemini Pro support.
+One-Click to get a well-designed cross-platform ChatGPT web UI, with Claude, GPT4 & Gemini Pro support.
-一键免费部署你的跨平台私人 ChatGPT 应用, 支持 GPT3, GPT4 & Gemini Pro 模型。
+一键免费部署你的跨平台私人 ChatGPT 应用, 支持 Claude, GPT4 & Gemini Pro 模型。
[![Saas][Saas-image]][saas-url]
[![Web][Web-image]][web-url]
@@ -31,7 +32,7 @@ One-Click to get a well-designed cross-platform ChatGPT web UI, with GPT3, GPT4
[MacOS-image]: https://img.shields.io/badge/-MacOS-black?logo=apple
[Linux-image]: https://img.shields.io/badge/-Linux-333?logo=ubuntu
-[

](https://vercel.com/new/clone?repository-url=https%3A%2F%2Fgithub.com%2FChatGPTNextWeb%2FChatGPT-Next-Web&env=OPENAI_API_KEY&env=CODE&project-name=nextchat&repository-name=NextChat) [

](https://zeabur.com/templates/ZBUEFA) [

](https://gitpod.io/#https://github.com/Yidadaa/ChatGPT-Next-Web) [

](https://www.bt.cn/new/download.html) [

](https://computenest.aliyun.com/market/service-f1c9b75e59814dc49d52)
+[

](https://vercel.com/new/clone?repository-url=https%3A%2F%2Fgithub.com%2FChatGPTNextWeb%2FChatGPT-Next-Web&env=OPENAI_API_KEY&env=CODE&project-name=nextchat&repository-name=NextChat) [

](https://zeabur.com/templates/ZBUEFA) [

](https://gitpod.io/#https://github.com/Yidadaa/ChatGPT-Next-Web) [

](https://www.bt.cn/new/download.html)
[

](https://monica.im/?utm=nxcrp)
@@ -355,6 +356,13 @@ For ByteDance: use `modelName@bytedance=deploymentName` to customize model name
Change default model
+### `VISION_MODELS` (optional)
+
+> Default: Empty
+> Example: `gpt-4-vision,claude-3-opus,my-custom-model` means add vision capabilities to these models in addition to the default pattern matches (which detect models containing keywords like "vision", "claude-3", "gemini-1.5", etc).
+
+Add additional models to have vision capabilities, beyond the default pattern matching. Multiple models should be separated by commas.
+
### `WHITE_WEBDAV_ENDPOINTS` (optional)
You can use this option if you want to increase the number of webdav service addresses you are allowed to access, as required by the format:
diff --git a/README_CN.md b/README_CN.md
index d4da8b9da..8173b9c4d 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -235,6 +235,13 @@ ChatGLM Api Url.
更改默认模型
+### `VISION_MODELS` (可选)
+
+> 默认值:空
+> 示例:`gpt-4-vision,claude-3-opus,my-custom-model` 表示为这些模型添加视觉能力,作为对默认模式匹配的补充(默认会检测包含"vision"、"claude-3"、"gemini-1.5"等关键词的模型)。
+
+在默认模式匹配之外,添加更多具有视觉能力的模型。多个模型用逗号分隔。
+
### `DEFAULT_INPUT_TEMPLATE` (可选)
自定义默认的 template,用于初始化『设置』中的『用户输入预处理』配置项
diff --git a/README_JA.md b/README_JA.md
index 062c11262..29eb0d275 100644
--- a/README_JA.md
+++ b/README_JA.md
@@ -217,6 +217,13 @@ ByteDance モードでは、`modelName@bytedance=deploymentName` 形式でモデ
デフォルトのモデルを変更します。
+### `VISION_MODELS` (オプション)
+
+> デフォルト:空
+> 例:`gpt-4-vision,claude-3-opus,my-custom-model` は、これらのモデルにビジョン機能を追加します。これはデフォルトのパターンマッチング("vision"、"claude-3"、"gemini-1.5"などのキーワードを含むモデルを検出)に加えて適用されます。
+
+デフォルトのパターンマッチングに加えて、追加のモデルにビジョン機能を付与します。複数のモデルはカンマで区切ります。
+
### `DEFAULT_INPUT_TEMPLATE` (オプション)
『設定』の『ユーザー入力前処理』の初期設定に使用するテンプレートをカスタマイズします。
diff --git a/app/config/build.ts b/app/config/build.ts
index b2b1ad49d..aa7c10729 100644
--- a/app/config/build.ts
+++ b/app/config/build.ts
@@ -40,6 +40,7 @@ export const getBuildConfig = () => {
buildMode,
isApp,
template: process.env.DEFAULT_INPUT_TEMPLATE ?? DEFAULT_INPUT_TEMPLATE,
+ visionModels: process.env.VISION_MODELS || "",
};
};
diff --git a/app/constant.ts b/app/constant.ts
index 25c8d98ea..5759411af 100644
--- a/app/constant.ts
+++ b/app/constant.ts
@@ -291,6 +291,22 @@ export const DEFAULT_TTS_VOICES = [
"shimmer",
];
+export const VISION_MODEL_REGEXES = [
+ /vision/,
+ /gpt-4o/,
+ /claude-3/,
+ /gemini-1\.5/,
+ /gemini-exp/,
+ /gemini-2\.0/,
+ /learnlm/,
+ /qwen-vl/,
+ /qwen2-vl/,
+ /gpt-4-turbo(?!.*preview)/, // Matches "gpt-4-turbo" but not "gpt-4-turbo-preview"
+ /^dall-e-3$/, // Matches exactly "dall-e-3"
+];
+
+export const EXCLUDE_VISION_MODEL_REGEXES = [/claude-3-5-haiku-20241022/];
+
const openaiModels = [
"gpt-3.5-turbo",
"gpt-3.5-turbo-1106",
@@ -317,13 +333,23 @@ const openaiModels = [
];
const googleModels = [
- "gemini-1.0-pro",
+ "gemini-1.0-pro", // Deprecated on 2/15/2025
"gemini-1.5-pro-latest",
+ "gemini-1.5-pro",
+ "gemini-1.5-pro-002",
+ "gemini-1.5-pro-exp-0827",
"gemini-1.5-flash-latest",
+ "gemini-1.5-flash-8b-latest",
+ "gemini-1.5-flash",
+ "gemini-1.5-flash-8b",
+ "gemini-1.5-flash-002",
+ "gemini-1.5-flash-exp-0827",
+ "learnlm-1.5-pro-experimental",
"gemini-exp-1114",
"gemini-exp-1121",
- "learnlm-1.5-pro-experimental",
- "gemini-pro-vision",
+ "gemini-exp-1206",
+ "gemini-2.0-flash-exp",
+ "gemini-2.0-flash-thinking-exp-1219",
];
const anthropicModels = [
diff --git a/app/masks/cn.ts b/app/masks/cn.ts
index ed507d734..64842f6e8 100644
--- a/app/masks/cn.ts
+++ b/app/masks/cn.ts
@@ -3,7 +3,7 @@ import { BuiltinMask } from "./typing";
export const CN_MASKS: BuiltinMask[] = [
{
avatar: "1f5bc-fe0f",
- name: "以文搜图",
+ name: "AI文生图",
context: [
{
id: "text-to-pic-0",
@@ -28,7 +28,7 @@ export const CN_MASKS: BuiltinMask[] = [
id: "text-to-pic-3",
role: "system",
content:
- "助手善于判断用户意图,当确定需要提供图片时,助手会变得沉默寡言,只使用以下格式输出markdown图片:,因为这个语法可以自动按照提示生成并渲染图片。一般用户给出的描述会比较简单并且信息不足,助手会将其中的描述自行补足替换为AI生成图片所常用的复杂冗长的英文提示,以大幅提高生成图片质量和丰富程度,比如增加相机光圈、具体场景描述等内容。助手会避免用代码块或原始块包围markdown标记,因为那样只会渲染出代码块或原始块而不是图片。",
+ "助手善于判断用户意图,当确定需要提供图片时,助手会变得沉默寡言,只使用以下格式输出markdown图片:,因为这个语法可以自动按照提示生成并渲染图片。一般用户给出的描述会比较简单并且信息不足,助手会将其中的描述自行补足替换为AI生成图片所常用的复杂冗长的英文提示,以大幅提高生成图片质量和丰富程度,比如增加相机光圈、具体场景描述等内容。助手会避免用代码块或原始块包围markdown标记,因为那样只会渲染出代码块或原始块而不是图片。url中的空格等符号需要转义。",
date: "",
},
],
diff --git a/app/utils.ts b/app/utils.ts
index b62bc126d..962e68a10 100644
--- a/app/utils.ts
+++ b/app/utils.ts
@@ -5,6 +5,8 @@ import { RequestMessage } from "./client/api";
import { ServiceProvider } from "./constant";
// import { fetch as tauriFetch, ResponseType } from "@tauri-apps/api/http";
import { fetch as tauriStreamFetch } from "./utils/stream";
+import { VISION_MODEL_REGEXES, EXCLUDE_VISION_MODEL_REGEXES } from "./constant";
+import { getClientConfig } from "./config/client";
export function trimTopic(topic: string) {
// Fix an issue where double quotes still show in the Indonesian language
@@ -252,27 +254,16 @@ export function getMessageImages(message: RequestMessage): string[] {
}
export function isVisionModel(model: string) {
- // Note: This is a better way using the TypeScript feature instead of `&&` or `||` (ts v5.5.0-dev.20240314 I've been using)
-
- const excludeKeywords = ["claude-3-5-haiku-20241022"];
- const visionKeywords = [
- "vision",
- "gpt-4o",
- "claude-3",
- "gemini-1.5",
- "gemini-exp",
- "learnlm",
- "qwen-vl",
- "qwen2-vl",
- ];
- const isGpt4Turbo =
- model.includes("gpt-4-turbo") && !model.includes("preview");
-
+ const clientConfig = getClientConfig();
+ const envVisionModels = clientConfig?.visionModels
+ ?.split(",")
+ .map((m) => m.trim());
+ if (envVisionModels?.includes(model)) {
+ return true;
+ }
return (
- !excludeKeywords.some((keyword) => model.includes(keyword)) &&
- (visionKeywords.some((keyword) => model.includes(keyword)) ||
- isGpt4Turbo ||
- isDalle3(model))
+ !EXCLUDE_VISION_MODEL_REGEXES.some((regex) => regex.test(model)) &&
+ VISION_MODEL_REGEXES.some((regex) => regex.test(model))
);
}
diff --git a/package.json b/package.json
index f7b544bb2..e081567a4 100644
--- a/package.json
+++ b/package.json
@@ -60,7 +60,7 @@
"@tauri-apps/cli": "1.5.11",
"@testing-library/dom": "^10.4.0",
"@testing-library/jest-dom": "^6.6.3",
- "@testing-library/react": "^16.0.1",
+ "@testing-library/react": "^16.1.0",
"@types/jest": "^29.5.14",
"@types/js-yaml": "4.0.9",
"@types/lodash-es": "^4.17.12",
diff --git a/test/vision-model-checker.test.ts b/test/vision-model-checker.test.ts
new file mode 100644
index 000000000..734e992d8
--- /dev/null
+++ b/test/vision-model-checker.test.ts
@@ -0,0 +1,67 @@
+import { isVisionModel } from "../app/utils";
+
+describe("isVisionModel", () => {
+ const originalEnv = process.env;
+
+ beforeEach(() => {
+ jest.resetModules();
+ process.env = { ...originalEnv };
+ });
+
+ afterEach(() => {
+ process.env = originalEnv;
+ });
+
+ test("should identify vision models using regex patterns", () => {
+ const visionModels = [
+ "gpt-4-vision",
+ "claude-3-opus",
+ "gemini-1.5-pro",
+ "gemini-2.0",
+ "gemini-exp-vision",
+ "learnlm-vision",
+ "qwen-vl-max",
+ "qwen2-vl-max",
+ "gpt-4-turbo",
+ "dall-e-3",
+ ];
+
+ visionModels.forEach((model) => {
+ expect(isVisionModel(model)).toBe(true);
+ });
+ });
+
+ test("should exclude specific models", () => {
+ expect(isVisionModel("claude-3-5-haiku-20241022")).toBe(false);
+ });
+
+ test("should not identify non-vision models", () => {
+ const nonVisionModels = [
+ "gpt-3.5-turbo",
+ "gpt-4-turbo-preview",
+ "claude-2",
+ "regular-model",
+ ];
+
+ nonVisionModels.forEach((model) => {
+ expect(isVisionModel(model)).toBe(false);
+ });
+ });
+
+ test("should identify models from VISION_MODELS env var", () => {
+ process.env.VISION_MODELS = "custom-vision-model,another-vision-model";
+
+ expect(isVisionModel("custom-vision-model")).toBe(true);
+ expect(isVisionModel("another-vision-model")).toBe(true);
+ expect(isVisionModel("unrelated-model")).toBe(false);
+ });
+
+ test("should handle empty or missing VISION_MODELS", () => {
+ process.env.VISION_MODELS = "";
+ expect(isVisionModel("unrelated-model")).toBe(false);
+
+ delete process.env.VISION_MODELS;
+ expect(isVisionModel("unrelated-model")).toBe(false);
+ expect(isVisionModel("gpt-4-vision")).toBe(true);
+ });
+});
\ No newline at end of file
diff --git a/yarn.lock b/yarn.lock
index ff257a3ef..dffc35e9c 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -2127,10 +2127,10 @@
lodash "^4.17.21"
redent "^3.0.0"
-"@testing-library/react@^16.0.1":
- version "16.0.1"
- resolved "https://registry.yarnpkg.com/@testing-library/react/-/react-16.0.1.tgz#29c0ee878d672703f5e7579f239005e4e0faa875"
- integrity sha512-dSmwJVtJXmku+iocRhWOUFbrERC76TX2Mnf0ATODz8brzAZrMBbzLwQixlBSanZxR6LddK3eiwpSFZgDET1URg==
+"@testing-library/react@^16.1.0":
+ version "16.1.0"
+ resolved "https://registry.yarnpkg.com/@testing-library/react/-/react-16.1.0.tgz#aa0c61398bac82eaf89776967e97de41ac742d71"
+ integrity sha512-Q2ToPvg0KsVL0ohND9A3zLJWcOXXcO8IDu3fj11KhNt0UlCWyFyvnCIBkd12tidB2lkiVRG8VFqdhcqhqnAQtg==
dependencies:
"@babel/runtime" "^7.12.5"