diff --git a/app/client/platforms/openai.ts b/app/client/platforms/openai.ts index 9b94d23b6..5f348f079 100644 --- a/app/client/platforms/openai.ts +++ b/app/client/platforms/openai.ts @@ -74,10 +74,28 @@ export class ChatGPTApi implements LLMApi { } async chat(options: ChatOptions) { - const messages = options.messages.map((v) => ({ - role: v.role, - content: v.content, - })); + const messages = options.messages.map((v) => { + let message: { + role: string; + content: { type: string; text?: string; image_url?: { url: string } }[]; + } = { + role: v.role, + content: [], + }; + message.content.push({ + type: "text", + text: v.content, + }); + if (v.image_url) { + message.content.push({ + type: "image_url", + image_url: { + url: v.image_url, + }, + }); + } + return message; + }); const modelConfig = { ...useAppConfig.getState().modelConfig, @@ -95,6 +113,10 @@ export class ChatGPTApi implements LLMApi { presence_penalty: modelConfig.presence_penalty, frequency_penalty: modelConfig.frequency_penalty, top_p: modelConfig.top_p, + max_tokens: + modelConfig.model == "gpt-4-vision-preview" + ? modelConfig.max_tokens + : null, // max_tokens: Math.max(modelConfig.max_tokens, 1024), // Please do not ask me why not send max_tokens, no reason, this param is just shit, I dont want to explain anymore. }; diff --git a/app/components/chat.tsx b/app/components/chat.tsx index 7e19fb028..fbcabefba 100644 --- a/app/components/chat.tsx +++ b/app/components/chat.tsx @@ -80,6 +80,7 @@ import { import { useNavigate } from "react-router-dom"; import { CHAT_PAGE_SIZE, + LAST_INPUT_IMAGE_KEY, LAST_INPUT_KEY, Path, REQUEST_TIMEOUT_MS, @@ -554,7 +555,8 @@ export function ChatActions(props: { /> {config.pluginConfig.enable && - /^gpt(?!.*03\d{2}$).*$/.test(currentModel) && ( + /^gpt(?!.*03\d{2}$).*$/.test(currentModel) && + currentModel != "gpt-4-vision-preview" && ( setIsLoading(false)); + chatStore + .onUserInput(userInput, userImage.base64) + .then(() => setIsLoading(false)); localStorage.setItem(LAST_INPUT_KEY, userInput); + localStorage.setItem(LAST_INPUT_IMAGE_KEY, userImage); setUserInput(""); setPromptHints([]); setUserImage(null); @@ -847,6 +852,7 @@ function _Chat() { !(e.metaKey || e.altKey || e.ctrlKey) ) { setUserInput(localStorage.getItem(LAST_INPUT_KEY) ?? ""); + setUserImage(localStorage.getItem(LAST_INPUT_IMAGE_KEY)); e.preventDefault(); return; } @@ -1331,7 +1337,7 @@ function _Chat() { )}
"unfinished-input-" + id; export const STORAGE_KEY = "chatgpt-next-web"; diff --git a/app/store/chat.ts b/app/store/chat.ts index 2961c2a68..85582d765 100644 --- a/app/store/chat.ts +++ b/app/store/chat.ts @@ -274,7 +274,7 @@ export const useChatStore = createPersistStore( get().summarizeSession(); }, - async onUserInput(content: string) { + async onUserInput(content: string, image_url?: string) { const session = get().currentSession(); const modelConfig = session.mask.modelConfig; @@ -284,8 +284,8 @@ export const useChatStore = createPersistStore( const userMessage: ChatMessage = createMessage({ role: "user", content: userContent, + image_url: image_url, }); - const botMessage: ChatMessage = createMessage({ role: "assistant", streaming: true, @@ -319,11 +319,11 @@ export const useChatStore = createPersistStore( session.messages.push(savedUserMessage); session.messages.push(botMessage); }); - if ( config.pluginConfig.enable && session.mask.usePlugins && - allPlugins.length > 0 + allPlugins.length > 0 && + modelConfig.model != "gpt-4-vision-preview" ) { console.log("[ToolAgent] start"); const pluginToolNames = allPlugins.map((m) => m.toolName);