diff --git a/app/client/platforms/openai.ts b/app/client/platforms/openai.ts
index 9b94d23b6..5f348f079 100644
--- a/app/client/platforms/openai.ts
+++ b/app/client/platforms/openai.ts
@@ -74,10 +74,28 @@ export class ChatGPTApi implements LLMApi {
   }
 
   async chat(options: ChatOptions) {
-    const messages = options.messages.map((v) => ({
-      role: v.role,
-      content: v.content,
-    }));
+    const messages = options.messages.map((v) => {
+      let message: {
+        role: string;
+        content: { type: string; text?: string; image_url?: { url: string } }[];
+      } = {
+        role: v.role,
+        content: [],
+      };
+      message.content.push({
+        type: "text",
+        text: v.content,
+      });
+      if (v.image_url) {
+        message.content.push({
+          type: "image_url",
+          image_url: {
+            url: v.image_url,
+          },
+        });
+      }
+      return message;
+    });
 
     const modelConfig = {
       ...useAppConfig.getState().modelConfig,
@@ -95,6 +113,10 @@ export class ChatGPTApi implements LLMApi {
       presence_penalty: modelConfig.presence_penalty,
       frequency_penalty: modelConfig.frequency_penalty,
       top_p: modelConfig.top_p,
+      max_tokens:
+        modelConfig.model == "gpt-4-vision-preview"
+          ? modelConfig.max_tokens
+          : null,
       // max_tokens: Math.max(modelConfig.max_tokens, 1024),
       // Please do not ask me why not send max_tokens, no reason, this param is just shit, I dont want to explain anymore.
     };
diff --git a/app/components/chat.tsx b/app/components/chat.tsx
index 7e19fb028..fbcabefba 100644
--- a/app/components/chat.tsx
+++ b/app/components/chat.tsx
@@ -80,6 +80,7 @@ import {
 import { useNavigate } from "react-router-dom";
 import {
   CHAT_PAGE_SIZE,
+  LAST_INPUT_IMAGE_KEY,
   LAST_INPUT_KEY,
   Path,
   REQUEST_TIMEOUT_MS,
@@ -554,7 +555,8 @@ export function ChatActions(props: {
         />
 
         {config.pluginConfig.enable &&
-          /^gpt(?!.*03\d{2}$).*$/.test(currentModel) && (
+          /^gpt(?!.*03\d{2}$).*$/.test(currentModel) &&
+          currentModel != "gpt-4-vision-preview" && (
             <ChatAction
               onClick={switchUsePlugins}
               text={
@@ -778,8 +780,11 @@ function _Chat() {
       return;
     }
     setIsLoading(true);
-    chatStore.onUserInput(userInput).then(() => setIsLoading(false));
+    chatStore
+      .onUserInput(userInput, userImage.base64)
+      .then(() => setIsLoading(false));
     localStorage.setItem(LAST_INPUT_KEY, userInput);
+    localStorage.setItem(LAST_INPUT_IMAGE_KEY, userImage);
     setUserInput("");
     setPromptHints([]);
     setUserImage(null);
@@ -847,6 +852,7 @@ function _Chat() {
       !(e.metaKey || e.altKey || e.ctrlKey)
     ) {
       setUserInput(localStorage.getItem(LAST_INPUT_KEY) ?? "");
+      setUserImage(localStorage.getItem(LAST_INPUT_IMAGE_KEY));
       e.preventDefault();
       return;
     }
@@ -1331,7 +1337,7 @@ function _Chat() {
                   )}
                   <div className={styles["chat-message-item"]}>
                     <Markdown
-                      imageBase64={isUser && userImage && userImage.base64}
+                      imageBase64={message.image_url}
                       content={message.content}
                       loading={
                         (message.preview || message.streaming) &&
diff --git a/app/constant.ts b/app/constant.ts
index e0bba5dca..5cb0da447 100644
--- a/app/constant.ts
+++ b/app/constant.ts
@@ -57,6 +57,7 @@ export const NARROW_SIDEBAR_WIDTH = 100;
 export const ACCESS_CODE_PREFIX = "nk-";
 
 export const LAST_INPUT_KEY = "last-input";
+export const LAST_INPUT_IMAGE_KEY = "last-input-image";
 export const UNFINISHED_INPUT = (id: string) => "unfinished-input-" + id;
 
 export const STORAGE_KEY = "chatgpt-next-web";
diff --git a/app/store/chat.ts b/app/store/chat.ts
index 2961c2a68..85582d765 100644
--- a/app/store/chat.ts
+++ b/app/store/chat.ts
@@ -274,7 +274,7 @@ export const useChatStore = createPersistStore(
         get().summarizeSession();
       },
 
-      async onUserInput(content: string) {
+      async onUserInput(content: string, image_url?: string) {
         const session = get().currentSession();
         const modelConfig = session.mask.modelConfig;
 
@@ -284,8 +284,8 @@ export const useChatStore = createPersistStore(
         const userMessage: ChatMessage = createMessage({
           role: "user",
           content: userContent,
+          image_url: image_url,
         });
-
         const botMessage: ChatMessage = createMessage({
           role: "assistant",
           streaming: true,
@@ -319,11 +319,11 @@ export const useChatStore = createPersistStore(
           session.messages.push(savedUserMessage);
           session.messages.push(botMessage);
         });
-
         if (
           config.pluginConfig.enable &&
           session.mask.usePlugins &&
-          allPlugins.length > 0
+          allPlugins.length > 0 &&
+          modelConfig.model != "gpt-4-vision-preview"
         ) {
           console.log("[ToolAgent] start");
           const pluginToolNames = allPlugins.map((m) => m.toolName);