feat: 1. using cache storage store image data; 2. get base64image before chat to api #5013

2025-11-09 04:06:12 +08:00 · 2024-07-19 13:50:10 +08:00
parent afa1a4303b
commit 287fa0a39c
9 changed files with 164 additions and 14 deletions
--- a/app/client/platforms/alibaba.ts
+++ b/app/client/platforms/alibaba.ts
@@ -21,7 +21,7 @@ import {
 } from "@fortaine/fetch-event-source";
 import { prettyObject } from "@/app/utils/format";
 import { getClientConfig } from "@/app/config/client";
-import { getMessageTextContent, isVisionModel } from "@/app/utils";
+import { getMessageTextContent } from "@/app/utils";

 export interface OpenAIListModelResponse {
  object: string;
--- a/app/client/platforms/anthropic.ts
+++ b/app/client/platforms/anthropic.ts
@@ -12,6 +12,7 @@ import {
 import Locale from "../../locales";
 import { prettyObject } from "@/app/utils/format";
 import { getMessageTextContent, isVisionModel } from "@/app/utils";
+import { preProcessImageContent } from "@/app/utils/chat";
 import { cloudflareAIGatewayUrl } from "@/app/utils/cloudflare";

 export type MultiBlockContent = {
@@ -93,7 +94,12 @@ export class ClaudeApi implements LLMApi {
      },
    };

-    const messages = [...options.messages];
+    // try get base64image from local cache image_url
+    const messages = [];
+    for (const v of options.messages) {
+      const content = await preProcessImageContent(v.content);
+      messages.push({ role: v.role, content });
+    }

    const keys = ["system", "user"];

@@ -135,6 +141,7 @@ export class ClaudeApi implements LLMApi {
          content: content
            .filter((v) => v.image_url || v.text)
            .map(({ type, text, image_url }) => {
+              console.log("process message", type, text, image_url);
              if (type === "text") {
                return {
                  type,
--- a/app/client/platforms/google.ts
+++ b/app/client/platforms/google.ts
@@ -14,6 +14,7 @@ import {
  getMessageImages,
  isVisionModel,
 } from "@/app/utils";
+import { preProcessImageContent } from "@/app/utils/chat";

 export class GeminiProApi implements LLMApi {
  path(path: string): string {
@@ -56,7 +57,14 @@ export class GeminiProApi implements LLMApi {
  async chat(options: ChatOptions): Promise<void> {
    const apiClient = this;
    let multimodal = false;
-    const messages = options.messages.map((v) => {
+
+    // try get base64image from local cache image_url
+    const _messages = [];
+    for (const v of options.messages) {
+      const content = await preProcessImageContent(v.content);
+      _messages.push({ role: v.role, content });
+    }
+    const messages = _messages.map((v) => {
      let parts: any[] = [{ text: getMessageTextContent(v) }];
      if (isVisionModel(options.config.model)) {
        const images = getMessageImages(v);
--- a/app/client/platforms/openai.ts
+++ b/app/client/platforms/openai.ts
@@ -11,6 +11,7 @@ import {
 } from "@/app/constant";
 import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
 import { collectModelsWithDefaultModel } from "@/app/utils/model";
+import { preProcessImageContent } from "@/app/utils/chat";
 import { cloudflareAIGatewayUrl } from "@/app/utils/cloudflare";

 import {
@@ -105,10 +106,13 @@ export class ChatGPTApi implements LLMApi {

  async chat(options: ChatOptions) {
    const visionModel = isVisionModel(options.config.model);
-    const messages = options.messages.map((v) => ({
-      role: v.role,
-      content: visionModel ? v.content : getMessageTextContent(v),
-    }));
+    const messages = [];
+    for (const v of options.messages) {
+      const content = visionModel
+        ? await preProcessImageContent(v.content)
+        : getMessageTextContent(v);
+      messages.push({ role: v.role, content });
+    }

    const modelConfig = {
      ...useAppConfig.getState().modelConfig,