Add vision support (#4076)

This commit is contained in:
TheRam_
2024-02-20 18:04:32 +08:00
committed by GitHub
parent 05b6d989b6
commit e2da3406d2
16 changed files with 650 additions and 73 deletions

View File

@@ -3,6 +3,12 @@ import { ChatOptions, getHeaders, LLMApi, LLMModel, LLMUsage } from "../api";
import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
import { getClientConfig } from "@/app/config/client";
import { DEFAULT_API_HOST } from "@/app/constant";
import {
getMessageTextContent,
getMessageImages,
isVisionModel,
} from "@/app/utils";
export class GeminiProApi implements LLMApi {
extractMessage(res: any) {
console.log("[Response] gemini-pro response: ", res);
@@ -15,10 +21,33 @@ export class GeminiProApi implements LLMApi {
}
async chat(options: ChatOptions): Promise<void> {
// const apiClient = this;
const messages = options.messages.map((v) => ({
role: v.role.replace("assistant", "model").replace("system", "user"),
parts: [{ text: v.content }],
}));
const visionModel = isVisionModel(options.config.model);
let multimodal = false;
const messages = options.messages.map((v) => {
let parts: any[] = [{ text: getMessageTextContent(v) }];
if (visionModel) {
const images = getMessageImages(v);
if (images.length > 0) {
multimodal = true;
parts = parts.concat(
images.map((image) => {
const imageType = image.split(";")[0].split(":")[1];
const imageData = image.split(",")[1];
return {
inline_data: {
mime_type: imageType,
data: imageData,
},
};
}),
);
}
}
return {
role: v.role.replace("assistant", "model").replace("system", "user"),
parts: parts,
};
});
// google requires that role in neighboring messages must not be the same
for (let i = 0; i < messages.length - 1; ) {
@@ -33,7 +62,9 @@ export class GeminiProApi implements LLMApi {
i++;
}
}
// if (visionModel && messages.length > 1) {
// options.onError?.(new Error("Multiturn chat is not enabled for models/gemini-pro-vision"));
// }
const modelConfig = {
...useAppConfig.getState().modelConfig,
...useChatStore.getState().currentSession().mask.modelConfig,
@@ -80,13 +111,16 @@ export class GeminiProApi implements LLMApi {
const controller = new AbortController();
options.onController?.(controller);
try {
let chatPath = this.path(Google.ChatPath);
let googleChatPath = visionModel
? Google.VisionChatPath
: Google.ChatPath;
let chatPath = this.path(googleChatPath);
// let baseUrl = accessStore.googleUrl;
if (!baseUrl) {
baseUrl = isApp
? DEFAULT_API_HOST + "/api/proxy/google/" + Google.ChatPath
? DEFAULT_API_HOST + "/api/proxy/google/" + googleChatPath
: chatPath;
}
@@ -152,6 +186,19 @@ export class GeminiProApi implements LLMApi {
value,
}): Promise<any> {
if (done) {
if (response.status !== 200) {
try {
let data = JSON.parse(ensureProperEnding(partialData));
if (data && data[0].error) {
options.onError?.(new Error(data[0].error.message));
} else {
options.onError?.(new Error("Request failed"));
}
} catch (_) {
options.onError?.(new Error("Request failed"));
}
}
console.log("Stream complete");
// options.onFinish(responseText + remainText);
finished = true;

View File

@@ -9,7 +9,14 @@ import {
} from "@/app/constant";
import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
import { ChatOptions, getHeaders, LLMApi, LLMModel, LLMUsage } from "../api";
import {
ChatOptions,
getHeaders,
LLMApi,
LLMModel,
LLMUsage,
MultimodalContent,
} from "../api";
import Locale from "../../locales";
import {
EventStreamContentType,
@@ -18,6 +25,11 @@ import {
import { prettyObject } from "@/app/utils/format";
import { getClientConfig } from "@/app/config/client";
import { makeAzurePath } from "@/app/azure";
import {
getMessageTextContent,
getMessageImages,
isVisionModel,
} from "@/app/utils";
export interface OpenAIListModelResponse {
object: string;
@@ -72,9 +84,10 @@ export class ChatGPTApi implements LLMApi {
}
async chat(options: ChatOptions) {
const visionModel = isVisionModel(options.config.model);
const messages = options.messages.map((v) => ({
role: v.role,
content: v.content,
content: visionModel ? v.content : getMessageTextContent(v),
}));
const modelConfig = {