feat(alibaba): Added alibaba vision model and omni model support

This commit is contained in:
EvanWu
2025-02-24 20:18:07 +08:00
parent f5f3ce94f6
commit b709ee3983
4 changed files with 62 additions and 13 deletions

View File

@@ -40,6 +40,11 @@ export interface MultimodalContent {
};
}
export interface MultimodalContentForAlibaba {
text?: string;
image?: string;
}
export interface RequestMessage {
role: MessageRole;
content: string | MultimodalContent[];

View File

@@ -7,7 +7,10 @@ import {
ChatMessageTool,
usePluginStore,
} from "@/app/store";
import { streamWithThink } from "@/app/utils/chat";
import {
preProcessImageContentForAlibabaDashScope,
streamWithThink,
} from "@/app/utils/chat";
import {
ChatOptions,
getHeaders,
@@ -15,12 +18,14 @@ import {
LLMModel,
SpeechOptions,
MultimodalContent,
MultimodalContentForAlibaba,
} from "../api";
import { getClientConfig } from "@/app/config/client";
import {
getMessageTextContent,
getMessageTextContentWithoutThinking,
getTimeoutMSByModel,
isVisionModel,
} from "@/app/utils";
import { fetch } from "@/app/utils/stream";
@@ -89,14 +94,6 @@ export class QwenApi implements LLMApi {
}
async chat(options: ChatOptions) {
const messages = options.messages.map((v) => ({
role: v.role,
content:
v.role === "assistant"
? getMessageTextContentWithoutThinking(v)
: getMessageTextContent(v),
}));
const modelConfig = {
...useAppConfig.getState().modelConfig,
...useChatStore.getState().currentSession().mask.modelConfig,
@@ -105,6 +102,21 @@ export class QwenApi implements LLMApi {
},
};
const visionModel = isVisionModel(options.config.model);
const messages: ChatOptions["messages"] = [];
for (const v of options.messages) {
const content = (
visionModel
? await preProcessImageContentForAlibabaDashScope(v.content)
: v.role === "assistant"
? getMessageTextContentWithoutThinking(v)
: getMessageTextContent(v)
) as any;
messages.push({ role: v.role, content });
}
const shouldStream = !!options.config.stream;
const requestPayload: RequestPayload = {
model: modelConfig.model,
@@ -129,7 +141,7 @@ export class QwenApi implements LLMApi {
"X-DashScope-SSE": shouldStream ? "enable" : "disable",
};
const chatPath = this.path(Alibaba.ChatPath);
const chatPath = this.path(Alibaba.ChatPath(modelConfig.model));
const chatPayload = {
method: "POST",
body: JSON.stringify(requestPayload),
@@ -162,7 +174,7 @@ export class QwenApi implements LLMApi {
const json = JSON.parse(text);
const choices = json.output.choices as Array<{
message: {
content: string | null;
content: string | null | MultimodalContentForAlibaba[];
tool_calls: ChatMessageTool[];
reasoning_content: string | null;
};
@@ -212,7 +224,9 @@ export class QwenApi implements LLMApi {
} else if (content && content.length > 0) {
return {
isThinking: false,
content: content,
content: Array.isArray(content)
? content.map((item) => item.text).join(",")
: content,
};
}