Merge pull request #4971 from ConnectAI-E/hotfix/alibaba

change build messages for qwen in client
This commit is contained in:
Dogtiti 2024-07-11 10:25:39 +08:00 committed by GitHub
commit f8c2732fdc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 35 additions and 37 deletions

View File

@ -91,34 +91,14 @@ async function request(req: NextRequest) {
); );
const fetchUrl = `${baseUrl}${path}`; const fetchUrl = `${baseUrl}${path}`;
const clonedBody = await req.text();
const { messages, model, stream, top_p, ...rest } = JSON.parse(
clonedBody,
) as RequestPayload;
const requestBody = {
model,
input: {
messages,
},
parameters: {
...rest,
top_p: top_p === 1 ? 0.99 : top_p, // qwen top_p is should be < 1
result_format: "message",
incremental_output: true,
},
};
const fetchOptions: RequestInit = { const fetchOptions: RequestInit = {
headers: { headers: {
"Content-Type": "application/json", "Content-Type": "application/json",
Authorization: req.headers.get("Authorization") ?? "", Authorization: req.headers.get("Authorization") ?? "",
"X-DashScope-SSE": stream ? "enable" : "disable", "X-DashScope-SSE": req.headers.get("X-DashScope-SSE") ?? "disable",
}, },
method: req.method, method: req.method,
body: JSON.stringify(requestBody), body: req.body,
redirect: "manual", redirect: "manual",
// @ts-ignore // @ts-ignore
duplex: "half", duplex: "half",
@ -128,18 +108,23 @@ async function request(req: NextRequest) {
// #1815 try to refuse some request to some models // #1815 try to refuse some request to some models
if (serverConfig.customModels && req.body) { if (serverConfig.customModels && req.body) {
try { try {
const clonedBody = await req.text();
fetchOptions.body = clonedBody;
const jsonBody = JSON.parse(clonedBody) as { model?: string };
// not undefined and is false // not undefined and is false
if ( if (
isModelAvailableInServer( isModelAvailableInServer(
serverConfig.customModels, serverConfig.customModels,
model as string, jsonBody?.model as string,
ServiceProvider.Alibaba as string, ServiceProvider.Alibaba as string,
) )
) { ) {
return NextResponse.json( return NextResponse.json(
{ {
error: true, error: true,
message: `you are not allowed to use ${model} model`, message: `you are not allowed to use ${jsonBody?.model} model`,
}, },
{ {
status: 403, status: 403,

View File

@ -32,19 +32,25 @@ export interface OpenAIListModelResponse {
}>; }>;
} }
interface RequestPayload { interface RequestInput {
messages: { messages: {
role: "system" | "user" | "assistant"; role: "system" | "user" | "assistant";
content: string | MultimodalContent[]; content: string | MultimodalContent[];
}[]; }[];
stream?: boolean; }
model: string; interface RequestParam {
result_format: string;
incremental_output?: boolean;
temperature: number; temperature: number;
presence_penalty: number; repetition_penalty?: number;
frequency_penalty: number;
top_p: number; top_p: number;
max_tokens?: number; max_tokens?: number;
} }
interface RequestPayload {
model: string;
input: RequestInput;
parameters: RequestParam;
}
export class QwenApi implements LLMApi { export class QwenApi implements LLMApi {
path(path: string): string { path(path: string): string {
@ -91,17 +97,21 @@ export class QwenApi implements LLMApi {
}, },
}; };
const shouldStream = !!options.config.stream;
const requestPayload: RequestPayload = { const requestPayload: RequestPayload = {
messages,
stream: options.config.stream,
model: modelConfig.model, model: modelConfig.model,
temperature: modelConfig.temperature, input: {
presence_penalty: modelConfig.presence_penalty, messages,
frequency_penalty: modelConfig.frequency_penalty, },
top_p: modelConfig.top_p, parameters: {
result_format: "message",
incremental_output: shouldStream,
temperature: modelConfig.temperature,
// max_tokens: modelConfig.max_tokens,
top_p: modelConfig.top_p === 1 ? 0.99 : modelConfig.top_p, // qwen top_p is should be < 1
},
}; };
const shouldStream = !!options.config.stream;
const controller = new AbortController(); const controller = new AbortController();
options.onController?.(controller); options.onController?.(controller);
@ -111,7 +121,10 @@ export class QwenApi implements LLMApi {
method: "POST", method: "POST",
body: JSON.stringify(requestPayload), body: JSON.stringify(requestPayload),
signal: controller.signal, signal: controller.signal,
headers: getHeaders(), headers: {
...getHeaders(),
"X-DashScope-SSE": shouldStream ? "enable" : "disable",
},
}; };
// make a fetch request // make a fetch request