Merge pull request #4971 from ConnectAI-E/hotfix/alibaba

change build messages for qwen in client
This commit is contained in:
Dogtiti 2024-07-11 10:25:39 +08:00 committed by GitHub
commit f8c2732fdc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 35 additions and 37 deletions

View File

@ -91,34 +91,14 @@ async function request(req: NextRequest) {
);
const fetchUrl = `${baseUrl}${path}`;
const clonedBody = await req.text();
const { messages, model, stream, top_p, ...rest } = JSON.parse(
clonedBody,
) as RequestPayload;
const requestBody = {
model,
input: {
messages,
},
parameters: {
...rest,
top_p: top_p === 1 ? 0.99 : top_p, // qwen top_p is should be < 1
result_format: "message",
incremental_output: true,
},
};
const fetchOptions: RequestInit = {
headers: {
"Content-Type": "application/json",
Authorization: req.headers.get("Authorization") ?? "",
"X-DashScope-SSE": stream ? "enable" : "disable",
"X-DashScope-SSE": req.headers.get("X-DashScope-SSE") ?? "disable",
},
method: req.method,
body: JSON.stringify(requestBody),
body: req.body,
redirect: "manual",
// @ts-ignore
duplex: "half",
@ -128,18 +108,23 @@ async function request(req: NextRequest) {
// #1815 try to refuse some request to some models
if (serverConfig.customModels && req.body) {
try {
const clonedBody = await req.text();
fetchOptions.body = clonedBody;
const jsonBody = JSON.parse(clonedBody) as { model?: string };
// not undefined and is false
if (
isModelAvailableInServer(
serverConfig.customModels,
model as string,
jsonBody?.model as string,
ServiceProvider.Alibaba as string,
)
) {
return NextResponse.json(
{
error: true,
message: `you are not allowed to use ${model} model`,
message: `you are not allowed to use ${jsonBody?.model} model`,
},
{
status: 403,

View File

@ -32,19 +32,25 @@ export interface OpenAIListModelResponse {
}>;
}
interface RequestPayload {
interface RequestInput {
messages: {
role: "system" | "user" | "assistant";
content: string | MultimodalContent[];
}[];
stream?: boolean;
model: string;
}
interface RequestParam {
result_format: string;
incremental_output?: boolean;
temperature: number;
presence_penalty: number;
frequency_penalty: number;
repetition_penalty?: number;
top_p: number;
max_tokens?: number;
}
interface RequestPayload {
model: string;
input: RequestInput;
parameters: RequestParam;
}
export class QwenApi implements LLMApi {
path(path: string): string {
@ -91,17 +97,21 @@ export class QwenApi implements LLMApi {
},
};
const shouldStream = !!options.config.stream;
const requestPayload: RequestPayload = {
messages,
stream: options.config.stream,
model: modelConfig.model,
temperature: modelConfig.temperature,
presence_penalty: modelConfig.presence_penalty,
frequency_penalty: modelConfig.frequency_penalty,
top_p: modelConfig.top_p,
input: {
messages,
},
parameters: {
result_format: "message",
incremental_output: shouldStream,
temperature: modelConfig.temperature,
// max_tokens: modelConfig.max_tokens,
top_p: modelConfig.top_p === 1 ? 0.99 : modelConfig.top_p, // qwen top_p is should be < 1
},
};
const shouldStream = !!options.config.stream;
const controller = new AbortController();
options.onController?.(controller);
@ -111,7 +121,10 @@ export class QwenApi implements LLMApi {
method: "POST",
body: JSON.stringify(requestPayload),
signal: controller.signal,
headers: getHeaders(),
headers: {
...getHeaders(),
"X-DashScope-SSE": shouldStream ? "enable" : "disable",
},
};
// make a fetch request