Merge pull request #4971 from ConnectAI-E/hotfix/alibaba
change build messages for qwen in client
This commit is contained in:
commit
f8c2732fdc
|
@ -91,34 +91,14 @@ async function request(req: NextRequest) {
|
||||||
);
|
);
|
||||||
|
|
||||||
const fetchUrl = `${baseUrl}${path}`;
|
const fetchUrl = `${baseUrl}${path}`;
|
||||||
|
|
||||||
const clonedBody = await req.text();
|
|
||||||
|
|
||||||
const { messages, model, stream, top_p, ...rest } = JSON.parse(
|
|
||||||
clonedBody,
|
|
||||||
) as RequestPayload;
|
|
||||||
|
|
||||||
const requestBody = {
|
|
||||||
model,
|
|
||||||
input: {
|
|
||||||
messages,
|
|
||||||
},
|
|
||||||
parameters: {
|
|
||||||
...rest,
|
|
||||||
top_p: top_p === 1 ? 0.99 : top_p, // qwen top_p is should be < 1
|
|
||||||
result_format: "message",
|
|
||||||
incremental_output: true,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
const fetchOptions: RequestInit = {
|
const fetchOptions: RequestInit = {
|
||||||
headers: {
|
headers: {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
Authorization: req.headers.get("Authorization") ?? "",
|
Authorization: req.headers.get("Authorization") ?? "",
|
||||||
"X-DashScope-SSE": stream ? "enable" : "disable",
|
"X-DashScope-SSE": req.headers.get("X-DashScope-SSE") ?? "disable",
|
||||||
},
|
},
|
||||||
method: req.method,
|
method: req.method,
|
||||||
body: JSON.stringify(requestBody),
|
body: req.body,
|
||||||
redirect: "manual",
|
redirect: "manual",
|
||||||
// @ts-ignore
|
// @ts-ignore
|
||||||
duplex: "half",
|
duplex: "half",
|
||||||
|
@ -128,18 +108,23 @@ async function request(req: NextRequest) {
|
||||||
// #1815 try to refuse some request to some models
|
// #1815 try to refuse some request to some models
|
||||||
if (serverConfig.customModels && req.body) {
|
if (serverConfig.customModels && req.body) {
|
||||||
try {
|
try {
|
||||||
|
const clonedBody = await req.text();
|
||||||
|
fetchOptions.body = clonedBody;
|
||||||
|
|
||||||
|
const jsonBody = JSON.parse(clonedBody) as { model?: string };
|
||||||
|
|
||||||
// not undefined and is false
|
// not undefined and is false
|
||||||
if (
|
if (
|
||||||
isModelAvailableInServer(
|
isModelAvailableInServer(
|
||||||
serverConfig.customModels,
|
serverConfig.customModels,
|
||||||
model as string,
|
jsonBody?.model as string,
|
||||||
ServiceProvider.Alibaba as string,
|
ServiceProvider.Alibaba as string,
|
||||||
)
|
)
|
||||||
) {
|
) {
|
||||||
return NextResponse.json(
|
return NextResponse.json(
|
||||||
{
|
{
|
||||||
error: true,
|
error: true,
|
||||||
message: `you are not allowed to use ${model} model`,
|
message: `you are not allowed to use ${jsonBody?.model} model`,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
status: 403,
|
status: 403,
|
||||||
|
|
|
@ -32,19 +32,25 @@ export interface OpenAIListModelResponse {
|
||||||
}>;
|
}>;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface RequestPayload {
|
interface RequestInput {
|
||||||
messages: {
|
messages: {
|
||||||
role: "system" | "user" | "assistant";
|
role: "system" | "user" | "assistant";
|
||||||
content: string | MultimodalContent[];
|
content: string | MultimodalContent[];
|
||||||
}[];
|
}[];
|
||||||
stream?: boolean;
|
}
|
||||||
model: string;
|
interface RequestParam {
|
||||||
|
result_format: string;
|
||||||
|
incremental_output?: boolean;
|
||||||
temperature: number;
|
temperature: number;
|
||||||
presence_penalty: number;
|
repetition_penalty?: number;
|
||||||
frequency_penalty: number;
|
|
||||||
top_p: number;
|
top_p: number;
|
||||||
max_tokens?: number;
|
max_tokens?: number;
|
||||||
}
|
}
|
||||||
|
interface RequestPayload {
|
||||||
|
model: string;
|
||||||
|
input: RequestInput;
|
||||||
|
parameters: RequestParam;
|
||||||
|
}
|
||||||
|
|
||||||
export class QwenApi implements LLMApi {
|
export class QwenApi implements LLMApi {
|
||||||
path(path: string): string {
|
path(path: string): string {
|
||||||
|
@ -91,17 +97,21 @@ export class QwenApi implements LLMApi {
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const shouldStream = !!options.config.stream;
|
||||||
const requestPayload: RequestPayload = {
|
const requestPayload: RequestPayload = {
|
||||||
messages,
|
|
||||||
stream: options.config.stream,
|
|
||||||
model: modelConfig.model,
|
model: modelConfig.model,
|
||||||
temperature: modelConfig.temperature,
|
input: {
|
||||||
presence_penalty: modelConfig.presence_penalty,
|
messages,
|
||||||
frequency_penalty: modelConfig.frequency_penalty,
|
},
|
||||||
top_p: modelConfig.top_p,
|
parameters: {
|
||||||
|
result_format: "message",
|
||||||
|
incremental_output: shouldStream,
|
||||||
|
temperature: modelConfig.temperature,
|
||||||
|
// max_tokens: modelConfig.max_tokens,
|
||||||
|
top_p: modelConfig.top_p === 1 ? 0.99 : modelConfig.top_p, // qwen top_p is should be < 1
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
const shouldStream = !!options.config.stream;
|
|
||||||
const controller = new AbortController();
|
const controller = new AbortController();
|
||||||
options.onController?.(controller);
|
options.onController?.(controller);
|
||||||
|
|
||||||
|
@ -111,7 +121,10 @@ export class QwenApi implements LLMApi {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
body: JSON.stringify(requestPayload),
|
body: JSON.stringify(requestPayload),
|
||||||
signal: controller.signal,
|
signal: controller.signal,
|
||||||
headers: getHeaders(),
|
headers: {
|
||||||
|
...getHeaders(),
|
||||||
|
"X-DashScope-SSE": shouldStream ? "enable" : "disable",
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
// make a fetch request
|
// make a fetch request
|
||||||
|
|
Loading…
Reference in New Issue