ChatGPT-Next-Web/app/components/realtime-chat/realtime-chat.tsx

492 lines
17 KiB
TypeScript

import VoiceIcon from "@/app/icons/voice.svg";
import VoiceOffIcon from "@/app/icons/voice-off.svg";
import Close24Icon from "@/app/icons/close-24.svg";
import PowerIcon from "@/app/icons/power.svg";
import styles from "./realtime-chat.module.scss";
import clsx from "clsx";
import { useState, useRef, useEffect } from "react";
import {
useAccessStore,
useChatStore,
ChatMessage,
createMessage,
} from "@/app/store";
import { IconButton } from "@/app/components/button";
import {
Modality,
RTClient,
RTInputAudioItem,
RTResponse,
TurnDetection,
} from "rt-client";
import { AudioHandler } from "@/app/lib/audio";
import { uploadImage } from "@/app/utils/chat";
interface RealtimeChatProps {
onClose?: () => void;
onStartVoice?: () => void;
onPausedVoice?: () => void;
}
export function RealtimeChat({
onClose,
onStartVoice,
onPausedVoice,
}: RealtimeChatProps) {
const currentItemId = useRef<string>("");
const currentBotMessage = useRef<ChatMessage | null>();
const currentUserMessage = useRef<ChatMessage | null>();
const accessStore = useAccessStore.getState();
const chatStore = useChatStore();
const session = chatStore.currentSession();
const [isRecording, setIsRecording] = useState(false);
const [isConnected, setIsConnected] = useState(false);
const [isConnecting, setIsConnecting] = useState(false);
const [modality, setModality] = useState("audio");
const [isAzure, setIsAzure] = useState(false);
const [endpoint, setEndpoint] = useState("");
const [deployment, setDeployment] = useState("");
const [useVAD, setUseVAD] = useState(true);
const clientRef = useRef<RTClient | null>(null);
const audioHandlerRef = useRef<AudioHandler | null>(null);
const apiKey = accessStore.openaiApiKey;
const handleConnect = async () => {
if (!isConnected) {
try {
setIsConnecting(true);
clientRef.current = isAzure
? new RTClient(new URL(endpoint), { key: apiKey }, { deployment })
: new RTClient(
{ key: apiKey },
{ model: "gpt-4o-realtime-preview-2024-10-01" },
);
const modalities: Modality[] =
modality === "audio" ? ["text", "audio"] : ["text"];
const turnDetection: TurnDetection = useVAD
? { type: "server_vad" }
: null;
clientRef.current.configure({
instructions: "Hi",
input_audio_transcription: { model: "whisper-1" },
turn_detection: turnDetection,
tools: [],
temperature: 0.9,
modalities,
});
startResponseListener();
setIsConnected(true);
} catch (error) {
console.error("Connection failed:", error);
} finally {
setIsConnecting(false);
}
} else {
await disconnect();
}
};
const disconnect = async () => {
if (clientRef.current) {
try {
await clientRef.current.close();
clientRef.current = null;
setIsConnected(false);
} catch (error) {
console.error("Disconnect failed:", error);
}
}
};
const startResponseListener = async () => {
if (!clientRef.current) return;
try {
for await (const serverEvent of clientRef.current.events()) {
if (serverEvent.type === "response") {
await handleResponse(serverEvent);
} else if (serverEvent.type === "input_audio") {
await handleInputAudio(serverEvent);
}
}
} catch (error) {
if (clientRef.current) {
console.error("Response iteration error:", error);
}
}
};
const handleResponse = async (response: RTResponse) => {
for await (const item of response) {
if (item.type === "message" && item.role === "assistant") {
const botMessage = createMessage({
role: item.role,
content: "",
});
// add bot message first
chatStore.updateTargetSession(session, (session) => {
session.messages = session.messages.concat([botMessage]);
});
for await (const content of item) {
if (content.type === "text") {
for await (const text of content.textChunks()) {
botMessage.content += text;
}
} else if (content.type === "audio") {
const textTask = async () => {
for await (const text of content.transcriptChunks()) {
botMessage.content += text;
}
};
const audioTask = async () => {
audioHandlerRef.current?.startStreamingPlayback();
for await (const audio of content.audioChunks()) {
audioHandlerRef.current?.playChunk(audio);
}
};
await Promise.all([textTask(), audioTask()]);
}
// update message.content
chatStore.updateTargetSession((session) => {
session.messages = session.messages.concat();
});
}
// upload audio get audio_url
const blob = audioHandlerRef.current?.savePlayFile();
uploadImage(blob).then((audio_url) => {
botMessage.audio_url = audio_url;
// botMessage.date = new Date().toLocaleString();
// update text and audio_url
chatStore.updateTargetSession((session) => {
session.messages = session.messages.concat();
});
});
}
}
};
const handleInputAudio = async (item: RTInputAudioItem) => {
audioHandlerRef.current?.stopStreamingPlayback();
await item.waitForCompletion();
if (item.transcription) {
const userMessage = createMessage({
role: "user",
content: item.transcription,
});
chatStore.updateTargetSession(session, (session) => {
session.messages = session.messages.concat([userMessage]);
});
// save input audio_url, and update session
const { audioStartMillis, audioEndMillis } = item;
// upload audio get audio_url
const blob = audioHandlerRef.current?.saveRecordFile(
audioStartMillis,
audioEndMillis,
);
uploadImage(blob).then((audio_url) => {
userMessage.audio_url = audio_url;
chatStore.updateTargetSession((session) => {
session.messages = session.messages.concat();
});
});
}
};
const toggleRecording = async () => {
if (!isRecording && clientRef.current) {
try {
if (!audioHandlerRef.current) {
audioHandlerRef.current = new AudioHandler();
await audioHandlerRef.current.initialize();
}
await audioHandlerRef.current.startRecording(async (chunk) => {
await clientRef.current?.sendAudio(chunk);
});
setIsRecording(true);
} catch (error) {
console.error("Failed to start recording:", error);
}
} else if (audioHandlerRef.current) {
try {
audioHandlerRef.current.stopRecording();
if (!useVAD) {
const inputAudio = await clientRef.current?.commitAudio();
await handleInputAudio(inputAudio!);
await clientRef.current?.generateResponse();
}
setIsRecording(false);
} catch (error) {
console.error("Failed to stop recording:", error);
}
}
};
useEffect(() => {
const initAudioHandler = async () => {
const handler = new AudioHandler();
await handler.initialize();
audioHandlerRef.current = handler;
};
initAudioHandler().catch(console.error);
return () => {
disconnect();
audioHandlerRef.current?.close().catch(console.error);
};
}, []);
// useEffect(() => {
// if (
// clientRef.current?.getTurnDetectionType() === "server_vad" &&
// audioData
// ) {
// // console.log("appendInputAudio", audioData);
// // 将录制的16PCM音频发送给openai
// clientRef.current?.appendInputAudio(audioData);
// }
// }, [audioData]);
// useEffect(() => {
// console.log("isRecording", isRecording);
// if (!isRecording.current) return;
// if (!clientRef.current) {
// const apiKey = accessStore.openaiApiKey;
// const client = (clientRef.current = new RealtimeClient({
// url: "wss://api.openai.com/v1/realtime",
// apiKey,
// dangerouslyAllowAPIKeyInBrowser: true,
// debug: true,
// }));
// client
// .connect()
// .then(() => {
// // TODO 设置真实的上下文
// client.sendUserMessageContent([
// {
// type: `input_text`,
// text: `Hi`,
// // text: `For testing purposes, I want you to list ten car brands. Number each item, e.g. "one (or whatever number you are one): the item name".`
// },
// ]);
// // 配置服务端判断说话人开启还是结束
// client.updateSession({
// turn_detection: { type: "server_vad" },
// });
// client.on("realtime.event", (realtimeEvent) => {
// // 调试
// console.log("realtime.event", realtimeEvent);
// });
// client.on("conversation.interrupted", async () => {
// if (currentBotMessage.current) {
// stopPlaying();
// try {
// client.cancelResponse(
// currentBotMessage.current?.id,
// currentTime(),
// );
// } catch (e) {
// console.error(e);
// }
// }
// });
// client.on("conversation.updated", async (event: any) => {
// // console.log("currentSession", chatStore.currentSession());
// // const items = client.conversation.getItems();
// const content = event?.item?.content?.[0]?.transcript || "";
// const text = event?.item?.content?.[0]?.text || "";
// // console.log(
// // "conversation.updated",
// // event,
// // "content[0]",
// // event?.item?.content?.[0]?.transcript,
// // "formatted",
// // event?.item?.formatted?.transcript,
// // "content",
// // content,
// // "text",
// // text,
// // event?.item?.status,
// // event?.item?.role,
// // items.length,
// // items,
// // );
// const { item, delta } = event;
// const { role, id, status, formatted } = item || {};
// if (id && role == "assistant") {
// if (
// !currentBotMessage.current ||
// currentBotMessage.current?.id != id
// ) {
// // create assistant message and save to session
// currentBotMessage.current = createMessage({ id, role });
// chatStore.updateCurrentSession((session) => {
// session.messages = session.messages.concat([
// currentBotMessage.current!,
// ]);
// });
// }
// if (currentBotMessage.current?.id != id) {
// stopPlaying();
// }
// if (content) {
// currentBotMessage.current.content = content;
// chatStore.updateCurrentSession((session) => {
// session.messages = session.messages.concat();
// });
// }
// if (delta?.audio) {
// // typeof delta.audio is Int16Array
// // 直接播放
// addInt16PCM(delta.audio);
// }
// // console.log(
// // "updated try save wavFile",
// // status,
// // currentBotMessage.current?.audio_url,
// // formatted?.audio,
// // );
// if (
// status == "completed" &&
// !currentBotMessage.current?.audio_url &&
// formatted?.audio?.length
// ) {
// // 转换为wav文件保存 TODO 使用mp3格式会更节省空间
// const botMessage = currentBotMessage.current;
// const wavFile = new WavPacker().pack(sampleRate, {
// bitsPerSample: 16,
// channelCount: 1,
// data: formatted?.audio,
// });
// // 这里将音频文件放到对象里面wavFile.url可以使用<audio>标签播放
// item.formatted.file = wavFile;
// uploadImageRemote(wavFile.blob).then((audio_url) => {
// botMessage.audio_url = audio_url;
// chatStore.updateCurrentSession((session) => {
// session.messages = session.messages.concat();
// });
// });
// }
// if (
// status == "completed" &&
// !currentBotMessage.current?.content
// ) {
// chatStore.updateCurrentSession((session) => {
// session.messages = session.messages.filter(
// (m) => m.id !== currentBotMessage.current?.id,
// );
// });
// }
// }
// if (id && role == "user" && !text) {
// if (
// !currentUserMessage.current ||
// currentUserMessage.current?.id != id
// ) {
// // create assistant message and save to session
// currentUserMessage.current = createMessage({ id, role });
// chatStore.updateCurrentSession((session) => {
// session.messages = session.messages.concat([
// currentUserMessage.current!,
// ]);
// });
// }
// if (content) {
// // 转换为wav文件保存 TODO 使用mp3格式会更节省空间
// const userMessage = currentUserMessage.current;
// const wavFile = new WavPacker().pack(sampleRate, {
// bitsPerSample: 16,
// channelCount: 1,
// data: formatted?.audio,
// });
// // 这里将音频文件放到对象里面wavFile.url可以使用<audio>标签播放
// item.formatted.file = wavFile;
// uploadImageRemote(wavFile.blob).then((audio_url) => {
// // update message content
// userMessage.content = content;
// // update message audio_url
// userMessage.audio_url = audio_url;
// chatStore.updateCurrentSession((session) => {
// session.messages = session.messages.concat();
// });
// });
// }
// }
// });
// })
// .catch((e) => {
// console.error("Error", e);
// });
// }
// return () => {
// stop();
// // TODO close client
// clientRef.current?.disconnect();
// };
// }, [isRecording.current]);
const handleClose = () => {
onClose?.();
disconnect();
};
return (
<div className={styles["realtime-chat"]}>
<div
className={clsx(styles["circle-mic"], {
[styles["pulse"]]: true,
})}
>
<div className={styles["icon-center"]}></div>
</div>
<div className={styles["bottom-icons"]}>
<div>
<IconButton
icon={isRecording ? <VoiceOffIcon /> : <VoiceIcon />}
onClick={toggleRecording}
disabled={!isConnected}
bordered
shadow
/>
</div>
<div className={styles["icon-center"]}>
<IconButton
icon={<PowerIcon />}
text={
isConnecting
? "Connecting..."
: isConnected
? "Disconnect"
: "Connect"
}
onClick={handleConnect}
disabled={isConnecting}
bordered
shadow
/>
</div>
<div onClick={handleClose}>
<IconButton
icon={<Close24Icon />}
onClick={handleClose}
bordered
shadow
/>
</div>
</div>
</div>
);
}