import VoiceIcon from "@/app/icons/voice.svg"; import VoiceOffIcon from "@/app/icons/voice-off.svg"; import Close24Icon from "@/app/icons/close-24.svg"; import PowerIcon from "@/app/icons/power.svg"; import styles from "./realtime-chat.module.scss"; import clsx from "clsx"; import { useState, useRef, useEffect } from "react"; import { useAccessStore, useChatStore, ChatMessage, createMessage, } from "@/app/store"; import { IconButton } from "@/app/components/button"; import { Modality, RTClient, RTInputAudioItem, RTResponse, TurnDetection, } from "rt-client"; import { AudioHandler } from "@/app/lib/audio"; import { uploadImage } from "@/app/utils/chat"; interface RealtimeChatProps { onClose?: () => void; onStartVoice?: () => void; onPausedVoice?: () => void; } export function RealtimeChat({ onClose, onStartVoice, onPausedVoice, }: RealtimeChatProps) { const currentItemId = useRef(""); const currentBotMessage = useRef(); const currentUserMessage = useRef(); const accessStore = useAccessStore.getState(); const chatStore = useChatStore(); const session = chatStore.currentSession(); const [isRecording, setIsRecording] = useState(false); const [isConnected, setIsConnected] = useState(false); const [isConnecting, setIsConnecting] = useState(false); const [modality, setModality] = useState("audio"); const [isAzure, setIsAzure] = useState(false); const [endpoint, setEndpoint] = useState(""); const [deployment, setDeployment] = useState(""); const [useVAD, setUseVAD] = useState(true); const clientRef = useRef(null); const audioHandlerRef = useRef(null); const apiKey = accessStore.openaiApiKey; const handleConnect = async () => { if (!isConnected) { try { setIsConnecting(true); clientRef.current = isAzure ? new RTClient(new URL(endpoint), { key: apiKey }, { deployment }) : new RTClient( { key: apiKey }, { model: "gpt-4o-realtime-preview-2024-10-01" }, ); const modalities: Modality[] = modality === "audio" ? ["text", "audio"] : ["text"]; const turnDetection: TurnDetection = useVAD ? { type: "server_vad" } : null; clientRef.current.configure({ instructions: "Hi", input_audio_transcription: { model: "whisper-1" }, turn_detection: turnDetection, tools: [], temperature: 0.9, modalities, }); startResponseListener(); setIsConnected(true); } catch (error) { console.error("Connection failed:", error); } finally { setIsConnecting(false); } } else { await disconnect(); } }; const disconnect = async () => { if (clientRef.current) { try { await clientRef.current.close(); clientRef.current = null; setIsConnected(false); } catch (error) { console.error("Disconnect failed:", error); } } }; const startResponseListener = async () => { if (!clientRef.current) return; try { for await (const serverEvent of clientRef.current.events()) { if (serverEvent.type === "response") { await handleResponse(serverEvent); } else if (serverEvent.type === "input_audio") { await handleInputAudio(serverEvent); } } } catch (error) { if (clientRef.current) { console.error("Response iteration error:", error); } } }; const handleResponse = async (response: RTResponse) => { for await (const item of response) { if (item.type === "message" && item.role === "assistant") { const botMessage = createMessage({ role: item.role, content: "", }); // add bot message first chatStore.updateTargetSession(session, (session) => { session.messages = session.messages.concat([botMessage]); }); for await (const content of item) { if (content.type === "text") { for await (const text of content.textChunks()) { botMessage.content += text; } } else if (content.type === "audio") { const textTask = async () => { for await (const text of content.transcriptChunks()) { botMessage.content += text; } }; const audioTask = async () => { audioHandlerRef.current?.startStreamingPlayback(); for await (const audio of content.audioChunks()) { audioHandlerRef.current?.playChunk(audio); } }; await Promise.all([textTask(), audioTask()]); } // update message.content chatStore.updateTargetSession(session, (session) => { session.messages = session.messages.concat(); }); } // upload audio get audio_url const blob = audioHandlerRef.current?.savePlayFile(); uploadImage(blob).then((audio_url) => { botMessage.audio_url = audio_url; // botMessage.date = new Date().toLocaleString(); // update text and audio_url chatStore.updateTargetSession(session, (session) => { session.messages = session.messages.concat(); }); }); } } }; const handleInputAudio = async (item: RTInputAudioItem) => { audioHandlerRef.current?.stopStreamingPlayback(); await item.waitForCompletion(); if (item.transcription) { const userMessage = createMessage({ role: "user", content: item.transcription, }); chatStore.updateTargetSession(session, (session) => { session.messages = session.messages.concat([userMessage]); }); // save input audio_url, and update session const { audioStartMillis, audioEndMillis } = item; // upload audio get audio_url const blob = audioHandlerRef.current?.saveRecordFile( audioStartMillis, audioEndMillis, ); uploadImage(blob).then((audio_url) => { userMessage.audio_url = audio_url; chatStore.updateTargetSession(session, (session) => { session.messages = session.messages.concat(); }); }); } }; const toggleRecording = async () => { if (!isRecording && clientRef.current) { try { if (!audioHandlerRef.current) { audioHandlerRef.current = new AudioHandler(); await audioHandlerRef.current.initialize(); } await audioHandlerRef.current.startRecording(async (chunk) => { await clientRef.current?.sendAudio(chunk); }); setIsRecording(true); } catch (error) { console.error("Failed to start recording:", error); } } else if (audioHandlerRef.current) { try { audioHandlerRef.current.stopRecording(); if (!useVAD) { const inputAudio = await clientRef.current?.commitAudio(); await handleInputAudio(inputAudio!); await clientRef.current?.generateResponse(); } setIsRecording(false); } catch (error) { console.error("Failed to stop recording:", error); } } }; useEffect(() => { const initAudioHandler = async () => { const handler = new AudioHandler(); await handler.initialize(); audioHandlerRef.current = handler; }; initAudioHandler().catch(console.error); return () => { disconnect(); audioHandlerRef.current?.close().catch(console.error); }; }, []); // useEffect(() => { // if ( // clientRef.current?.getTurnDetectionType() === "server_vad" && // audioData // ) { // // console.log("appendInputAudio", audioData); // // 将录制的16PCM音频发送给openai // clientRef.current?.appendInputAudio(audioData); // } // }, [audioData]); // useEffect(() => { // console.log("isRecording", isRecording); // if (!isRecording.current) return; // if (!clientRef.current) { // const apiKey = accessStore.openaiApiKey; // const client = (clientRef.current = new RealtimeClient({ // url: "wss://api.openai.com/v1/realtime", // apiKey, // dangerouslyAllowAPIKeyInBrowser: true, // debug: true, // })); // client // .connect() // .then(() => { // // TODO 设置真实的上下文 // client.sendUserMessageContent([ // { // type: `input_text`, // text: `Hi`, // // text: `For testing purposes, I want you to list ten car brands. Number each item, e.g. "one (or whatever number you are one): the item name".` // }, // ]); // // 配置服务端判断说话人开启还是结束 // client.updateSession({ // turn_detection: { type: "server_vad" }, // }); // client.on("realtime.event", (realtimeEvent) => { // // 调试 // console.log("realtime.event", realtimeEvent); // }); // client.on("conversation.interrupted", async () => { // if (currentBotMessage.current) { // stopPlaying(); // try { // client.cancelResponse( // currentBotMessage.current?.id, // currentTime(), // ); // } catch (e) { // console.error(e); // } // } // }); // client.on("conversation.updated", async (event: any) => { // // console.log("currentSession", chatStore.currentSession()); // // const items = client.conversation.getItems(); // const content = event?.item?.content?.[0]?.transcript || ""; // const text = event?.item?.content?.[0]?.text || ""; // // console.log( // // "conversation.updated", // // event, // // "content[0]", // // event?.item?.content?.[0]?.transcript, // // "formatted", // // event?.item?.formatted?.transcript, // // "content", // // content, // // "text", // // text, // // event?.item?.status, // // event?.item?.role, // // items.length, // // items, // // ); // const { item, delta } = event; // const { role, id, status, formatted } = item || {}; // if (id && role == "assistant") { // if ( // !currentBotMessage.current || // currentBotMessage.current?.id != id // ) { // // create assistant message and save to session // currentBotMessage.current = createMessage({ id, role }); // chatStore.updateCurrentSession((session) => { // session.messages = session.messages.concat([ // currentBotMessage.current!, // ]); // }); // } // if (currentBotMessage.current?.id != id) { // stopPlaying(); // } // if (content) { // currentBotMessage.current.content = content; // chatStore.updateCurrentSession((session) => { // session.messages = session.messages.concat(); // }); // } // if (delta?.audio) { // // typeof delta.audio is Int16Array // // 直接播放 // addInt16PCM(delta.audio); // } // // console.log( // // "updated try save wavFile", // // status, // // currentBotMessage.current?.audio_url, // // formatted?.audio, // // ); // if ( // status == "completed" && // !currentBotMessage.current?.audio_url && // formatted?.audio?.length // ) { // // 转换为wav文件保存 TODO 使用mp3格式会更节省空间 // const botMessage = currentBotMessage.current; // const wavFile = new WavPacker().pack(sampleRate, { // bitsPerSample: 16, // channelCount: 1, // data: formatted?.audio, // }); // // 这里将音频文件放到对象里面wavFile.url可以使用