import { useDebouncedCallback } from "use-debounce"; import VoiceIcon from "@/app/icons/voice.svg"; import VoiceOffIcon from "@/app/icons/voice-off.svg"; import PowerIcon from "@/app/icons/power.svg"; import styles from "./realtime-chat.module.scss"; import clsx from "clsx"; import { useState, useRef, useEffect } from "react"; import { useAccessStore, useChatStore, ChatMessage, createMessage, } from "@/app/store"; import { IconButton } from "@/app/components/button"; import { Modality, RTClient, RTInputAudioItem, RTResponse, TurnDetection, } from "rt-client"; import { AudioHandler } from "@/app/lib/audio"; import { uploadImage } from "@/app/utils/chat"; interface RealtimeChatProps { onClose?: () => void; onStartVoice?: () => void; onPausedVoice?: () => void; } export function RealtimeChat({ onClose, onStartVoice, onPausedVoice, }: RealtimeChatProps) { const currentItemId = useRef(""); const currentBotMessage = useRef(); const currentUserMessage = useRef(); const accessStore = useAccessStore.getState(); const chatStore = useChatStore(); const session = chatStore.currentSession(); const [isRecording, setIsRecording] = useState(false); const [isConnected, setIsConnected] = useState(false); const [isConnecting, setIsConnecting] = useState(false); const [modality, setModality] = useState("audio"); const [isAzure, setIsAzure] = useState(false); const [endpoint, setEndpoint] = useState(""); const [deployment, setDeployment] = useState(""); const [useVAD, setUseVAD] = useState(true); const clientRef = useRef(null); const audioHandlerRef = useRef(null); const apiKey = accessStore.openaiApiKey; const handleConnect = async () => { if (isConnecting) return; if (!isConnected) { try { setIsConnecting(true); clientRef.current = isAzure ? new RTClient(new URL(endpoint), { key: apiKey }, { deployment }) : new RTClient( { key: apiKey }, { model: "gpt-4o-realtime-preview-2024-10-01" }, ); const modalities: Modality[] = modality === "audio" ? ["text", "audio"] : ["text"]; const turnDetection: TurnDetection = useVAD ? { type: "server_vad" } : null; clientRef.current.configure({ instructions: "", input_audio_transcription: { model: "whisper-1" }, turn_detection: turnDetection, tools: [], temperature: 0.9, modalities, }); startResponseListener(); setIsConnected(true); try { const recentMessages = chatStore.getMessagesWithMemory(); for (const message of recentMessages) { const { role, content } = message; await clientRef.current.sendItem({ type: "message", role, content: [{ type: "input_text", text: content }], }); } } catch (error) { console.error("Set message failed:", error); } } catch (error) { console.error("Connection failed:", error); } finally { setIsConnecting(false); } } else { await disconnect(); } }; const disconnect = async () => { if (clientRef.current) { try { await clientRef.current.close(); clientRef.current = null; setIsConnected(false); } catch (error) { console.error("Disconnect failed:", error); } } }; const startResponseListener = async () => { if (!clientRef.current) return; try { for await (const serverEvent of clientRef.current.events()) { if (serverEvent.type === "response") { await handleResponse(serverEvent); } else if (serverEvent.type === "input_audio") { await handleInputAudio(serverEvent); } } } catch (error) { if (clientRef.current) { console.error("Response iteration error:", error); } } }; const handleResponse = async (response: RTResponse) => { for await (const item of response) { if (item.type === "message" && item.role === "assistant") { const botMessage = createMessage({ role: item.role, content: "", }); // add bot message first chatStore.updateTargetSession(session, (session) => { session.messages = session.messages.concat([botMessage]); }); for await (const content of item) { if (content.type === "text") { for await (const text of content.textChunks()) { botMessage.content += text; } } else if (content.type === "audio") { const textTask = async () => { for await (const text of content.transcriptChunks()) { botMessage.content += text; } }; const audioTask = async () => { audioHandlerRef.current?.startStreamingPlayback(); for await (const audio of content.audioChunks()) { audioHandlerRef.current?.playChunk(audio); } }; await Promise.all([textTask(), audioTask()]); } // update message.content chatStore.updateTargetSession(session, (session) => { session.messages = session.messages.concat(); }); } // upload audio get audio_url const blob = audioHandlerRef.current?.savePlayFile(); uploadImage(blob!).then((audio_url) => { botMessage.audio_url = audio_url; // botMessage.date = new Date().toLocaleString(); // update text and audio_url chatStore.updateTargetSession(session, (session) => { session.messages = session.messages.concat(); }); }); } } }; const handleInputAudio = async (item: RTInputAudioItem) => { audioHandlerRef.current?.stopStreamingPlayback(); await item.waitForCompletion(); if (item.transcription) { const userMessage = createMessage({ role: "user", content: item.transcription, }); chatStore.updateTargetSession(session, (session) => { session.messages = session.messages.concat([userMessage]); }); // save input audio_url, and update session const { audioStartMillis, audioEndMillis } = item; // upload audio get audio_url const blob = audioHandlerRef.current?.saveRecordFile( audioStartMillis, audioEndMillis, ); uploadImage(blob!).then((audio_url) => { userMessage.audio_url = audio_url; chatStore.updateTargetSession(session, (session) => { session.messages = session.messages.concat(); }); }); } }; const toggleRecording = async () => { if (!isRecording && clientRef.current) { try { if (!audioHandlerRef.current) { audioHandlerRef.current = new AudioHandler(); await audioHandlerRef.current.initialize(); } await audioHandlerRef.current.startRecording(async (chunk) => { await clientRef.current?.sendAudio(chunk); }); setIsRecording(true); } catch (error) { console.error("Failed to start recording:", error); } } else if (audioHandlerRef.current) { try { audioHandlerRef.current.stopRecording(); if (!useVAD) { const inputAudio = await clientRef.current?.commitAudio(); await handleInputAudio(inputAudio!); await clientRef.current?.generateResponse(); } setIsRecording(false); } catch (error) { console.error("Failed to stop recording:", error); } } }; useEffect( useDebouncedCallback(() => { const initAudioHandler = async () => { const handler = new AudioHandler(); await handler.initialize(); audioHandlerRef.current = handler; await handleConnect(); await toggleRecording(); }; initAudioHandler().catch(console.error); return () => { if (isRecording) { toggleRecording(); } audioHandlerRef.current?.close().catch(console.error); disconnect(); }; }), [], ); const handleClose = async () => { onClose?.(); if (isRecording) { toggleRecording(); } disconnect(); }; return (
: } onClick={toggleRecording} disabled={!isConnected} type={isConnecting || isConnected ? "danger" : "primary"} />
} onClick={handleClose} type={isConnecting || isConnected ? "danger" : "primary"} />
); }