mirror of
https://github.com/Yidadaa/ChatGPT-Next-Web.git
synced 2025-08-08 19:26:54 +08:00
wip
This commit is contained in:
@@ -24,12 +24,19 @@
|
||||
.bottom-icons {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
width: 100%;
|
||||
position: absolute;
|
||||
bottom: 20px;
|
||||
box-sizing: border-box;
|
||||
padding: 0 20px;
|
||||
}
|
||||
.icon-center {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
gap: 4px;
|
||||
}
|
||||
|
||||
.icon-left,
|
||||
.icon-right {
|
||||
|
@@ -1,34 +1,220 @@
|
||||
import VoiceIcon from "@/app/icons/voice.svg";
|
||||
import VoiceOffIcon from "@/app/icons/voice-off.svg";
|
||||
import Close24Icon from "@/app/icons/close-24.svg";
|
||||
import PowerIcon from "@/app/icons/power.svg";
|
||||
|
||||
import styles from "./realtime-chat.module.scss";
|
||||
import clsx from "clsx";
|
||||
|
||||
import { useState, useRef, useCallback } from "react";
|
||||
import { useState, useRef, useCallback, useEffect } from "react";
|
||||
|
||||
import { useAccessStore, useChatStore, ChatMessage } from "@/app/store";
|
||||
|
||||
import { IconButton } from "@/app/components/button";
|
||||
|
||||
import {
|
||||
Modality,
|
||||
RTClient,
|
||||
RTInputAudioItem,
|
||||
RTResponse,
|
||||
TurnDetection,
|
||||
} from "rt-client";
|
||||
import { AudioHandler } from "@/app/lib/audio";
|
||||
|
||||
interface RealtimeChatProps {
|
||||
onClose?: () => void;
|
||||
onStartVoice?: () => void;
|
||||
onPausedVoice?: () => void;
|
||||
sampleRate?: number;
|
||||
}
|
||||
|
||||
export function RealtimeChat({
|
||||
onClose,
|
||||
onStartVoice,
|
||||
onPausedVoice,
|
||||
sampleRate = 24000,
|
||||
}: RealtimeChatProps) {
|
||||
const [isVoicePaused, setIsVoicePaused] = useState(true);
|
||||
const clientRef = useRef<null>(null);
|
||||
const currentItemId = useRef<string>("");
|
||||
const currentBotMessage = useRef<ChatMessage | null>();
|
||||
const currentUserMessage = useRef<ChatMessage | null>();
|
||||
const accessStore = useAccessStore.getState();
|
||||
const chatStore = useChatStore();
|
||||
|
||||
const [isRecording, setIsRecording] = useState(false);
|
||||
const [isConnected, setIsConnected] = useState(false);
|
||||
const [isConnecting, setIsConnecting] = useState(false);
|
||||
const [modality, setModality] = useState("audio");
|
||||
const [isAzure, setIsAzure] = useState(false);
|
||||
const [endpoint, setEndpoint] = useState("");
|
||||
const [deployment, setDeployment] = useState("");
|
||||
const [useVAD, setUseVAD] = useState(true);
|
||||
|
||||
const clientRef = useRef<RTClient | null>(null);
|
||||
const audioHandlerRef = useRef<AudioHandler | null>(null);
|
||||
|
||||
const apiKey = accessStore.openaiApiKey;
|
||||
|
||||
const handleConnect = async () => {
|
||||
if (!isConnected) {
|
||||
try {
|
||||
setIsConnecting(true);
|
||||
clientRef.current = isAzure
|
||||
? new RTClient(new URL(endpoint), { key: apiKey }, { deployment })
|
||||
: new RTClient(
|
||||
{ key: apiKey },
|
||||
{ model: "gpt-4o-realtime-preview-2024-10-01" },
|
||||
);
|
||||
const modalities: Modality[] =
|
||||
modality === "audio" ? ["text", "audio"] : ["text"];
|
||||
const turnDetection: TurnDetection = useVAD
|
||||
? { type: "server_vad" }
|
||||
: null;
|
||||
clientRef.current.configure({
|
||||
instructions: "Hi",
|
||||
input_audio_transcription: { model: "whisper-1" },
|
||||
turn_detection: turnDetection,
|
||||
tools: [],
|
||||
temperature: 0.9,
|
||||
modalities,
|
||||
});
|
||||
startResponseListener();
|
||||
|
||||
setIsConnected(true);
|
||||
} catch (error) {
|
||||
console.error("Connection failed:", error);
|
||||
} finally {
|
||||
setIsConnecting(false);
|
||||
}
|
||||
} else {
|
||||
await disconnect();
|
||||
}
|
||||
};
|
||||
|
||||
const disconnect = async () => {
|
||||
if (clientRef.current) {
|
||||
try {
|
||||
await clientRef.current.close();
|
||||
clientRef.current = null;
|
||||
setIsConnected(false);
|
||||
} catch (error) {
|
||||
console.error("Disconnect failed:", error);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const startResponseListener = async () => {
|
||||
if (!clientRef.current) return;
|
||||
|
||||
try {
|
||||
for await (const serverEvent of clientRef.current.events()) {
|
||||
if (serverEvent.type === "response") {
|
||||
await handleResponse(serverEvent);
|
||||
} else if (serverEvent.type === "input_audio") {
|
||||
await handleInputAudio(serverEvent);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
if (clientRef.current) {
|
||||
console.error("Response iteration error:", error);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const handleResponse = async (response: RTResponse) => {
|
||||
for await (const item of response) {
|
||||
if (item.type === "message" && item.role === "assistant") {
|
||||
const message = {
|
||||
type: item.role,
|
||||
content: "",
|
||||
};
|
||||
// setMessages((prevMessages) => [...prevMessages, message]);
|
||||
for await (const content of item) {
|
||||
if (content.type === "text") {
|
||||
for await (const text of content.textChunks()) {
|
||||
message.content += text;
|
||||
// setMessages((prevMessages) => {
|
||||
// prevMessages[prevMessages.length - 1].content = message.content;
|
||||
// return [...prevMessages];
|
||||
// });
|
||||
}
|
||||
} else if (content.type === "audio") {
|
||||
const textTask = async () => {
|
||||
for await (const text of content.transcriptChunks()) {
|
||||
message.content += text;
|
||||
// setMessages((prevMessages) => {
|
||||
// prevMessages[prevMessages.length - 1].content =
|
||||
// message.content;
|
||||
// return [...prevMessages];
|
||||
// });
|
||||
}
|
||||
};
|
||||
const audioTask = async () => {
|
||||
audioHandlerRef.current?.startStreamingPlayback();
|
||||
for await (const audio of content.audioChunks()) {
|
||||
audioHandlerRef.current?.playChunk(audio);
|
||||
}
|
||||
};
|
||||
await Promise.all([textTask(), audioTask()]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const handleInputAudio = async (item: RTInputAudioItem) => {
|
||||
audioHandlerRef.current?.stopStreamingPlayback();
|
||||
await item.waitForCompletion();
|
||||
// setMessages((prevMessages) => [
|
||||
// ...prevMessages,
|
||||
// {
|
||||
// type: "user",
|
||||
// content: item.transcription || "",
|
||||
// },
|
||||
// ]);
|
||||
};
|
||||
|
||||
const toggleRecording = async () => {
|
||||
if (!isRecording && clientRef.current) {
|
||||
try {
|
||||
if (!audioHandlerRef.current) {
|
||||
audioHandlerRef.current = new AudioHandler();
|
||||
await audioHandlerRef.current.initialize();
|
||||
}
|
||||
await audioHandlerRef.current.startRecording(async (chunk) => {
|
||||
await clientRef.current?.sendAudio(chunk);
|
||||
});
|
||||
setIsRecording(true);
|
||||
} catch (error) {
|
||||
console.error("Failed to start recording:", error);
|
||||
}
|
||||
} else if (audioHandlerRef.current) {
|
||||
try {
|
||||
audioHandlerRef.current.stopRecording();
|
||||
if (!useVAD) {
|
||||
const inputAudio = await clientRef.current?.commitAudio();
|
||||
await handleInputAudio(inputAudio!);
|
||||
await clientRef.current?.generateResponse();
|
||||
}
|
||||
setIsRecording(false);
|
||||
} catch (error) {
|
||||
console.error("Failed to stop recording:", error);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
const initAudioHandler = async () => {
|
||||
const handler = new AudioHandler();
|
||||
await handler.initialize();
|
||||
audioHandlerRef.current = handler;
|
||||
};
|
||||
|
||||
initAudioHandler().catch(console.error);
|
||||
|
||||
return () => {
|
||||
disconnect();
|
||||
audioHandlerRef.current?.close().catch(console.error);
|
||||
};
|
||||
}, []);
|
||||
|
||||
// useEffect(() => {
|
||||
// if (
|
||||
// clientRef.current?.getTurnDetectionType() === "server_vad" &&
|
||||
@@ -223,12 +409,16 @@ export function RealtimeChat({
|
||||
|
||||
const handleStartVoice = useCallback(() => {
|
||||
onStartVoice?.();
|
||||
setIsVoicePaused(false);
|
||||
handleConnect();
|
||||
}, []);
|
||||
|
||||
const handlePausedVoice = () => {
|
||||
onPausedVoice?.();
|
||||
setIsVoicePaused(true);
|
||||
};
|
||||
|
||||
const handleClose = () => {
|
||||
onClose?.();
|
||||
disconnect();
|
||||
};
|
||||
|
||||
return (
|
||||
@@ -241,15 +431,39 @@ export function RealtimeChat({
|
||||
<div className={styles["icon-center"]}></div>
|
||||
</div>
|
||||
<div className={styles["bottom-icons"]}>
|
||||
<div className={styles["icon-left"]}>
|
||||
{isVoicePaused ? (
|
||||
<VoiceOffIcon onClick={handleStartVoice} />
|
||||
) : (
|
||||
<VoiceIcon onClick={handlePausedVoice} />
|
||||
)}
|
||||
<div>
|
||||
<IconButton
|
||||
icon={isRecording ? <VoiceOffIcon /> : <VoiceIcon />}
|
||||
onClick={toggleRecording}
|
||||
disabled={!isConnected}
|
||||
bordered
|
||||
shadow
|
||||
/>
|
||||
</div>
|
||||
<div className={styles["icon-right"]} onClick={onClose}>
|
||||
<Close24Icon />
|
||||
<div className={styles["icon-center"]}>
|
||||
<IconButton
|
||||
icon={<PowerIcon />}
|
||||
text={
|
||||
isConnecting
|
||||
? "Connecting..."
|
||||
: isConnected
|
||||
? "Disconnect"
|
||||
: "Connect"
|
||||
}
|
||||
onClick={handleConnect}
|
||||
disabled={isConnecting}
|
||||
bordered
|
||||
shadow
|
||||
/>
|
||||
</div>
|
||||
<div onClick={handleClose}>
|
||||
<IconButton
|
||||
icon={<Close24Icon />}
|
||||
onClick={handleClose}
|
||||
disabled={!isConnected}
|
||||
bordered
|
||||
shadow
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
Reference in New Issue
Block a user