feat: #3110 add voice control

This commit is contained in:
Yidadaa
2023-11-14 03:42:23 +08:00
parent 9da455a7ea
commit d1d8b1f393
13 changed files with 273 additions and 9 deletions

View File

@@ -89,6 +89,7 @@ import { prettyObject } from "../utils/format";
import { ExportMessageModal } from "./exporter";
import { getClientConfig } from "../config/client";
import { useAllModels } from "../utils/hooks";
import { VoicePage } from "./voice/voice";
const Markdown = dynamic(async () => (await import("./markdown")).Markdown, {
loading: () => <LoadingIcon />,
@@ -1049,6 +1050,8 @@ function _Chat() {
// eslint-disable-next-line react-hooks/exhaustive-deps
}, []);
return <VoicePage />;
return (
<div className={styles.chat} key={session.id}>
<div className="window-header" data-tauri-drag-region>

View File

@@ -2,6 +2,8 @@
require("../polyfill");
import "regenerator-runtime/runtime";
import { useState, useEffect } from "react";
import styles from "./home.module.scss";
@@ -128,7 +130,8 @@ function Screen() {
const isHome = location.pathname === Path.Home;
const isAuth = location.pathname === Path.Auth;
const isMobileScreen = useMobileScreen();
const shouldTightBorder = getClientConfig()?.isApp || (config.tightBorder && !isMobileScreen);
const shouldTightBorder =
getClientConfig()?.isApp || (config.tightBorder && !isMobileScreen);
useEffect(() => {
loadAsyncGoogleFont();

View File

@@ -0,0 +1,55 @@
.voice-page {
position: fixed;
top: 0;
left: 0;
width: 100vw;
height: 100vh;
background-color: rgba($color: #000000, $alpha: 0.9);
color: white;
backdrop-filter: blur(10px);
display: flex;
flex-direction: column;
align-items: center;
.top,
.bottom {
flex: 1;
padding: 20px;
font-size: 1.5em;
color: rgba($color: #fff, $alpha: 0.6);
overflow: auto;
width: 100%;
box-sizing: border-box;
}
.active {
background-color: rgba($color: #00ff00, $alpha: 0.2);
}
.top.active {
background-color: white;
&::after {
content: "☁️";
color: black;
}
}
.top:hover {
background-color: black;
}
.top {
}
.center {
height: 2px;
background-color: white;
opacity: 0.2;
width: 100%;
}
.bottom {
}
}

View File

@@ -0,0 +1,117 @@
import { useChatStore } from "@/app/store";
import style from "./voice.module.scss";
import { useEffect, useMemo, useRef, useState } from "react";
import SpeechRecognition, {
useSpeechRecognition,
} from "react-speech-recognition";
import { IconButton } from "../button";
import { api } from "@/app/client/api";
function findLast<T>(array: T[], predictor: (_: T) => boolean) {
for (let i = array.length - 1; i >= 0; i -= 1) {
if (predictor(array[i])) {
return array[i];
}
}
return null;
}
export function VoicePage() {
const chatStore = useChatStore();
const session = chatStore.currentSession();
const lastAssistantMessage = useMemo(
() => findLast(session.messages, (m) => m.role === "assistant"),
[session.messages],
);
const lastUserMessage = useMemo(
() => findLast(session.messages, (m) => m.role === "user"),
[session.messages],
);
const speech = useSpeechRecognition({
clearTranscriptOnListen: true,
});
if (!speech.browserSupportsSpeechRecognition) {
throw Error("your browser does not support speech recognition api");
}
function startVoice() {
SpeechRecognition.startListening({
language: "zh-CN",
});
sourceNodeRef.current?.stop();
}
function stopVoice() {
SpeechRecognition.stopListening();
}
useEffect(() => {
if (!speech.listening) {
if (
speech.finalTranscript.length > 0 &&
speech.finalTranscript !== lastUserMessage?.content
) {
chatStore.onUserInput(speech.finalTranscript);
}
}
}, [speech.listening]);
const [loadingTTS, setLoadingTTS] = useState(false);
const sourceNodeRef = useRef<AudioBufferSourceNode>();
function speak() {
const content = lastAssistantMessage?.content;
if (!content) return;
setLoadingTTS(true);
api.llm.speech(content).then(async (arrayBuffer) => {
const audioContext = new (window.AudioContext ||
(window as any).webkitAudioContext)();
const source = audioContext.createBufferSource();
try {
sourceNodeRef.current?.stop();
} catch {}
sourceNodeRef.current = source;
// 设置音频源的 buffer 属性
source.buffer = await audioContext.decodeAudioData(arrayBuffer);
// 连接到默认的输出设备(通常是扬声器)
source.connect(audioContext.destination);
// 开始播放
setLoadingTTS(false);
source.start(0);
});
}
const lastStream = useRef(false);
useEffect(() => {
if (
lastAssistantMessage?.streaming !== lastStream.current &&
lastStream.current
) {
speak();
}
lastStream.current = !!lastAssistantMessage?.streaming;
}, [lastAssistantMessage?.streaming]);
return (
<div className={style["voice-page"]}>
<div className={style["top"] + ` ${style["active"]}`} onClick={speak}>
{lastAssistantMessage?.content}
</div>
<div className={style["center"]}></div>
<div
className={style["bottom"] + ` ${speech.listening && style["active"]}`}
onClick={() => {
if (speech.listening) {
stopVoice();
} else {
startVoice();
}
}}
>
{speech.transcript || lastUserMessage?.content}
</div>
</div>
);
}