feat: add tts stt

This commit is contained in:
DDMeaqua
2024-08-27 16:21:02 +08:00
parent 718782f5b1
commit 2f410fc09f
22 changed files with 1446 additions and 6 deletions

View File

@@ -10,11 +10,14 @@ import React, {
} from "react";
import SendWhiteIcon from "../icons/send-white.svg";
import VoiceWhiteIcon from "../icons/voice-white.svg";
import BrainIcon from "../icons/brain.svg";
import RenameIcon from "../icons/rename.svg";
import ExportIcon from "../icons/share.svg";
import ReturnIcon from "../icons/return.svg";
import CopyIcon from "../icons/copy.svg";
import SpeakIcon from "../icons/speak.svg";
import SpeakStopIcon from "../icons/speak-stop.svg";
import LoadingIcon from "../icons/three-dots.svg";
import LoadingButtonIcon from "../icons/loading.svg";
import PromptIcon from "../icons/prompt.svg";
@@ -64,6 +67,7 @@ import {
getMessageImages,
isVisionModel,
isDalle3,
isFirefox,
} from "../utils";
import { uploadImage as uploadImageRemote } from "@/app/utils/chat";
@@ -73,7 +77,7 @@ import dynamic from "next/dynamic";
import { ChatControllerPool } from "../client/controller";
import { DalleSize, DalleQuality, DalleStyle } from "../typing";
import { Prompt, usePromptStore } from "../store/prompt";
import Locale from "../locales";
import Locale, { getLang, getSTTLang } from "../locales";
import { IconButton } from "./button";
import styles from "./chat.module.scss";
@@ -90,6 +94,10 @@ import {
import { useNavigate } from "react-router-dom";
import {
CHAT_PAGE_SIZE,
DEFAULT_STT_ENGINE,
DEFAULT_TTS_ENGINE,
FIREFOX_DEFAULT_STT_ENGINE,
ModelProvider,
LAST_INPUT_KEY,
Path,
REQUEST_TIMEOUT_MS,
@@ -106,6 +114,16 @@ import { ExportMessageModal } from "./exporter";
import { getClientConfig } from "../config/client";
import { useAllModels } from "../utils/hooks";
import { MultimodalContent } from "../client/api";
import { ClientApi } from "../client/api";
import { createTTSPlayer } from "../utils/audio";
import {
OpenAITranscriptionApi,
SpeechApi,
WebTranscriptionApi,
} from "../utils/speech";
import { MsEdgeTTS, OUTPUT_FORMAT } from "../utils/ms_edge_tts";
const ttsPlayer = createTTSPlayer();
const Markdown = dynamic(async () => (await import("./markdown")).Markdown, {
loading: () => <LoadingIcon />,
@@ -922,6 +940,33 @@ function _Chat() {
}
};
const [isListening, setIsListening] = useState(false);
const [isTranscription, setIsTranscription] = useState(false);
const [speechApi, setSpeechApi] = useState<any>(null);
const startListening = async () => {
if (speechApi) {
await speechApi.start();
setIsListening(true);
}
};
const stopListening = async () => {
if (speechApi) {
if (config.sttConfig.engine !== DEFAULT_STT_ENGINE)
setIsTranscription(true);
await speechApi.stop();
setIsListening(false);
}
};
const onRecognitionEnd = (finalTranscript: string) => {
console.log(finalTranscript);
if (finalTranscript) setUserInput(finalTranscript);
if (config.sttConfig.engine !== DEFAULT_STT_ENGINE)
setIsTranscription(false);
};
const doSubmit = (userInput: string) => {
if (userInput.trim() === "") return;
const matchCommand = chatCommands.match(userInput);
@@ -992,6 +1037,16 @@ function _Chat() {
}
});
// eslint-disable-next-line react-hooks/exhaustive-deps
if (isFirefox()) config.sttConfig.engine = FIREFOX_DEFAULT_STT_ENGINE;
setSpeechApi(
config.sttConfig.engine === DEFAULT_STT_ENGINE
? new WebTranscriptionApi((transcription) =>
onRecognitionEnd(transcription),
)
: new OpenAITranscriptionApi((transcription) =>
onRecognitionEnd(transcription),
),
);
}, []);
// check if should send message
@@ -1102,10 +1157,55 @@ function _Chat() {
});
};
const accessStore = useAccessStore();
const [speechStatus, setSpeechStatus] = useState(false);
const [speechLoading, setSpeechLoading] = useState(false);
async function openaiSpeech(text: string) {
if (speechStatus) {
ttsPlayer.stop();
setSpeechStatus(false);
} else {
var api: ClientApi;
api = new ClientApi(ModelProvider.GPT);
const config = useAppConfig.getState();
setSpeechLoading(true);
ttsPlayer.init();
let audioBuffer: ArrayBuffer;
const { markdownToTxt } = require("markdown-to-txt");
const textContent = markdownToTxt(text);
if (config.ttsConfig.engine !== DEFAULT_TTS_ENGINE) {
const edgeVoiceName = accessStore.edgeVoiceName();
const tts = new MsEdgeTTS();
await tts.setMetadata(
edgeVoiceName,
OUTPUT_FORMAT.AUDIO_24KHZ_96KBITRATE_MONO_MP3,
);
audioBuffer = await tts.toArrayBuffer(textContent);
} else {
audioBuffer = await api.llm.speech({
model: config.ttsConfig.model,
input: textContent,
voice: config.ttsConfig.voice,
speed: config.ttsConfig.speed,
});
}
setSpeechStatus(true);
ttsPlayer
.play(audioBuffer, () => {
setSpeechStatus(false);
})
.catch((e) => {
console.error("[OpenAI Speech]", e);
showToast(prettyObject(e));
setSpeechStatus(false);
})
.finally(() => setSpeechLoading(false));
}
}
const context: RenderMessage[] = useMemo(() => {
return session.mask.hideContext ? [] : session.mask.context.slice();
}, [session.mask.context, session.mask.hideContext]);
const accessStore = useAccessStore();
if (
context.length === 0 &&
@@ -1567,6 +1667,26 @@ function _Chat() {
)
}
/>
{config.ttsConfig.enable && (
<ChatAction
text={
speechStatus
? Locale.Chat.Actions.StopSpeech
: Locale.Chat.Actions.Speech
}
loding={speechLoading}
icon={
speechStatus ? (
<SpeakStopIcon />
) : (
<SpeakIcon />
)
}
onClick={() =>
openaiSpeech(getMessageTextContent(message))
}
/>
)}
</>
)}
</div>
@@ -1714,13 +1834,35 @@ function _Chat() {
})}
</div>
)}
<IconButton
{config.sttConfig.enable ? (
<IconButton
icon={<VoiceWhiteIcon />}
text={
isListening ? Locale.Chat.StopSpeak : Locale.Chat.StartSpeak
}
className={styles["chat-input-send"]}
type="primary"
onClick={async () =>
isListening ? await stopListening() : await startListening()
}
loding={isTranscription}
/>
) : (
<IconButton
icon={<SendWhiteIcon />}
text={Locale.Chat.Send}
className={styles["chat-input-send"]}
type="primary"
onClick={() => doSubmit(userInput)}
/>
)}
{/* <IconButton
icon={<SendWhiteIcon />}
text={Locale.Chat.Send}
className={styles["chat-input-send"]}
type="primary"
onClick={() => doSubmit(userInput)}
/>
/> */}
</label>
</div>

View File

@@ -80,6 +80,8 @@ import { useSyncStore } from "../store/sync";
import { nanoid } from "nanoid";
import { useMaskStore } from "../store/mask";
import { ProviderType } from "../utils/cloud";
import { TTSConfigList } from "./tts-config";
import { STTConfigList } from "./stt-config";
function EditPromptModal(props: { id: string; onClose: () => void }) {
const promptStore = usePromptStore();
@@ -1646,6 +1648,28 @@ export function Settings() {
<UserPromptModal onClose={() => setShowPromptModal(false)} />
)}
<List>
<TTSConfigList
ttsConfig={config.ttsConfig}
updateConfig={(updater) => {
const ttsConfig = { ...config.ttsConfig };
updater(ttsConfig);
config.update((config) => (config.ttsConfig = ttsConfig));
}}
/>
</List>
<List>
<STTConfigList
sttConfig={config.sttConfig}
updateConfig={(updater) => {
const sttConfig = { ...config.sttConfig };
updater(sttConfig);
config.update((config) => (config.sttConfig = sttConfig));
}}
/>
</List>
<DangerItems />
</div>
</ErrorBoundary>

View File

@@ -0,0 +1,51 @@
import { STTConfig, STTConfigValidator } from "../store";
import Locale from "../locales";
import { ListItem, Select } from "./ui-lib";
import { DEFAULT_STT_ENGINES } from "../constant";
import { isFirefox } from "../utils";
export function STTConfigList(props: {
sttConfig: STTConfig;
updateConfig: (updater: (config: STTConfig) => void) => void;
}) {
return (
<>
<ListItem
title={Locale.Settings.STT.Enable.Title}
subTitle={Locale.Settings.STT.Enable.SubTitle}
>
<input
type="checkbox"
checked={props.sttConfig.enable}
onChange={(e) =>
props.updateConfig(
(config) => (config.enable = e.currentTarget.checked),
)
}
></input>
</ListItem>
{!isFirefox() && (
<ListItem title={Locale.Settings.STT.Engine.Title}>
<Select
value={props.sttConfig.engine}
onChange={(e) => {
props.updateConfig(
(config) =>
(config.engine = STTConfigValidator.engine(
e.currentTarget.value,
)),
);
}}
>
{DEFAULT_STT_ENGINES.map((v, i) => (
<option value={v} key={i}>
{v}
</option>
))}
</Select>
</ListItem>
)}
</>
);
}

View File

@@ -0,0 +1,119 @@
@import "../styles/animation.scss";
.plugin-page {
height: 100%;
display: flex;
flex-direction: column;
.plugin-page-body {
padding: 20px;
overflow-y: auto;
.plugin-filter {
width: 100%;
max-width: 100%;
margin-bottom: 20px;
animation: slide-in ease 0.3s;
height: 40px;
display: flex;
.search-bar {
flex-grow: 1;
max-width: 100%;
min-width: 0;
outline: none;
}
.search-bar:focus {
border: 1px solid var(--primary);
}
.plugin-filter-lang {
height: 100%;
margin-left: 10px;
}
.plugin-create {
height: 100%;
margin-left: 10px;
box-sizing: border-box;
min-width: 80px;
}
}
.plugin-item {
display: flex;
justify-content: space-between;
padding: 20px;
border: var(--border-in-light);
animation: slide-in ease 0.3s;
&:not(:last-child) {
border-bottom: 0;
}
&:first-child {
border-top-left-radius: 10px;
border-top-right-radius: 10px;
}
&:last-child {
border-bottom-left-radius: 10px;
border-bottom-right-radius: 10px;
}
.plugin-header {
display: flex;
align-items: center;
.plugin-icon {
display: flex;
align-items: center;
justify-content: center;
margin-right: 10px;
}
.plugin-title {
.plugin-name {
font-size: 14px;
font-weight: bold;
}
.plugin-info {
font-size: 12px;
}
.plugin-runtime-warning {
font-size: 12px;
color: #f86c6c;
}
}
}
.plugin-actions {
display: flex;
flex-wrap: nowrap;
transition: all ease 0.3s;
justify-content: center;
align-items: center;
}
@media screen and (max-width: 600px) {
display: flex;
flex-direction: column;
padding-bottom: 10px;
border-radius: 10px;
margin-bottom: 20px;
box-shadow: var(--card-shadow);
&:not(:last-child) {
border-bottom: var(--border-in-light);
}
.plugin-actions {
width: 100%;
justify-content: space-between;
padding-top: 10px;
}
}
}
}
}

View File

@@ -0,0 +1,132 @@
import { PluginConfig, TTSConfig, TTSConfigValidator } from "../store";
import Locale from "../locales";
import { ListItem, Select } from "./ui-lib";
import {
DEFAULT_TTS_ENGINE,
DEFAULT_TTS_ENGINES,
DEFAULT_TTS_MODELS,
DEFAULT_TTS_VOICES,
} from "../constant";
import { InputRange } from "./input-range";
export function TTSConfigList(props: {
ttsConfig: TTSConfig;
updateConfig: (updater: (config: TTSConfig) => void) => void;
}) {
return (
<>
<ListItem
title={Locale.Settings.TTS.Enable.Title}
subTitle={Locale.Settings.TTS.Enable.SubTitle}
>
<input
type="checkbox"
checked={props.ttsConfig.enable}
onChange={(e) =>
props.updateConfig(
(config) => (config.enable = e.currentTarget.checked),
)
}
></input>
</ListItem>
{/* <ListItem
title={Locale.Settings.TTS.Autoplay.Title}
subTitle={Locale.Settings.TTS.Autoplay.SubTitle}
>
<input
type="checkbox"
checked={props.ttsConfig.autoplay}
onChange={(e) =>
props.updateConfig(
(config) => (config.autoplay = e.currentTarget.checked),
)
}
></input>
</ListItem> */}
<ListItem title={Locale.Settings.TTS.Engine}>
<Select
value={props.ttsConfig.engine}
onChange={(e) => {
props.updateConfig(
(config) =>
(config.engine = TTSConfigValidator.engine(
e.currentTarget.value,
)),
);
}}
>
{DEFAULT_TTS_ENGINES.map((v, i) => (
<option value={v} key={i}>
{v}
</option>
))}
</Select>
</ListItem>
{props.ttsConfig.engine === DEFAULT_TTS_ENGINE && (
<>
<ListItem title={Locale.Settings.TTS.Model}>
<Select
value={props.ttsConfig.model}
onChange={(e) => {
props.updateConfig(
(config) =>
(config.model = TTSConfigValidator.model(
e.currentTarget.value,
)),
);
}}
>
{DEFAULT_TTS_MODELS.map((v, i) => (
<option value={v} key={i}>
{v}
</option>
))}
</Select>
</ListItem>
<ListItem
title={Locale.Settings.TTS.Voice.Title}
subTitle={Locale.Settings.TTS.Voice.SubTitle}
>
<Select
value={props.ttsConfig.voice}
onChange={(e) => {
props.updateConfig(
(config) =>
(config.voice = TTSConfigValidator.voice(
e.currentTarget.value,
)),
);
}}
>
{DEFAULT_TTS_VOICES.map((v, i) => (
<option value={v} key={i}>
{v}
</option>
))}
</Select>
</ListItem>
<ListItem
title={Locale.Settings.TTS.Speed.Title}
subTitle={Locale.Settings.TTS.Speed.SubTitle}
>
<InputRange
value={props.ttsConfig.speed?.toFixed(1)}
min="0.3"
max="4.0"
step="0.1"
onChange={(e) => {
props.updateConfig(
(config) =>
(config.speed = TTSConfigValidator.speed(
e.currentTarget.valueAsNumber,
)),
);
}}
></InputRange>
</ListItem>
</>
)}
</>
);
}

View File

@@ -0,0 +1,119 @@
@import "../styles/animation.scss";
.plugin-page {
height: 100%;
display: flex;
flex-direction: column;
.plugin-page-body {
padding: 20px;
overflow-y: auto;
.plugin-filter {
width: 100%;
max-width: 100%;
margin-bottom: 20px;
animation: slide-in ease 0.3s;
height: 40px;
display: flex;
.search-bar {
flex-grow: 1;
max-width: 100%;
min-width: 0;
outline: none;
}
.search-bar:focus {
border: 1px solid var(--primary);
}
.plugin-filter-lang {
height: 100%;
margin-left: 10px;
}
.plugin-create {
height: 100%;
margin-left: 10px;
box-sizing: border-box;
min-width: 80px;
}
}
.plugin-item {
display: flex;
justify-content: space-between;
padding: 20px;
border: var(--border-in-light);
animation: slide-in ease 0.3s;
&:not(:last-child) {
border-bottom: 0;
}
&:first-child {
border-top-left-radius: 10px;
border-top-right-radius: 10px;
}
&:last-child {
border-bottom-left-radius: 10px;
border-bottom-right-radius: 10px;
}
.plugin-header {
display: flex;
align-items: center;
.plugin-icon {
display: flex;
align-items: center;
justify-content: center;
margin-right: 10px;
}
.plugin-title {
.plugin-name {
font-size: 14px;
font-weight: bold;
}
.plugin-info {
font-size: 12px;
}
.plugin-runtime-warning {
font-size: 12px;
color: #f86c6c;
}
}
}
.plugin-actions {
display: flex;
flex-wrap: nowrap;
transition: all ease 0.3s;
justify-content: center;
align-items: center;
}
@media screen and (max-width: 600px) {
display: flex;
flex-direction: column;
padding-bottom: 10px;
border-radius: 10px;
margin-bottom: 20px;
box-shadow: var(--card-shadow);
&:not(:last-child) {
border-bottom: var(--border-in-light);
}
.plugin-actions {
width: 100%;
justify-content: space-between;
padding-top: 10px;
}
}
}
}
}