Merge pull request #5459 from DDMeaqua/tts

add tts
This commit is contained in:
Dogtiti
2024-09-18 15:42:16 +08:00
committed by GitHub
28 changed files with 1080 additions and 12 deletions

View File

@@ -15,6 +15,8 @@ import RenameIcon from "../icons/rename.svg";
import ExportIcon from "../icons/share.svg";
import ReturnIcon from "../icons/return.svg";
import CopyIcon from "../icons/copy.svg";
import SpeakIcon from "../icons/speak.svg";
import SpeakStopIcon from "../icons/speak-stop.svg";
import LoadingIcon from "../icons/three-dots.svg";
import LoadingButtonIcon from "../icons/loading.svg";
import PromptIcon from "../icons/prompt.svg";
@@ -96,6 +98,8 @@ import {
import { useNavigate } from "react-router-dom";
import {
CHAT_PAGE_SIZE,
DEFAULT_TTS_ENGINE,
ModelProvider,
Path,
REQUEST_TIMEOUT_MS,
UNFINISHED_INPUT,
@@ -112,6 +116,11 @@ import { useAllModels } from "../utils/hooks";
import { MultimodalContent } from "../client/api";
const localStorage = safeLocalStorage();
import { ClientApi } from "../client/api";
import { createTTSPlayer } from "../utils/audio";
import { MsEdgeTTS, OUTPUT_FORMAT } from "../utils/ms_edge_tts";
const ttsPlayer = createTTSPlayer();
const Markdown = dynamic(async () => (await import("./markdown")).Markdown, {
loading: () => <LoadingIcon />,
@@ -442,6 +451,7 @@ export function ChatActions(props: {
hitBottom: boolean;
uploading: boolean;
setShowShortcutKeyModal: React.Dispatch<React.SetStateAction<boolean>>;
setUserInput: (input: string) => void;
}) {
const config = useAppConfig();
const navigate = useNavigate();
@@ -1184,10 +1194,55 @@ function _Chat() {
});
};
const accessStore = useAccessStore();
const [speechStatus, setSpeechStatus] = useState(false);
const [speechLoading, setSpeechLoading] = useState(false);
async function openaiSpeech(text: string) {
if (speechStatus) {
ttsPlayer.stop();
setSpeechStatus(false);
} else {
var api: ClientApi;
api = new ClientApi(ModelProvider.GPT);
const config = useAppConfig.getState();
setSpeechLoading(true);
ttsPlayer.init();
let audioBuffer: ArrayBuffer;
const { markdownToTxt } = require("markdown-to-txt");
const textContent = markdownToTxt(text);
if (config.ttsConfig.engine !== DEFAULT_TTS_ENGINE) {
const edgeVoiceName = accessStore.edgeVoiceName();
const tts = new MsEdgeTTS();
await tts.setMetadata(
edgeVoiceName,
OUTPUT_FORMAT.AUDIO_24KHZ_96KBITRATE_MONO_MP3,
);
audioBuffer = await tts.toArrayBuffer(textContent);
} else {
audioBuffer = await api.llm.speech({
model: config.ttsConfig.model,
input: textContent,
voice: config.ttsConfig.voice,
speed: config.ttsConfig.speed,
});
}
setSpeechStatus(true);
ttsPlayer
.play(audioBuffer, () => {
setSpeechStatus(false);
})
.catch((e) => {
console.error("[OpenAI Speech]", e);
showToast(prettyObject(e));
setSpeechStatus(false);
})
.finally(() => setSpeechLoading(false));
}
}
const context: RenderMessage[] = useMemo(() => {
return session.mask.hideContext ? [] : session.mask.context.slice();
}, [session.mask.context, session.mask.hideContext]);
const accessStore = useAccessStore();
if (
context.length === 0 &&
@@ -1724,6 +1779,25 @@ function _Chat() {
)
}
/>
{config.ttsConfig.enable && (
<ChatAction
text={
speechStatus
? Locale.Chat.Actions.StopSpeech
: Locale.Chat.Actions.Speech
}
icon={
speechStatus ? (
<SpeakStopIcon />
) : (
<SpeakIcon />
)
}
onClick={() =>
openaiSpeech(getMessageTextContent(message))
}
/>
)}
</>
)}
</div>
@@ -1842,6 +1916,7 @@ function _Chat() {
onSearch("");
}}
setShowShortcutKeyModal={setShowShortcutKeyModal}
setUserInput={setUserInput}
/>
<label
className={`${styles["chat-input-panel-inner"]} ${