feat: tts

2025-09-23 20:50:08 +08:00 · 2024-09-18 11:24:25 +08:00
parent 212605a7e3
commit 3ae8ec1af6
19 changed files with 2 additions and 490 deletions
--- a/app/client/platforms/alibaba.ts
+++ b/app/client/platforms/alibaba.ts
@@ -13,7 +13,6 @@ import {
  LLMApi,
  LLMModel,
  SpeechOptions,
-  TranscriptionOptions,
  MultimodalContent,
 } from "../api";
 import Locale from "../../locales";
@@ -88,9 +87,6 @@ export class QwenApi implements LLMApi {
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
    throw new Error("Method not implemented.");
  }
-  transcription(options: TranscriptionOptions): Promise<string> {
-    throw new Error("Method not implemented.");
-  }

  async chat(options: ChatOptions) {
    const messages = options.messages.map((v) => ({
--- a/app/client/platforms/anthropic.ts
+++ b/app/client/platforms/anthropic.ts
@@ -5,7 +5,6 @@ import {
  LLMApi,
  MultimodalContent,
  SpeechOptions,
-  TranscriptionOptions,
 } from "../api";
 import {
  useAccessStore,
@@ -90,9 +89,6 @@ export class ClaudeApi implements LLMApi {
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
    throw new Error("Method not implemented.");
  }
-  transcription(options: TranscriptionOptions): Promise<string> {
-    throw new Error("Method not implemented.");
-  }

  extractMessage(res: any) {
    console.log("[Response] claude response: ", res);
--- a/app/client/platforms/baidu.ts
+++ b/app/client/platforms/baidu.ts
@@ -15,7 +15,6 @@ import {
  LLMModel,
  MultimodalContent,
  SpeechOptions,
-  TranscriptionOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
@@ -80,9 +79,6 @@ export class ErnieApi implements LLMApi {
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
    throw new Error("Method not implemented.");
  }
-  transcription(options: TranscriptionOptions): Promise<string> {
-    throw new Error("Method not implemented.");
-  }

  async chat(options: ChatOptions) {
    const messages = options.messages.map((v) => ({
--- a/app/client/platforms/bytedance.ts
+++ b/app/client/platforms/bytedance.ts
@@ -14,7 +14,6 @@ import {
  LLMModel,
  MultimodalContent,
  SpeechOptions,
-  TranscriptionOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
@@ -82,9 +81,6 @@ export class DoubaoApi implements LLMApi {
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
    throw new Error("Method not implemented.");
  }
-  transcription(options: TranscriptionOptions): Promise<string> {
-    throw new Error("Method not implemented.");
-  }

  async chat(options: ChatOptions) {
    const messages = options.messages.map((v) => ({
--- a/app/client/platforms/google.ts
+++ b/app/client/platforms/google.ts
@@ -6,7 +6,6 @@ import {
  LLMModel,
  LLMUsage,
  SpeechOptions,
-  TranscriptionOptions,
 } from "../api";
 import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
 import { getClientConfig } from "@/app/config/client";
@@ -67,9 +66,7 @@ export class GeminiProApi implements LLMApi {
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
    throw new Error("Method not implemented.");
  }
-  transcription(options: TranscriptionOptions): Promise<string> {
-    throw new Error("Method not implemented.");
-  }
+
  async chat(options: ChatOptions): Promise<void> {
    const apiClient = this;
    let multimodal = false;
--- a/app/client/platforms/iflytek.ts
+++ b/app/client/platforms/iflytek.ts
@@ -13,7 +13,6 @@ import {
  LLMApi,
  LLMModel,
  SpeechOptions,
-  TranscriptionOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
@@ -63,9 +62,6 @@ export class SparkApi implements LLMApi {
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
    throw new Error("Method not implemented.");
  }
-  transcription(options: TranscriptionOptions): Promise<string> {
-    throw new Error("Method not implemented.");
-  }

  async chat(options: ChatOptions) {
    const messages: ChatOptions["messages"] = [];
--- a/app/client/platforms/moonshot.ts
+++ b/app/client/platforms/moonshot.ts
@@ -27,7 +27,6 @@ import {
  LLMUsage,
  MultimodalContent,
  SpeechOptions,
-  TranscriptionOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
@@ -77,9 +76,6 @@ export class MoonshotApi implements LLMApi {
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
    throw new Error("Method not implemented.");
  }
-  transcription(options: TranscriptionOptions): Promise<string> {
-    throw new Error("Method not implemented.");
-  }

  async chat(options: ChatOptions) {
    const messages: ChatOptions["messages"] = [];
--- a/app/client/platforms/openai.ts
+++ b/app/client/platforms/openai.ts
@@ -34,7 +34,6 @@ import {
  LLMUsage,
  MultimodalContent,
  SpeechOptions,
-  TranscriptionOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
@@ -187,47 +186,6 @@ export class ChatGPTApi implements LLMApi {
    }
  }

-  async transcription(options: TranscriptionOptions): Promise<string> {
-    const formData = new FormData();
-    formData.append("file", options.file, "audio.wav");
-    formData.append("model", options.model ?? "whisper-1");
-    if (options.language) formData.append("language", options.language);
-    if (options.prompt) formData.append("prompt", options.prompt);
-    if (options.response_format)
-      formData.append("response_format", options.response_format);
-    if (options.temperature)
-      formData.append("temperature", options.temperature.toString());
-
-    console.log("[Request] openai audio transcriptions payload: ", options);
-
-    const controller = new AbortController();
-    options.onController?.(controller);
-
-    try {
-      const path = this.path(OpenaiPath.TranscriptionPath, options.model);
-      const headers = getHeaders(true);
-      const payload = {
-        method: "POST",
-        body: formData,
-        signal: controller.signal,
-        headers: headers,
-      };
-
-      // make a fetch request
-      const requestTimeoutId = setTimeout(
-        () => controller.abort(),
-        REQUEST_TIMEOUT_MS,
-      );
-      const res = await fetch(path, payload);
-      clearTimeout(requestTimeoutId);
-      const json = await res.json();
-      return json.text;
-    } catch (e) {
-      console.log("[Request] failed to make a audio transcriptions request", e);
-      throw e;
-    }
-  }
-
  async chat(options: ChatOptions) {
    const modelConfig = {
      ...useAppConfig.getState().modelConfig,
--- a/app/client/platforms/tencent.ts
+++ b/app/client/platforms/tencent.ts
@@ -9,7 +9,6 @@ import {
  LLMModel,
  MultimodalContent,
  SpeechOptions,
-  TranscriptionOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
@@ -94,9 +93,6 @@ export class HunyuanApi implements LLMApi {
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
    throw new Error("Method not implemented.");
  }
-  transcription(options: TranscriptionOptions): Promise<string> {
-    throw new Error("Method not implemented.");
-  }

  async chat(options: ChatOptions) {
    const visionModel = isVisionModel(options.config.model);