fix: chroem speech

This commit is contained in:
Hk-Gosuto 2024-08-31 16:59:43 +08:00
parent 0e84b31f2d
commit 9227cb82d5
1 changed files with 119 additions and 25 deletions

View File

@ -32,6 +32,11 @@ export class OpenAITranscriptionApi extends SpeechApi {
} }
async start(): Promise<void> { async start(): Promise<void> {
// 如果已经在监听,先停止当前的会话
if (this.listeningStatus) {
await this.stop();
}
// @ts-ignore // @ts-ignore
navigator.getUserMedia = navigator.getUserMedia =
// @ts-ignore // @ts-ignore
@ -42,28 +47,30 @@ export class OpenAITranscriptionApi extends SpeechApi {
navigator.mozGetUserMedia || navigator.mozGetUserMedia ||
// @ts-ignore // @ts-ignore
navigator.msGetUserMedia; navigator.msGetUserMedia;
if (navigator.mediaDevices) {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
this.mediaRecorder = new MediaRecorder(stream);
this.mediaRecorder.ondataavailable = (e) => {
if (e.data && e.data.size > 0) {
this.audioChunks.push(e.data);
}
};
this.stream = stream; if (navigator.mediaDevices) {
try {
const stream = await navigator.mediaDevices.getUserMedia({
audio: true,
});
this.stream = stream;
this.mediaRecorder = new MediaRecorder(stream);
this.mediaRecorder.ondataavailable = (e) => {
if (e.data && e.data.size > 0) {
this.audioChunks.push(e.data);
}
};
} catch (error) {
console.error("Error accessing media devices:", error);
return;
}
} else { } else {
console.warn("Media Decives will work only with SSL"); console.warn("Media Devices will work only with SSL");
return; return;
} }
this.audioChunks = []; this.audioChunks = [];
this.mediaRecorder!.start(1000);
// this.recorder.addEventListener("dataavailable", (event) => {
// this.audioChunks.push(event.data);
// });
this.mediaRecorder.start(1000);
this.listeningStatus = true; this.listeningStatus = true;
} }
@ -79,6 +86,13 @@ export class OpenAITranscriptionApi extends SpeechApi {
const transcription = await llm.transcription({ file: audioBlob }); const transcription = await llm.transcription({ file: audioBlob });
this.onTranscription(transcription); this.onTranscription(transcription);
this.listeningStatus = false; this.listeningStatus = false;
// 停止所有音轨
if (this.stream) {
this.stream.getTracks().forEach((track) => track.stop());
this.stream = null;
}
resolve(); resolve();
}); });
@ -90,37 +104,117 @@ export class OpenAITranscriptionApi extends SpeechApi {
export class WebTranscriptionApi extends SpeechApi { export class WebTranscriptionApi extends SpeechApi {
private listeningStatus = false; private listeningStatus = false;
private recognitionInstance: any | null = null; private recognitionInstance: any | null = null;
private shouldContinueListening = false;
isListening = () => this.listeningStatus; isListening = () => this.listeningStatus;
constructor(transcriptionCallback?: TranscriptionCallback) { constructor(transcriptionCallback?: TranscriptionCallback) {
super(); super();
if (isFirefox()) return; this.initRecognition();
if (transcriptionCallback) {
this.onTranscriptionReceived(transcriptionCallback);
}
}
private initRecognition(): void {
const SpeechRecognition = const SpeechRecognition =
(window as any).SpeechRecognition || (window as any).SpeechRecognition ||
(window as any).webkitSpeechRecognition; (window as any).webkitSpeechRecognition ||
(window as any).msSpeechRecognition;
if (!SpeechRecognition) {
console.error("SpeechRecognition is not supported in this browser");
return;
}
this.recognitionInstance = new SpeechRecognition(); this.recognitionInstance = new SpeechRecognition();
this.recognitionInstance.continuous = true; this.recognitionInstance.continuous = true;
this.recognitionInstance.interimResults = true; this.recognitionInstance.interimResults = true;
this.recognitionInstance.lang = getSTTLang(); this.recognitionInstance.lang = getSTTLang();
if (transcriptionCallback) {
this.onTranscriptionReceived(transcriptionCallback);
}
this.recognitionInstance.onresult = (event: any) => { this.recognitionInstance.onresult = (event: any) => {
const result = event.results[event.results.length - 1]; const result = event.results[event.results.length - 1];
if (result.isFinal) { if (result.isFinal) {
this.onTranscription(result[0].transcript); this.onTranscription(result[0].transcript);
} }
}; };
this.recognitionInstance.onerror = (event: any) => {
console.error("Speech recognition error:", event.error);
if (event.error !== "no-speech") {
this.listeningStatus = false;
this.shouldContinueListening = false;
}
};
this.recognitionInstance.onend = () => {
console.log("Speech recognition ended");
this.listeningStatus = false;
if (this.shouldContinueListening) {
console.log("Restarting speech recognition");
this.start();
}
};
} }
async start(): Promise<void> { async start(): Promise<void> {
this.listeningStatus = true; if (this.listeningStatus) {
await this.recognitionInstance.start(); console.warn("Speech recognition is already active.");
return;
}
if (!this.recognitionInstance) {
this.initRecognition();
}
if (!this.recognitionInstance) {
throw new Error("Failed to initialize speech recognition");
}
this.shouldContinueListening = true;
return new Promise((resolve, reject) => {
const startRecognition = () => {
try {
this.recognitionInstance.start();
this.listeningStatus = true;
console.log("Speech recognition started");
resolve();
} catch (error) {
console.error("Error starting speech recognition:", error);
this.listeningStatus = false;
this.shouldContinueListening = false;
reject(error);
}
};
startRecognition();
});
} }
async stop(): Promise<void> { async stop(): Promise<void> {
this.listeningStatus = false; this.shouldContinueListening = false;
await this.recognitionInstance.stop();
if (!this.listeningStatus || !this.recognitionInstance) {
return;
}
return new Promise<void>((resolve) => {
const onStop = () => {
this.listeningStatus = false;
this.recognitionInstance.removeEventListener("end", onStop);
console.log("Speech recognition stopped");
resolve();
};
this.recognitionInstance.addEventListener("end", onStop);
try {
this.recognitionInstance.stop();
} catch (error) {
console.error("Error stopping speech recognition:", error);
onStop();
}
});
} }
} }