From 8a342fa85bbab3ccd0d4aafa9762d448105ee4d8 Mon Sep 17 00:00:00 2001 From: Andrei Date: Thu, 2 Oct 2025 07:25:16 +0000 Subject: [PATCH] Fix Web Speech API desktop voice recognition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Set continuous=true to keep listening through pauses - Only process final results, ignore interim transcripts - Add usesFallback check to route Web Speech API transcripts through classification - Desktop now captures complete phrases before classification - Add detailed logging for debugging recognition flow 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../components/voice/VoiceFloatingButton.tsx | 11 +++++++-- maternal-web/hooks/useVoiceInput.ts | 23 ++++++++++++++----- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/maternal-web/components/voice/VoiceFloatingButton.tsx b/maternal-web/components/voice/VoiceFloatingButton.tsx index 41a1535..4a61169 100644 --- a/maternal-web/components/voice/VoiceFloatingButton.tsx +++ b/maternal-web/components/voice/VoiceFloatingButton.tsx @@ -44,10 +44,10 @@ export function VoiceFloatingButton() { severity: 'info', }); - const { isListening, isSupported, transcript, classification, error, startListening, stopListening, reset } = + const { isListening, isSupported, transcript, classification, error, usesFallback, startListening, stopListening, reset } = useVoiceInput(); - // Auto-use classification from backend when transcription completes + // Auto-use classification from backend when transcription completes (MediaRecorder fallback) React.useEffect(() => { if (classification && !isListening && !isProcessing && open) { setClassificationResult(classification); @@ -55,6 +55,13 @@ export function VoiceFloatingButton() { } }, [classification, isListening, isProcessing, open]); + // For Web Speech API (desktop), classify the transcript client-side + React.useEffect(() => { + if (!usesFallback && transcript && !isListening && !isProcessing && open && transcript !== lastClassifiedTranscript) { + classifyTranscript(transcript); + } + }, [usesFallback, transcript, isListening, isProcessing, open, lastClassifiedTranscript]); + const handleOpen = () => { if (!isSupported) { setSnackbar({ diff --git a/maternal-web/hooks/useVoiceInput.ts b/maternal-web/hooks/useVoiceInput.ts index 26294b5..f64312e 100644 --- a/maternal-web/hooks/useVoiceInput.ts +++ b/maternal-web/hooks/useVoiceInput.ts @@ -56,13 +56,15 @@ export function useVoiceInput() { } else if (SpeechRecognition) { try { // Initialize recognition for non-iOS browsers + console.log('[Voice] Initializing Web Speech API'); const recognition = new SpeechRecognition(); - recognition.continuous = false; // Single recognition + recognition.continuous = true; // Keep listening until manually stopped recognition.interimResults = true; // Get interim results recognition.maxAlternatives = 1; recognition.lang = 'en-US'; // Default language recognitionRef.current = recognition; + console.log('[Voice] Web Speech API initialized successfully'); setState(prev => ({ ...prev, isSupported: true, usesFallback: false })); } catch (error) { console.warn('[Voice] Speech Recognition initialization failed, trying fallback'); @@ -281,17 +283,24 @@ export function useVoiceInput() { } } - setState(prev => ({ - ...prev, - transcript: finalTranscript || interimTranscript, - })); + // Only update state with final results, show interim in console for debugging + if (finalTranscript) { + console.log('[Voice] Final result:', finalTranscript); + setState(prev => ({ + ...prev, + transcript: finalTranscript, + })); + } else { + console.log('[Voice] Interim result:', interimTranscript); + } }; recognition.onerror = (event: any) => { - console.error('[Voice] Error:', event.error); + console.error('[Voice] Error:', event.error, event); let errorMessage = 'Failed to recognize speech'; if (event.error === 'no-speech') { + console.warn('[Voice] No speech detected - this is often normal if user stops speaking'); errorMessage = 'No speech detected. Please try again.'; } else if (event.error === 'audio-capture') { errorMessage = 'No microphone found. Please check your settings.'; @@ -325,7 +334,9 @@ export function useVoiceInput() { // Start recognition try { + console.log('[Voice] Calling recognition.start()'); recognition.start(); + console.log('[Voice] recognition.start() called successfully'); } catch (error) { console.error('[Voice] Failed to start:', error); setState(prev => ({