From 8a342fa85bbab3ccd0d4aafa9762d448105ee4d8 Mon Sep 17 00:00:00 2001
From: Andrei <andrei@cloudz.ro>
Date: Thu, 2 Oct 2025 07:25:16 +0000
Subject: [PATCH] Fix Web Speech API desktop voice recognition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Set continuous=true to keep listening through pauses
- Only process final results, ignore interim transcripts
- Add usesFallback check to route Web Speech API transcripts through classification
- Desktop now captures complete phrases before classification
- Add detailed logging for debugging recognition flow

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .../components/voice/VoiceFloatingButton.tsx  | 11 +++++++--
 maternal-web/hooks/useVoiceInput.ts           | 23 ++++++++++++++-----
 2 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/maternal-web/components/voice/VoiceFloatingButton.tsx b/maternal-web/components/voice/VoiceFloatingButton.tsx
index 41a1535..4a61169 100644
--- a/maternal-web/components/voice/VoiceFloatingButton.tsx
+++ b/maternal-web/components/voice/VoiceFloatingButton.tsx
@@ -44,10 +44,10 @@ export function VoiceFloatingButton() {
     severity: 'info',
   });
 
-  const { isListening, isSupported, transcript, classification, error, startListening, stopListening, reset } =
+  const { isListening, isSupported, transcript, classification, error, usesFallback, startListening, stopListening, reset } =
     useVoiceInput();
 
-  // Auto-use classification from backend when transcription completes
+  // Auto-use classification from backend when transcription completes (MediaRecorder fallback)
   React.useEffect(() => {
     if (classification && !isListening && !isProcessing && open) {
       setClassificationResult(classification);
@@ -55,6 +55,13 @@ export function VoiceFloatingButton() {
     }
   }, [classification, isListening, isProcessing, open]);
 
+  // For Web Speech API (desktop), classify the transcript client-side
+  React.useEffect(() => {
+    if (!usesFallback && transcript && !isListening && !isProcessing && open && transcript !== lastClassifiedTranscript) {
+      classifyTranscript(transcript);
+    }
+  }, [usesFallback, transcript, isListening, isProcessing, open, lastClassifiedTranscript]);
+
   const handleOpen = () => {
     if (!isSupported) {
       setSnackbar({
diff --git a/maternal-web/hooks/useVoiceInput.ts b/maternal-web/hooks/useVoiceInput.ts
index 26294b5..f64312e 100644
--- a/maternal-web/hooks/useVoiceInput.ts
+++ b/maternal-web/hooks/useVoiceInput.ts
@@ -56,13 +56,15 @@ export function useVoiceInput() {
     } else if (SpeechRecognition) {
       try {
         // Initialize recognition for non-iOS browsers
+        console.log('[Voice] Initializing Web Speech API');
         const recognition = new SpeechRecognition();
-        recognition.continuous = false; // Single recognition
+        recognition.continuous = true; // Keep listening until manually stopped
         recognition.interimResults = true; // Get interim results
         recognition.maxAlternatives = 1;
         recognition.lang = 'en-US'; // Default language
 
         recognitionRef.current = recognition;
+        console.log('[Voice] Web Speech API initialized successfully');
         setState(prev => ({ ...prev, isSupported: true, usesFallback: false }));
       } catch (error) {
         console.warn('[Voice] Speech Recognition initialization failed, trying fallback');
@@ -281,17 +283,24 @@ export function useVoiceInput() {
         }
       }
 
-      setState(prev => ({
-        ...prev,
-        transcript: finalTranscript || interimTranscript,
-      }));
+      // Only update state with final results, show interim in console for debugging
+      if (finalTranscript) {
+        console.log('[Voice] Final result:', finalTranscript);
+        setState(prev => ({
+          ...prev,
+          transcript: finalTranscript,
+        }));
+      } else {
+        console.log('[Voice] Interim result:', interimTranscript);
+      }
     };
 
     recognition.onerror = (event: any) => {
-      console.error('[Voice] Error:', event.error);
+      console.error('[Voice] Error:', event.error, event);
 
       let errorMessage = 'Failed to recognize speech';
       if (event.error === 'no-speech') {
+        console.warn('[Voice] No speech detected - this is often normal if user stops speaking');
         errorMessage = 'No speech detected. Please try again.';
       } else if (event.error === 'audio-capture') {
         errorMessage = 'No microphone found. Please check your settings.';
@@ -325,7 +334,9 @@ export function useVoiceInput() {
 
     // Start recognition
     try {
+      console.log('[Voice] Calling recognition.start()');
       recognition.start();
+      console.log('[Voice] recognition.start() called successfully');
     } catch (error) {
       console.error('[Voice] Failed to start:', error);
       setState(prev => ({