Fix login data structure and improve voice input UX

- Fix login endpoint to return families as array of objects instead of strings - Update auth interface to match /auth/me endpoint structure - Add silence detection to voice input (auto-stop after 1.5s) - Add comprehensive status messages to voice modal (Listening, Understanding, Saving) - Unify voice input flow to use MediaRecorder + backend for all platforms - Add null checks to prevent tracking page crashes from invalid data - Wait for auth completion before loading family data in HomePage 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-02 10:25:13 +00:00
parent 4b8828fdad
commit c60467b6f9
9 changed files with 231 additions and 120 deletions
--- a/maternal-web/hooks/useVoiceInput.ts
+++ b/maternal-web/hooks/useVoiceInput.ts
@@ -36,49 +36,21 @@ export function useVoiceInput() {
  const mediaRecorderRef = useRef<MediaRecorder | null>(null);
  const audioChunksRef = useRef<Blob[]>([]);
  const timeoutRef = useRef<NodeJS.Timeout | null>(null);
+  const silenceTimeoutRef = useRef<NodeJS.Timeout | null>(null);
+  const audioContextRef = useRef<AudioContext | null>(null);
+  const analyserRef = useRef<AnalyserNode | null>(null);

-  // Check if browser supports Speech Recognition or MediaRecorder
+  // Check if browser supports MediaRecorder (unified approach for all platforms)
  useEffect(() => {
-    // Detect iOS Safari specifically
-    const isIOSSafari = /iPad|iPhone|iPod/.test(navigator.userAgent) && !(window as any).MSStream;
+    // Always use MediaRecorder + backend transcription for consistency
+    // This gives us one flow to debug and maintain, works on all platforms
+    console.log('[Voice] Checking MediaRecorder support...');

-    const SpeechRecognition =
-      (window as any).SpeechRecognition || (window as any).webkitSpeechRecognition;
-
-    // Force fallback for iOS Safari regardless of Speech Recognition availability
-    if (isIOSSafari) {
-      console.log('[Voice] iOS Safari detected, using MediaRecorder fallback');
-      if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
-        setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
-      } else {
-        setState(prev => ({ ...prev, isSupported: false }));
-      }
-    } else if (SpeechRecognition) {
-      try {
-        // Initialize recognition for non-iOS browsers
-        console.log('[Voice] Initializing Web Speech API');
-        const recognition = new SpeechRecognition();
-        recognition.continuous = true; // Keep listening until manually stopped
-        recognition.interimResults = true; // Get interim results
-        recognition.maxAlternatives = 1;
-        recognition.lang = 'en-US'; // Default language
-
-        recognitionRef.current = recognition;
-        console.log('[Voice] Web Speech API initialized successfully');
-        setState(prev => ({ ...prev, isSupported: true, usesFallback: false }));
-      } catch (error) {
-        console.warn('[Voice] Speech Recognition initialization failed, trying fallback');
-        if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
-          setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
-        } else {
-          setState(prev => ({ ...prev, isSupported: false }));
-        }
-      }
-    } else if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
-      // Use MediaRecorder fallback for other browsers without Speech Recognition
-      console.log('[Voice] No Speech Recognition, using MediaRecorder fallback');
+    if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
+      console.log('[Voice] MediaRecorder supported, will use backend transcription for all platforms');
      setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
    } else {
+      console.log('[Voice] MediaRecorder not supported');
      setState(prev => ({ ...prev, isSupported: false }));
    }

@@ -115,6 +87,59 @@ export function useVoiceInput() {

      console.log('[Voice] Microphone access granted, creating MediaRecorder...');

+      // Set up silence detection using Web Audio API
+      const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
+      const analyser = audioContext.createAnalyser();
+      const microphone = audioContext.createMediaStreamSource(stream);
+      analyser.fftSize = 512;
+      microphone.connect(analyser);
+
+      audioContextRef.current = audioContext;
+      analyserRef.current = analyser;
+
+      // Monitor audio levels for silence detection
+      const bufferLength = analyser.frequencyBinCount;
+      const dataArray = new Uint8Array(bufferLength);
+      let lastSoundTime = Date.now();
+      const SILENCE_THRESHOLD = 10; // Adjust based on testing
+      const SILENCE_DURATION = 1500; // 1.5 seconds of silence
+
+      const checkSilence = () => {
+        analyser.getByteFrequencyData(dataArray);
+        const average = dataArray.reduce((a, b) => a + b, 0) / bufferLength;
+
+        if (average > SILENCE_THRESHOLD) {
+          lastSoundTime = Date.now();
+          // Clear silence timeout if sound detected
+          if (silenceTimeoutRef.current) {
+            clearTimeout(silenceTimeoutRef.current);
+            silenceTimeoutRef.current = null;
+          }
+        } else {
+          // Check if silence has lasted long enough
+          const silenceDuration = Date.now() - lastSoundTime;
+          if (silenceDuration > SILENCE_DURATION && !silenceTimeoutRef.current) {
+            console.log('[Voice] Silence detected, auto-stopping...');
+            if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
+              mediaRecorderRef.current.stop();
+            }
+            return;
+          }
+        }
+
+        // Continue checking if still recording
+        if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
+          requestAnimationFrame(checkSilence);
+        }
+      };
+
+      // Start monitoring after a brief delay to avoid immediate stop
+      setTimeout(() => {
+        if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
+          requestAnimationFrame(checkSilence);
+        }
+      }, 500);
+
      // Try different mime types for iOS Safari compatibility
      let mimeType = 'audio/webm;codecs=opus';
      if (!MediaRecorder.isTypeSupported(mimeType)) {
@@ -200,6 +225,12 @@ export function useVoiceInput() {
        // Stop all tracks
        stream.getTracks().forEach(track => track.stop());
        console.log('[Voice] Stream tracks stopped');
+
+        // Clean up audio context
+        if (audioContextRef.current) {
+          audioContextRef.current.close();
+          audioContextRef.current = null;
+        }
      };

      mediaRecorder.onerror = (event) => {
@@ -265,6 +296,8 @@ export function useVoiceInput() {
      error: null,
    }));

+    let lastSpeechTime = Date.now();
+
    // Set up event handlers
    recognition.onstart = () => {
      console.log('[Voice] Started listening');
@@ -283,6 +316,26 @@ export function useVoiceInput() {
        }
      }

+      // Update last speech time
+      lastSpeechTime = Date.now();
+
+      // Reset silence timeout
+      if (silenceTimeoutRef.current) {
+        clearTimeout(silenceTimeoutRef.current);
+      }
+
+      // Set new silence timeout (1.5 seconds after last speech)
+      silenceTimeoutRef.current = setTimeout(() => {
+        console.log('[Voice] Silence detected, auto-stopping...');
+        if (recognitionRef.current) {
+          try {
+            recognitionRef.current.stop();
+          } catch (e) {
+            // Ignore errors
+          }
+        }
+      }, 1500);
+
      // Only update state with final results, show interim in console for debugging
      if (finalTranscript) {
        console.log('[Voice] Final result:', finalTranscript);
@@ -347,19 +400,10 @@ export function useVoiceInput() {
    }
  }, []);

-  // Start listening (chooses appropriate method)
+  // Start listening (always uses MediaRecorder + backend transcription)
  const startListening = useCallback(() => {
-    if (state.usesFallback) {
-      startListeningWithFallback();
-    } else if (recognitionRef.current) {
-      startListeningWithSpeechAPI();
-    } else {
-      setState(prev => ({
-        ...prev,
-        error: 'Voice input not supported in this browser',
-      }));
-    }
-  }, [state.usesFallback, startListeningWithFallback, startListeningWithSpeechAPI]);
+    startListeningWithFallback();
+  }, [startListeningWithFallback]);

  // Stop listening
  const stopListening = useCallback(() => {