Fix login data structure and improve voice input UX

- Fix login endpoint to return families as array of objects instead of strings - Update auth interface to match /auth/me endpoint structure - Add silence detection to voice input (auto-stop after 1.5s) - Add comprehensive status messages to voice modal (Listening, Understanding, Saving) - Unify voice input flow to use MediaRecorder + backend for all platforms - Add null checks to prevent tracking page crashes from invalid data - Wait for auth completion before loading family data in HomePage 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-02 10:25:13 +00:00
parent 4b8828fdad
commit c60467b6f9
9 changed files with 231 additions and 120 deletions
--- a/maternal-web/app/api/voice/transcribe/route.ts
+++ b/maternal-web/app/api/voice/transcribe/route.ts
@@ -16,7 +16,7 @@ export async function POST(request: NextRequest) {
    let transcribedText: string;

    if (contentType.includes('application/json')) {
-      // Text input (already transcribed)
+      // Text input (already transcribed) - forward to backend for LLM classification
      const body = await request.json();
      transcribedText = body.text;

@@ -29,6 +29,41 @@ export async function POST(request: NextRequest) {
          { status: 400 }
        );
      }
+
+      // Forward text to backend for LLM-based classification
+      const backendUrl = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:3020';
+      const backendResponse = await fetch(`${backendUrl}/api/v1/voice/transcribe`, {
+        method: 'POST',
+        headers: {
+          'Content-Type': 'application/json',
+          // Forward auth token if present
+          ...(request.headers.get('authorization') && {
+            authorization: request.headers.get('authorization')!,
+          }),
+        },
+        body: JSON.stringify({
+          text: transcribedText,
+          language: body.language || 'en',
+          childName: body.childName,
+        }),
+      });
+
+      if (!backendResponse.ok) {
+        const errorData = await backendResponse.json();
+        return NextResponse.json(errorData, { status: backendResponse.status });
+      }
+
+      const result = await backendResponse.json();
+
+      // Backend returns { success, transcript, classification }
+      return NextResponse.json(
+        {
+          success: true,
+          transcript: result.transcript,
+          classification: result.classification,
+        },
+        { status: 200 }
+      );
    } else if (contentType.includes('multipart/form-data')) {
      // Audio file upload - forward to backend for Whisper transcription
      const formData = await request.formData();
--- a/maternal-web/app/page.tsx
+++ b/maternal-web/app/page.tsx
@@ -25,7 +25,7 @@ import { childrenApi, Child } from '@/lib/api/children';
 import { format } from 'date-fns';

 export default function HomePage() {
-  const { user } = useAuth();
+  const { user, isLoading: authLoading } = useAuth();
  const router = useRouter();
  const [children, setChildren] = useState<Child[]>([]);
  const [selectedChild, setSelectedChild] = useState<Child | null>(null);
@@ -33,17 +33,29 @@ export default function HomePage() {
  const [loading, setLoading] = useState(true);

  const familyId = user?.families?.[0]?.familyId;
+
  // Load children and daily summary
  useEffect(() => {
    const loadData = async () => {
+      // Wait for auth to complete before trying to load data
+      if (authLoading) {
+        return;
+      }
+
      if (!familyId) {
+        console.log('[HomePage] No familyId found');
+        console.log('[HomePage] User object:', JSON.stringify(user, null, 2));
+        console.log('[HomePage] User.families:', user?.families);
        setLoading(false);
        return;
      }

+      console.log('[HomePage] Loading data for familyId:', familyId);
+
      try {
        // Load children
        const childrenData = await childrenApi.getChildren(familyId);
+        console.log('[HomePage] Children loaded:', childrenData.length);
        setChildren(childrenData);

        if (childrenData.length > 0) {
@@ -56,14 +68,14 @@ export default function HomePage() {
          setDailySummary(summary);
        }
      } catch (error) {
-        console.error('Failed to load data:', error);
+        console.error('[HomePage] Failed to load data:', error);
      } finally {
        setLoading(false);
      }
    };

    loadData();
-  }, [familyId]);
+  }, [familyId, authLoading, user]);

  const quickActions = [
    { icon: <Restaurant />, label: 'Feeding', color: '#FFB6C1', path: '/track/feeding' },
--- a/maternal-web/app/track/diaper/page.tsx
+++ b/maternal-web/app/track/diaper/page.tsx
@@ -619,6 +619,11 @@ export default function DiaperTrackPage() {
                <Box sx={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
                  {recentDiapers.map((activity, index) => {
                    const data = activity.data as DiaperData;
+                    // Skip activities with invalid data structure
+                    if (!data || !data.diaperType) {
+                      console.warn('[Diaper] Activity missing diaperType:', activity);
+                      return null;
+                    }
                    return (
                      <motion.div
                        key={activity.id}
--- a/maternal-web/app/track/feeding/page.tsx
+++ b/maternal-web/app/track/feeding/page.tsx
@@ -601,6 +601,11 @@ function FeedingTrackPage() {
                <Box sx={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
                  {recentFeedings.map((activity, index) => {
                    const data = activity.data as FeedingData;
+                    // Skip activities with invalid data structure
+                    if (!data || !data.feedingType) {
+                      console.warn('[Feeding] Activity missing feedingType:', activity);
+                      return null;
+                    }
                    return (
                      <motion.div
                        key={activity.id}
--- a/maternal-web/app/track/sleep/page.tsx
+++ b/maternal-web/app/track/sleep/page.tsx
@@ -557,6 +557,11 @@ export default function SleepTrackPage() {
                <Box sx={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
                  {recentSleeps.map((activity, index) => {
                    const data = activity.data as SleepData;
+                    // Skip activities with invalid data structure
+                    if (!data || !data.quality || !data.location) {
+                      console.warn('[Sleep] Activity missing required fields:', activity);
+                      return null;
+                    }
                    return (
                      <motion.div
                        key={activity.id}
--- a/maternal-web/components/voice/VoiceFloatingButton.tsx
+++ b/maternal-web/components/voice/VoiceFloatingButton.tsx
@@ -36,8 +36,9 @@ export function VoiceFloatingButton() {
  const { user } = useAuth();
  const [open, setOpen] = useState(false);
  const [isProcessing, setIsProcessing] = useState(false);
+  const [processingStatus, setProcessingStatus] = useState<'listening' | 'understanding' | 'saving' | null>(null);
+  const [identifiedActivity, setIdentifiedActivity] = useState<string>('');
  const [classificationResult, setClassificationResult] = useState<any>(null);
-  const [lastClassifiedTranscript, setLastClassifiedTranscript] = useState<string>('');
  const [snackbar, setSnackbar] = useState<{
    open: boolean;
    message: string;
@@ -53,7 +54,18 @@ export function VoiceFloatingButton() {
  const { isListening, isSupported, transcript, classification, error, usesFallback, startListening, stopListening, reset } =
    useVoiceInput();

-  // Auto-use classification from backend when transcription completes (MediaRecorder fallback)
+  // Set status when listening starts/stops
+  React.useEffect(() => {
+    if (isListening) {
+      setProcessingStatus('listening');
+    } else if (processingStatus === 'listening' && transcript) {
+      // Transition from listening to understanding when we have a transcript
+      setProcessingStatus('understanding');
+    }
+  }, [isListening, transcript]);
+
+  // Auto-use classification from backend when transcription completes
+  // MediaRecorder sends audio to backend, which transcribes + classifies in one call
  React.useEffect(() => {
    if (classification && !isListening && !isProcessing && open) {
      setClassificationResult(classification);
@@ -61,13 +73,6 @@ export function VoiceFloatingButton() {
    }
  }, [classification, isListening, isProcessing, open]);

-  // For Web Speech API (desktop), classify the transcript client-side
-  React.useEffect(() => {
-    if (!usesFallback && transcript && !isListening && !isProcessing && open && transcript !== lastClassifiedTranscript) {
-      classifyTranscript(transcript);
-    }
-  }, [usesFallback, transcript, isListening, isProcessing, open, lastClassifiedTranscript]);
-
  const handleOpen = () => {
    if (!isSupported) {
      setSnackbar({
@@ -80,7 +85,8 @@ export function VoiceFloatingButton() {
    setOpen(true);
    reset();
    setClassificationResult(null);
-    setLastClassifiedTranscript('');
+    setProcessingStatus(null);
+    setIdentifiedActivity('');
  };

  const handleClose = () => {
@@ -90,13 +96,13 @@ export function VoiceFloatingButton() {
    setOpen(false);
    reset();
    setClassificationResult(null);
-    setLastClassifiedTranscript('');
+    setProcessingStatus(null);
+    setIdentifiedActivity('');
  };

  const handleStartListening = () => {
    reset();
    setClassificationResult(null);
-    setLastClassifiedTranscript('');
    startListening();
  };

@@ -104,43 +110,12 @@ export function VoiceFloatingButton() {
    stopListening();
  };

-  const classifyTranscript = async (text: string) => {
-    // Mark this transcript as being classified to prevent duplicate calls
-    setLastClassifiedTranscript(text);
-    setIsProcessing(true);
-    try {
-      const response = await fetch('/api/voice/transcribe', {
-        method: 'POST',
-        headers: {
-          'Content-Type': 'application/json',
-        },
-        body: JSON.stringify({ text }),
-      });
-
-      const data = await response.json();
-
-      if (response.ok && data.success) {
-        setClassificationResult(data.classification);
-        handleClassifiedIntent(data.classification);
-      } else {
-        setClassificationResult({
-          error: true,
-          message: data.message || 'Could not understand command',
-        });
-      }
-    } catch (error) {
-      console.error('[Voice] Classification error:', error);
-      setClassificationResult({
-        error: true,
-        message: 'Failed to process command',
-      });
-    } finally {
-      setIsProcessing(false);
-    }
-  };
-
  const handleClassifiedIntent = async (result: any) => {
+    console.log('[Voice] handleClassifiedIntent called with result:', result);
+
    if (result.error) {
+      console.log('[Voice] Result has error:', result.message);
+      setProcessingStatus(null);
      setSnackbar({
        open: true,
        message: result.message,
@@ -149,8 +124,17 @@ export function VoiceFloatingButton() {
      return;
    }

+    // Support both formats: backend returns 'type', frontend local classifier returns 'intent'
+    const activityType = result.type || result.intent;
+    console.log('[Voice] Activity type:', activityType);
+
+    // Set identified activity for status display
+    setIdentifiedActivity(activityType);
+
    // Handle unknown or low confidence
-    if (result.type === 'unknown' || (result.confidence && result.confidence < 0.3)) {
+    if (activityType === 'unknown' || (result.confidence && result.confidence < 0.3)) {
+      console.log('[Voice] Unknown or low confidence:', activityType, result.confidence);
+      setProcessingStatus(null);
      setSnackbar({
        open: true,
        message: 'Could not understand the command. Please try again or use manual entry.',
@@ -161,6 +145,8 @@ export function VoiceFloatingButton() {

    // Get the first child from the family
    if (!familyId) {
+      console.log('[Voice] No familyId found');
+      setProcessingStatus(null);
      setSnackbar({
        open: true,
        message: 'No family found. Please set up your profile first.',
@@ -169,11 +155,17 @@ export function VoiceFloatingButton() {
      return;
    }

+    console.log('[Voice] Family ID:', familyId);
+
    try {
      setIsProcessing(true);
+      setProcessingStatus('saving');

      // Fetch children
+      console.log('[Voice] Fetching children for family:', familyId);
      const children = await childrenApi.getChildren(familyId);
+      console.log('[Voice] Children found:', children.length, children);
+
      if (children.length === 0) {
        setSnackbar({
          open: true,
@@ -186,21 +178,23 @@ export function VoiceFloatingButton() {

      // Use the first child (or you could enhance this to support child name matching)
      const childId = children[0].id;
+      console.log('[Voice] Using child ID:', childId);

      // Create the activity
      const activityData = {
-        type: result.type,
+        type: activityType,
        timestamp: result.timestamp || new Date().toISOString(),
-        data: result.details || {},
-        notes: result.details?.notes || undefined,
+        data: result.details || result.structuredData || {},
+        notes: result.details?.notes || result.structuredData?.notes || undefined,
      };

-      console.log('[Voice] Creating activity:', activityData);
+      console.log('[Voice] Creating activity with data:', JSON.stringify(activityData, null, 2));

-      await trackingApi.createActivity(childId, activityData);
+      const createdActivity = await trackingApi.createActivity(childId, activityData);
+      console.log('[Voice] Activity created successfully:', createdActivity);

      // Show success message
-      const activityLabel = result.type.charAt(0).toUpperCase() + result.type.slice(1);
+      const activityLabel = activityType.charAt(0).toUpperCase() + activityType.slice(1);
      setSnackbar({
        open: true,
        message: `${activityLabel} activity saved successfully!`,
@@ -212,7 +206,9 @@ export function VoiceFloatingButton() {
        handleClose();
      }, 1500);
    } catch (error: any) {
-      console.error('[Voice] Failed to create activity:', error);
+      console.error('[Voice] Failed to create activity - Full error:', error);
+      console.error('[Voice] Error response:', error.response);
+      console.error('[Voice] Error data:', error.response?.data);
      setSnackbar({
        open: true,
        message: error.response?.data?.message || 'Failed to save activity. Please try again.',
@@ -253,7 +249,7 @@ export function VoiceFloatingButton() {
          Voice Command
          {classificationResult && !classificationResult.error && (
            <Chip
-              label={`${classificationResult.intent} (${classificationResult.confidenceLevel})`}
+              label={`${classificationResult.type || classificationResult.intent} (${classificationResult.confidenceLevel || Math.round((classificationResult.confidence || 0) * 100) + '%'})`}
              color="success"
              size="small"
              sx={{ ml: 2 }}
@@ -287,9 +283,12 @@ export function VoiceFloatingButton() {
              </IconButton>
            </Box>

-            {/* Status text */}
+            {/* Status text with detailed processing stages */}
            <Typography variant="body1" color="text.secondary" gutterBottom>
-              {isListening ? 'Listening... Speak now' : 'Click the microphone to start'}
+              {processingStatus === 'listening' && 'Listening... Speak now'}
+              {processingStatus === 'understanding' && 'Understanding your request...'}
+              {processingStatus === 'saving' && identifiedActivity && `Adding to ${identifiedActivity.charAt(0).toUpperCase() + identifiedActivity.slice(1)} tracker...`}
+              {!processingStatus && !isListening && 'Click the microphone to start'}
            </Typography>

            {/* Transcript */}
@@ -302,12 +301,14 @@ export function VoiceFloatingButton() {
              </Box>
            )}

-            {/* Processing indicator */}
-            {isProcessing && (
+            {/* Processing indicator with status */}
+            {processingStatus && (
              <Box sx={{ mt: 2, display: 'flex', alignItems: 'center', justifyContent: 'center' }}>
                <CircularProgress size={20} sx={{ mr: 1 }} />
                <Typography variant="body2" color="text.secondary">
-                  Processing command...
+                  {processingStatus === 'listening' && 'Listening...'}
+                  {processingStatus === 'understanding' && 'Understanding...'}
+                  {processingStatus === 'saving' && 'Saving...'}
                </Typography>
              </Box>
            )}
@@ -316,7 +317,7 @@ export function VoiceFloatingButton() {
            {classificationResult && !classificationResult.error && (
              <Alert severity="success" sx={{ mt: 2 }}>
                <Typography variant="body2" gutterBottom>
-                  <strong>Understood:</strong> {classificationResult.intent}
+                  <strong>Understood:</strong> {classificationResult.type || classificationResult.intent}
                </Typography>
              </Alert>
            )}
--- a/maternal-web/hooks/useVoiceInput.ts
+++ b/maternal-web/hooks/useVoiceInput.ts
@@ -36,49 +36,21 @@ export function useVoiceInput() {
  const mediaRecorderRef = useRef<MediaRecorder | null>(null);
  const audioChunksRef = useRef<Blob[]>([]);
  const timeoutRef = useRef<NodeJS.Timeout | null>(null);
+  const silenceTimeoutRef = useRef<NodeJS.Timeout | null>(null);
+  const audioContextRef = useRef<AudioContext | null>(null);
+  const analyserRef = useRef<AnalyserNode | null>(null);

-  // Check if browser supports Speech Recognition or MediaRecorder
+  // Check if browser supports MediaRecorder (unified approach for all platforms)
  useEffect(() => {
-    // Detect iOS Safari specifically
-    const isIOSSafari = /iPad|iPhone|iPod/.test(navigator.userAgent) && !(window as any).MSStream;
+    // Always use MediaRecorder + backend transcription for consistency
+    // This gives us one flow to debug and maintain, works on all platforms
+    console.log('[Voice] Checking MediaRecorder support...');

-    const SpeechRecognition =
-      (window as any).SpeechRecognition || (window as any).webkitSpeechRecognition;
-
-    // Force fallback for iOS Safari regardless of Speech Recognition availability
-    if (isIOSSafari) {
-      console.log('[Voice] iOS Safari detected, using MediaRecorder fallback');
-      if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
-        setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
-      } else {
-        setState(prev => ({ ...prev, isSupported: false }));
-      }
-    } else if (SpeechRecognition) {
-      try {
-        // Initialize recognition for non-iOS browsers
-        console.log('[Voice] Initializing Web Speech API');
-        const recognition = new SpeechRecognition();
-        recognition.continuous = true; // Keep listening until manually stopped
-        recognition.interimResults = true; // Get interim results
-        recognition.maxAlternatives = 1;
-        recognition.lang = 'en-US'; // Default language
-
-        recognitionRef.current = recognition;
-        console.log('[Voice] Web Speech API initialized successfully');
-        setState(prev => ({ ...prev, isSupported: true, usesFallback: false }));
-      } catch (error) {
-        console.warn('[Voice] Speech Recognition initialization failed, trying fallback');
-        if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
-          setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
-        } else {
-          setState(prev => ({ ...prev, isSupported: false }));
-        }
-      }
-    } else if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
-      // Use MediaRecorder fallback for other browsers without Speech Recognition
-      console.log('[Voice] No Speech Recognition, using MediaRecorder fallback');
+    if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
+      console.log('[Voice] MediaRecorder supported, will use backend transcription for all platforms');
      setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
    } else {
+      console.log('[Voice] MediaRecorder not supported');
      setState(prev => ({ ...prev, isSupported: false }));
    }

@@ -115,6 +87,59 @@ export function useVoiceInput() {

      console.log('[Voice] Microphone access granted, creating MediaRecorder...');

+      // Set up silence detection using Web Audio API
+      const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
+      const analyser = audioContext.createAnalyser();
+      const microphone = audioContext.createMediaStreamSource(stream);
+      analyser.fftSize = 512;
+      microphone.connect(analyser);
+
+      audioContextRef.current = audioContext;
+      analyserRef.current = analyser;
+
+      // Monitor audio levels for silence detection
+      const bufferLength = analyser.frequencyBinCount;
+      const dataArray = new Uint8Array(bufferLength);
+      let lastSoundTime = Date.now();
+      const SILENCE_THRESHOLD = 10; // Adjust based on testing
+      const SILENCE_DURATION = 1500; // 1.5 seconds of silence
+
+      const checkSilence = () => {
+        analyser.getByteFrequencyData(dataArray);
+        const average = dataArray.reduce((a, b) => a + b, 0) / bufferLength;
+
+        if (average > SILENCE_THRESHOLD) {
+          lastSoundTime = Date.now();
+          // Clear silence timeout if sound detected
+          if (silenceTimeoutRef.current) {
+            clearTimeout(silenceTimeoutRef.current);
+            silenceTimeoutRef.current = null;
+          }
+        } else {
+          // Check if silence has lasted long enough
+          const silenceDuration = Date.now() - lastSoundTime;
+          if (silenceDuration > SILENCE_DURATION && !silenceTimeoutRef.current) {
+            console.log('[Voice] Silence detected, auto-stopping...');
+            if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
+              mediaRecorderRef.current.stop();
+            }
+            return;
+          }
+        }
+
+        // Continue checking if still recording
+        if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
+          requestAnimationFrame(checkSilence);
+        }
+      };
+
+      // Start monitoring after a brief delay to avoid immediate stop
+      setTimeout(() => {
+        if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
+          requestAnimationFrame(checkSilence);
+        }
+      }, 500);
+
      // Try different mime types for iOS Safari compatibility
      let mimeType = 'audio/webm;codecs=opus';
      if (!MediaRecorder.isTypeSupported(mimeType)) {
@@ -200,6 +225,12 @@ export function useVoiceInput() {
        // Stop all tracks
        stream.getTracks().forEach(track => track.stop());
        console.log('[Voice] Stream tracks stopped');
+
+        // Clean up audio context
+        if (audioContextRef.current) {
+          audioContextRef.current.close();
+          audioContextRef.current = null;
+        }
      };

      mediaRecorder.onerror = (event) => {
@@ -265,6 +296,8 @@ export function useVoiceInput() {
      error: null,
    }));

+    let lastSpeechTime = Date.now();
+
    // Set up event handlers
    recognition.onstart = () => {
      console.log('[Voice] Started listening');
@@ -283,6 +316,26 @@ export function useVoiceInput() {
        }
      }

+      // Update last speech time
+      lastSpeechTime = Date.now();
+
+      // Reset silence timeout
+      if (silenceTimeoutRef.current) {
+        clearTimeout(silenceTimeoutRef.current);
+      }
+
+      // Set new silence timeout (1.5 seconds after last speech)
+      silenceTimeoutRef.current = setTimeout(() => {
+        console.log('[Voice] Silence detected, auto-stopping...');
+        if (recognitionRef.current) {
+          try {
+            recognitionRef.current.stop();
+          } catch (e) {
+            // Ignore errors
+          }
+        }
+      }, 1500);
+
      // Only update state with final results, show interim in console for debugging
      if (finalTranscript) {
        console.log('[Voice] Final result:', finalTranscript);
@@ -347,19 +400,10 @@ export function useVoiceInput() {
    }
  }, []);

-  // Start listening (chooses appropriate method)
+  // Start listening (always uses MediaRecorder + backend transcription)
  const startListening = useCallback(() => {
-    if (state.usesFallback) {
-      startListeningWithFallback();
-    } else if (recognitionRef.current) {
-      startListeningWithSpeechAPI();
-    } else {
-      setState(prev => ({
-        ...prev,
-        error: 'Voice input not supported in this browser',
-      }));
-    }
-  }, [state.usesFallback, startListeningWithFallback, startListeningWithSpeechAPI]);
+    startListeningWithFallback();
+  }, [startListeningWithFallback]);

  // Stop listening
  const stopListening = useCallback(() => {