Fix login data structure and improve voice input UX

- Fix login endpoint to return families as array of objects instead of strings - Update auth interface to match /auth/me endpoint structure - Add silence detection to voice input (auto-stop after 1.5s) - Add comprehensive status messages to voice modal (Listening, Understanding, Saving) - Unify voice input flow to use MediaRecorder + backend for all platforms - Add null checks to prevent tracking page crashes from invalid data - Wait for auth completion before loading family data in HomePage 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-02 10:25:13 +00:00
parent 4b8828fdad
commit c60467b6f9
9 changed files with 231 additions and 120 deletions
--- a/maternal-app/maternal-app-backend/src/modules/auth/auth.service.ts
+++ b/maternal-app/maternal-app-backend/src/modules/auth/auth.service.ts
@@ -164,8 +164,12 @@ export class AuthService {
    // Generate tokens
    const tokens = await this.generateTokens(user, device.id);
-    // Get family IDs
+    // Get families with proper structure (matching /auth/me endpoint)
-    const familyIds = user.familyMemberships?.map((fm) => fm.familyId) || [];
+    const families = user.familyMemberships?.map((fm) => ({
      id: fm.familyId,
      familyId: fm.familyId,
      role: fm.role,
    })) || [];
    // Audit log: successful login
    await this.auditService.logLogin(user.id);
@@ -180,7 +184,7 @@ export class AuthService {
          locale: user.locale,
          emailVerified: user.emailVerified,
          preferences: user.preferences,
-          families: familyIds,
+          families: families,
        },
        tokens,
        requiresMFA: false,
--- a/maternal-app/maternal-app-backend/src/modules/auth/interfaces/auth-response.interface.ts
+++ b/maternal-app/maternal-app-backend/src/modules/auth/interfaces/auth-response.interface.ts
@@ -13,7 +13,7 @@ export interface AuthResponse {
      name: string;
      locale: string;
      emailVerified: boolean;
-      families?: string[];
+      families?: Array<{ id: string; familyId: string; role: string }>;
      preferences?: any;
    };
    tokens: AuthTokens;
--- a/maternal-web/app/api/voice/transcribe/route.ts
+++ b/maternal-web/app/api/voice/transcribe/route.ts
@@ -16,7 +16,7 @@ export async function POST(request: NextRequest) {
    let transcribedText: string;
    if (contentType.includes('application/json')) {
-      // Text input (already transcribed)
+      // Text input (already transcribed) - forward to backend for LLM classification
      const body = await request.json();
      transcribedText = body.text;
@@ -29,6 +29,41 @@ export async function POST(request: NextRequest) {
          { status: 400 }
        );
      }
      // Forward text to backend for LLM-based classification
      const backendUrl = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:3020';
      const backendResponse = await fetch(`${backendUrl}/api/v1/voice/transcribe`, {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
          // Forward auth token if present
          ...(request.headers.get('authorization') && {
            authorization: request.headers.get('authorization')!,
          }),
        },
        body: JSON.stringify({
          text: transcribedText,
          language: body.language || 'en',
          childName: body.childName,
        }),
      });
      if (!backendResponse.ok) {
        const errorData = await backendResponse.json();
        return NextResponse.json(errorData, { status: backendResponse.status });
      }
      const result = await backendResponse.json();
      // Backend returns { success, transcript, classification }
      return NextResponse.json(
        {
          success: true,
          transcript: result.transcript,
          classification: result.classification,
        },
        { status: 200 }
      );
    } else if (contentType.includes('multipart/form-data')) {
      // Audio file upload - forward to backend for Whisper transcription
      const formData = await request.formData();
--- a/maternal-web/app/page.tsx
+++ b/maternal-web/app/page.tsx
@@ -25,7 +25,7 @@ import { childrenApi, Child } from '@/lib/api/children';
 import { format } from 'date-fns';
 export default function HomePage() {
-  const { user } = useAuth();
+  const { user, isLoading: authLoading } = useAuth();
  const router = useRouter();
  const [children, setChildren] = useState<Child[]>([]);
  const [selectedChild, setSelectedChild] = useState<Child | null>(null);
@@ -33,17 +33,29 @@ export default function HomePage() {
  const [loading, setLoading] = useState(true);
  const familyId = user?.families?.[0]?.familyId;
  // Load children and daily summary
  useEffect(() => {
    const loadData = async () => {
      // Wait for auth to complete before trying to load data
      if (authLoading) {
        return;
      }
      if (!familyId) {
        console.log('[HomePage] No familyId found');
        console.log('[HomePage] User object:', JSON.stringify(user, null, 2));
        console.log('[HomePage] User.families:', user?.families);
        setLoading(false);
        return;
      }
      console.log('[HomePage] Loading data for familyId:', familyId);
      try {
        // Load children
        const childrenData = await childrenApi.getChildren(familyId);
        console.log('[HomePage] Children loaded:', childrenData.length);
        setChildren(childrenData);
        if (childrenData.length > 0) {
@@ -56,14 +68,14 @@ export default function HomePage() {
          setDailySummary(summary);
        }
      } catch (error) {
-        console.error('Failed to load data:', error);
+        console.error('[HomePage] Failed to load data:', error);
      } finally {
        setLoading(false);
      }
    };
    loadData();
-  }, [familyId]);
+  }, [familyId, authLoading, user]);
  const quickActions = [
    { icon: <Restaurant />, label: 'Feeding', color: '#FFB6C1', path: '/track/feeding' },
--- a/maternal-web/app/track/diaper/page.tsx
+++ b/maternal-web/app/track/diaper/page.tsx
@@ -619,6 +619,11 @@ export default function DiaperTrackPage() {
                <Box sx={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
                  {recentDiapers.map((activity, index) => {
                    const data = activity.data as DiaperData;
                    // Skip activities with invalid data structure
                    if (!data || !data.diaperType) {
                      console.warn('[Diaper] Activity missing diaperType:', activity);
                      return null;
                    }
                    return (
                      <motion.div
                        key={activity.id}
--- a/maternal-web/app/track/feeding/page.tsx
+++ b/maternal-web/app/track/feeding/page.tsx
@@ -601,6 +601,11 @@ function FeedingTrackPage() {
                <Box sx={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
                  {recentFeedings.map((activity, index) => {
                    const data = activity.data as FeedingData;
                    // Skip activities with invalid data structure
                    if (!data || !data.feedingType) {
                      console.warn('[Feeding] Activity missing feedingType:', activity);
                      return null;
                    }
                    return (
                      <motion.div
                        key={activity.id}
--- a/maternal-web/app/track/sleep/page.tsx
+++ b/maternal-web/app/track/sleep/page.tsx
@@ -557,6 +557,11 @@ export default function SleepTrackPage() {
                <Box sx={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
                  {recentSleeps.map((activity, index) => {
                    const data = activity.data as SleepData;
                    // Skip activities with invalid data structure
                    if (!data || !data.quality || !data.location) {
                      console.warn('[Sleep] Activity missing required fields:', activity);
                      return null;
                    }
                    return (
                      <motion.div
                        key={activity.id}
--- a/maternal-web/components/voice/VoiceFloatingButton.tsx
+++ b/maternal-web/components/voice/VoiceFloatingButton.tsx
@@ -36,8 +36,9 @@ export function VoiceFloatingButton() {
  const { user } = useAuth();
  const [open, setOpen] = useState(false);
  const [isProcessing, setIsProcessing] = useState(false);
  const [processingStatus, setProcessingStatus] = useState<'listening' | 'understanding' | 'saving' | null>(null);
  const [identifiedActivity, setIdentifiedActivity] = useState<string>('');
  const [classificationResult, setClassificationResult] = useState<any>(null);
  const [lastClassifiedTranscript, setLastClassifiedTranscript] = useState<string>('');
  const [snackbar, setSnackbar] = useState<{
    open: boolean;
    message: string;
@@ -53,7 +54,18 @@ export function VoiceFloatingButton() {
  const { isListening, isSupported, transcript, classification, error, usesFallback, startListening, stopListening, reset } =
    useVoiceInput();
-  // Auto-use classification from backend when transcription completes (MediaRecorder fallback)
+  // Set status when listening starts/stops
  React.useEffect(() => {
    if (isListening) {
      setProcessingStatus('listening');
    } else if (processingStatus === 'listening' && transcript) {
      // Transition from listening to understanding when we have a transcript
      setProcessingStatus('understanding');
    }
  }, [isListening, transcript]);
  // Auto-use classification from backend when transcription completes
  // MediaRecorder sends audio to backend, which transcribes + classifies in one call
  React.useEffect(() => {
    if (classification && !isListening && !isProcessing && open) {
      setClassificationResult(classification);
@@ -61,13 +73,6 @@ export function VoiceFloatingButton() {
    }
  }, [classification, isListening, isProcessing, open]);
  // For Web Speech API (desktop), classify the transcript client-side
  React.useEffect(() => {
    if (!usesFallback && transcript && !isListening && !isProcessing && open && transcript !== lastClassifiedTranscript) {
      classifyTranscript(transcript);
    }
  }, [usesFallback, transcript, isListening, isProcessing, open, lastClassifiedTranscript]);
  const handleOpen = () => {
    if (!isSupported) {
      setSnackbar({
@@ -80,7 +85,8 @@ export function VoiceFloatingButton() {
    setOpen(true);
    reset();
    setClassificationResult(null);
-    setLastClassifiedTranscript('');
+    setProcessingStatus(null);
    setIdentifiedActivity('');
  };
  const handleClose = () => {
@@ -90,13 +96,13 @@ export function VoiceFloatingButton() {
    setOpen(false);
    reset();
    setClassificationResult(null);
-    setLastClassifiedTranscript('');
+    setProcessingStatus(null);
    setIdentifiedActivity('');
  };
  const handleStartListening = () => {
    reset();
    setClassificationResult(null);
    setLastClassifiedTranscript('');
    startListening();
  };
@@ -104,43 +110,12 @@ export function VoiceFloatingButton() {
    stopListening();
  };
  const classifyTranscript = async (text: string) => {
    // Mark this transcript as being classified to prevent duplicate calls
    setLastClassifiedTranscript(text);
    setIsProcessing(true);
    try {
      const response = await fetch('/api/voice/transcribe', {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify({ text }),
      });
      const data = await response.json();
      if (response.ok && data.success) {
        setClassificationResult(data.classification);
        handleClassifiedIntent(data.classification);
      } else {
        setClassificationResult({
          error: true,
          message: data.message || 'Could not understand command',
        });
      }
    } catch (error) {
      console.error('[Voice] Classification error:', error);
      setClassificationResult({
        error: true,
        message: 'Failed to process command',
      });
    } finally {
      setIsProcessing(false);
    }
  };
  const handleClassifiedIntent = async (result: any) => {
    console.log('[Voice] handleClassifiedIntent called with result:', result);
    if (result.error) {
      console.log('[Voice] Result has error:', result.message);
      setProcessingStatus(null);
      setSnackbar({
        open: true,
        message: result.message,
@@ -149,8 +124,17 @@ export function VoiceFloatingButton() {
      return;
    }
    // Support both formats: backend returns 'type', frontend local classifier returns 'intent'
    const activityType = result.type || result.intent;
    console.log('[Voice] Activity type:', activityType);
    // Set identified activity for status display
    setIdentifiedActivity(activityType);
    // Handle unknown or low confidence
-    if (result.type === 'unknown' || (result.confidence && result.confidence < 0.3)) {
+    if (activityType === 'unknown' || (result.confidence && result.confidence < 0.3)) {
      console.log('[Voice] Unknown or low confidence:', activityType, result.confidence);
      setProcessingStatus(null);
      setSnackbar({
        open: true,
        message: 'Could not understand the command. Please try again or use manual entry.',
@@ -161,6 +145,8 @@ export function VoiceFloatingButton() {
    // Get the first child from the family
    if (!familyId) {
      console.log('[Voice] No familyId found');
      setProcessingStatus(null);
      setSnackbar({
        open: true,
        message: 'No family found. Please set up your profile first.',
@@ -169,11 +155,17 @@ export function VoiceFloatingButton() {
      return;
    }
    console.log('[Voice] Family ID:', familyId);
    try {
      setIsProcessing(true);
      setProcessingStatus('saving');
      // Fetch children
      console.log('[Voice] Fetching children for family:', familyId);
      const children = await childrenApi.getChildren(familyId);
      console.log('[Voice] Children found:', children.length, children);
      if (children.length === 0) {
        setSnackbar({
          open: true,
@@ -186,21 +178,23 @@ export function VoiceFloatingButton() {
      // Use the first child (or you could enhance this to support child name matching)
      const childId = children[0].id;
      console.log('[Voice] Using child ID:', childId);
      // Create the activity
      const activityData = {
-        type: result.type,
+        type: activityType,
        timestamp: result.timestamp || new Date().toISOString(),
-        data: result.details || {},
+        data: result.details || result.structuredData || {},
-        notes: result.details?.notes || undefined,
+        notes: result.details?.notes || result.structuredData?.notes || undefined,
      };
-      console.log('[Voice] Creating activity:', activityData);
+      console.log('[Voice] Creating activity with data:', JSON.stringify(activityData, null, 2));
-      await trackingApi.createActivity(childId, activityData);
+      const createdActivity = await trackingApi.createActivity(childId, activityData);
      console.log('[Voice] Activity created successfully:', createdActivity);
      // Show success message
-      const activityLabel = result.type.charAt(0).toUpperCase() + result.type.slice(1);
+      const activityLabel = activityType.charAt(0).toUpperCase() + activityType.slice(1);
      setSnackbar({
        open: true,
        message: `${activityLabel} activity saved successfully!`,
@@ -212,7 +206,9 @@ export function VoiceFloatingButton() {
        handleClose();
      }, 1500);
    } catch (error: any) {
-      console.error('[Voice] Failed to create activity:', error);
+      console.error('[Voice] Failed to create activity - Full error:', error);
      console.error('[Voice] Error response:', error.response);
      console.error('[Voice] Error data:', error.response?.data);
      setSnackbar({
        open: true,
        message: error.response?.data?.message || 'Failed to save activity. Please try again.',
@@ -253,7 +249,7 @@ export function VoiceFloatingButton() {
          Voice Command
          {classificationResult && !classificationResult.error && (
            <Chip
-              label={`${classificationResult.intent} (${classificationResult.confidenceLevel})`}
+              label={`${classificationResult.type || classificationResult.intent} (${classificationResult.confidenceLevel || Math.round((classificationResult.confidence || 0) * 100) + '%'})`}
              color="success"
              size="small"
              sx={{ ml: 2 }}
@@ -287,9 +283,12 @@ export function VoiceFloatingButton() {
              </IconButton>
            </Box>
-            {/* Status text */}
+            {/* Status text with detailed processing stages */}
            <Typography variant="body1" color="text.secondary" gutterBottom>
-              {isListening ? 'Listening... Speak now' : 'Click the microphone to start'}
+              {processingStatus === 'listening' && 'Listening... Speak now'}
              {processingStatus === 'understanding' && 'Understanding your request...'}
              {processingStatus === 'saving' && identifiedActivity && `Adding to ${identifiedActivity.charAt(0).toUpperCase() + identifiedActivity.slice(1)} tracker...`}
              {!processingStatus && !isListening && 'Click the microphone to start'}
            </Typography>
            {/* Transcript */}
@@ -302,12 +301,14 @@ export function VoiceFloatingButton() {
              </Box>
            )}
-            {/* Processing indicator */}
+            {/* Processing indicator with status */}
-            {isProcessing && (
+            {processingStatus && (
              <Box sx={{ mt: 2, display: 'flex', alignItems: 'center', justifyContent: 'center' }}>
                <CircularProgress size={20} sx={{ mr: 1 }} />
                <Typography variant="body2" color="text.secondary">
-                  Processing command...
+                  {processingStatus === 'listening' && 'Listening...'}
                  {processingStatus === 'understanding' && 'Understanding...'}
                  {processingStatus === 'saving' && 'Saving...'}
                </Typography>
              </Box>
            )}
@@ -316,7 +317,7 @@ export function VoiceFloatingButton() {
            {classificationResult && !classificationResult.error && (
              <Alert severity="success" sx={{ mt: 2 }}>
                <Typography variant="body2" gutterBottom>
-                  <strong>Understood:</strong> {classificationResult.intent}
+                  <strong>Understood:</strong> {classificationResult.type || classificationResult.intent}
                </Typography>
              </Alert>
            )}
--- a/maternal-web/hooks/useVoiceInput.ts
+++ b/maternal-web/hooks/useVoiceInput.ts
@@ -36,49 +36,21 @@ export function useVoiceInput() {
  const mediaRecorderRef = useRef<MediaRecorder | null>(null);
  const audioChunksRef = useRef<Blob[]>([]);
  const timeoutRef = useRef<NodeJS.Timeout | null>(null);
  const silenceTimeoutRef = useRef<NodeJS.Timeout | null>(null);
  const audioContextRef = useRef<AudioContext | null>(null);
  const analyserRef = useRef<AnalyserNode | null>(null);
-  // Check if browser supports Speech Recognition or MediaRecorder
+  // Check if browser supports MediaRecorder (unified approach for all platforms)
  useEffect(() => {
-    // Detect iOS Safari specifically
+    // Always use MediaRecorder + backend transcription for consistency
-    const isIOSSafari = /iPad|iPhone|iPod/.test(navigator.userAgent) && !(window as any).MSStream;
+    // This gives us one flow to debug and maintain, works on all platforms
    console.log('[Voice] Checking MediaRecorder support...');
-    const SpeechRecognition =
+    if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
-      (window as any).SpeechRecognition || (window as any).webkitSpeechRecognition;
+      console.log('[Voice] MediaRecorder supported, will use backend transcription for all platforms');
    // Force fallback for iOS Safari regardless of Speech Recognition availability
    if (isIOSSafari) {
      console.log('[Voice] iOS Safari detected, using MediaRecorder fallback');
      if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
        setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
      } else {
        setState(prev => ({ ...prev, isSupported: false }));
      }
    } else if (SpeechRecognition) {
      try {
        // Initialize recognition for non-iOS browsers
        console.log('[Voice] Initializing Web Speech API');
        const recognition = new SpeechRecognition();
        recognition.continuous = true; // Keep listening until manually stopped
        recognition.interimResults = true; // Get interim results
        recognition.maxAlternatives = 1;
        recognition.lang = 'en-US'; // Default language
        recognitionRef.current = recognition;
        console.log('[Voice] Web Speech API initialized successfully');
        setState(prev => ({ ...prev, isSupported: true, usesFallback: false }));
      } catch (error) {
        console.warn('[Voice] Speech Recognition initialization failed, trying fallback');
        if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
          setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
        } else {
          setState(prev => ({ ...prev, isSupported: false }));
        }
      }
    } else if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
      // Use MediaRecorder fallback for other browsers without Speech Recognition
      console.log('[Voice] No Speech Recognition, using MediaRecorder fallback');
      setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
    } else {
      console.log('[Voice] MediaRecorder not supported');
      setState(prev => ({ ...prev, isSupported: false }));
    }
@@ -115,6 +87,59 @@ export function useVoiceInput() {
      console.log('[Voice] Microphone access granted, creating MediaRecorder...');
      // Set up silence detection using Web Audio API
      const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
      const analyser = audioContext.createAnalyser();
      const microphone = audioContext.createMediaStreamSource(stream);
      analyser.fftSize = 512;
      microphone.connect(analyser);
      audioContextRef.current = audioContext;
      analyserRef.current = analyser;
      // Monitor audio levels for silence detection
      const bufferLength = analyser.frequencyBinCount;
      const dataArray = new Uint8Array(bufferLength);
      let lastSoundTime = Date.now();
      const SILENCE_THRESHOLD = 10; // Adjust based on testing
      const SILENCE_DURATION = 1500; // 1.5 seconds of silence
      const checkSilence = () => {
        analyser.getByteFrequencyData(dataArray);
        const average = dataArray.reduce((a, b) => a + b, 0) / bufferLength;
        if (average > SILENCE_THRESHOLD) {
          lastSoundTime = Date.now();
          // Clear silence timeout if sound detected
          if (silenceTimeoutRef.current) {
            clearTimeout(silenceTimeoutRef.current);
            silenceTimeoutRef.current = null;
          }
        } else {
          // Check if silence has lasted long enough
          const silenceDuration = Date.now() - lastSoundTime;
          if (silenceDuration > SILENCE_DURATION && !silenceTimeoutRef.current) {
            console.log('[Voice] Silence detected, auto-stopping...');
            if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
              mediaRecorderRef.current.stop();
            }
            return;
          }
        }
        // Continue checking if still recording
        if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
          requestAnimationFrame(checkSilence);
        }
      };
      // Start monitoring after a brief delay to avoid immediate stop
      setTimeout(() => {
        if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
          requestAnimationFrame(checkSilence);
        }
      }, 500);
      // Try different mime types for iOS Safari compatibility
      let mimeType = 'audio/webm;codecs=opus';
      if (!MediaRecorder.isTypeSupported(mimeType)) {
@@ -200,6 +225,12 @@ export function useVoiceInput() {
        // Stop all tracks
        stream.getTracks().forEach(track => track.stop());
        console.log('[Voice] Stream tracks stopped');
        // Clean up audio context
        if (audioContextRef.current) {
          audioContextRef.current.close();
          audioContextRef.current = null;
        }
      };
      mediaRecorder.onerror = (event) => {
@@ -265,6 +296,8 @@ export function useVoiceInput() {
      error: null,
    }));
    let lastSpeechTime = Date.now();
    // Set up event handlers
    recognition.onstart = () => {
      console.log('[Voice] Started listening');
@@ -283,6 +316,26 @@ export function useVoiceInput() {
        }
      }
      // Update last speech time
      lastSpeechTime = Date.now();
      // Reset silence timeout
      if (silenceTimeoutRef.current) {
        clearTimeout(silenceTimeoutRef.current);
      }
      // Set new silence timeout (1.5 seconds after last speech)
      silenceTimeoutRef.current = setTimeout(() => {
        console.log('[Voice] Silence detected, auto-stopping...');
        if (recognitionRef.current) {
          try {
            recognitionRef.current.stop();
          } catch (e) {
            // Ignore errors
          }
        }
      }, 1500);
      // Only update state with final results, show interim in console for debugging
      if (finalTranscript) {
        console.log('[Voice] Final result:', finalTranscript);
@@ -347,19 +400,10 @@ export function useVoiceInput() {
    }
  }, []);
-  // Start listening (chooses appropriate method)
+  // Start listening (always uses MediaRecorder + backend transcription)
  const startListening = useCallback(() => {
-    if (state.usesFallback) {
+    startListeningWithFallback();
-      startListeningWithFallback();
+  }, [startListeningWithFallback]);
    } else if (recognitionRef.current) {
      startListeningWithSpeechAPI();
    } else {
      setState(prev => ({
        ...prev,
        error: 'Voice input not supported in this browser',
      }));
    }
  }, [state.usesFallback, startListeningWithFallback, startListeningWithSpeechAPI]);
  // Stop listening
  const stopListening = useCallback(() => {