Fix login data structure and improve voice input UX
Some checks failed
CI/CD Pipeline / Lint and Test (push) Has been cancelled
CI/CD Pipeline / E2E Tests (push) Has been cancelled
CI/CD Pipeline / Build Application (push) Has been cancelled

- Fix login endpoint to return families as array of objects instead of strings
- Update auth interface to match /auth/me endpoint structure
- Add silence detection to voice input (auto-stop after 1.5s)
- Add comprehensive status messages to voice modal (Listening, Understanding, Saving)
- Unify voice input flow to use MediaRecorder + backend for all platforms
- Add null checks to prevent tracking page crashes from invalid data
- Wait for auth completion before loading family data in HomePage

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-10-02 10:25:13 +00:00
parent 4b8828fdad
commit c60467b6f9
9 changed files with 231 additions and 120 deletions

View File

@@ -16,7 +16,7 @@ export async function POST(request: NextRequest) {
let transcribedText: string;
if (contentType.includes('application/json')) {
// Text input (already transcribed)
// Text input (already transcribed) - forward to backend for LLM classification
const body = await request.json();
transcribedText = body.text;
@@ -29,6 +29,41 @@ export async function POST(request: NextRequest) {
{ status: 400 }
);
}
// Forward text to backend for LLM-based classification
const backendUrl = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:3020';
const backendResponse = await fetch(`${backendUrl}/api/v1/voice/transcribe`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
// Forward auth token if present
...(request.headers.get('authorization') && {
authorization: request.headers.get('authorization')!,
}),
},
body: JSON.stringify({
text: transcribedText,
language: body.language || 'en',
childName: body.childName,
}),
});
if (!backendResponse.ok) {
const errorData = await backendResponse.json();
return NextResponse.json(errorData, { status: backendResponse.status });
}
const result = await backendResponse.json();
// Backend returns { success, transcript, classification }
return NextResponse.json(
{
success: true,
transcript: result.transcript,
classification: result.classification,
},
{ status: 200 }
);
} else if (contentType.includes('multipart/form-data')) {
// Audio file upload - forward to backend for Whisper transcription
const formData = await request.formData();

View File

@@ -25,7 +25,7 @@ import { childrenApi, Child } from '@/lib/api/children';
import { format } from 'date-fns';
export default function HomePage() {
const { user } = useAuth();
const { user, isLoading: authLoading } = useAuth();
const router = useRouter();
const [children, setChildren] = useState<Child[]>([]);
const [selectedChild, setSelectedChild] = useState<Child | null>(null);
@@ -33,17 +33,29 @@ export default function HomePage() {
const [loading, setLoading] = useState(true);
const familyId = user?.families?.[0]?.familyId;
// Load children and daily summary
useEffect(() => {
const loadData = async () => {
// Wait for auth to complete before trying to load data
if (authLoading) {
return;
}
if (!familyId) {
console.log('[HomePage] No familyId found');
console.log('[HomePage] User object:', JSON.stringify(user, null, 2));
console.log('[HomePage] User.families:', user?.families);
setLoading(false);
return;
}
console.log('[HomePage] Loading data for familyId:', familyId);
try {
// Load children
const childrenData = await childrenApi.getChildren(familyId);
console.log('[HomePage] Children loaded:', childrenData.length);
setChildren(childrenData);
if (childrenData.length > 0) {
@@ -56,14 +68,14 @@ export default function HomePage() {
setDailySummary(summary);
}
} catch (error) {
console.error('Failed to load data:', error);
console.error('[HomePage] Failed to load data:', error);
} finally {
setLoading(false);
}
};
loadData();
}, [familyId]);
}, [familyId, authLoading, user]);
const quickActions = [
{ icon: <Restaurant />, label: 'Feeding', color: '#FFB6C1', path: '/track/feeding' },

View File

@@ -619,6 +619,11 @@ export default function DiaperTrackPage() {
<Box sx={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
{recentDiapers.map((activity, index) => {
const data = activity.data as DiaperData;
// Skip activities with invalid data structure
if (!data || !data.diaperType) {
console.warn('[Diaper] Activity missing diaperType:', activity);
return null;
}
return (
<motion.div
key={activity.id}

View File

@@ -601,6 +601,11 @@ function FeedingTrackPage() {
<Box sx={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
{recentFeedings.map((activity, index) => {
const data = activity.data as FeedingData;
// Skip activities with invalid data structure
if (!data || !data.feedingType) {
console.warn('[Feeding] Activity missing feedingType:', activity);
return null;
}
return (
<motion.div
key={activity.id}

View File

@@ -557,6 +557,11 @@ export default function SleepTrackPage() {
<Box sx={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
{recentSleeps.map((activity, index) => {
const data = activity.data as SleepData;
// Skip activities with invalid data structure
if (!data || !data.quality || !data.location) {
console.warn('[Sleep] Activity missing required fields:', activity);
return null;
}
return (
<motion.div
key={activity.id}

View File

@@ -36,8 +36,9 @@ export function VoiceFloatingButton() {
const { user } = useAuth();
const [open, setOpen] = useState(false);
const [isProcessing, setIsProcessing] = useState(false);
const [processingStatus, setProcessingStatus] = useState<'listening' | 'understanding' | 'saving' | null>(null);
const [identifiedActivity, setIdentifiedActivity] = useState<string>('');
const [classificationResult, setClassificationResult] = useState<any>(null);
const [lastClassifiedTranscript, setLastClassifiedTranscript] = useState<string>('');
const [snackbar, setSnackbar] = useState<{
open: boolean;
message: string;
@@ -53,7 +54,18 @@ export function VoiceFloatingButton() {
const { isListening, isSupported, transcript, classification, error, usesFallback, startListening, stopListening, reset } =
useVoiceInput();
// Auto-use classification from backend when transcription completes (MediaRecorder fallback)
// Set status when listening starts/stops
React.useEffect(() => {
if (isListening) {
setProcessingStatus('listening');
} else if (processingStatus === 'listening' && transcript) {
// Transition from listening to understanding when we have a transcript
setProcessingStatus('understanding');
}
}, [isListening, transcript]);
// Auto-use classification from backend when transcription completes
// MediaRecorder sends audio to backend, which transcribes + classifies in one call
React.useEffect(() => {
if (classification && !isListening && !isProcessing && open) {
setClassificationResult(classification);
@@ -61,13 +73,6 @@ export function VoiceFloatingButton() {
}
}, [classification, isListening, isProcessing, open]);
// For Web Speech API (desktop), classify the transcript client-side
React.useEffect(() => {
if (!usesFallback && transcript && !isListening && !isProcessing && open && transcript !== lastClassifiedTranscript) {
classifyTranscript(transcript);
}
}, [usesFallback, transcript, isListening, isProcessing, open, lastClassifiedTranscript]);
const handleOpen = () => {
if (!isSupported) {
setSnackbar({
@@ -80,7 +85,8 @@ export function VoiceFloatingButton() {
setOpen(true);
reset();
setClassificationResult(null);
setLastClassifiedTranscript('');
setProcessingStatus(null);
setIdentifiedActivity('');
};
const handleClose = () => {
@@ -90,13 +96,13 @@ export function VoiceFloatingButton() {
setOpen(false);
reset();
setClassificationResult(null);
setLastClassifiedTranscript('');
setProcessingStatus(null);
setIdentifiedActivity('');
};
const handleStartListening = () => {
reset();
setClassificationResult(null);
setLastClassifiedTranscript('');
startListening();
};
@@ -104,43 +110,12 @@ export function VoiceFloatingButton() {
stopListening();
};
const classifyTranscript = async (text: string) => {
// Mark this transcript as being classified to prevent duplicate calls
setLastClassifiedTranscript(text);
setIsProcessing(true);
try {
const response = await fetch('/api/voice/transcribe', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ text }),
});
const data = await response.json();
if (response.ok && data.success) {
setClassificationResult(data.classification);
handleClassifiedIntent(data.classification);
} else {
setClassificationResult({
error: true,
message: data.message || 'Could not understand command',
});
}
} catch (error) {
console.error('[Voice] Classification error:', error);
setClassificationResult({
error: true,
message: 'Failed to process command',
});
} finally {
setIsProcessing(false);
}
};
const handleClassifiedIntent = async (result: any) => {
console.log('[Voice] handleClassifiedIntent called with result:', result);
if (result.error) {
console.log('[Voice] Result has error:', result.message);
setProcessingStatus(null);
setSnackbar({
open: true,
message: result.message,
@@ -149,8 +124,17 @@ export function VoiceFloatingButton() {
return;
}
// Support both formats: backend returns 'type', frontend local classifier returns 'intent'
const activityType = result.type || result.intent;
console.log('[Voice] Activity type:', activityType);
// Set identified activity for status display
setIdentifiedActivity(activityType);
// Handle unknown or low confidence
if (result.type === 'unknown' || (result.confidence && result.confidence < 0.3)) {
if (activityType === 'unknown' || (result.confidence && result.confidence < 0.3)) {
console.log('[Voice] Unknown or low confidence:', activityType, result.confidence);
setProcessingStatus(null);
setSnackbar({
open: true,
message: 'Could not understand the command. Please try again or use manual entry.',
@@ -161,6 +145,8 @@ export function VoiceFloatingButton() {
// Get the first child from the family
if (!familyId) {
console.log('[Voice] No familyId found');
setProcessingStatus(null);
setSnackbar({
open: true,
message: 'No family found. Please set up your profile first.',
@@ -169,11 +155,17 @@ export function VoiceFloatingButton() {
return;
}
console.log('[Voice] Family ID:', familyId);
try {
setIsProcessing(true);
setProcessingStatus('saving');
// Fetch children
console.log('[Voice] Fetching children for family:', familyId);
const children = await childrenApi.getChildren(familyId);
console.log('[Voice] Children found:', children.length, children);
if (children.length === 0) {
setSnackbar({
open: true,
@@ -186,21 +178,23 @@ export function VoiceFloatingButton() {
// Use the first child (or you could enhance this to support child name matching)
const childId = children[0].id;
console.log('[Voice] Using child ID:', childId);
// Create the activity
const activityData = {
type: result.type,
type: activityType,
timestamp: result.timestamp || new Date().toISOString(),
data: result.details || {},
notes: result.details?.notes || undefined,
data: result.details || result.structuredData || {},
notes: result.details?.notes || result.structuredData?.notes || undefined,
};
console.log('[Voice] Creating activity:', activityData);
console.log('[Voice] Creating activity with data:', JSON.stringify(activityData, null, 2));
await trackingApi.createActivity(childId, activityData);
const createdActivity = await trackingApi.createActivity(childId, activityData);
console.log('[Voice] Activity created successfully:', createdActivity);
// Show success message
const activityLabel = result.type.charAt(0).toUpperCase() + result.type.slice(1);
const activityLabel = activityType.charAt(0).toUpperCase() + activityType.slice(1);
setSnackbar({
open: true,
message: `${activityLabel} activity saved successfully!`,
@@ -212,7 +206,9 @@ export function VoiceFloatingButton() {
handleClose();
}, 1500);
} catch (error: any) {
console.error('[Voice] Failed to create activity:', error);
console.error('[Voice] Failed to create activity - Full error:', error);
console.error('[Voice] Error response:', error.response);
console.error('[Voice] Error data:', error.response?.data);
setSnackbar({
open: true,
message: error.response?.data?.message || 'Failed to save activity. Please try again.',
@@ -253,7 +249,7 @@ export function VoiceFloatingButton() {
Voice Command
{classificationResult && !classificationResult.error && (
<Chip
label={`${classificationResult.intent} (${classificationResult.confidenceLevel})`}
label={`${classificationResult.type || classificationResult.intent} (${classificationResult.confidenceLevel || Math.round((classificationResult.confidence || 0) * 100) + '%'})`}
color="success"
size="small"
sx={{ ml: 2 }}
@@ -287,9 +283,12 @@ export function VoiceFloatingButton() {
</IconButton>
</Box>
{/* Status text */}
{/* Status text with detailed processing stages */}
<Typography variant="body1" color="text.secondary" gutterBottom>
{isListening ? 'Listening... Speak now' : 'Click the microphone to start'}
{processingStatus === 'listening' && 'Listening... Speak now'}
{processingStatus === 'understanding' && 'Understanding your request...'}
{processingStatus === 'saving' && identifiedActivity && `Adding to ${identifiedActivity.charAt(0).toUpperCase() + identifiedActivity.slice(1)} tracker...`}
{!processingStatus && !isListening && 'Click the microphone to start'}
</Typography>
{/* Transcript */}
@@ -302,12 +301,14 @@ export function VoiceFloatingButton() {
</Box>
)}
{/* Processing indicator */}
{isProcessing && (
{/* Processing indicator with status */}
{processingStatus && (
<Box sx={{ mt: 2, display: 'flex', alignItems: 'center', justifyContent: 'center' }}>
<CircularProgress size={20} sx={{ mr: 1 }} />
<Typography variant="body2" color="text.secondary">
Processing command...
{processingStatus === 'listening' && 'Listening...'}
{processingStatus === 'understanding' && 'Understanding...'}
{processingStatus === 'saving' && 'Saving...'}
</Typography>
</Box>
)}
@@ -316,7 +317,7 @@ export function VoiceFloatingButton() {
{classificationResult && !classificationResult.error && (
<Alert severity="success" sx={{ mt: 2 }}>
<Typography variant="body2" gutterBottom>
<strong>Understood:</strong> {classificationResult.intent}
<strong>Understood:</strong> {classificationResult.type || classificationResult.intent}
</Typography>
</Alert>
)}

View File

@@ -36,49 +36,21 @@ export function useVoiceInput() {
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const audioChunksRef = useRef<Blob[]>([]);
const timeoutRef = useRef<NodeJS.Timeout | null>(null);
const silenceTimeoutRef = useRef<NodeJS.Timeout | null>(null);
const audioContextRef = useRef<AudioContext | null>(null);
const analyserRef = useRef<AnalyserNode | null>(null);
// Check if browser supports Speech Recognition or MediaRecorder
// Check if browser supports MediaRecorder (unified approach for all platforms)
useEffect(() => {
// Detect iOS Safari specifically
const isIOSSafari = /iPad|iPhone|iPod/.test(navigator.userAgent) && !(window as any).MSStream;
// Always use MediaRecorder + backend transcription for consistency
// This gives us one flow to debug and maintain, works on all platforms
console.log('[Voice] Checking MediaRecorder support...');
const SpeechRecognition =
(window as any).SpeechRecognition || (window as any).webkitSpeechRecognition;
// Force fallback for iOS Safari regardless of Speech Recognition availability
if (isIOSSafari) {
console.log('[Voice] iOS Safari detected, using MediaRecorder fallback');
if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
} else {
setState(prev => ({ ...prev, isSupported: false }));
}
} else if (SpeechRecognition) {
try {
// Initialize recognition for non-iOS browsers
console.log('[Voice] Initializing Web Speech API');
const recognition = new SpeechRecognition();
recognition.continuous = true; // Keep listening until manually stopped
recognition.interimResults = true; // Get interim results
recognition.maxAlternatives = 1;
recognition.lang = 'en-US'; // Default language
recognitionRef.current = recognition;
console.log('[Voice] Web Speech API initialized successfully');
setState(prev => ({ ...prev, isSupported: true, usesFallback: false }));
} catch (error) {
console.warn('[Voice] Speech Recognition initialization failed, trying fallback');
if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
} else {
setState(prev => ({ ...prev, isSupported: false }));
}
}
} else if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
// Use MediaRecorder fallback for other browsers without Speech Recognition
console.log('[Voice] No Speech Recognition, using MediaRecorder fallback');
if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
console.log('[Voice] MediaRecorder supported, will use backend transcription for all platforms');
setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
} else {
console.log('[Voice] MediaRecorder not supported');
setState(prev => ({ ...prev, isSupported: false }));
}
@@ -115,6 +87,59 @@ export function useVoiceInput() {
console.log('[Voice] Microphone access granted, creating MediaRecorder...');
// Set up silence detection using Web Audio API
const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
const analyser = audioContext.createAnalyser();
const microphone = audioContext.createMediaStreamSource(stream);
analyser.fftSize = 512;
microphone.connect(analyser);
audioContextRef.current = audioContext;
analyserRef.current = analyser;
// Monitor audio levels for silence detection
const bufferLength = analyser.frequencyBinCount;
const dataArray = new Uint8Array(bufferLength);
let lastSoundTime = Date.now();
const SILENCE_THRESHOLD = 10; // Adjust based on testing
const SILENCE_DURATION = 1500; // 1.5 seconds of silence
const checkSilence = () => {
analyser.getByteFrequencyData(dataArray);
const average = dataArray.reduce((a, b) => a + b, 0) / bufferLength;
if (average > SILENCE_THRESHOLD) {
lastSoundTime = Date.now();
// Clear silence timeout if sound detected
if (silenceTimeoutRef.current) {
clearTimeout(silenceTimeoutRef.current);
silenceTimeoutRef.current = null;
}
} else {
// Check if silence has lasted long enough
const silenceDuration = Date.now() - lastSoundTime;
if (silenceDuration > SILENCE_DURATION && !silenceTimeoutRef.current) {
console.log('[Voice] Silence detected, auto-stopping...');
if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
mediaRecorderRef.current.stop();
}
return;
}
}
// Continue checking if still recording
if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
requestAnimationFrame(checkSilence);
}
};
// Start monitoring after a brief delay to avoid immediate stop
setTimeout(() => {
if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
requestAnimationFrame(checkSilence);
}
}, 500);
// Try different mime types for iOS Safari compatibility
let mimeType = 'audio/webm;codecs=opus';
if (!MediaRecorder.isTypeSupported(mimeType)) {
@@ -200,6 +225,12 @@ export function useVoiceInput() {
// Stop all tracks
stream.getTracks().forEach(track => track.stop());
console.log('[Voice] Stream tracks stopped');
// Clean up audio context
if (audioContextRef.current) {
audioContextRef.current.close();
audioContextRef.current = null;
}
};
mediaRecorder.onerror = (event) => {
@@ -265,6 +296,8 @@ export function useVoiceInput() {
error: null,
}));
let lastSpeechTime = Date.now();
// Set up event handlers
recognition.onstart = () => {
console.log('[Voice] Started listening');
@@ -283,6 +316,26 @@ export function useVoiceInput() {
}
}
// Update last speech time
lastSpeechTime = Date.now();
// Reset silence timeout
if (silenceTimeoutRef.current) {
clearTimeout(silenceTimeoutRef.current);
}
// Set new silence timeout (1.5 seconds after last speech)
silenceTimeoutRef.current = setTimeout(() => {
console.log('[Voice] Silence detected, auto-stopping...');
if (recognitionRef.current) {
try {
recognitionRef.current.stop();
} catch (e) {
// Ignore errors
}
}
}, 1500);
// Only update state with final results, show interim in console for debugging
if (finalTranscript) {
console.log('[Voice] Final result:', finalTranscript);
@@ -347,19 +400,10 @@ export function useVoiceInput() {
}
}, []);
// Start listening (chooses appropriate method)
// Start listening (always uses MediaRecorder + backend transcription)
const startListening = useCallback(() => {
if (state.usesFallback) {
startListeningWithFallback();
} else if (recognitionRef.current) {
startListeningWithSpeechAPI();
} else {
setState(prev => ({
...prev,
error: 'Voice input not supported in this browser',
}));
}
}, [state.usesFallback, startListeningWithFallback, startListeningWithSpeechAPI]);
startListeningWithFallback();
}, [startListeningWithFallback]);
// Stop listening
const stopListening = useCallback(() => {