Fix login data structure and improve voice input UX
Some checks failed
CI/CD Pipeline / Lint and Test (push) Has been cancelled
CI/CD Pipeline / E2E Tests (push) Has been cancelled
CI/CD Pipeline / Build Application (push) Has been cancelled

- Fix login endpoint to return families as array of objects instead of strings
- Update auth interface to match /auth/me endpoint structure
- Add silence detection to voice input (auto-stop after 1.5s)
- Add comprehensive status messages to voice modal (Listening, Understanding, Saving)
- Unify voice input flow to use MediaRecorder + backend for all platforms
- Add null checks to prevent tracking page crashes from invalid data
- Wait for auth completion before loading family data in HomePage

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-10-02 10:25:13 +00:00
parent 4b8828fdad
commit c60467b6f9
9 changed files with 231 additions and 120 deletions

View File

@@ -36,49 +36,21 @@ export function useVoiceInput() {
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const audioChunksRef = useRef<Blob[]>([]);
const timeoutRef = useRef<NodeJS.Timeout | null>(null);
const silenceTimeoutRef = useRef<NodeJS.Timeout | null>(null);
const audioContextRef = useRef<AudioContext | null>(null);
const analyserRef = useRef<AnalyserNode | null>(null);
// Check if browser supports Speech Recognition or MediaRecorder
// Check if browser supports MediaRecorder (unified approach for all platforms)
useEffect(() => {
// Detect iOS Safari specifically
const isIOSSafari = /iPad|iPhone|iPod/.test(navigator.userAgent) && !(window as any).MSStream;
// Always use MediaRecorder + backend transcription for consistency
// This gives us one flow to debug and maintain, works on all platforms
console.log('[Voice] Checking MediaRecorder support...');
const SpeechRecognition =
(window as any).SpeechRecognition || (window as any).webkitSpeechRecognition;
// Force fallback for iOS Safari regardless of Speech Recognition availability
if (isIOSSafari) {
console.log('[Voice] iOS Safari detected, using MediaRecorder fallback');
if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
} else {
setState(prev => ({ ...prev, isSupported: false }));
}
} else if (SpeechRecognition) {
try {
// Initialize recognition for non-iOS browsers
console.log('[Voice] Initializing Web Speech API');
const recognition = new SpeechRecognition();
recognition.continuous = true; // Keep listening until manually stopped
recognition.interimResults = true; // Get interim results
recognition.maxAlternatives = 1;
recognition.lang = 'en-US'; // Default language
recognitionRef.current = recognition;
console.log('[Voice] Web Speech API initialized successfully');
setState(prev => ({ ...prev, isSupported: true, usesFallback: false }));
} catch (error) {
console.warn('[Voice] Speech Recognition initialization failed, trying fallback');
if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
} else {
setState(prev => ({ ...prev, isSupported: false }));
}
}
} else if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
// Use MediaRecorder fallback for other browsers without Speech Recognition
console.log('[Voice] No Speech Recognition, using MediaRecorder fallback');
if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
console.log('[Voice] MediaRecorder supported, will use backend transcription for all platforms');
setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
} else {
console.log('[Voice] MediaRecorder not supported');
setState(prev => ({ ...prev, isSupported: false }));
}
@@ -115,6 +87,59 @@ export function useVoiceInput() {
console.log('[Voice] Microphone access granted, creating MediaRecorder...');
// Set up silence detection using Web Audio API
const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
const analyser = audioContext.createAnalyser();
const microphone = audioContext.createMediaStreamSource(stream);
analyser.fftSize = 512;
microphone.connect(analyser);
audioContextRef.current = audioContext;
analyserRef.current = analyser;
// Monitor audio levels for silence detection
const bufferLength = analyser.frequencyBinCount;
const dataArray = new Uint8Array(bufferLength);
let lastSoundTime = Date.now();
const SILENCE_THRESHOLD = 10; // Adjust based on testing
const SILENCE_DURATION = 1500; // 1.5 seconds of silence
const checkSilence = () => {
analyser.getByteFrequencyData(dataArray);
const average = dataArray.reduce((a, b) => a + b, 0) / bufferLength;
if (average > SILENCE_THRESHOLD) {
lastSoundTime = Date.now();
// Clear silence timeout if sound detected
if (silenceTimeoutRef.current) {
clearTimeout(silenceTimeoutRef.current);
silenceTimeoutRef.current = null;
}
} else {
// Check if silence has lasted long enough
const silenceDuration = Date.now() - lastSoundTime;
if (silenceDuration > SILENCE_DURATION && !silenceTimeoutRef.current) {
console.log('[Voice] Silence detected, auto-stopping...');
if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
mediaRecorderRef.current.stop();
}
return;
}
}
// Continue checking if still recording
if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
requestAnimationFrame(checkSilence);
}
};
// Start monitoring after a brief delay to avoid immediate stop
setTimeout(() => {
if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
requestAnimationFrame(checkSilence);
}
}, 500);
// Try different mime types for iOS Safari compatibility
let mimeType = 'audio/webm;codecs=opus';
if (!MediaRecorder.isTypeSupported(mimeType)) {
@@ -200,6 +225,12 @@ export function useVoiceInput() {
// Stop all tracks
stream.getTracks().forEach(track => track.stop());
console.log('[Voice] Stream tracks stopped');
// Clean up audio context
if (audioContextRef.current) {
audioContextRef.current.close();
audioContextRef.current = null;
}
};
mediaRecorder.onerror = (event) => {
@@ -265,6 +296,8 @@ export function useVoiceInput() {
error: null,
}));
let lastSpeechTime = Date.now();
// Set up event handlers
recognition.onstart = () => {
console.log('[Voice] Started listening');
@@ -283,6 +316,26 @@ export function useVoiceInput() {
}
}
// Update last speech time
lastSpeechTime = Date.now();
// Reset silence timeout
if (silenceTimeoutRef.current) {
clearTimeout(silenceTimeoutRef.current);
}
// Set new silence timeout (1.5 seconds after last speech)
silenceTimeoutRef.current = setTimeout(() => {
console.log('[Voice] Silence detected, auto-stopping...');
if (recognitionRef.current) {
try {
recognitionRef.current.stop();
} catch (e) {
// Ignore errors
}
}
}, 1500);
// Only update state with final results, show interim in console for debugging
if (finalTranscript) {
console.log('[Voice] Final result:', finalTranscript);
@@ -347,19 +400,10 @@ export function useVoiceInput() {
}
}, []);
// Start listening (chooses appropriate method)
// Start listening (always uses MediaRecorder + backend transcription)
const startListening = useCallback(() => {
if (state.usesFallback) {
startListeningWithFallback();
} else if (recognitionRef.current) {
startListeningWithSpeechAPI();
} else {
setState(prev => ({
...prev,
error: 'Voice input not supported in this browser',
}));
}
}, [state.usesFallback, startListeningWithFallback, startListeningWithSpeechAPI]);
startListeningWithFallback();
}, [startListeningWithFallback]);
// Stop listening
const stopListening = useCallback(() => {