- Fix login endpoint to return families as array of objects instead of strings - Update auth interface to match /auth/me endpoint structure - Add silence detection to voice input (auto-stop after 1.5s) - Add comprehensive status messages to voice modal (Listening, Understanding, Saving) - Unify voice input flow to use MediaRecorder + backend for all platforms - Add null checks to prevent tracking page crashes from invalid data - Wait for auth completion before loading family data in HomePage 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
443 lines
14 KiB
TypeScript
443 lines
14 KiB
TypeScript
import { useState, useEffect, useCallback, useRef } from 'react';
|
|
import { tokenStorage } from '@/lib/utils/tokenStorage';
|
|
|
|
export interface VoiceInputResult {
|
|
transcript: string;
|
|
confidence: number;
|
|
isFinal: boolean;
|
|
}
|
|
|
|
export interface VoiceInputState {
|
|
isListening: boolean;
|
|
isSupported: boolean;
|
|
transcript: string;
|
|
classification: any | null;
|
|
error: string | null;
|
|
usesFallback: boolean;
|
|
}
|
|
|
|
/**
|
|
* Hook for voice input using browser Web Speech API or MediaRecorder fallback
|
|
*
|
|
* Provides voice recording functionality with real-time transcription.
|
|
* Falls back to MediaRecorder + server-side transcription for iOS Safari.
|
|
*/
|
|
export function useVoiceInput() {
|
|
const [state, setState] = useState<VoiceInputState>({
|
|
isListening: false,
|
|
isSupported: false,
|
|
transcript: '',
|
|
classification: null,
|
|
error: null,
|
|
usesFallback: false,
|
|
});
|
|
|
|
const recognitionRef = useRef<any>(null);
|
|
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
|
const audioChunksRef = useRef<Blob[]>([]);
|
|
const timeoutRef = useRef<NodeJS.Timeout | null>(null);
|
|
const silenceTimeoutRef = useRef<NodeJS.Timeout | null>(null);
|
|
const audioContextRef = useRef<AudioContext | null>(null);
|
|
const analyserRef = useRef<AnalyserNode | null>(null);
|
|
|
|
// Check if browser supports MediaRecorder (unified approach for all platforms)
|
|
useEffect(() => {
|
|
// Always use MediaRecorder + backend transcription for consistency
|
|
// This gives us one flow to debug and maintain, works on all platforms
|
|
console.log('[Voice] Checking MediaRecorder support...');
|
|
|
|
if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
|
|
console.log('[Voice] MediaRecorder supported, will use backend transcription for all platforms');
|
|
setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
|
|
} else {
|
|
console.log('[Voice] MediaRecorder not supported');
|
|
setState(prev => ({ ...prev, isSupported: false }));
|
|
}
|
|
|
|
return () => {
|
|
if (recognitionRef.current) {
|
|
try {
|
|
recognitionRef.current.stop();
|
|
} catch (e) {
|
|
// Ignore errors on cleanup
|
|
}
|
|
}
|
|
if (mediaRecorderRef.current && mediaRecorderRef.current.state !== 'inactive') {
|
|
mediaRecorderRef.current.stop();
|
|
}
|
|
if (timeoutRef.current) {
|
|
clearTimeout(timeoutRef.current);
|
|
}
|
|
};
|
|
}, []);
|
|
|
|
// Start listening with MediaRecorder fallback
|
|
const startListeningWithFallback = useCallback(async () => {
|
|
audioChunksRef.current = [];
|
|
|
|
try {
|
|
console.log('[Voice] Requesting microphone access...');
|
|
const stream = await navigator.mediaDevices.getUserMedia({
|
|
audio: {
|
|
echoCancellation: true,
|
|
noiseSuppression: true,
|
|
sampleRate: 44100,
|
|
}
|
|
});
|
|
|
|
console.log('[Voice] Microphone access granted, creating MediaRecorder...');
|
|
|
|
// Set up silence detection using Web Audio API
|
|
const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
|
|
const analyser = audioContext.createAnalyser();
|
|
const microphone = audioContext.createMediaStreamSource(stream);
|
|
analyser.fftSize = 512;
|
|
microphone.connect(analyser);
|
|
|
|
audioContextRef.current = audioContext;
|
|
analyserRef.current = analyser;
|
|
|
|
// Monitor audio levels for silence detection
|
|
const bufferLength = analyser.frequencyBinCount;
|
|
const dataArray = new Uint8Array(bufferLength);
|
|
let lastSoundTime = Date.now();
|
|
const SILENCE_THRESHOLD = 10; // Adjust based on testing
|
|
const SILENCE_DURATION = 1500; // 1.5 seconds of silence
|
|
|
|
const checkSilence = () => {
|
|
analyser.getByteFrequencyData(dataArray);
|
|
const average = dataArray.reduce((a, b) => a + b, 0) / bufferLength;
|
|
|
|
if (average > SILENCE_THRESHOLD) {
|
|
lastSoundTime = Date.now();
|
|
// Clear silence timeout if sound detected
|
|
if (silenceTimeoutRef.current) {
|
|
clearTimeout(silenceTimeoutRef.current);
|
|
silenceTimeoutRef.current = null;
|
|
}
|
|
} else {
|
|
// Check if silence has lasted long enough
|
|
const silenceDuration = Date.now() - lastSoundTime;
|
|
if (silenceDuration > SILENCE_DURATION && !silenceTimeoutRef.current) {
|
|
console.log('[Voice] Silence detected, auto-stopping...');
|
|
if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
|
|
mediaRecorderRef.current.stop();
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Continue checking if still recording
|
|
if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
|
|
requestAnimationFrame(checkSilence);
|
|
}
|
|
};
|
|
|
|
// Start monitoring after a brief delay to avoid immediate stop
|
|
setTimeout(() => {
|
|
if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
|
|
requestAnimationFrame(checkSilence);
|
|
}
|
|
}, 500);
|
|
|
|
// Try different mime types for iOS Safari compatibility
|
|
let mimeType = 'audio/webm;codecs=opus';
|
|
if (!MediaRecorder.isTypeSupported(mimeType)) {
|
|
console.warn('[Voice] webm not supported, trying mp4...');
|
|
mimeType = 'audio/mp4';
|
|
if (!MediaRecorder.isTypeSupported(mimeType)) {
|
|
console.warn('[Voice] mp4 not supported, trying default...');
|
|
mimeType = '';
|
|
}
|
|
}
|
|
|
|
const options = mimeType ? { mimeType } : {};
|
|
console.log('[Voice] Using MediaRecorder with options:', options);
|
|
const mediaRecorder = new MediaRecorder(stream, options);
|
|
|
|
mediaRecorderRef.current = mediaRecorder;
|
|
|
|
mediaRecorder.ondataavailable = (event) => {
|
|
if (event.data.size > 0) {
|
|
audioChunksRef.current.push(event.data);
|
|
}
|
|
};
|
|
|
|
mediaRecorder.onstop = async () => {
|
|
console.log('[Voice] Recording stopped, processing audio...');
|
|
const audioBlob = new Blob(audioChunksRef.current, {
|
|
type: mimeType || 'audio/webm'
|
|
});
|
|
|
|
console.log('[Voice] Audio blob created, size:', audioBlob.size, 'bytes');
|
|
|
|
// Send to backend for transcription
|
|
try {
|
|
const formData = new FormData();
|
|
const extension = mimeType.includes('mp4') ? 'mp4' : 'webm';
|
|
formData.append('audio', audioBlob, `recording.${extension}`);
|
|
|
|
console.log('[Voice] Sending to backend for transcription...');
|
|
|
|
// Get auth token and API base URL
|
|
const token = tokenStorage.getAccessToken();
|
|
const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:3020';
|
|
const headers: HeadersInit = {};
|
|
if (token) {
|
|
headers['Authorization'] = `Bearer ${token}`;
|
|
}
|
|
|
|
const response = await fetch(`${API_BASE_URL}/api/v1/voice/transcribe`, {
|
|
method: 'POST',
|
|
body: formData,
|
|
headers,
|
|
});
|
|
|
|
console.log('[Voice] Transcription response status:', response.status);
|
|
const data = await response.json();
|
|
console.log('[Voice] Transcription response data:', data);
|
|
|
|
if (response.ok && data.success) {
|
|
setState(prev => ({
|
|
...prev,
|
|
isListening: false,
|
|
transcript: data.transcript,
|
|
classification: data.classification || null,
|
|
}));
|
|
} else {
|
|
console.error('[Voice] Transcription failed:', data);
|
|
setState(prev => ({
|
|
...prev,
|
|
isListening: false,
|
|
error: data.message || 'Failed to transcribe audio',
|
|
classification: null,
|
|
}));
|
|
}
|
|
} catch (error) {
|
|
console.error('[Voice] Transcription error:', error);
|
|
setState(prev => ({
|
|
...prev,
|
|
isListening: false,
|
|
error: 'Failed to process audio. Please try again.',
|
|
}));
|
|
}
|
|
|
|
// Stop all tracks
|
|
stream.getTracks().forEach(track => track.stop());
|
|
console.log('[Voice] Stream tracks stopped');
|
|
|
|
// Clean up audio context
|
|
if (audioContextRef.current) {
|
|
audioContextRef.current.close();
|
|
audioContextRef.current = null;
|
|
}
|
|
};
|
|
|
|
mediaRecorder.onerror = (event) => {
|
|
console.error('[Voice] MediaRecorder error:', event);
|
|
setState(prev => ({
|
|
...prev,
|
|
isListening: false,
|
|
error: 'Recording failed',
|
|
}));
|
|
};
|
|
|
|
setState(prev => ({
|
|
...prev,
|
|
isListening: true,
|
|
transcript: '',
|
|
error: null,
|
|
}));
|
|
|
|
console.log('[Voice] Starting MediaRecorder...');
|
|
mediaRecorder.start();
|
|
console.log('[Voice] MediaRecorder started successfully');
|
|
|
|
// Auto-stop after 10 seconds
|
|
timeoutRef.current = setTimeout(() => {
|
|
if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
|
|
console.log('[Voice] Auto-stopping after 10 seconds');
|
|
mediaRecorderRef.current.stop();
|
|
}
|
|
}, 10000);
|
|
} catch (error: any) {
|
|
console.error('[Voice] Failed to access microphone:', error);
|
|
console.error('[Voice] Error name:', error.name);
|
|
console.error('[Voice] Error message:', error.message);
|
|
|
|
let errorMessage = 'Failed to access microphone';
|
|
if (error.name === 'NotAllowedError' || error.name === 'PermissionDeniedError') {
|
|
errorMessage = 'Microphone permission denied. Please allow microphone access in your browser settings and try again.';
|
|
} else if (error.name === 'NotFoundError' || error.name === 'DevicesNotFoundError') {
|
|
errorMessage = 'No microphone found. Please check your device settings.';
|
|
} else if (error.name === 'NotSupportedError') {
|
|
errorMessage = 'Your browser does not support audio recording.';
|
|
} else if (error.name === 'NotReadableError' || error.name === 'TrackStartError') {
|
|
errorMessage = 'Microphone is already in use by another application.';
|
|
}
|
|
|
|
setState(prev => ({
|
|
...prev,
|
|
isListening: false,
|
|
error: errorMessage,
|
|
}));
|
|
}
|
|
}, []);
|
|
|
|
// Start listening with Web Speech API
|
|
const startListeningWithSpeechAPI = useCallback(() => {
|
|
const recognition = recognitionRef.current;
|
|
|
|
// Clear previous state
|
|
setState(prev => ({
|
|
...prev,
|
|
isListening: true,
|
|
transcript: '',
|
|
error: null,
|
|
}));
|
|
|
|
let lastSpeechTime = Date.now();
|
|
|
|
// Set up event handlers
|
|
recognition.onstart = () => {
|
|
console.log('[Voice] Started listening');
|
|
};
|
|
|
|
recognition.onresult = (event: any) => {
|
|
let interimTranscript = '';
|
|
let finalTranscript = '';
|
|
|
|
for (let i = event.resultIndex; i < event.results.length; i++) {
|
|
const transcript = event.results[i][0].transcript;
|
|
if (event.results[i].isFinal) {
|
|
finalTranscript += transcript;
|
|
} else {
|
|
interimTranscript += transcript;
|
|
}
|
|
}
|
|
|
|
// Update last speech time
|
|
lastSpeechTime = Date.now();
|
|
|
|
// Reset silence timeout
|
|
if (silenceTimeoutRef.current) {
|
|
clearTimeout(silenceTimeoutRef.current);
|
|
}
|
|
|
|
// Set new silence timeout (1.5 seconds after last speech)
|
|
silenceTimeoutRef.current = setTimeout(() => {
|
|
console.log('[Voice] Silence detected, auto-stopping...');
|
|
if (recognitionRef.current) {
|
|
try {
|
|
recognitionRef.current.stop();
|
|
} catch (e) {
|
|
// Ignore errors
|
|
}
|
|
}
|
|
}, 1500);
|
|
|
|
// Only update state with final results, show interim in console for debugging
|
|
if (finalTranscript) {
|
|
console.log('[Voice] Final result:', finalTranscript);
|
|
setState(prev => ({
|
|
...prev,
|
|
transcript: finalTranscript,
|
|
}));
|
|
} else {
|
|
console.log('[Voice] Interim result:', interimTranscript);
|
|
}
|
|
};
|
|
|
|
recognition.onerror = (event: any) => {
|
|
console.error('[Voice] Error:', event.error, event);
|
|
|
|
let errorMessage = 'Failed to recognize speech';
|
|
if (event.error === 'no-speech') {
|
|
console.warn('[Voice] No speech detected - this is often normal if user stops speaking');
|
|
errorMessage = 'No speech detected. Please try again.';
|
|
} else if (event.error === 'audio-capture') {
|
|
errorMessage = 'No microphone found. Please check your settings.';
|
|
} else if (event.error === 'not-allowed') {
|
|
errorMessage = 'Microphone access denied. Please grant permission.';
|
|
} else if (event.error === 'network') {
|
|
errorMessage = 'Network error. Please check your connection.';
|
|
}
|
|
|
|
setState(prev => ({
|
|
...prev,
|
|
isListening: false,
|
|
error: errorMessage,
|
|
}));
|
|
};
|
|
|
|
recognition.onend = () => {
|
|
console.log('[Voice] Stopped listening');
|
|
setState(prev => ({
|
|
...prev,
|
|
isListening: false,
|
|
}));
|
|
};
|
|
|
|
// Auto-stop after 10 seconds
|
|
timeoutRef.current = setTimeout(() => {
|
|
if (recognitionRef.current) {
|
|
recognitionRef.current.stop();
|
|
}
|
|
}, 10000);
|
|
|
|
// Start recognition
|
|
try {
|
|
console.log('[Voice] Calling recognition.start()');
|
|
recognition.start();
|
|
console.log('[Voice] recognition.start() called successfully');
|
|
} catch (error) {
|
|
console.error('[Voice] Failed to start:', error);
|
|
setState(prev => ({
|
|
...prev,
|
|
isListening: false,
|
|
error: 'Failed to start voice recognition',
|
|
}));
|
|
}
|
|
}, []);
|
|
|
|
// Start listening (always uses MediaRecorder + backend transcription)
|
|
const startListening = useCallback(() => {
|
|
startListeningWithFallback();
|
|
}, [startListeningWithFallback]);
|
|
|
|
// Stop listening
|
|
const stopListening = useCallback(() => {
|
|
if (recognitionRef.current) {
|
|
try {
|
|
recognitionRef.current.stop();
|
|
} catch (e) {
|
|
// Ignore errors
|
|
}
|
|
}
|
|
if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
|
|
mediaRecorderRef.current.stop();
|
|
}
|
|
if (timeoutRef.current) {
|
|
clearTimeout(timeoutRef.current);
|
|
timeoutRef.current = null;
|
|
}
|
|
}, []);
|
|
|
|
// Reset state
|
|
const reset = useCallback(() => {
|
|
setState(prev => ({
|
|
...prev,
|
|
transcript: '',
|
|
classification: null,
|
|
error: null,
|
|
}));
|
|
}, []);
|
|
|
|
return {
|
|
...state,
|
|
startListening,
|
|
stopListening,
|
|
reset,
|
|
};
|
|
}
|