import { useState, useEffect, useCallback, useRef } from 'react'; import { tokenStorage } from '@/lib/utils/tokenStorage'; export interface VoiceInputResult { transcript: string; confidence: number; isFinal: boolean; } export interface VoiceInputState { isListening: boolean; isSupported: boolean; transcript: string; classification: any | null; error: string | null; usesFallback: boolean; } /** * Hook for voice input using browser Web Speech API or MediaRecorder fallback * * Provides voice recording functionality with real-time transcription. * Falls back to MediaRecorder + server-side transcription for iOS Safari. */ export function useVoiceInput() { const [state, setState] = useState({ isListening: false, isSupported: false, transcript: '', classification: null, error: null, usesFallback: false, }); const recognitionRef = useRef(null); const mediaRecorderRef = useRef(null); const audioChunksRef = useRef([]); const timeoutRef = useRef(null); const silenceTimeoutRef = useRef(null); const audioContextRef = useRef(null); const analyserRef = useRef(null); // Check if browser supports MediaRecorder (unified approach for all platforms) useEffect(() => { // Always use MediaRecorder + backend transcription for consistency // This gives us one flow to debug and maintain, works on all platforms console.log('[Voice] Checking MediaRecorder support...'); if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) { console.log('[Voice] MediaRecorder supported, will use backend transcription for all platforms'); setState(prev => ({ ...prev, isSupported: true, usesFallback: true })); } else { console.log('[Voice] MediaRecorder not supported'); setState(prev => ({ ...prev, isSupported: false })); } return () => { if (recognitionRef.current) { try { recognitionRef.current.stop(); } catch (e) { // Ignore errors on cleanup } } if (mediaRecorderRef.current && mediaRecorderRef.current.state !== 'inactive') { mediaRecorderRef.current.stop(); } if (timeoutRef.current) { clearTimeout(timeoutRef.current); } }; }, []); // Start listening with MediaRecorder fallback const startListeningWithFallback = useCallback(async () => { audioChunksRef.current = []; try { console.log('[Voice] Requesting microphone access...'); const stream = await navigator.mediaDevices.getUserMedia({ audio: { echoCancellation: true, noiseSuppression: true, sampleRate: 44100, } }); console.log('[Voice] Microphone access granted, creating MediaRecorder...'); // Set up silence detection using Web Audio API const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)(); const analyser = audioContext.createAnalyser(); const microphone = audioContext.createMediaStreamSource(stream); analyser.fftSize = 512; microphone.connect(analyser); audioContextRef.current = audioContext; analyserRef.current = analyser; // Monitor audio levels for silence detection const bufferLength = analyser.frequencyBinCount; const dataArray = new Uint8Array(bufferLength); let lastSoundTime = Date.now(); const SILENCE_THRESHOLD = 10; // Adjust based on testing const SILENCE_DURATION = 1500; // 1.5 seconds of silence const checkSilence = () => { analyser.getByteFrequencyData(dataArray); const average = dataArray.reduce((a, b) => a + b, 0) / bufferLength; if (average > SILENCE_THRESHOLD) { lastSoundTime = Date.now(); // Clear silence timeout if sound detected if (silenceTimeoutRef.current) { clearTimeout(silenceTimeoutRef.current); silenceTimeoutRef.current = null; } } else { // Check if silence has lasted long enough const silenceDuration = Date.now() - lastSoundTime; if (silenceDuration > SILENCE_DURATION && !silenceTimeoutRef.current) { console.log('[Voice] Silence detected, auto-stopping...'); if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') { mediaRecorderRef.current.stop(); } return; } } // Continue checking if still recording if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') { requestAnimationFrame(checkSilence); } }; // Start monitoring after a brief delay to avoid immediate stop setTimeout(() => { if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') { requestAnimationFrame(checkSilence); } }, 500); // Try different mime types for iOS Safari compatibility let mimeType = 'audio/webm;codecs=opus'; if (!MediaRecorder.isTypeSupported(mimeType)) { console.warn('[Voice] webm not supported, trying mp4...'); mimeType = 'audio/mp4'; if (!MediaRecorder.isTypeSupported(mimeType)) { console.warn('[Voice] mp4 not supported, trying default...'); mimeType = ''; } } const options = mimeType ? { mimeType } : {}; console.log('[Voice] Using MediaRecorder with options:', options); const mediaRecorder = new MediaRecorder(stream, options); mediaRecorderRef.current = mediaRecorder; mediaRecorder.ondataavailable = (event) => { if (event.data.size > 0) { audioChunksRef.current.push(event.data); } }; mediaRecorder.onstop = async () => { console.log('[Voice] Recording stopped, processing audio...'); const audioBlob = new Blob(audioChunksRef.current, { type: mimeType || 'audio/webm' }); console.log('[Voice] Audio blob created, size:', audioBlob.size, 'bytes'); // Send to backend for transcription try { const formData = new FormData(); const extension = mimeType.includes('mp4') ? 'mp4' : 'webm'; formData.append('audio', audioBlob, `recording.${extension}`); console.log('[Voice] Sending to backend for transcription...'); // Get auth token and API base URL const token = tokenStorage.getAccessToken(); const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:3020'; const headers: HeadersInit = {}; if (token) { headers['Authorization'] = `Bearer ${token}`; } const response = await fetch(`${API_BASE_URL}/api/v1/voice/transcribe`, { method: 'POST', body: formData, headers, }); console.log('[Voice] Transcription response status:', response.status); const data = await response.json(); console.log('[Voice] Transcription response data:', data); if (response.ok && data.success) { setState(prev => ({ ...prev, isListening: false, transcript: data.transcript, classification: data.classification || null, })); } else { console.error('[Voice] Transcription failed:', data); setState(prev => ({ ...prev, isListening: false, error: data.message || 'Failed to transcribe audio', classification: null, })); } } catch (error) { console.error('[Voice] Transcription error:', error); setState(prev => ({ ...prev, isListening: false, error: 'Failed to process audio. Please try again.', })); } // Stop all tracks stream.getTracks().forEach(track => track.stop()); console.log('[Voice] Stream tracks stopped'); // Clean up audio context if (audioContextRef.current) { audioContextRef.current.close(); audioContextRef.current = null; } }; mediaRecorder.onerror = (event) => { console.error('[Voice] MediaRecorder error:', event); setState(prev => ({ ...prev, isListening: false, error: 'Recording failed', })); }; setState(prev => ({ ...prev, isListening: true, transcript: '', error: null, })); console.log('[Voice] Starting MediaRecorder...'); mediaRecorder.start(); console.log('[Voice] MediaRecorder started successfully'); // Auto-stop after 10 seconds timeoutRef.current = setTimeout(() => { if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') { console.log('[Voice] Auto-stopping after 10 seconds'); mediaRecorderRef.current.stop(); } }, 10000); } catch (error: any) { console.error('[Voice] Failed to access microphone:', error); console.error('[Voice] Error name:', error.name); console.error('[Voice] Error message:', error.message); let errorMessage = 'Failed to access microphone'; if (error.name === 'NotAllowedError' || error.name === 'PermissionDeniedError') { errorMessage = 'Microphone permission denied. Please allow microphone access in your browser settings and try again.'; } else if (error.name === 'NotFoundError' || error.name === 'DevicesNotFoundError') { errorMessage = 'No microphone found. Please check your device settings.'; } else if (error.name === 'NotSupportedError') { errorMessage = 'Your browser does not support audio recording.'; } else if (error.name === 'NotReadableError' || error.name === 'TrackStartError') { errorMessage = 'Microphone is already in use by another application.'; } setState(prev => ({ ...prev, isListening: false, error: errorMessage, })); } }, []); // Start listening with Web Speech API const startListeningWithSpeechAPI = useCallback(() => { const recognition = recognitionRef.current; // Clear previous state setState(prev => ({ ...prev, isListening: true, transcript: '', error: null, })); let lastSpeechTime = Date.now(); // Set up event handlers recognition.onstart = () => { console.log('[Voice] Started listening'); }; recognition.onresult = (event: any) => { let interimTranscript = ''; let finalTranscript = ''; for (let i = event.resultIndex; i < event.results.length; i++) { const transcript = event.results[i][0].transcript; if (event.results[i].isFinal) { finalTranscript += transcript; } else { interimTranscript += transcript; } } // Update last speech time lastSpeechTime = Date.now(); // Reset silence timeout if (silenceTimeoutRef.current) { clearTimeout(silenceTimeoutRef.current); } // Set new silence timeout (1.5 seconds after last speech) silenceTimeoutRef.current = setTimeout(() => { console.log('[Voice] Silence detected, auto-stopping...'); if (recognitionRef.current) { try { recognitionRef.current.stop(); } catch (e) { // Ignore errors } } }, 1500); // Only update state with final results, show interim in console for debugging if (finalTranscript) { console.log('[Voice] Final result:', finalTranscript); setState(prev => ({ ...prev, transcript: finalTranscript, })); } else { console.log('[Voice] Interim result:', interimTranscript); } }; recognition.onerror = (event: any) => { console.error('[Voice] Error:', event.error, event); let errorMessage = 'Failed to recognize speech'; if (event.error === 'no-speech') { console.warn('[Voice] No speech detected - this is often normal if user stops speaking'); errorMessage = 'No speech detected. Please try again.'; } else if (event.error === 'audio-capture') { errorMessage = 'No microphone found. Please check your settings.'; } else if (event.error === 'not-allowed') { errorMessage = 'Microphone access denied. Please grant permission.'; } else if (event.error === 'network') { errorMessage = 'Network error. Please check your connection.'; } setState(prev => ({ ...prev, isListening: false, error: errorMessage, })); }; recognition.onend = () => { console.log('[Voice] Stopped listening'); setState(prev => ({ ...prev, isListening: false, })); }; // Auto-stop after 10 seconds timeoutRef.current = setTimeout(() => { if (recognitionRef.current) { recognitionRef.current.stop(); } }, 10000); // Start recognition try { console.log('[Voice] Calling recognition.start()'); recognition.start(); console.log('[Voice] recognition.start() called successfully'); } catch (error) { console.error('[Voice] Failed to start:', error); setState(prev => ({ ...prev, isListening: false, error: 'Failed to start voice recognition', })); } }, []); // Start listening (always uses MediaRecorder + backend transcription) const startListening = useCallback(() => { startListeningWithFallback(); }, [startListeningWithFallback]); // Stop listening const stopListening = useCallback(() => { if (recognitionRef.current) { try { recognitionRef.current.stop(); } catch (e) { // Ignore errors } } if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') { mediaRecorderRef.current.stop(); } if (timeoutRef.current) { clearTimeout(timeoutRef.current); timeoutRef.current = null; } }, []); // Reset state const reset = useCallback(() => { setState(prev => ({ ...prev, transcript: '', classification: null, error: null, })); }, []); return { ...state, startListening, stopListening, reset, }; }