import { useState, useEffect, useCallback, useRef } from 'react'; export interface VoiceInputResult { transcript: string; confidence: number; isFinal: boolean; } export interface VoiceInputState { isListening: boolean; isSupported: boolean; transcript: string; error: string | null; usesFallback: boolean; } /** * Hook for voice input using browser Web Speech API or MediaRecorder fallback * * Provides voice recording functionality with real-time transcription. * Falls back to MediaRecorder + server-side transcription for iOS Safari. */ export function useVoiceInput() { const [state, setState] = useState({ isListening: false, isSupported: false, transcript: '', error: null, usesFallback: false, }); const recognitionRef = useRef(null); const mediaRecorderRef = useRef(null); const audioChunksRef = useRef([]); const timeoutRef = useRef(null); // Check if browser supports Speech Recognition or MediaRecorder useEffect(() => { // Detect iOS Safari specifically const isIOSSafari = /iPad|iPhone|iPod/.test(navigator.userAgent) && !(window as any).MSStream; const SpeechRecognition = (window as any).SpeechRecognition || (window as any).webkitSpeechRecognition; // Force fallback for iOS Safari regardless of Speech Recognition availability if (isIOSSafari) { console.log('[Voice] iOS Safari detected, using MediaRecorder fallback'); if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) { setState(prev => ({ ...prev, isSupported: true, usesFallback: true })); } else { setState(prev => ({ ...prev, isSupported: false })); } } else if (SpeechRecognition) { try { // Initialize recognition for non-iOS browsers const recognition = new SpeechRecognition(); recognition.continuous = false; // Single recognition recognition.interimResults = true; // Get interim results recognition.maxAlternatives = 1; recognition.lang = 'en-US'; // Default language recognitionRef.current = recognition; setState(prev => ({ ...prev, isSupported: true, usesFallback: false })); } catch (error) { console.warn('[Voice] Speech Recognition initialization failed, trying fallback'); if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) { setState(prev => ({ ...prev, isSupported: true, usesFallback: true })); } else { setState(prev => ({ ...prev, isSupported: false })); } } } else if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) { // Use MediaRecorder fallback for other browsers without Speech Recognition console.log('[Voice] No Speech Recognition, using MediaRecorder fallback'); setState(prev => ({ ...prev, isSupported: true, usesFallback: true })); } else { setState(prev => ({ ...prev, isSupported: false })); } return () => { if (recognitionRef.current) { try { recognitionRef.current.stop(); } catch (e) { // Ignore errors on cleanup } } if (mediaRecorderRef.current && mediaRecorderRef.current.state !== 'inactive') { mediaRecorderRef.current.stop(); } if (timeoutRef.current) { clearTimeout(timeoutRef.current); } }; }, []); // Start listening with MediaRecorder fallback const startListeningWithFallback = useCallback(async () => { audioChunksRef.current = []; try { console.log('[Voice] Requesting microphone access...'); const stream = await navigator.mediaDevices.getUserMedia({ audio: { echoCancellation: true, noiseSuppression: true, sampleRate: 44100, } }); console.log('[Voice] Microphone access granted, creating MediaRecorder...'); // Try different mime types for iOS Safari compatibility let mimeType = 'audio/webm;codecs=opus'; if (!MediaRecorder.isTypeSupported(mimeType)) { console.warn('[Voice] webm not supported, trying mp4...'); mimeType = 'audio/mp4'; if (!MediaRecorder.isTypeSupported(mimeType)) { console.warn('[Voice] mp4 not supported, trying default...'); mimeType = ''; } } const options = mimeType ? { mimeType } : {}; console.log('[Voice] Using MediaRecorder with options:', options); const mediaRecorder = new MediaRecorder(stream, options); mediaRecorderRef.current = mediaRecorder; mediaRecorder.ondataavailable = (event) => { if (event.data.size > 0) { audioChunksRef.current.push(event.data); } }; mediaRecorder.onstop = async () => { console.log('[Voice] Recording stopped, processing audio...'); const audioBlob = new Blob(audioChunksRef.current, { type: mimeType || 'audio/webm' }); console.log('[Voice] Audio blob created, size:', audioBlob.size, 'bytes'); // Send to backend for transcription try { const formData = new FormData(); const extension = mimeType.includes('mp4') ? 'mp4' : 'webm'; formData.append('audio', audioBlob, `recording.${extension}`); console.log('[Voice] Sending to backend for transcription...'); const response = await fetch('/api/voice/transcribe', { method: 'POST', body: formData, }); console.log('[Voice] Transcription response status:', response.status); const data = await response.json(); console.log('[Voice] Transcription response data:', data); if (response.ok && data.success) { setState(prev => ({ ...prev, isListening: false, transcript: data.transcript, })); } else { console.error('[Voice] Transcription failed:', data); setState(prev => ({ ...prev, isListening: false, error: data.message || 'Failed to transcribe audio', })); } } catch (error) { console.error('[Voice] Transcription error:', error); setState(prev => ({ ...prev, isListening: false, error: 'Failed to process audio. Please try again.', })); } // Stop all tracks stream.getTracks().forEach(track => track.stop()); console.log('[Voice] Stream tracks stopped'); }; mediaRecorder.onerror = (event) => { console.error('[Voice] MediaRecorder error:', event); setState(prev => ({ ...prev, isListening: false, error: 'Recording failed', })); }; setState(prev => ({ ...prev, isListening: true, transcript: '', error: null, })); console.log('[Voice] Starting MediaRecorder...'); mediaRecorder.start(); console.log('[Voice] MediaRecorder started successfully'); // Auto-stop after 10 seconds timeoutRef.current = setTimeout(() => { if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') { console.log('[Voice] Auto-stopping after 10 seconds'); mediaRecorderRef.current.stop(); } }, 10000); } catch (error: any) { console.error('[Voice] Failed to access microphone:', error); console.error('[Voice] Error name:', error.name); console.error('[Voice] Error message:', error.message); let errorMessage = 'Failed to access microphone'; if (error.name === 'NotAllowedError' || error.name === 'PermissionDeniedError') { errorMessage = 'Microphone permission denied. Please allow microphone access in your browser settings and try again.'; } else if (error.name === 'NotFoundError' || error.name === 'DevicesNotFoundError') { errorMessage = 'No microphone found. Please check your device settings.'; } else if (error.name === 'NotSupportedError') { errorMessage = 'Your browser does not support audio recording.'; } else if (error.name === 'NotReadableError' || error.name === 'TrackStartError') { errorMessage = 'Microphone is already in use by another application.'; } setState(prev => ({ ...prev, isListening: false, error: errorMessage, })); } }, []); // Start listening with Web Speech API const startListeningWithSpeechAPI = useCallback(() => { const recognition = recognitionRef.current; // Clear previous state setState(prev => ({ ...prev, isListening: true, transcript: '', error: null, })); // Set up event handlers recognition.onstart = () => { console.log('[Voice] Started listening'); }; recognition.onresult = (event: any) => { let interimTranscript = ''; let finalTranscript = ''; for (let i = event.resultIndex; i < event.results.length; i++) { const transcript = event.results[i][0].transcript; if (event.results[i].isFinal) { finalTranscript += transcript; } else { interimTranscript += transcript; } } setState(prev => ({ ...prev, transcript: finalTranscript || interimTranscript, })); }; recognition.onerror = (event: any) => { console.error('[Voice] Error:', event.error); let errorMessage = 'Failed to recognize speech'; if (event.error === 'no-speech') { errorMessage = 'No speech detected. Please try again.'; } else if (event.error === 'audio-capture') { errorMessage = 'No microphone found. Please check your settings.'; } else if (event.error === 'not-allowed') { errorMessage = 'Microphone access denied. Please grant permission.'; } else if (event.error === 'network') { errorMessage = 'Network error. Please check your connection.'; } setState(prev => ({ ...prev, isListening: false, error: errorMessage, })); }; recognition.onend = () => { console.log('[Voice] Stopped listening'); setState(prev => ({ ...prev, isListening: false, })); }; // Auto-stop after 10 seconds timeoutRef.current = setTimeout(() => { if (recognitionRef.current) { recognitionRef.current.stop(); } }, 10000); // Start recognition try { recognition.start(); } catch (error) { console.error('[Voice] Failed to start:', error); setState(prev => ({ ...prev, isListening: false, error: 'Failed to start voice recognition', })); } }, []); // Start listening (chooses appropriate method) const startListening = useCallback(() => { if (state.usesFallback) { startListeningWithFallback(); } else if (recognitionRef.current) { startListeningWithSpeechAPI(); } else { setState(prev => ({ ...prev, error: 'Voice input not supported in this browser', })); } }, [state.usesFallback, startListeningWithFallback, startListeningWithSpeechAPI]); // Stop listening const stopListening = useCallback(() => { if (recognitionRef.current) { try { recognitionRef.current.stop(); } catch (e) { // Ignore errors } } if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') { mediaRecorderRef.current.stop(); } if (timeoutRef.current) { clearTimeout(timeoutRef.current); timeoutRef.current = null; } }, []); // Reset state const reset = useCallback(() => { setState(prev => ({ ...prev, transcript: '', error: null, })); }, []); return { ...state, startListening, stopListening, reset, }; }