import { useState, useEffect, useCallback, useRef } from 'react'; export interface VoiceInputResult { transcript: string; confidence: number; isFinal: boolean; } export interface VoiceInputState { isListening: boolean; isSupported: boolean; transcript: string; error: string | null; usesFallback: boolean; } /** * Hook for voice input using browser Web Speech API or MediaRecorder fallback * * Provides voice recording functionality with real-time transcription. * Falls back to MediaRecorder + server-side transcription for iOS Safari. */ export function useVoiceInput() { const [state, setState] = useState({ isListening: false, isSupported: false, transcript: '', error: null, usesFallback: false, }); const recognitionRef = useRef(null); const mediaRecorderRef = useRef(null); const audioChunksRef = useRef([]); const timeoutRef = useRef(null); // Check if browser supports Speech Recognition or MediaRecorder useEffect(() => { const SpeechRecognition = (window as any).SpeechRecognition || (window as any).webkitSpeechRecognition; if (SpeechRecognition) { try { // Initialize recognition const recognition = new SpeechRecognition(); recognition.continuous = false; // Single recognition recognition.interimResults = true; // Get interim results recognition.maxAlternatives = 1; recognition.lang = 'en-US'; // Default language recognitionRef.current = recognition; setState(prev => ({ ...prev, isSupported: true, usesFallback: false })); } catch (error) { console.warn('[Voice] Speech Recognition initialization failed, using fallback'); setState(prev => ({ ...prev, isSupported: true, usesFallback: true })); } } else if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) { // Use MediaRecorder fallback for iOS Safari console.log('[Voice] Using MediaRecorder fallback for iOS Safari'); setState(prev => ({ ...prev, isSupported: true, usesFallback: true })); } else { setState(prev => ({ ...prev, isSupported: false })); } return () => { if (recognitionRef.current) { try { recognitionRef.current.stop(); } catch (e) { // Ignore errors on cleanup } } if (mediaRecorderRef.current && mediaRecorderRef.current.state !== 'inactive') { mediaRecorderRef.current.stop(); } if (timeoutRef.current) { clearTimeout(timeoutRef.current); } }; }, []); // Start listening with MediaRecorder fallback const startListeningWithFallback = useCallback(async () => { audioChunksRef.current = []; try { const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); const mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm;codecs=opus', }); mediaRecorderRef.current = mediaRecorder; mediaRecorder.ondataavailable = (event) => { if (event.data.size > 0) { audioChunksRef.current.push(event.data); } }; mediaRecorder.onstop = async () => { const audioBlob = new Blob(audioChunksRef.current, { type: 'audio/webm' }); // Send to backend for transcription try { const formData = new FormData(); formData.append('audio', audioBlob, 'recording.webm'); const response = await fetch('/api/voice/transcribe', { method: 'POST', body: formData, }); const data = await response.json(); if (response.ok && data.success) { setState(prev => ({ ...prev, isListening: false, transcript: data.transcript, })); } else { setState(prev => ({ ...prev, isListening: false, error: data.message || 'Failed to transcribe audio', })); } } catch (error) { console.error('[Voice] Transcription error:', error); setState(prev => ({ ...prev, isListening: false, error: 'Failed to process audio', })); } // Stop all tracks stream.getTracks().forEach(track => track.stop()); }; mediaRecorder.onerror = (event) => { console.error('[Voice] MediaRecorder error:', event); setState(prev => ({ ...prev, isListening: false, error: 'Recording failed', })); }; setState(prev => ({ ...prev, isListening: true, transcript: '', error: null, })); mediaRecorder.start(); // Auto-stop after 10 seconds timeoutRef.current = setTimeout(() => { if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') { mediaRecorderRef.current.stop(); } }, 10000); } catch (error: any) { console.error('[Voice] Failed to access microphone:', error); let errorMessage = 'Failed to access microphone'; if (error.name === 'NotAllowedError' || error.name === 'PermissionDeniedError') { errorMessage = 'Microphone access denied. Please grant permission.'; } else if (error.name === 'NotFoundError') { errorMessage = 'No microphone found. Please check your settings.'; } setState(prev => ({ ...prev, error: errorMessage, })); } }, []); // Start listening with Web Speech API const startListeningWithSpeechAPI = useCallback(() => { const recognition = recognitionRef.current; // Clear previous state setState(prev => ({ ...prev, isListening: true, transcript: '', error: null, })); // Set up event handlers recognition.onstart = () => { console.log('[Voice] Started listening'); }; recognition.onresult = (event: any) => { let interimTranscript = ''; let finalTranscript = ''; for (let i = event.resultIndex; i < event.results.length; i++) { const transcript = event.results[i][0].transcript; if (event.results[i].isFinal) { finalTranscript += transcript; } else { interimTranscript += transcript; } } setState(prev => ({ ...prev, transcript: finalTranscript || interimTranscript, })); }; recognition.onerror = (event: any) => { console.error('[Voice] Error:', event.error); let errorMessage = 'Failed to recognize speech'; if (event.error === 'no-speech') { errorMessage = 'No speech detected. Please try again.'; } else if (event.error === 'audio-capture') { errorMessage = 'No microphone found. Please check your settings.'; } else if (event.error === 'not-allowed') { errorMessage = 'Microphone access denied. Please grant permission.'; } else if (event.error === 'network') { errorMessage = 'Network error. Please check your connection.'; } setState(prev => ({ ...prev, isListening: false, error: errorMessage, })); }; recognition.onend = () => { console.log('[Voice] Stopped listening'); setState(prev => ({ ...prev, isListening: false, })); }; // Auto-stop after 10 seconds timeoutRef.current = setTimeout(() => { if (recognitionRef.current) { recognitionRef.current.stop(); } }, 10000); // Start recognition try { recognition.start(); } catch (error) { console.error('[Voice] Failed to start:', error); setState(prev => ({ ...prev, isListening: false, error: 'Failed to start voice recognition', })); } }, []); // Start listening (chooses appropriate method) const startListening = useCallback(() => { if (state.usesFallback) { startListeningWithFallback(); } else if (recognitionRef.current) { startListeningWithSpeechAPI(); } else { setState(prev => ({ ...prev, error: 'Voice input not supported in this browser', })); } }, [state.usesFallback, startListeningWithFallback, startListeningWithSpeechAPI]); // Stop listening const stopListening = useCallback(() => { if (recognitionRef.current) { try { recognitionRef.current.stop(); } catch (e) { // Ignore errors } } if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') { mediaRecorderRef.current.stop(); } if (timeoutRef.current) { clearTimeout(timeoutRef.current); timeoutRef.current = null; } }, []); // Reset state const reset = useCallback(() => { setState(prev => ({ ...prev, transcript: '', error: null, })); }, []); return { ...state, startListening, stopListening, reset, }; }