From 26d3f8962f4b0711f7f9800cfdbf7ce32c52e908 Mon Sep 17 00:00:00 2001 From: Andrei Date: Thu, 2 Oct 2025 06:03:24 +0000 Subject: [PATCH] Improve iOS Safari voice input with better error handling and debugging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Force MediaRecorder fallback for all iOS Safari devices - Add iOS device detection to avoid Web Speech API on iOS - Support multiple audio formats (webm, mp4, default) for compatibility - Add comprehensive error logging throughout the flow - Improve error messages with specific guidance for each error type - Add console logging to track microphone permissions and recording state - Better handling of getUserMedia permissions This should help diagnose and fix the "Failed to recognize speech" error by ensuring iOS Safari uses the MediaRecorder path with proper permissions. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../components/voice/VoiceInputButton.tsx | 2 + maternal-web/hooks/useVoiceInput.ts | 89 +++++++++++++++---- 2 files changed, 76 insertions(+), 15 deletions(-) diff --git a/maternal-web/components/voice/VoiceInputButton.tsx b/maternal-web/components/voice/VoiceInputButton.tsx index 15a5a0d..4a0e98a 100644 --- a/maternal-web/components/voice/VoiceInputButton.tsx +++ b/maternal-web/components/voice/VoiceInputButton.tsx @@ -53,6 +53,7 @@ export function VoiceInputButton({ }, [transcript, isListening, isProcessing]); const handleOpen = () => { + console.log('[VoiceButton] Opening dialog, isSupported:', isSupported, 'usesFallback:', usesFallback); if (!isSupported) { alert('Voice input is not supported in your browser. Please use Chrome, Edge, or Safari.'); return; @@ -72,6 +73,7 @@ export function VoiceInputButton({ }; const handleStartListening = () => { + console.log('[VoiceButton] Starting listening, usesFallback:', usesFallback); reset(); setClassificationResult(null); startListening(); diff --git a/maternal-web/hooks/useVoiceInput.ts b/maternal-web/hooks/useVoiceInput.ts index abbe509..494b06e 100644 --- a/maternal-web/hooks/useVoiceInput.ts +++ b/maternal-web/hooks/useVoiceInput.ts @@ -36,12 +36,23 @@ export function useVoiceInput() { // Check if browser supports Speech Recognition or MediaRecorder useEffect(() => { + // Detect iOS Safari specifically + const isIOSSafari = /iPad|iPhone|iPod/.test(navigator.userAgent) && !(window as any).MSStream; + const SpeechRecognition = (window as any).SpeechRecognition || (window as any).webkitSpeechRecognition; - if (SpeechRecognition) { + // Force fallback for iOS Safari regardless of Speech Recognition availability + if (isIOSSafari) { + console.log('[Voice] iOS Safari detected, using MediaRecorder fallback'); + if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) { + setState(prev => ({ ...prev, isSupported: true, usesFallback: true })); + } else { + setState(prev => ({ ...prev, isSupported: false })); + } + } else if (SpeechRecognition) { try { - // Initialize recognition + // Initialize recognition for non-iOS browsers const recognition = new SpeechRecognition(); recognition.continuous = false; // Single recognition recognition.interimResults = true; // Get interim results @@ -51,12 +62,16 @@ export function useVoiceInput() { recognitionRef.current = recognition; setState(prev => ({ ...prev, isSupported: true, usesFallback: false })); } catch (error) { - console.warn('[Voice] Speech Recognition initialization failed, using fallback'); - setState(prev => ({ ...prev, isSupported: true, usesFallback: true })); + console.warn('[Voice] Speech Recognition initialization failed, trying fallback'); + if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) { + setState(prev => ({ ...prev, isSupported: true, usesFallback: true })); + } else { + setState(prev => ({ ...prev, isSupported: false })); + } } } else if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) { - // Use MediaRecorder fallback for iOS Safari - console.log('[Voice] Using MediaRecorder fallback for iOS Safari'); + // Use MediaRecorder fallback for other browsers without Speech Recognition + console.log('[Voice] No Speech Recognition, using MediaRecorder fallback'); setState(prev => ({ ...prev, isSupported: true, usesFallback: true })); } else { setState(prev => ({ ...prev, isSupported: false })); @@ -84,11 +99,32 @@ export function useVoiceInput() { audioChunksRef.current = []; try { - const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); - const mediaRecorder = new MediaRecorder(stream, { - mimeType: 'audio/webm;codecs=opus', + console.log('[Voice] Requesting microphone access...'); + const stream = await navigator.mediaDevices.getUserMedia({ + audio: { + echoCancellation: true, + noiseSuppression: true, + sampleRate: 44100, + } }); + console.log('[Voice] Microphone access granted, creating MediaRecorder...'); + + // Try different mime types for iOS Safari compatibility + let mimeType = 'audio/webm;codecs=opus'; + if (!MediaRecorder.isTypeSupported(mimeType)) { + console.warn('[Voice] webm not supported, trying mp4...'); + mimeType = 'audio/mp4'; + if (!MediaRecorder.isTypeSupported(mimeType)) { + console.warn('[Voice] mp4 not supported, trying default...'); + mimeType = ''; + } + } + + const options = mimeType ? { mimeType } : {}; + console.log('[Voice] Using MediaRecorder with options:', options); + const mediaRecorder = new MediaRecorder(stream, options); + mediaRecorderRef.current = mediaRecorder; mediaRecorder.ondataavailable = (event) => { @@ -98,19 +134,28 @@ export function useVoiceInput() { }; mediaRecorder.onstop = async () => { - const audioBlob = new Blob(audioChunksRef.current, { type: 'audio/webm' }); + console.log('[Voice] Recording stopped, processing audio...'); + const audioBlob = new Blob(audioChunksRef.current, { + type: mimeType || 'audio/webm' + }); + + console.log('[Voice] Audio blob created, size:', audioBlob.size, 'bytes'); // Send to backend for transcription try { const formData = new FormData(); - formData.append('audio', audioBlob, 'recording.webm'); + const extension = mimeType.includes('mp4') ? 'mp4' : 'webm'; + formData.append('audio', audioBlob, `recording.${extension}`); + console.log('[Voice] Sending to backend for transcription...'); const response = await fetch('/api/voice/transcribe', { method: 'POST', body: formData, }); + console.log('[Voice] Transcription response status:', response.status); const data = await response.json(); + console.log('[Voice] Transcription response data:', data); if (response.ok && data.success) { setState(prev => ({ @@ -119,6 +164,7 @@ export function useVoiceInput() { transcript: data.transcript, })); } else { + console.error('[Voice] Transcription failed:', data); setState(prev => ({ ...prev, isListening: false, @@ -130,12 +176,13 @@ export function useVoiceInput() { setState(prev => ({ ...prev, isListening: false, - error: 'Failed to process audio', + error: 'Failed to process audio. Please try again.', })); } // Stop all tracks stream.getTracks().forEach(track => track.stop()); + console.log('[Voice] Stream tracks stopped'); }; mediaRecorder.onerror = (event) => { @@ -154,24 +201,36 @@ export function useVoiceInput() { error: null, })); + console.log('[Voice] Starting MediaRecorder...'); mediaRecorder.start(); + console.log('[Voice] MediaRecorder started successfully'); // Auto-stop after 10 seconds timeoutRef.current = setTimeout(() => { if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') { + console.log('[Voice] Auto-stopping after 10 seconds'); mediaRecorderRef.current.stop(); } }, 10000); } catch (error: any) { console.error('[Voice] Failed to access microphone:', error); + console.error('[Voice] Error name:', error.name); + console.error('[Voice] Error message:', error.message); + let errorMessage = 'Failed to access microphone'; if (error.name === 'NotAllowedError' || error.name === 'PermissionDeniedError') { - errorMessage = 'Microphone access denied. Please grant permission.'; - } else if (error.name === 'NotFoundError') { - errorMessage = 'No microphone found. Please check your settings.'; + errorMessage = 'Microphone permission denied. Please allow microphone access in your browser settings and try again.'; + } else if (error.name === 'NotFoundError' || error.name === 'DevicesNotFoundError') { + errorMessage = 'No microphone found. Please check your device settings.'; + } else if (error.name === 'NotSupportedError') { + errorMessage = 'Your browser does not support audio recording.'; + } else if (error.name === 'NotReadableError' || error.name === 'TrackStartError') { + errorMessage = 'Microphone is already in use by another application.'; } + setState(prev => ({ ...prev, + isListening: false, error: errorMessage, })); }