Improve iOS Safari voice input with better error handling and debugging
- Force MediaRecorder fallback for all iOS Safari devices - Add iOS device detection to avoid Web Speech API on iOS - Support multiple audio formats (webm, mp4, default) for compatibility - Add comprehensive error logging throughout the flow - Improve error messages with specific guidance for each error type - Add console logging to track microphone permissions and recording state - Better handling of getUserMedia permissions This should help diagnose and fix the "Failed to recognize speech" error by ensuring iOS Safari uses the MediaRecorder path with proper permissions. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -53,6 +53,7 @@ export function VoiceInputButton({
|
|||||||
}, [transcript, isListening, isProcessing]);
|
}, [transcript, isListening, isProcessing]);
|
||||||
|
|
||||||
const handleOpen = () => {
|
const handleOpen = () => {
|
||||||
|
console.log('[VoiceButton] Opening dialog, isSupported:', isSupported, 'usesFallback:', usesFallback);
|
||||||
if (!isSupported) {
|
if (!isSupported) {
|
||||||
alert('Voice input is not supported in your browser. Please use Chrome, Edge, or Safari.');
|
alert('Voice input is not supported in your browser. Please use Chrome, Edge, or Safari.');
|
||||||
return;
|
return;
|
||||||
@@ -72,6 +73,7 @@ export function VoiceInputButton({
|
|||||||
};
|
};
|
||||||
|
|
||||||
const handleStartListening = () => {
|
const handleStartListening = () => {
|
||||||
|
console.log('[VoiceButton] Starting listening, usesFallback:', usesFallback);
|
||||||
reset();
|
reset();
|
||||||
setClassificationResult(null);
|
setClassificationResult(null);
|
||||||
startListening();
|
startListening();
|
||||||
|
|||||||
@@ -36,12 +36,23 @@ export function useVoiceInput() {
|
|||||||
|
|
||||||
// Check if browser supports Speech Recognition or MediaRecorder
|
// Check if browser supports Speech Recognition or MediaRecorder
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
|
// Detect iOS Safari specifically
|
||||||
|
const isIOSSafari = /iPad|iPhone|iPod/.test(navigator.userAgent) && !(window as any).MSStream;
|
||||||
|
|
||||||
const SpeechRecognition =
|
const SpeechRecognition =
|
||||||
(window as any).SpeechRecognition || (window as any).webkitSpeechRecognition;
|
(window as any).SpeechRecognition || (window as any).webkitSpeechRecognition;
|
||||||
|
|
||||||
if (SpeechRecognition) {
|
// Force fallback for iOS Safari regardless of Speech Recognition availability
|
||||||
|
if (isIOSSafari) {
|
||||||
|
console.log('[Voice] iOS Safari detected, using MediaRecorder fallback');
|
||||||
|
if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
|
||||||
|
setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
|
||||||
|
} else {
|
||||||
|
setState(prev => ({ ...prev, isSupported: false }));
|
||||||
|
}
|
||||||
|
} else if (SpeechRecognition) {
|
||||||
try {
|
try {
|
||||||
// Initialize recognition
|
// Initialize recognition for non-iOS browsers
|
||||||
const recognition = new SpeechRecognition();
|
const recognition = new SpeechRecognition();
|
||||||
recognition.continuous = false; // Single recognition
|
recognition.continuous = false; // Single recognition
|
||||||
recognition.interimResults = true; // Get interim results
|
recognition.interimResults = true; // Get interim results
|
||||||
@@ -51,12 +62,16 @@ export function useVoiceInput() {
|
|||||||
recognitionRef.current = recognition;
|
recognitionRef.current = recognition;
|
||||||
setState(prev => ({ ...prev, isSupported: true, usesFallback: false }));
|
setState(prev => ({ ...prev, isSupported: true, usesFallback: false }));
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.warn('[Voice] Speech Recognition initialization failed, using fallback');
|
console.warn('[Voice] Speech Recognition initialization failed, trying fallback');
|
||||||
setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
|
if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
|
||||||
|
setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
|
||||||
|
} else {
|
||||||
|
setState(prev => ({ ...prev, isSupported: false }));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
|
} else if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
|
||||||
// Use MediaRecorder fallback for iOS Safari
|
// Use MediaRecorder fallback for other browsers without Speech Recognition
|
||||||
console.log('[Voice] Using MediaRecorder fallback for iOS Safari');
|
console.log('[Voice] No Speech Recognition, using MediaRecorder fallback');
|
||||||
setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
|
setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
|
||||||
} else {
|
} else {
|
||||||
setState(prev => ({ ...prev, isSupported: false }));
|
setState(prev => ({ ...prev, isSupported: false }));
|
||||||
@@ -84,11 +99,32 @@ export function useVoiceInput() {
|
|||||||
audioChunksRef.current = [];
|
audioChunksRef.current = [];
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
console.log('[Voice] Requesting microphone access...');
|
||||||
const mediaRecorder = new MediaRecorder(stream, {
|
const stream = await navigator.mediaDevices.getUserMedia({
|
||||||
mimeType: 'audio/webm;codecs=opus',
|
audio: {
|
||||||
|
echoCancellation: true,
|
||||||
|
noiseSuppression: true,
|
||||||
|
sampleRate: 44100,
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
console.log('[Voice] Microphone access granted, creating MediaRecorder...');
|
||||||
|
|
||||||
|
// Try different mime types for iOS Safari compatibility
|
||||||
|
let mimeType = 'audio/webm;codecs=opus';
|
||||||
|
if (!MediaRecorder.isTypeSupported(mimeType)) {
|
||||||
|
console.warn('[Voice] webm not supported, trying mp4...');
|
||||||
|
mimeType = 'audio/mp4';
|
||||||
|
if (!MediaRecorder.isTypeSupported(mimeType)) {
|
||||||
|
console.warn('[Voice] mp4 not supported, trying default...');
|
||||||
|
mimeType = '';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const options = mimeType ? { mimeType } : {};
|
||||||
|
console.log('[Voice] Using MediaRecorder with options:', options);
|
||||||
|
const mediaRecorder = new MediaRecorder(stream, options);
|
||||||
|
|
||||||
mediaRecorderRef.current = mediaRecorder;
|
mediaRecorderRef.current = mediaRecorder;
|
||||||
|
|
||||||
mediaRecorder.ondataavailable = (event) => {
|
mediaRecorder.ondataavailable = (event) => {
|
||||||
@@ -98,19 +134,28 @@ export function useVoiceInput() {
|
|||||||
};
|
};
|
||||||
|
|
||||||
mediaRecorder.onstop = async () => {
|
mediaRecorder.onstop = async () => {
|
||||||
const audioBlob = new Blob(audioChunksRef.current, { type: 'audio/webm' });
|
console.log('[Voice] Recording stopped, processing audio...');
|
||||||
|
const audioBlob = new Blob(audioChunksRef.current, {
|
||||||
|
type: mimeType || 'audio/webm'
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('[Voice] Audio blob created, size:', audioBlob.size, 'bytes');
|
||||||
|
|
||||||
// Send to backend for transcription
|
// Send to backend for transcription
|
||||||
try {
|
try {
|
||||||
const formData = new FormData();
|
const formData = new FormData();
|
||||||
formData.append('audio', audioBlob, 'recording.webm');
|
const extension = mimeType.includes('mp4') ? 'mp4' : 'webm';
|
||||||
|
formData.append('audio', audioBlob, `recording.${extension}`);
|
||||||
|
|
||||||
|
console.log('[Voice] Sending to backend for transcription...');
|
||||||
const response = await fetch('/api/voice/transcribe', {
|
const response = await fetch('/api/voice/transcribe', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
body: formData,
|
body: formData,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
console.log('[Voice] Transcription response status:', response.status);
|
||||||
const data = await response.json();
|
const data = await response.json();
|
||||||
|
console.log('[Voice] Transcription response data:', data);
|
||||||
|
|
||||||
if (response.ok && data.success) {
|
if (response.ok && data.success) {
|
||||||
setState(prev => ({
|
setState(prev => ({
|
||||||
@@ -119,6 +164,7 @@ export function useVoiceInput() {
|
|||||||
transcript: data.transcript,
|
transcript: data.transcript,
|
||||||
}));
|
}));
|
||||||
} else {
|
} else {
|
||||||
|
console.error('[Voice] Transcription failed:', data);
|
||||||
setState(prev => ({
|
setState(prev => ({
|
||||||
...prev,
|
...prev,
|
||||||
isListening: false,
|
isListening: false,
|
||||||
@@ -130,12 +176,13 @@ export function useVoiceInput() {
|
|||||||
setState(prev => ({
|
setState(prev => ({
|
||||||
...prev,
|
...prev,
|
||||||
isListening: false,
|
isListening: false,
|
||||||
error: 'Failed to process audio',
|
error: 'Failed to process audio. Please try again.',
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stop all tracks
|
// Stop all tracks
|
||||||
stream.getTracks().forEach(track => track.stop());
|
stream.getTracks().forEach(track => track.stop());
|
||||||
|
console.log('[Voice] Stream tracks stopped');
|
||||||
};
|
};
|
||||||
|
|
||||||
mediaRecorder.onerror = (event) => {
|
mediaRecorder.onerror = (event) => {
|
||||||
@@ -154,24 +201,36 @@ export function useVoiceInput() {
|
|||||||
error: null,
|
error: null,
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
console.log('[Voice] Starting MediaRecorder...');
|
||||||
mediaRecorder.start();
|
mediaRecorder.start();
|
||||||
|
console.log('[Voice] MediaRecorder started successfully');
|
||||||
|
|
||||||
// Auto-stop after 10 seconds
|
// Auto-stop after 10 seconds
|
||||||
timeoutRef.current = setTimeout(() => {
|
timeoutRef.current = setTimeout(() => {
|
||||||
if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
|
if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
|
||||||
|
console.log('[Voice] Auto-stopping after 10 seconds');
|
||||||
mediaRecorderRef.current.stop();
|
mediaRecorderRef.current.stop();
|
||||||
}
|
}
|
||||||
}, 10000);
|
}, 10000);
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
console.error('[Voice] Failed to access microphone:', error);
|
console.error('[Voice] Failed to access microphone:', error);
|
||||||
|
console.error('[Voice] Error name:', error.name);
|
||||||
|
console.error('[Voice] Error message:', error.message);
|
||||||
|
|
||||||
let errorMessage = 'Failed to access microphone';
|
let errorMessage = 'Failed to access microphone';
|
||||||
if (error.name === 'NotAllowedError' || error.name === 'PermissionDeniedError') {
|
if (error.name === 'NotAllowedError' || error.name === 'PermissionDeniedError') {
|
||||||
errorMessage = 'Microphone access denied. Please grant permission.';
|
errorMessage = 'Microphone permission denied. Please allow microphone access in your browser settings and try again.';
|
||||||
} else if (error.name === 'NotFoundError') {
|
} else if (error.name === 'NotFoundError' || error.name === 'DevicesNotFoundError') {
|
||||||
errorMessage = 'No microphone found. Please check your settings.';
|
errorMessage = 'No microphone found. Please check your device settings.';
|
||||||
|
} else if (error.name === 'NotSupportedError') {
|
||||||
|
errorMessage = 'Your browser does not support audio recording.';
|
||||||
|
} else if (error.name === 'NotReadableError' || error.name === 'TrackStartError') {
|
||||||
|
errorMessage = 'Microphone is already in use by another application.';
|
||||||
}
|
}
|
||||||
|
|
||||||
setState(prev => ({
|
setState(prev => ({
|
||||||
...prev,
|
...prev,
|
||||||
|
isListening: false,
|
||||||
error: errorMessage,
|
error: errorMessage,
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user