Files
maternal-app/maternal-web/hooks/useVoiceInput.ts
Andrei 26d3f8962f
Some checks failed
CI/CD Pipeline / Lint and Test (push) Has been cancelled
CI/CD Pipeline / E2E Tests (push) Has been cancelled
CI/CD Pipeline / Build Application (push) Has been cancelled
Improve iOS Safari voice input with better error handling and debugging
- Force MediaRecorder fallback for all iOS Safari devices
- Add iOS device detection to avoid Web Speech API on iOS
- Support multiple audio formats (webm, mp4, default) for compatibility
- Add comprehensive error logging throughout the flow
- Improve error messages with specific guidance for each error type
- Add console logging to track microphone permissions and recording state
- Better handling of getUserMedia permissions

This should help diagnose and fix the "Failed to recognize speech" error
by ensuring iOS Safari uses the MediaRecorder path with proper permissions.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-02 06:03:24 +00:00

372 lines
12 KiB
TypeScript

import { useState, useEffect, useCallback, useRef } from 'react';
export interface VoiceInputResult {
transcript: string;
confidence: number;
isFinal: boolean;
}
export interface VoiceInputState {
isListening: boolean;
isSupported: boolean;
transcript: string;
error: string | null;
usesFallback: boolean;
}
/**
* Hook for voice input using browser Web Speech API or MediaRecorder fallback
*
* Provides voice recording functionality with real-time transcription.
* Falls back to MediaRecorder + server-side transcription for iOS Safari.
*/
export function useVoiceInput() {
const [state, setState] = useState<VoiceInputState>({
isListening: false,
isSupported: false,
transcript: '',
error: null,
usesFallback: false,
});
const recognitionRef = useRef<any>(null);
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const audioChunksRef = useRef<Blob[]>([]);
const timeoutRef = useRef<NodeJS.Timeout | null>(null);
// Check if browser supports Speech Recognition or MediaRecorder
useEffect(() => {
// Detect iOS Safari specifically
const isIOSSafari = /iPad|iPhone|iPod/.test(navigator.userAgent) && !(window as any).MSStream;
const SpeechRecognition =
(window as any).SpeechRecognition || (window as any).webkitSpeechRecognition;
// Force fallback for iOS Safari regardless of Speech Recognition availability
if (isIOSSafari) {
console.log('[Voice] iOS Safari detected, using MediaRecorder fallback');
if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
} else {
setState(prev => ({ ...prev, isSupported: false }));
}
} else if (SpeechRecognition) {
try {
// Initialize recognition for non-iOS browsers
const recognition = new SpeechRecognition();
recognition.continuous = false; // Single recognition
recognition.interimResults = true; // Get interim results
recognition.maxAlternatives = 1;
recognition.lang = 'en-US'; // Default language
recognitionRef.current = recognition;
setState(prev => ({ ...prev, isSupported: true, usesFallback: false }));
} catch (error) {
console.warn('[Voice] Speech Recognition initialization failed, trying fallback');
if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
} else {
setState(prev => ({ ...prev, isSupported: false }));
}
}
} else if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
// Use MediaRecorder fallback for other browsers without Speech Recognition
console.log('[Voice] No Speech Recognition, using MediaRecorder fallback');
setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
} else {
setState(prev => ({ ...prev, isSupported: false }));
}
return () => {
if (recognitionRef.current) {
try {
recognitionRef.current.stop();
} catch (e) {
// Ignore errors on cleanup
}
}
if (mediaRecorderRef.current && mediaRecorderRef.current.state !== 'inactive') {
mediaRecorderRef.current.stop();
}
if (timeoutRef.current) {
clearTimeout(timeoutRef.current);
}
};
}, []);
// Start listening with MediaRecorder fallback
const startListeningWithFallback = useCallback(async () => {
audioChunksRef.current = [];
try {
console.log('[Voice] Requesting microphone access...');
const stream = await navigator.mediaDevices.getUserMedia({
audio: {
echoCancellation: true,
noiseSuppression: true,
sampleRate: 44100,
}
});
console.log('[Voice] Microphone access granted, creating MediaRecorder...');
// Try different mime types for iOS Safari compatibility
let mimeType = 'audio/webm;codecs=opus';
if (!MediaRecorder.isTypeSupported(mimeType)) {
console.warn('[Voice] webm not supported, trying mp4...');
mimeType = 'audio/mp4';
if (!MediaRecorder.isTypeSupported(mimeType)) {
console.warn('[Voice] mp4 not supported, trying default...');
mimeType = '';
}
}
const options = mimeType ? { mimeType } : {};
console.log('[Voice] Using MediaRecorder with options:', options);
const mediaRecorder = new MediaRecorder(stream, options);
mediaRecorderRef.current = mediaRecorder;
mediaRecorder.ondataavailable = (event) => {
if (event.data.size > 0) {
audioChunksRef.current.push(event.data);
}
};
mediaRecorder.onstop = async () => {
console.log('[Voice] Recording stopped, processing audio...');
const audioBlob = new Blob(audioChunksRef.current, {
type: mimeType || 'audio/webm'
});
console.log('[Voice] Audio blob created, size:', audioBlob.size, 'bytes');
// Send to backend for transcription
try {
const formData = new FormData();
const extension = mimeType.includes('mp4') ? 'mp4' : 'webm';
formData.append('audio', audioBlob, `recording.${extension}`);
console.log('[Voice] Sending to backend for transcription...');
const response = await fetch('/api/voice/transcribe', {
method: 'POST',
body: formData,
});
console.log('[Voice] Transcription response status:', response.status);
const data = await response.json();
console.log('[Voice] Transcription response data:', data);
if (response.ok && data.success) {
setState(prev => ({
...prev,
isListening: false,
transcript: data.transcript,
}));
} else {
console.error('[Voice] Transcription failed:', data);
setState(prev => ({
...prev,
isListening: false,
error: data.message || 'Failed to transcribe audio',
}));
}
} catch (error) {
console.error('[Voice] Transcription error:', error);
setState(prev => ({
...prev,
isListening: false,
error: 'Failed to process audio. Please try again.',
}));
}
// Stop all tracks
stream.getTracks().forEach(track => track.stop());
console.log('[Voice] Stream tracks stopped');
};
mediaRecorder.onerror = (event) => {
console.error('[Voice] MediaRecorder error:', event);
setState(prev => ({
...prev,
isListening: false,
error: 'Recording failed',
}));
};
setState(prev => ({
...prev,
isListening: true,
transcript: '',
error: null,
}));
console.log('[Voice] Starting MediaRecorder...');
mediaRecorder.start();
console.log('[Voice] MediaRecorder started successfully');
// Auto-stop after 10 seconds
timeoutRef.current = setTimeout(() => {
if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
console.log('[Voice] Auto-stopping after 10 seconds');
mediaRecorderRef.current.stop();
}
}, 10000);
} catch (error: any) {
console.error('[Voice] Failed to access microphone:', error);
console.error('[Voice] Error name:', error.name);
console.error('[Voice] Error message:', error.message);
let errorMessage = 'Failed to access microphone';
if (error.name === 'NotAllowedError' || error.name === 'PermissionDeniedError') {
errorMessage = 'Microphone permission denied. Please allow microphone access in your browser settings and try again.';
} else if (error.name === 'NotFoundError' || error.name === 'DevicesNotFoundError') {
errorMessage = 'No microphone found. Please check your device settings.';
} else if (error.name === 'NotSupportedError') {
errorMessage = 'Your browser does not support audio recording.';
} else if (error.name === 'NotReadableError' || error.name === 'TrackStartError') {
errorMessage = 'Microphone is already in use by another application.';
}
setState(prev => ({
...prev,
isListening: false,
error: errorMessage,
}));
}
}, []);
// Start listening with Web Speech API
const startListeningWithSpeechAPI = useCallback(() => {
const recognition = recognitionRef.current;
// Clear previous state
setState(prev => ({
...prev,
isListening: true,
transcript: '',
error: null,
}));
// Set up event handlers
recognition.onstart = () => {
console.log('[Voice] Started listening');
};
recognition.onresult = (event: any) => {
let interimTranscript = '';
let finalTranscript = '';
for (let i = event.resultIndex; i < event.results.length; i++) {
const transcript = event.results[i][0].transcript;
if (event.results[i].isFinal) {
finalTranscript += transcript;
} else {
interimTranscript += transcript;
}
}
setState(prev => ({
...prev,
transcript: finalTranscript || interimTranscript,
}));
};
recognition.onerror = (event: any) => {
console.error('[Voice] Error:', event.error);
let errorMessage = 'Failed to recognize speech';
if (event.error === 'no-speech') {
errorMessage = 'No speech detected. Please try again.';
} else if (event.error === 'audio-capture') {
errorMessage = 'No microphone found. Please check your settings.';
} else if (event.error === 'not-allowed') {
errorMessage = 'Microphone access denied. Please grant permission.';
} else if (event.error === 'network') {
errorMessage = 'Network error. Please check your connection.';
}
setState(prev => ({
...prev,
isListening: false,
error: errorMessage,
}));
};
recognition.onend = () => {
console.log('[Voice] Stopped listening');
setState(prev => ({
...prev,
isListening: false,
}));
};
// Auto-stop after 10 seconds
timeoutRef.current = setTimeout(() => {
if (recognitionRef.current) {
recognitionRef.current.stop();
}
}, 10000);
// Start recognition
try {
recognition.start();
} catch (error) {
console.error('[Voice] Failed to start:', error);
setState(prev => ({
...prev,
isListening: false,
error: 'Failed to start voice recognition',
}));
}
}, []);
// Start listening (chooses appropriate method)
const startListening = useCallback(() => {
if (state.usesFallback) {
startListeningWithFallback();
} else if (recognitionRef.current) {
startListeningWithSpeechAPI();
} else {
setState(prev => ({
...prev,
error: 'Voice input not supported in this browser',
}));
}
}, [state.usesFallback, startListeningWithFallback, startListeningWithSpeechAPI]);
// Stop listening
const stopListening = useCallback(() => {
if (recognitionRef.current) {
try {
recognitionRef.current.stop();
} catch (e) {
// Ignore errors
}
}
if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
mediaRecorderRef.current.stop();
}
if (timeoutRef.current) {
clearTimeout(timeoutRef.current);
timeoutRef.current = null;
}
}, []);
// Reset state
const reset = useCallback(() => {
setState(prev => ({
...prev,
transcript: '',
error: null,
}));
}, []);
return {
...state,
startListening,
stopListening,
reset,
};
}