Files
maternal-app/maternal-web/components/voice/VoiceInputButton.tsx
Andrei 26d3f8962f
Some checks failed
CI/CD Pipeline / Lint and Test (push) Has been cancelled
CI/CD Pipeline / E2E Tests (push) Has been cancelled
CI/CD Pipeline / Build Application (push) Has been cancelled
Improve iOS Safari voice input with better error handling and debugging
- Force MediaRecorder fallback for all iOS Safari devices
- Add iOS device detection to avoid Web Speech API on iOS
- Support multiple audio formats (webm, mp4, default) for compatibility
- Add comprehensive error logging throughout the flow
- Improve error messages with specific guidance for each error type
- Add console logging to track microphone permissions and recording state
- Better handling of getUserMedia permissions

This should help diagnose and fix the "Failed to recognize speech" error
by ensuring iOS Safari uses the MediaRecorder path with proper permissions.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-02 06:03:24 +00:00

309 lines
9.3 KiB
TypeScript

'use client';
import React, { useState, useEffect } from 'react';
import {
IconButton,
Tooltip,
Dialog,
DialogTitle,
DialogContent,
DialogActions,
Button,
Box,
Typography,
CircularProgress,
Alert,
Chip,
} from '@mui/material';
import MicIcon from '@mui/icons-material/Mic';
import MicOffIcon from '@mui/icons-material/MicOff';
import { useVoiceInput } from '@/hooks/useVoiceInput';
export interface VoiceInputButtonProps {
onTranscript: (transcript: string) => void;
onClassifiedIntent?: (result: any) => void;
size?: 'small' | 'medium' | 'large';
variant?: 'icon' | 'fab';
}
/**
* Voice input button component
*
* Displays microphone button that opens dialog for voice recording.
* Uses Web Speech API for real-time transcription.
*/
export function VoiceInputButton({
onTranscript,
onClassifiedIntent,
size = 'medium',
variant = 'icon',
}: VoiceInputButtonProps) {
const [open, setOpen] = useState(false);
const [isProcessing, setIsProcessing] = useState(false);
const [classificationResult, setClassificationResult] = useState<any>(null);
const { isListening, isSupported, transcript, error, usesFallback, startListening, stopListening, reset } =
useVoiceInput();
// Auto-classify when we get a final transcript
useEffect(() => {
if (transcript && !isListening && !isProcessing) {
classifyTranscript(transcript);
}
}, [transcript, isListening, isProcessing]);
const handleOpen = () => {
console.log('[VoiceButton] Opening dialog, isSupported:', isSupported, 'usesFallback:', usesFallback);
if (!isSupported) {
alert('Voice input is not supported in your browser. Please use Chrome, Edge, or Safari.');
return;
}
setOpen(true);
reset();
setClassificationResult(null);
};
const handleClose = () => {
if (isListening) {
stopListening();
}
setOpen(false);
reset();
setClassificationResult(null);
};
const handleStartListening = () => {
console.log('[VoiceButton] Starting listening, usesFallback:', usesFallback);
reset();
setClassificationResult(null);
startListening();
};
const handleStopListening = () => {
stopListening();
};
const classifyTranscript = async (text: string) => {
setIsProcessing(true);
try {
const response = await fetch('/api/voice/transcribe', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ text }),
});
const data = await response.json();
if (response.ok && data.success) {
setClassificationResult(data.classification);
if (onClassifiedIntent) {
onClassifiedIntent(data.classification);
}
} else {
setClassificationResult({
error: true,
message: data.message || 'Could not understand command',
});
}
} catch (error) {
console.error('[Voice] Classification error:', error);
setClassificationResult({
error: true,
message: 'Failed to process command',
});
} finally {
setIsProcessing(false);
}
};
const handleUseTranscript = () => {
if (transcript) {
onTranscript(transcript);
handleClose();
}
};
const renderButton = () => {
const icon = isListening ? <MicOffIcon /> : <MicIcon />;
const title = isSupported
? 'Voice input'
: 'Voice input not supported in this browser';
if (variant === 'fab') {
return (
<Tooltip title={title}>
<IconButton
color="primary"
onClick={handleOpen}
disabled={!isSupported}
size={size}
sx={{
width: size === 'large' ? 64 : size === 'medium' ? 56 : 48,
height: size === 'large' ? 64 : size === 'medium' ? 56 : 48,
borderRadius: '50%',
bgcolor: 'primary.main',
color: 'white',
'&:hover': {
bgcolor: 'primary.dark',
},
boxShadow: 3,
}}
>
{icon}
</IconButton>
</Tooltip>
);
}
return (
<Tooltip title={title}>
<IconButton
color="primary"
onClick={handleOpen}
disabled={!isSupported}
size={size}
>
{icon}
</IconButton>
</Tooltip>
);
};
return (
<>
{renderButton()}
<Dialog open={open} onClose={handleClose} maxWidth="sm" fullWidth>
<DialogTitle>
Voice Command
{classificationResult && !classificationResult.error && (
<Chip
label={`${classificationResult.intent} (${classificationResult.confidenceLevel})`}
color="success"
size="small"
sx={{ ml: 2 }}
/>
)}
</DialogTitle>
<DialogContent>
<Box sx={{ textAlign: 'center', py: 3 }}>
{/* Microphone animation */}
<Box sx={{ position: 'relative', display: 'inline-block', mb: 3 }}>
<IconButton
color={isListening ? 'error' : 'primary'}
onClick={isListening ? handleStopListening : handleStartListening}
sx={{
width: 80,
height: 80,
bgcolor: isListening ? 'error.light' : 'primary.light',
'&:hover': {
bgcolor: isListening ? 'error.main' : 'primary.main',
},
animation: isListening ? 'pulse 1.5s infinite' : 'none',
'@keyframes pulse': {
'0%': { transform: 'scale(1)', opacity: 1 },
'50%': { transform: 'scale(1.1)', opacity: 0.8 },
'100%': { transform: 'scale(1)', opacity: 1 },
},
}}
>
{isListening ? <MicIcon sx={{ fontSize: 48 }} /> : <MicOffIcon sx={{ fontSize: 48 }} />}
</IconButton>
</Box>
{/* Status text */}
<Typography variant="body1" color="text.secondary" gutterBottom>
{isListening
? usesFallback
? 'Recording... Speak now'
: 'Listening... Speak now'
: 'Click the microphone to start'}
</Typography>
{usesFallback && !isListening && !transcript && (
<Typography variant="caption" color="text.secondary" sx={{ mt: 1, display: 'block' }}>
Using audio recording mode (iOS Safari)
</Typography>
)}
{/* Transcript */}
{transcript && (
<Box sx={{ mt: 3, p: 2, bgcolor: 'grey.100', borderRadius: 1 }}>
<Typography variant="body2" color="text.secondary" gutterBottom>
Transcript:
</Typography>
<Typography variant="body1">{transcript}</Typography>
</Box>
)}
{/* Processing indicator */}
{isProcessing && (
<Box sx={{ mt: 2, display: 'flex', alignItems: 'center', justifyContent: 'center' }}>
<CircularProgress size={20} sx={{ mr: 1 }} />
<Typography variant="body2" color="text.secondary">
Processing command...
</Typography>
</Box>
)}
{/* Classification result */}
{classificationResult && !classificationResult.error && (
<Alert severity="success" sx={{ mt: 2 }}>
<Typography variant="body2" gutterBottom>
<strong>Understood:</strong> {classificationResult.intent}
</Typography>
{classificationResult.structuredData && (
<Typography variant="caption" component="pre" sx={{ mt: 1, fontSize: '0.75rem' }}>
{JSON.stringify(classificationResult.structuredData, null, 2)}
</Typography>
)}
</Alert>
)}
{/* Error messages */}
{(error || (classificationResult && classificationResult.error)) && (
<Alert severity="error" sx={{ mt: 2 }}>
{error || classificationResult.message}
</Alert>
)}
{/* Examples */}
{!transcript && !isListening && (
<Box sx={{ mt: 3, textAlign: 'left' }}>
<Typography variant="caption" color="text.secondary" gutterBottom display="block">
Example commands:
</Typography>
<Typography variant="caption" color="text.secondary" component="div">
"Fed baby 120 ml"
<br />
"Nursed on left breast for 15 minutes"
<br />
"Changed wet diaper"
<br />
"Baby napped for 45 minutes"
</Typography>
</Box>
)}
</Box>
</DialogContent>
<DialogActions>
<Button onClick={handleClose}>Cancel</Button>
{transcript && (
<Button
onClick={handleUseTranscript}
variant="contained"
color="primary"
>
Use Command
</Button>
)}
</DialogActions>
</Dialog>
</>
);
}