- Force MediaRecorder fallback for all iOS Safari devices - Add iOS device detection to avoid Web Speech API on iOS - Support multiple audio formats (webm, mp4, default) for compatibility - Add comprehensive error logging throughout the flow - Improve error messages with specific guidance for each error type - Add console logging to track microphone permissions and recording state - Better handling of getUserMedia permissions This should help diagnose and fix the "Failed to recognize speech" error by ensuring iOS Safari uses the MediaRecorder path with proper permissions. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
309 lines
9.3 KiB
TypeScript
309 lines
9.3 KiB
TypeScript
'use client';
|
|
|
|
import React, { useState, useEffect } from 'react';
|
|
import {
|
|
IconButton,
|
|
Tooltip,
|
|
Dialog,
|
|
DialogTitle,
|
|
DialogContent,
|
|
DialogActions,
|
|
Button,
|
|
Box,
|
|
Typography,
|
|
CircularProgress,
|
|
Alert,
|
|
Chip,
|
|
} from '@mui/material';
|
|
import MicIcon from '@mui/icons-material/Mic';
|
|
import MicOffIcon from '@mui/icons-material/MicOff';
|
|
import { useVoiceInput } from '@/hooks/useVoiceInput';
|
|
|
|
export interface VoiceInputButtonProps {
|
|
onTranscript: (transcript: string) => void;
|
|
onClassifiedIntent?: (result: any) => void;
|
|
size?: 'small' | 'medium' | 'large';
|
|
variant?: 'icon' | 'fab';
|
|
}
|
|
|
|
/**
|
|
* Voice input button component
|
|
*
|
|
* Displays microphone button that opens dialog for voice recording.
|
|
* Uses Web Speech API for real-time transcription.
|
|
*/
|
|
export function VoiceInputButton({
|
|
onTranscript,
|
|
onClassifiedIntent,
|
|
size = 'medium',
|
|
variant = 'icon',
|
|
}: VoiceInputButtonProps) {
|
|
const [open, setOpen] = useState(false);
|
|
const [isProcessing, setIsProcessing] = useState(false);
|
|
const [classificationResult, setClassificationResult] = useState<any>(null);
|
|
|
|
const { isListening, isSupported, transcript, error, usesFallback, startListening, stopListening, reset } =
|
|
useVoiceInput();
|
|
|
|
// Auto-classify when we get a final transcript
|
|
useEffect(() => {
|
|
if (transcript && !isListening && !isProcessing) {
|
|
classifyTranscript(transcript);
|
|
}
|
|
}, [transcript, isListening, isProcessing]);
|
|
|
|
const handleOpen = () => {
|
|
console.log('[VoiceButton] Opening dialog, isSupported:', isSupported, 'usesFallback:', usesFallback);
|
|
if (!isSupported) {
|
|
alert('Voice input is not supported in your browser. Please use Chrome, Edge, or Safari.');
|
|
return;
|
|
}
|
|
setOpen(true);
|
|
reset();
|
|
setClassificationResult(null);
|
|
};
|
|
|
|
const handleClose = () => {
|
|
if (isListening) {
|
|
stopListening();
|
|
}
|
|
setOpen(false);
|
|
reset();
|
|
setClassificationResult(null);
|
|
};
|
|
|
|
const handleStartListening = () => {
|
|
console.log('[VoiceButton] Starting listening, usesFallback:', usesFallback);
|
|
reset();
|
|
setClassificationResult(null);
|
|
startListening();
|
|
};
|
|
|
|
const handleStopListening = () => {
|
|
stopListening();
|
|
};
|
|
|
|
const classifyTranscript = async (text: string) => {
|
|
setIsProcessing(true);
|
|
try {
|
|
const response = await fetch('/api/voice/transcribe', {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
},
|
|
body: JSON.stringify({ text }),
|
|
});
|
|
|
|
const data = await response.json();
|
|
|
|
if (response.ok && data.success) {
|
|
setClassificationResult(data.classification);
|
|
if (onClassifiedIntent) {
|
|
onClassifiedIntent(data.classification);
|
|
}
|
|
} else {
|
|
setClassificationResult({
|
|
error: true,
|
|
message: data.message || 'Could not understand command',
|
|
});
|
|
}
|
|
} catch (error) {
|
|
console.error('[Voice] Classification error:', error);
|
|
setClassificationResult({
|
|
error: true,
|
|
message: 'Failed to process command',
|
|
});
|
|
} finally {
|
|
setIsProcessing(false);
|
|
}
|
|
};
|
|
|
|
const handleUseTranscript = () => {
|
|
if (transcript) {
|
|
onTranscript(transcript);
|
|
handleClose();
|
|
}
|
|
};
|
|
|
|
const renderButton = () => {
|
|
const icon = isListening ? <MicOffIcon /> : <MicIcon />;
|
|
const title = isSupported
|
|
? 'Voice input'
|
|
: 'Voice input not supported in this browser';
|
|
|
|
if (variant === 'fab') {
|
|
return (
|
|
<Tooltip title={title}>
|
|
<IconButton
|
|
color="primary"
|
|
onClick={handleOpen}
|
|
disabled={!isSupported}
|
|
size={size}
|
|
sx={{
|
|
width: size === 'large' ? 64 : size === 'medium' ? 56 : 48,
|
|
height: size === 'large' ? 64 : size === 'medium' ? 56 : 48,
|
|
borderRadius: '50%',
|
|
bgcolor: 'primary.main',
|
|
color: 'white',
|
|
'&:hover': {
|
|
bgcolor: 'primary.dark',
|
|
},
|
|
boxShadow: 3,
|
|
}}
|
|
>
|
|
{icon}
|
|
</IconButton>
|
|
</Tooltip>
|
|
);
|
|
}
|
|
|
|
return (
|
|
<Tooltip title={title}>
|
|
<IconButton
|
|
color="primary"
|
|
onClick={handleOpen}
|
|
disabled={!isSupported}
|
|
size={size}
|
|
>
|
|
{icon}
|
|
</IconButton>
|
|
</Tooltip>
|
|
);
|
|
};
|
|
|
|
return (
|
|
<>
|
|
{renderButton()}
|
|
|
|
<Dialog open={open} onClose={handleClose} maxWidth="sm" fullWidth>
|
|
<DialogTitle>
|
|
Voice Command
|
|
{classificationResult && !classificationResult.error && (
|
|
<Chip
|
|
label={`${classificationResult.intent} (${classificationResult.confidenceLevel})`}
|
|
color="success"
|
|
size="small"
|
|
sx={{ ml: 2 }}
|
|
/>
|
|
)}
|
|
</DialogTitle>
|
|
|
|
<DialogContent>
|
|
<Box sx={{ textAlign: 'center', py: 3 }}>
|
|
{/* Microphone animation */}
|
|
<Box sx={{ position: 'relative', display: 'inline-block', mb: 3 }}>
|
|
<IconButton
|
|
color={isListening ? 'error' : 'primary'}
|
|
onClick={isListening ? handleStopListening : handleStartListening}
|
|
sx={{
|
|
width: 80,
|
|
height: 80,
|
|
bgcolor: isListening ? 'error.light' : 'primary.light',
|
|
'&:hover': {
|
|
bgcolor: isListening ? 'error.main' : 'primary.main',
|
|
},
|
|
animation: isListening ? 'pulse 1.5s infinite' : 'none',
|
|
'@keyframes pulse': {
|
|
'0%': { transform: 'scale(1)', opacity: 1 },
|
|
'50%': { transform: 'scale(1.1)', opacity: 0.8 },
|
|
'100%': { transform: 'scale(1)', opacity: 1 },
|
|
},
|
|
}}
|
|
>
|
|
{isListening ? <MicIcon sx={{ fontSize: 48 }} /> : <MicOffIcon sx={{ fontSize: 48 }} />}
|
|
</IconButton>
|
|
</Box>
|
|
|
|
{/* Status text */}
|
|
<Typography variant="body1" color="text.secondary" gutterBottom>
|
|
{isListening
|
|
? usesFallback
|
|
? 'Recording... Speak now'
|
|
: 'Listening... Speak now'
|
|
: 'Click the microphone to start'}
|
|
</Typography>
|
|
|
|
{usesFallback && !isListening && !transcript && (
|
|
<Typography variant="caption" color="text.secondary" sx={{ mt: 1, display: 'block' }}>
|
|
Using audio recording mode (iOS Safari)
|
|
</Typography>
|
|
)}
|
|
|
|
{/* Transcript */}
|
|
{transcript && (
|
|
<Box sx={{ mt: 3, p: 2, bgcolor: 'grey.100', borderRadius: 1 }}>
|
|
<Typography variant="body2" color="text.secondary" gutterBottom>
|
|
Transcript:
|
|
</Typography>
|
|
<Typography variant="body1">{transcript}</Typography>
|
|
</Box>
|
|
)}
|
|
|
|
{/* Processing indicator */}
|
|
{isProcessing && (
|
|
<Box sx={{ mt: 2, display: 'flex', alignItems: 'center', justifyContent: 'center' }}>
|
|
<CircularProgress size={20} sx={{ mr: 1 }} />
|
|
<Typography variant="body2" color="text.secondary">
|
|
Processing command...
|
|
</Typography>
|
|
</Box>
|
|
)}
|
|
|
|
{/* Classification result */}
|
|
{classificationResult && !classificationResult.error && (
|
|
<Alert severity="success" sx={{ mt: 2 }}>
|
|
<Typography variant="body2" gutterBottom>
|
|
<strong>Understood:</strong> {classificationResult.intent}
|
|
</Typography>
|
|
{classificationResult.structuredData && (
|
|
<Typography variant="caption" component="pre" sx={{ mt: 1, fontSize: '0.75rem' }}>
|
|
{JSON.stringify(classificationResult.structuredData, null, 2)}
|
|
</Typography>
|
|
)}
|
|
</Alert>
|
|
)}
|
|
|
|
{/* Error messages */}
|
|
{(error || (classificationResult && classificationResult.error)) && (
|
|
<Alert severity="error" sx={{ mt: 2 }}>
|
|
{error || classificationResult.message}
|
|
</Alert>
|
|
)}
|
|
|
|
{/* Examples */}
|
|
{!transcript && !isListening && (
|
|
<Box sx={{ mt: 3, textAlign: 'left' }}>
|
|
<Typography variant="caption" color="text.secondary" gutterBottom display="block">
|
|
Example commands:
|
|
</Typography>
|
|
<Typography variant="caption" color="text.secondary" component="div">
|
|
• "Fed baby 120 ml"
|
|
<br />
|
|
• "Nursed on left breast for 15 minutes"
|
|
<br />
|
|
• "Changed wet diaper"
|
|
<br />
|
|
• "Baby napped for 45 minutes"
|
|
</Typography>
|
|
</Box>
|
|
)}
|
|
</Box>
|
|
</DialogContent>
|
|
|
|
<DialogActions>
|
|
<Button onClick={handleClose}>Cancel</Button>
|
|
{transcript && (
|
|
<Button
|
|
onClick={handleUseTranscript}
|
|
variant="contained"
|
|
color="primary"
|
|
>
|
|
Use Command
|
|
</Button>
|
|
)}
|
|
</DialogActions>
|
|
</Dialog>
|
|
</>
|
|
);
|
|
}
|