Fix Web Speech API desktop voice recognition
Some checks failed
CI/CD Pipeline / Lint and Test (push) Has been cancelled
CI/CD Pipeline / E2E Tests (push) Has been cancelled
CI/CD Pipeline / Build Application (push) Has been cancelled

- Set continuous=true to keep listening through pauses
- Only process final results, ignore interim transcripts
- Add usesFallback check to route Web Speech API transcripts through classification
- Desktop now captures complete phrases before classification
- Add detailed logging for debugging recognition flow

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-10-02 07:25:16 +00:00
parent a44faf6ef4
commit 8a342fa85b
2 changed files with 26 additions and 8 deletions

View File

@@ -44,10 +44,10 @@ export function VoiceFloatingButton() {
severity: 'info', severity: 'info',
}); });
const { isListening, isSupported, transcript, classification, error, startListening, stopListening, reset } = const { isListening, isSupported, transcript, classification, error, usesFallback, startListening, stopListening, reset } =
useVoiceInput(); useVoiceInput();
// Auto-use classification from backend when transcription completes // Auto-use classification from backend when transcription completes (MediaRecorder fallback)
React.useEffect(() => { React.useEffect(() => {
if (classification && !isListening && !isProcessing && open) { if (classification && !isListening && !isProcessing && open) {
setClassificationResult(classification); setClassificationResult(classification);
@@ -55,6 +55,13 @@ export function VoiceFloatingButton() {
} }
}, [classification, isListening, isProcessing, open]); }, [classification, isListening, isProcessing, open]);
// For Web Speech API (desktop), classify the transcript client-side
React.useEffect(() => {
if (!usesFallback && transcript && !isListening && !isProcessing && open && transcript !== lastClassifiedTranscript) {
classifyTranscript(transcript);
}
}, [usesFallback, transcript, isListening, isProcessing, open, lastClassifiedTranscript]);
const handleOpen = () => { const handleOpen = () => {
if (!isSupported) { if (!isSupported) {
setSnackbar({ setSnackbar({

View File

@@ -56,13 +56,15 @@ export function useVoiceInput() {
} else if (SpeechRecognition) { } else if (SpeechRecognition) {
try { try {
// Initialize recognition for non-iOS browsers // Initialize recognition for non-iOS browsers
console.log('[Voice] Initializing Web Speech API');
const recognition = new SpeechRecognition(); const recognition = new SpeechRecognition();
recognition.continuous = false; // Single recognition recognition.continuous = true; // Keep listening until manually stopped
recognition.interimResults = true; // Get interim results recognition.interimResults = true; // Get interim results
recognition.maxAlternatives = 1; recognition.maxAlternatives = 1;
recognition.lang = 'en-US'; // Default language recognition.lang = 'en-US'; // Default language
recognitionRef.current = recognition; recognitionRef.current = recognition;
console.log('[Voice] Web Speech API initialized successfully');
setState(prev => ({ ...prev, isSupported: true, usesFallback: false })); setState(prev => ({ ...prev, isSupported: true, usesFallback: false }));
} catch (error) { } catch (error) {
console.warn('[Voice] Speech Recognition initialization failed, trying fallback'); console.warn('[Voice] Speech Recognition initialization failed, trying fallback');
@@ -281,17 +283,24 @@ export function useVoiceInput() {
} }
} }
// Only update state with final results, show interim in console for debugging
if (finalTranscript) {
console.log('[Voice] Final result:', finalTranscript);
setState(prev => ({ setState(prev => ({
...prev, ...prev,
transcript: finalTranscript || interimTranscript, transcript: finalTranscript,
})); }));
} else {
console.log('[Voice] Interim result:', interimTranscript);
}
}; };
recognition.onerror = (event: any) => { recognition.onerror = (event: any) => {
console.error('[Voice] Error:', event.error); console.error('[Voice] Error:', event.error, event);
let errorMessage = 'Failed to recognize speech'; let errorMessage = 'Failed to recognize speech';
if (event.error === 'no-speech') { if (event.error === 'no-speech') {
console.warn('[Voice] No speech detected - this is often normal if user stops speaking');
errorMessage = 'No speech detected. Please try again.'; errorMessage = 'No speech detected. Please try again.';
} else if (event.error === 'audio-capture') { } else if (event.error === 'audio-capture') {
errorMessage = 'No microphone found. Please check your settings.'; errorMessage = 'No microphone found. Please check your settings.';
@@ -325,7 +334,9 @@ export function useVoiceInput() {
// Start recognition // Start recognition
try { try {
console.log('[Voice] Calling recognition.start()');
recognition.start(); recognition.start();
console.log('[Voice] recognition.start() called successfully');
} catch (error) { } catch (error) {
console.error('[Voice] Failed to start:', error); console.error('[Voice] Failed to start:', error);
setState(prev => ({ setState(prev => ({