Fix Web Speech API desktop voice recognition
Some checks failed
CI/CD Pipeline / Lint and Test (push) Has been cancelled
CI/CD Pipeline / E2E Tests (push) Has been cancelled
CI/CD Pipeline / Build Application (push) Has been cancelled

- Set continuous=true to keep listening through pauses
- Only process final results, ignore interim transcripts
- Add usesFallback check to route Web Speech API transcripts through classification
- Desktop now captures complete phrases before classification
- Add detailed logging for debugging recognition flow

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-10-02 07:25:16 +00:00
parent a44faf6ef4
commit 8a342fa85b
2 changed files with 26 additions and 8 deletions

View File

@@ -44,10 +44,10 @@ export function VoiceFloatingButton() {
severity: 'info',
});
const { isListening, isSupported, transcript, classification, error, startListening, stopListening, reset } =
const { isListening, isSupported, transcript, classification, error, usesFallback, startListening, stopListening, reset } =
useVoiceInput();
// Auto-use classification from backend when transcription completes
// Auto-use classification from backend when transcription completes (MediaRecorder fallback)
React.useEffect(() => {
if (classification && !isListening && !isProcessing && open) {
setClassificationResult(classification);
@@ -55,6 +55,13 @@ export function VoiceFloatingButton() {
}
}, [classification, isListening, isProcessing, open]);
// For Web Speech API (desktop), classify the transcript client-side
React.useEffect(() => {
if (!usesFallback && transcript && !isListening && !isProcessing && open && transcript !== lastClassifiedTranscript) {
classifyTranscript(transcript);
}
}, [usesFallback, transcript, isListening, isProcessing, open, lastClassifiedTranscript]);
const handleOpen = () => {
if (!isSupported) {
setSnackbar({

View File

@@ -56,13 +56,15 @@ export function useVoiceInput() {
} else if (SpeechRecognition) {
try {
// Initialize recognition for non-iOS browsers
console.log('[Voice] Initializing Web Speech API');
const recognition = new SpeechRecognition();
recognition.continuous = false; // Single recognition
recognition.continuous = true; // Keep listening until manually stopped
recognition.interimResults = true; // Get interim results
recognition.maxAlternatives = 1;
recognition.lang = 'en-US'; // Default language
recognitionRef.current = recognition;
console.log('[Voice] Web Speech API initialized successfully');
setState(prev => ({ ...prev, isSupported: true, usesFallback: false }));
} catch (error) {
console.warn('[Voice] Speech Recognition initialization failed, trying fallback');
@@ -281,17 +283,24 @@ export function useVoiceInput() {
}
}
setState(prev => ({
...prev,
transcript: finalTranscript || interimTranscript,
}));
// Only update state with final results, show interim in console for debugging
if (finalTranscript) {
console.log('[Voice] Final result:', finalTranscript);
setState(prev => ({
...prev,
transcript: finalTranscript,
}));
} else {
console.log('[Voice] Interim result:', interimTranscript);
}
};
recognition.onerror = (event: any) => {
console.error('[Voice] Error:', event.error);
console.error('[Voice] Error:', event.error, event);
let errorMessage = 'Failed to recognize speech';
if (event.error === 'no-speech') {
console.warn('[Voice] No speech detected - this is often normal if user stops speaking');
errorMessage = 'No speech detected. Please try again.';
} else if (event.error === 'audio-capture') {
errorMessage = 'No microphone found. Please check your settings.';
@@ -325,7 +334,9 @@ export function useVoiceInput() {
// Start recognition
try {
console.log('[Voice] Calling recognition.start()');
recognition.start();
console.log('[Voice] recognition.start() called successfully');
} catch (error) {
console.error('[Voice] Failed to start:', error);
setState(prev => ({