Fix Web Speech API desktop voice recognition
- Set continuous=true to keep listening through pauses - Only process final results, ignore interim transcripts - Add usesFallback check to route Web Speech API transcripts through classification - Desktop now captures complete phrases before classification - Add detailed logging for debugging recognition flow 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -44,10 +44,10 @@ export function VoiceFloatingButton() {
|
|||||||
severity: 'info',
|
severity: 'info',
|
||||||
});
|
});
|
||||||
|
|
||||||
const { isListening, isSupported, transcript, classification, error, startListening, stopListening, reset } =
|
const { isListening, isSupported, transcript, classification, error, usesFallback, startListening, stopListening, reset } =
|
||||||
useVoiceInput();
|
useVoiceInput();
|
||||||
|
|
||||||
// Auto-use classification from backend when transcription completes
|
// Auto-use classification from backend when transcription completes (MediaRecorder fallback)
|
||||||
React.useEffect(() => {
|
React.useEffect(() => {
|
||||||
if (classification && !isListening && !isProcessing && open) {
|
if (classification && !isListening && !isProcessing && open) {
|
||||||
setClassificationResult(classification);
|
setClassificationResult(classification);
|
||||||
@@ -55,6 +55,13 @@ export function VoiceFloatingButton() {
|
|||||||
}
|
}
|
||||||
}, [classification, isListening, isProcessing, open]);
|
}, [classification, isListening, isProcessing, open]);
|
||||||
|
|
||||||
|
// For Web Speech API (desktop), classify the transcript client-side
|
||||||
|
React.useEffect(() => {
|
||||||
|
if (!usesFallback && transcript && !isListening && !isProcessing && open && transcript !== lastClassifiedTranscript) {
|
||||||
|
classifyTranscript(transcript);
|
||||||
|
}
|
||||||
|
}, [usesFallback, transcript, isListening, isProcessing, open, lastClassifiedTranscript]);
|
||||||
|
|
||||||
const handleOpen = () => {
|
const handleOpen = () => {
|
||||||
if (!isSupported) {
|
if (!isSupported) {
|
||||||
setSnackbar({
|
setSnackbar({
|
||||||
|
|||||||
@@ -56,13 +56,15 @@ export function useVoiceInput() {
|
|||||||
} else if (SpeechRecognition) {
|
} else if (SpeechRecognition) {
|
||||||
try {
|
try {
|
||||||
// Initialize recognition for non-iOS browsers
|
// Initialize recognition for non-iOS browsers
|
||||||
|
console.log('[Voice] Initializing Web Speech API');
|
||||||
const recognition = new SpeechRecognition();
|
const recognition = new SpeechRecognition();
|
||||||
recognition.continuous = false; // Single recognition
|
recognition.continuous = true; // Keep listening until manually stopped
|
||||||
recognition.interimResults = true; // Get interim results
|
recognition.interimResults = true; // Get interim results
|
||||||
recognition.maxAlternatives = 1;
|
recognition.maxAlternatives = 1;
|
||||||
recognition.lang = 'en-US'; // Default language
|
recognition.lang = 'en-US'; // Default language
|
||||||
|
|
||||||
recognitionRef.current = recognition;
|
recognitionRef.current = recognition;
|
||||||
|
console.log('[Voice] Web Speech API initialized successfully');
|
||||||
setState(prev => ({ ...prev, isSupported: true, usesFallback: false }));
|
setState(prev => ({ ...prev, isSupported: true, usesFallback: false }));
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.warn('[Voice] Speech Recognition initialization failed, trying fallback');
|
console.warn('[Voice] Speech Recognition initialization failed, trying fallback');
|
||||||
@@ -281,17 +283,24 @@ export function useVoiceInput() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Only update state with final results, show interim in console for debugging
|
||||||
|
if (finalTranscript) {
|
||||||
|
console.log('[Voice] Final result:', finalTranscript);
|
||||||
setState(prev => ({
|
setState(prev => ({
|
||||||
...prev,
|
...prev,
|
||||||
transcript: finalTranscript || interimTranscript,
|
transcript: finalTranscript,
|
||||||
}));
|
}));
|
||||||
|
} else {
|
||||||
|
console.log('[Voice] Interim result:', interimTranscript);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
recognition.onerror = (event: any) => {
|
recognition.onerror = (event: any) => {
|
||||||
console.error('[Voice] Error:', event.error);
|
console.error('[Voice] Error:', event.error, event);
|
||||||
|
|
||||||
let errorMessage = 'Failed to recognize speech';
|
let errorMessage = 'Failed to recognize speech';
|
||||||
if (event.error === 'no-speech') {
|
if (event.error === 'no-speech') {
|
||||||
|
console.warn('[Voice] No speech detected - this is often normal if user stops speaking');
|
||||||
errorMessage = 'No speech detected. Please try again.';
|
errorMessage = 'No speech detected. Please try again.';
|
||||||
} else if (event.error === 'audio-capture') {
|
} else if (event.error === 'audio-capture') {
|
||||||
errorMessage = 'No microphone found. Please check your settings.';
|
errorMessage = 'No microphone found. Please check your settings.';
|
||||||
@@ -325,7 +334,9 @@ export function useVoiceInput() {
|
|||||||
|
|
||||||
// Start recognition
|
// Start recognition
|
||||||
try {
|
try {
|
||||||
|
console.log('[Voice] Calling recognition.start()');
|
||||||
recognition.start();
|
recognition.start();
|
||||||
|
console.log('[Voice] recognition.start() called successfully');
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('[Voice] Failed to start:', error);
|
console.error('[Voice] Failed to start:', error);
|
||||||
setState(prev => ({
|
setState(prev => ({
|
||||||
|
|||||||
Reference in New Issue
Block a user