Fix login data structure and improve voice input UX
- Fix login endpoint to return families as array of objects instead of strings - Update auth interface to match /auth/me endpoint structure - Add silence detection to voice input (auto-stop after 1.5s) - Add comprehensive status messages to voice modal (Listening, Understanding, Saving) - Unify voice input flow to use MediaRecorder + backend for all platforms - Add null checks to prevent tracking page crashes from invalid data - Wait for auth completion before loading family data in HomePage 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -164,8 +164,12 @@ export class AuthService {
|
|||||||
// Generate tokens
|
// Generate tokens
|
||||||
const tokens = await this.generateTokens(user, device.id);
|
const tokens = await this.generateTokens(user, device.id);
|
||||||
|
|
||||||
// Get family IDs
|
// Get families with proper structure (matching /auth/me endpoint)
|
||||||
const familyIds = user.familyMemberships?.map((fm) => fm.familyId) || [];
|
const families = user.familyMemberships?.map((fm) => ({
|
||||||
|
id: fm.familyId,
|
||||||
|
familyId: fm.familyId,
|
||||||
|
role: fm.role,
|
||||||
|
})) || [];
|
||||||
|
|
||||||
// Audit log: successful login
|
// Audit log: successful login
|
||||||
await this.auditService.logLogin(user.id);
|
await this.auditService.logLogin(user.id);
|
||||||
@@ -180,7 +184,7 @@ export class AuthService {
|
|||||||
locale: user.locale,
|
locale: user.locale,
|
||||||
emailVerified: user.emailVerified,
|
emailVerified: user.emailVerified,
|
||||||
preferences: user.preferences,
|
preferences: user.preferences,
|
||||||
families: familyIds,
|
families: families,
|
||||||
},
|
},
|
||||||
tokens,
|
tokens,
|
||||||
requiresMFA: false,
|
requiresMFA: false,
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ export interface AuthResponse {
|
|||||||
name: string;
|
name: string;
|
||||||
locale: string;
|
locale: string;
|
||||||
emailVerified: boolean;
|
emailVerified: boolean;
|
||||||
families?: string[];
|
families?: Array<{ id: string; familyId: string; role: string }>;
|
||||||
preferences?: any;
|
preferences?: any;
|
||||||
};
|
};
|
||||||
tokens: AuthTokens;
|
tokens: AuthTokens;
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ export async function POST(request: NextRequest) {
|
|||||||
let transcribedText: string;
|
let transcribedText: string;
|
||||||
|
|
||||||
if (contentType.includes('application/json')) {
|
if (contentType.includes('application/json')) {
|
||||||
// Text input (already transcribed)
|
// Text input (already transcribed) - forward to backend for LLM classification
|
||||||
const body = await request.json();
|
const body = await request.json();
|
||||||
transcribedText = body.text;
|
transcribedText = body.text;
|
||||||
|
|
||||||
@@ -29,6 +29,41 @@ export async function POST(request: NextRequest) {
|
|||||||
{ status: 400 }
|
{ status: 400 }
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Forward text to backend for LLM-based classification
|
||||||
|
const backendUrl = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:3020';
|
||||||
|
const backendResponse = await fetch(`${backendUrl}/api/v1/voice/transcribe`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
// Forward auth token if present
|
||||||
|
...(request.headers.get('authorization') && {
|
||||||
|
authorization: request.headers.get('authorization')!,
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
text: transcribedText,
|
||||||
|
language: body.language || 'en',
|
||||||
|
childName: body.childName,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!backendResponse.ok) {
|
||||||
|
const errorData = await backendResponse.json();
|
||||||
|
return NextResponse.json(errorData, { status: backendResponse.status });
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await backendResponse.json();
|
||||||
|
|
||||||
|
// Backend returns { success, transcript, classification }
|
||||||
|
return NextResponse.json(
|
||||||
|
{
|
||||||
|
success: true,
|
||||||
|
transcript: result.transcript,
|
||||||
|
classification: result.classification,
|
||||||
|
},
|
||||||
|
{ status: 200 }
|
||||||
|
);
|
||||||
} else if (contentType.includes('multipart/form-data')) {
|
} else if (contentType.includes('multipart/form-data')) {
|
||||||
// Audio file upload - forward to backend for Whisper transcription
|
// Audio file upload - forward to backend for Whisper transcription
|
||||||
const formData = await request.formData();
|
const formData = await request.formData();
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ import { childrenApi, Child } from '@/lib/api/children';
|
|||||||
import { format } from 'date-fns';
|
import { format } from 'date-fns';
|
||||||
|
|
||||||
export default function HomePage() {
|
export default function HomePage() {
|
||||||
const { user } = useAuth();
|
const { user, isLoading: authLoading } = useAuth();
|
||||||
const router = useRouter();
|
const router = useRouter();
|
||||||
const [children, setChildren] = useState<Child[]>([]);
|
const [children, setChildren] = useState<Child[]>([]);
|
||||||
const [selectedChild, setSelectedChild] = useState<Child | null>(null);
|
const [selectedChild, setSelectedChild] = useState<Child | null>(null);
|
||||||
@@ -33,17 +33,29 @@ export default function HomePage() {
|
|||||||
const [loading, setLoading] = useState(true);
|
const [loading, setLoading] = useState(true);
|
||||||
|
|
||||||
const familyId = user?.families?.[0]?.familyId;
|
const familyId = user?.families?.[0]?.familyId;
|
||||||
|
|
||||||
// Load children and daily summary
|
// Load children and daily summary
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
const loadData = async () => {
|
const loadData = async () => {
|
||||||
|
// Wait for auth to complete before trying to load data
|
||||||
|
if (authLoading) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (!familyId) {
|
if (!familyId) {
|
||||||
|
console.log('[HomePage] No familyId found');
|
||||||
|
console.log('[HomePage] User object:', JSON.stringify(user, null, 2));
|
||||||
|
console.log('[HomePage] User.families:', user?.families);
|
||||||
setLoading(false);
|
setLoading(false);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
console.log('[HomePage] Loading data for familyId:', familyId);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Load children
|
// Load children
|
||||||
const childrenData = await childrenApi.getChildren(familyId);
|
const childrenData = await childrenApi.getChildren(familyId);
|
||||||
|
console.log('[HomePage] Children loaded:', childrenData.length);
|
||||||
setChildren(childrenData);
|
setChildren(childrenData);
|
||||||
|
|
||||||
if (childrenData.length > 0) {
|
if (childrenData.length > 0) {
|
||||||
@@ -56,14 +68,14 @@ export default function HomePage() {
|
|||||||
setDailySummary(summary);
|
setDailySummary(summary);
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Failed to load data:', error);
|
console.error('[HomePage] Failed to load data:', error);
|
||||||
} finally {
|
} finally {
|
||||||
setLoading(false);
|
setLoading(false);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
loadData();
|
loadData();
|
||||||
}, [familyId]);
|
}, [familyId, authLoading, user]);
|
||||||
|
|
||||||
const quickActions = [
|
const quickActions = [
|
||||||
{ icon: <Restaurant />, label: 'Feeding', color: '#FFB6C1', path: '/track/feeding' },
|
{ icon: <Restaurant />, label: 'Feeding', color: '#FFB6C1', path: '/track/feeding' },
|
||||||
|
|||||||
@@ -619,6 +619,11 @@ export default function DiaperTrackPage() {
|
|||||||
<Box sx={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
|
<Box sx={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
|
||||||
{recentDiapers.map((activity, index) => {
|
{recentDiapers.map((activity, index) => {
|
||||||
const data = activity.data as DiaperData;
|
const data = activity.data as DiaperData;
|
||||||
|
// Skip activities with invalid data structure
|
||||||
|
if (!data || !data.diaperType) {
|
||||||
|
console.warn('[Diaper] Activity missing diaperType:', activity);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
return (
|
return (
|
||||||
<motion.div
|
<motion.div
|
||||||
key={activity.id}
|
key={activity.id}
|
||||||
|
|||||||
@@ -601,6 +601,11 @@ function FeedingTrackPage() {
|
|||||||
<Box sx={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
|
<Box sx={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
|
||||||
{recentFeedings.map((activity, index) => {
|
{recentFeedings.map((activity, index) => {
|
||||||
const data = activity.data as FeedingData;
|
const data = activity.data as FeedingData;
|
||||||
|
// Skip activities with invalid data structure
|
||||||
|
if (!data || !data.feedingType) {
|
||||||
|
console.warn('[Feeding] Activity missing feedingType:', activity);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
return (
|
return (
|
||||||
<motion.div
|
<motion.div
|
||||||
key={activity.id}
|
key={activity.id}
|
||||||
|
|||||||
@@ -557,6 +557,11 @@ export default function SleepTrackPage() {
|
|||||||
<Box sx={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
|
<Box sx={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
|
||||||
{recentSleeps.map((activity, index) => {
|
{recentSleeps.map((activity, index) => {
|
||||||
const data = activity.data as SleepData;
|
const data = activity.data as SleepData;
|
||||||
|
// Skip activities with invalid data structure
|
||||||
|
if (!data || !data.quality || !data.location) {
|
||||||
|
console.warn('[Sleep] Activity missing required fields:', activity);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
return (
|
return (
|
||||||
<motion.div
|
<motion.div
|
||||||
key={activity.id}
|
key={activity.id}
|
||||||
|
|||||||
@@ -36,8 +36,9 @@ export function VoiceFloatingButton() {
|
|||||||
const { user } = useAuth();
|
const { user } = useAuth();
|
||||||
const [open, setOpen] = useState(false);
|
const [open, setOpen] = useState(false);
|
||||||
const [isProcessing, setIsProcessing] = useState(false);
|
const [isProcessing, setIsProcessing] = useState(false);
|
||||||
|
const [processingStatus, setProcessingStatus] = useState<'listening' | 'understanding' | 'saving' | null>(null);
|
||||||
|
const [identifiedActivity, setIdentifiedActivity] = useState<string>('');
|
||||||
const [classificationResult, setClassificationResult] = useState<any>(null);
|
const [classificationResult, setClassificationResult] = useState<any>(null);
|
||||||
const [lastClassifiedTranscript, setLastClassifiedTranscript] = useState<string>('');
|
|
||||||
const [snackbar, setSnackbar] = useState<{
|
const [snackbar, setSnackbar] = useState<{
|
||||||
open: boolean;
|
open: boolean;
|
||||||
message: string;
|
message: string;
|
||||||
@@ -53,7 +54,18 @@ export function VoiceFloatingButton() {
|
|||||||
const { isListening, isSupported, transcript, classification, error, usesFallback, startListening, stopListening, reset } =
|
const { isListening, isSupported, transcript, classification, error, usesFallback, startListening, stopListening, reset } =
|
||||||
useVoiceInput();
|
useVoiceInput();
|
||||||
|
|
||||||
// Auto-use classification from backend when transcription completes (MediaRecorder fallback)
|
// Set status when listening starts/stops
|
||||||
|
React.useEffect(() => {
|
||||||
|
if (isListening) {
|
||||||
|
setProcessingStatus('listening');
|
||||||
|
} else if (processingStatus === 'listening' && transcript) {
|
||||||
|
// Transition from listening to understanding when we have a transcript
|
||||||
|
setProcessingStatus('understanding');
|
||||||
|
}
|
||||||
|
}, [isListening, transcript]);
|
||||||
|
|
||||||
|
// Auto-use classification from backend when transcription completes
|
||||||
|
// MediaRecorder sends audio to backend, which transcribes + classifies in one call
|
||||||
React.useEffect(() => {
|
React.useEffect(() => {
|
||||||
if (classification && !isListening && !isProcessing && open) {
|
if (classification && !isListening && !isProcessing && open) {
|
||||||
setClassificationResult(classification);
|
setClassificationResult(classification);
|
||||||
@@ -61,13 +73,6 @@ export function VoiceFloatingButton() {
|
|||||||
}
|
}
|
||||||
}, [classification, isListening, isProcessing, open]);
|
}, [classification, isListening, isProcessing, open]);
|
||||||
|
|
||||||
// For Web Speech API (desktop), classify the transcript client-side
|
|
||||||
React.useEffect(() => {
|
|
||||||
if (!usesFallback && transcript && !isListening && !isProcessing && open && transcript !== lastClassifiedTranscript) {
|
|
||||||
classifyTranscript(transcript);
|
|
||||||
}
|
|
||||||
}, [usesFallback, transcript, isListening, isProcessing, open, lastClassifiedTranscript]);
|
|
||||||
|
|
||||||
const handleOpen = () => {
|
const handleOpen = () => {
|
||||||
if (!isSupported) {
|
if (!isSupported) {
|
||||||
setSnackbar({
|
setSnackbar({
|
||||||
@@ -80,7 +85,8 @@ export function VoiceFloatingButton() {
|
|||||||
setOpen(true);
|
setOpen(true);
|
||||||
reset();
|
reset();
|
||||||
setClassificationResult(null);
|
setClassificationResult(null);
|
||||||
setLastClassifiedTranscript('');
|
setProcessingStatus(null);
|
||||||
|
setIdentifiedActivity('');
|
||||||
};
|
};
|
||||||
|
|
||||||
const handleClose = () => {
|
const handleClose = () => {
|
||||||
@@ -90,13 +96,13 @@ export function VoiceFloatingButton() {
|
|||||||
setOpen(false);
|
setOpen(false);
|
||||||
reset();
|
reset();
|
||||||
setClassificationResult(null);
|
setClassificationResult(null);
|
||||||
setLastClassifiedTranscript('');
|
setProcessingStatus(null);
|
||||||
|
setIdentifiedActivity('');
|
||||||
};
|
};
|
||||||
|
|
||||||
const handleStartListening = () => {
|
const handleStartListening = () => {
|
||||||
reset();
|
reset();
|
||||||
setClassificationResult(null);
|
setClassificationResult(null);
|
||||||
setLastClassifiedTranscript('');
|
|
||||||
startListening();
|
startListening();
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -104,43 +110,12 @@ export function VoiceFloatingButton() {
|
|||||||
stopListening();
|
stopListening();
|
||||||
};
|
};
|
||||||
|
|
||||||
const classifyTranscript = async (text: string) => {
|
|
||||||
// Mark this transcript as being classified to prevent duplicate calls
|
|
||||||
setLastClassifiedTranscript(text);
|
|
||||||
setIsProcessing(true);
|
|
||||||
try {
|
|
||||||
const response = await fetch('/api/voice/transcribe', {
|
|
||||||
method: 'POST',
|
|
||||||
headers: {
|
|
||||||
'Content-Type': 'application/json',
|
|
||||||
},
|
|
||||||
body: JSON.stringify({ text }),
|
|
||||||
});
|
|
||||||
|
|
||||||
const data = await response.json();
|
|
||||||
|
|
||||||
if (response.ok && data.success) {
|
|
||||||
setClassificationResult(data.classification);
|
|
||||||
handleClassifiedIntent(data.classification);
|
|
||||||
} else {
|
|
||||||
setClassificationResult({
|
|
||||||
error: true,
|
|
||||||
message: data.message || 'Could not understand command',
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[Voice] Classification error:', error);
|
|
||||||
setClassificationResult({
|
|
||||||
error: true,
|
|
||||||
message: 'Failed to process command',
|
|
||||||
});
|
|
||||||
} finally {
|
|
||||||
setIsProcessing(false);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const handleClassifiedIntent = async (result: any) => {
|
const handleClassifiedIntent = async (result: any) => {
|
||||||
|
console.log('[Voice] handleClassifiedIntent called with result:', result);
|
||||||
|
|
||||||
if (result.error) {
|
if (result.error) {
|
||||||
|
console.log('[Voice] Result has error:', result.message);
|
||||||
|
setProcessingStatus(null);
|
||||||
setSnackbar({
|
setSnackbar({
|
||||||
open: true,
|
open: true,
|
||||||
message: result.message,
|
message: result.message,
|
||||||
@@ -149,8 +124,17 @@ export function VoiceFloatingButton() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Support both formats: backend returns 'type', frontend local classifier returns 'intent'
|
||||||
|
const activityType = result.type || result.intent;
|
||||||
|
console.log('[Voice] Activity type:', activityType);
|
||||||
|
|
||||||
|
// Set identified activity for status display
|
||||||
|
setIdentifiedActivity(activityType);
|
||||||
|
|
||||||
// Handle unknown or low confidence
|
// Handle unknown or low confidence
|
||||||
if (result.type === 'unknown' || (result.confidence && result.confidence < 0.3)) {
|
if (activityType === 'unknown' || (result.confidence && result.confidence < 0.3)) {
|
||||||
|
console.log('[Voice] Unknown or low confidence:', activityType, result.confidence);
|
||||||
|
setProcessingStatus(null);
|
||||||
setSnackbar({
|
setSnackbar({
|
||||||
open: true,
|
open: true,
|
||||||
message: 'Could not understand the command. Please try again or use manual entry.',
|
message: 'Could not understand the command. Please try again or use manual entry.',
|
||||||
@@ -161,6 +145,8 @@ export function VoiceFloatingButton() {
|
|||||||
|
|
||||||
// Get the first child from the family
|
// Get the first child from the family
|
||||||
if (!familyId) {
|
if (!familyId) {
|
||||||
|
console.log('[Voice] No familyId found');
|
||||||
|
setProcessingStatus(null);
|
||||||
setSnackbar({
|
setSnackbar({
|
||||||
open: true,
|
open: true,
|
||||||
message: 'No family found. Please set up your profile first.',
|
message: 'No family found. Please set up your profile first.',
|
||||||
@@ -169,11 +155,17 @@ export function VoiceFloatingButton() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
console.log('[Voice] Family ID:', familyId);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
setIsProcessing(true);
|
setIsProcessing(true);
|
||||||
|
setProcessingStatus('saving');
|
||||||
|
|
||||||
// Fetch children
|
// Fetch children
|
||||||
|
console.log('[Voice] Fetching children for family:', familyId);
|
||||||
const children = await childrenApi.getChildren(familyId);
|
const children = await childrenApi.getChildren(familyId);
|
||||||
|
console.log('[Voice] Children found:', children.length, children);
|
||||||
|
|
||||||
if (children.length === 0) {
|
if (children.length === 0) {
|
||||||
setSnackbar({
|
setSnackbar({
|
||||||
open: true,
|
open: true,
|
||||||
@@ -186,21 +178,23 @@ export function VoiceFloatingButton() {
|
|||||||
|
|
||||||
// Use the first child (or you could enhance this to support child name matching)
|
// Use the first child (or you could enhance this to support child name matching)
|
||||||
const childId = children[0].id;
|
const childId = children[0].id;
|
||||||
|
console.log('[Voice] Using child ID:', childId);
|
||||||
|
|
||||||
// Create the activity
|
// Create the activity
|
||||||
const activityData = {
|
const activityData = {
|
||||||
type: result.type,
|
type: activityType,
|
||||||
timestamp: result.timestamp || new Date().toISOString(),
|
timestamp: result.timestamp || new Date().toISOString(),
|
||||||
data: result.details || {},
|
data: result.details || result.structuredData || {},
|
||||||
notes: result.details?.notes || undefined,
|
notes: result.details?.notes || result.structuredData?.notes || undefined,
|
||||||
};
|
};
|
||||||
|
|
||||||
console.log('[Voice] Creating activity:', activityData);
|
console.log('[Voice] Creating activity with data:', JSON.stringify(activityData, null, 2));
|
||||||
|
|
||||||
await trackingApi.createActivity(childId, activityData);
|
const createdActivity = await trackingApi.createActivity(childId, activityData);
|
||||||
|
console.log('[Voice] Activity created successfully:', createdActivity);
|
||||||
|
|
||||||
// Show success message
|
// Show success message
|
||||||
const activityLabel = result.type.charAt(0).toUpperCase() + result.type.slice(1);
|
const activityLabel = activityType.charAt(0).toUpperCase() + activityType.slice(1);
|
||||||
setSnackbar({
|
setSnackbar({
|
||||||
open: true,
|
open: true,
|
||||||
message: `${activityLabel} activity saved successfully!`,
|
message: `${activityLabel} activity saved successfully!`,
|
||||||
@@ -212,7 +206,9 @@ export function VoiceFloatingButton() {
|
|||||||
handleClose();
|
handleClose();
|
||||||
}, 1500);
|
}, 1500);
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
console.error('[Voice] Failed to create activity:', error);
|
console.error('[Voice] Failed to create activity - Full error:', error);
|
||||||
|
console.error('[Voice] Error response:', error.response);
|
||||||
|
console.error('[Voice] Error data:', error.response?.data);
|
||||||
setSnackbar({
|
setSnackbar({
|
||||||
open: true,
|
open: true,
|
||||||
message: error.response?.data?.message || 'Failed to save activity. Please try again.',
|
message: error.response?.data?.message || 'Failed to save activity. Please try again.',
|
||||||
@@ -253,7 +249,7 @@ export function VoiceFloatingButton() {
|
|||||||
Voice Command
|
Voice Command
|
||||||
{classificationResult && !classificationResult.error && (
|
{classificationResult && !classificationResult.error && (
|
||||||
<Chip
|
<Chip
|
||||||
label={`${classificationResult.intent} (${classificationResult.confidenceLevel})`}
|
label={`${classificationResult.type || classificationResult.intent} (${classificationResult.confidenceLevel || Math.round((classificationResult.confidence || 0) * 100) + '%'})`}
|
||||||
color="success"
|
color="success"
|
||||||
size="small"
|
size="small"
|
||||||
sx={{ ml: 2 }}
|
sx={{ ml: 2 }}
|
||||||
@@ -287,9 +283,12 @@ export function VoiceFloatingButton() {
|
|||||||
</IconButton>
|
</IconButton>
|
||||||
</Box>
|
</Box>
|
||||||
|
|
||||||
{/* Status text */}
|
{/* Status text with detailed processing stages */}
|
||||||
<Typography variant="body1" color="text.secondary" gutterBottom>
|
<Typography variant="body1" color="text.secondary" gutterBottom>
|
||||||
{isListening ? 'Listening... Speak now' : 'Click the microphone to start'}
|
{processingStatus === 'listening' && 'Listening... Speak now'}
|
||||||
|
{processingStatus === 'understanding' && 'Understanding your request...'}
|
||||||
|
{processingStatus === 'saving' && identifiedActivity && `Adding to ${identifiedActivity.charAt(0).toUpperCase() + identifiedActivity.slice(1)} tracker...`}
|
||||||
|
{!processingStatus && !isListening && 'Click the microphone to start'}
|
||||||
</Typography>
|
</Typography>
|
||||||
|
|
||||||
{/* Transcript */}
|
{/* Transcript */}
|
||||||
@@ -302,12 +301,14 @@ export function VoiceFloatingButton() {
|
|||||||
</Box>
|
</Box>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{/* Processing indicator */}
|
{/* Processing indicator with status */}
|
||||||
{isProcessing && (
|
{processingStatus && (
|
||||||
<Box sx={{ mt: 2, display: 'flex', alignItems: 'center', justifyContent: 'center' }}>
|
<Box sx={{ mt: 2, display: 'flex', alignItems: 'center', justifyContent: 'center' }}>
|
||||||
<CircularProgress size={20} sx={{ mr: 1 }} />
|
<CircularProgress size={20} sx={{ mr: 1 }} />
|
||||||
<Typography variant="body2" color="text.secondary">
|
<Typography variant="body2" color="text.secondary">
|
||||||
Processing command...
|
{processingStatus === 'listening' && 'Listening...'}
|
||||||
|
{processingStatus === 'understanding' && 'Understanding...'}
|
||||||
|
{processingStatus === 'saving' && 'Saving...'}
|
||||||
</Typography>
|
</Typography>
|
||||||
</Box>
|
</Box>
|
||||||
)}
|
)}
|
||||||
@@ -316,7 +317,7 @@ export function VoiceFloatingButton() {
|
|||||||
{classificationResult && !classificationResult.error && (
|
{classificationResult && !classificationResult.error && (
|
||||||
<Alert severity="success" sx={{ mt: 2 }}>
|
<Alert severity="success" sx={{ mt: 2 }}>
|
||||||
<Typography variant="body2" gutterBottom>
|
<Typography variant="body2" gutterBottom>
|
||||||
<strong>Understood:</strong> {classificationResult.intent}
|
<strong>Understood:</strong> {classificationResult.type || classificationResult.intent}
|
||||||
</Typography>
|
</Typography>
|
||||||
</Alert>
|
</Alert>
|
||||||
)}
|
)}
|
||||||
|
|||||||
@@ -36,49 +36,21 @@ export function useVoiceInput() {
|
|||||||
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
||||||
const audioChunksRef = useRef<Blob[]>([]);
|
const audioChunksRef = useRef<Blob[]>([]);
|
||||||
const timeoutRef = useRef<NodeJS.Timeout | null>(null);
|
const timeoutRef = useRef<NodeJS.Timeout | null>(null);
|
||||||
|
const silenceTimeoutRef = useRef<NodeJS.Timeout | null>(null);
|
||||||
|
const audioContextRef = useRef<AudioContext | null>(null);
|
||||||
|
const analyserRef = useRef<AnalyserNode | null>(null);
|
||||||
|
|
||||||
// Check if browser supports Speech Recognition or MediaRecorder
|
// Check if browser supports MediaRecorder (unified approach for all platforms)
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
// Detect iOS Safari specifically
|
// Always use MediaRecorder + backend transcription for consistency
|
||||||
const isIOSSafari = /iPad|iPhone|iPod/.test(navigator.userAgent) && !(window as any).MSStream;
|
// This gives us one flow to debug and maintain, works on all platforms
|
||||||
|
console.log('[Voice] Checking MediaRecorder support...');
|
||||||
|
|
||||||
const SpeechRecognition =
|
|
||||||
(window as any).SpeechRecognition || (window as any).webkitSpeechRecognition;
|
|
||||||
|
|
||||||
// Force fallback for iOS Safari regardless of Speech Recognition availability
|
|
||||||
if (isIOSSafari) {
|
|
||||||
console.log('[Voice] iOS Safari detected, using MediaRecorder fallback');
|
|
||||||
if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
|
if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
|
||||||
|
console.log('[Voice] MediaRecorder supported, will use backend transcription for all platforms');
|
||||||
setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
|
setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
|
||||||
} else {
|
} else {
|
||||||
setState(prev => ({ ...prev, isSupported: false }));
|
console.log('[Voice] MediaRecorder not supported');
|
||||||
}
|
|
||||||
} else if (SpeechRecognition) {
|
|
||||||
try {
|
|
||||||
// Initialize recognition for non-iOS browsers
|
|
||||||
console.log('[Voice] Initializing Web Speech API');
|
|
||||||
const recognition = new SpeechRecognition();
|
|
||||||
recognition.continuous = true; // Keep listening until manually stopped
|
|
||||||
recognition.interimResults = true; // Get interim results
|
|
||||||
recognition.maxAlternatives = 1;
|
|
||||||
recognition.lang = 'en-US'; // Default language
|
|
||||||
|
|
||||||
recognitionRef.current = recognition;
|
|
||||||
console.log('[Voice] Web Speech API initialized successfully');
|
|
||||||
setState(prev => ({ ...prev, isSupported: true, usesFallback: false }));
|
|
||||||
} catch (error) {
|
|
||||||
console.warn('[Voice] Speech Recognition initialization failed, trying fallback');
|
|
||||||
if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
|
|
||||||
setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
|
|
||||||
} else {
|
|
||||||
setState(prev => ({ ...prev, isSupported: false }));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
|
|
||||||
// Use MediaRecorder fallback for other browsers without Speech Recognition
|
|
||||||
console.log('[Voice] No Speech Recognition, using MediaRecorder fallback');
|
|
||||||
setState(prev => ({ ...prev, isSupported: true, usesFallback: true }));
|
|
||||||
} else {
|
|
||||||
setState(prev => ({ ...prev, isSupported: false }));
|
setState(prev => ({ ...prev, isSupported: false }));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -115,6 +87,59 @@ export function useVoiceInput() {
|
|||||||
|
|
||||||
console.log('[Voice] Microphone access granted, creating MediaRecorder...');
|
console.log('[Voice] Microphone access granted, creating MediaRecorder...');
|
||||||
|
|
||||||
|
// Set up silence detection using Web Audio API
|
||||||
|
const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
|
||||||
|
const analyser = audioContext.createAnalyser();
|
||||||
|
const microphone = audioContext.createMediaStreamSource(stream);
|
||||||
|
analyser.fftSize = 512;
|
||||||
|
microphone.connect(analyser);
|
||||||
|
|
||||||
|
audioContextRef.current = audioContext;
|
||||||
|
analyserRef.current = analyser;
|
||||||
|
|
||||||
|
// Monitor audio levels for silence detection
|
||||||
|
const bufferLength = analyser.frequencyBinCount;
|
||||||
|
const dataArray = new Uint8Array(bufferLength);
|
||||||
|
let lastSoundTime = Date.now();
|
||||||
|
const SILENCE_THRESHOLD = 10; // Adjust based on testing
|
||||||
|
const SILENCE_DURATION = 1500; // 1.5 seconds of silence
|
||||||
|
|
||||||
|
const checkSilence = () => {
|
||||||
|
analyser.getByteFrequencyData(dataArray);
|
||||||
|
const average = dataArray.reduce((a, b) => a + b, 0) / bufferLength;
|
||||||
|
|
||||||
|
if (average > SILENCE_THRESHOLD) {
|
||||||
|
lastSoundTime = Date.now();
|
||||||
|
// Clear silence timeout if sound detected
|
||||||
|
if (silenceTimeoutRef.current) {
|
||||||
|
clearTimeout(silenceTimeoutRef.current);
|
||||||
|
silenceTimeoutRef.current = null;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Check if silence has lasted long enough
|
||||||
|
const silenceDuration = Date.now() - lastSoundTime;
|
||||||
|
if (silenceDuration > SILENCE_DURATION && !silenceTimeoutRef.current) {
|
||||||
|
console.log('[Voice] Silence detected, auto-stopping...');
|
||||||
|
if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
|
||||||
|
mediaRecorderRef.current.stop();
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Continue checking if still recording
|
||||||
|
if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
|
||||||
|
requestAnimationFrame(checkSilence);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Start monitoring after a brief delay to avoid immediate stop
|
||||||
|
setTimeout(() => {
|
||||||
|
if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
|
||||||
|
requestAnimationFrame(checkSilence);
|
||||||
|
}
|
||||||
|
}, 500);
|
||||||
|
|
||||||
// Try different mime types for iOS Safari compatibility
|
// Try different mime types for iOS Safari compatibility
|
||||||
let mimeType = 'audio/webm;codecs=opus';
|
let mimeType = 'audio/webm;codecs=opus';
|
||||||
if (!MediaRecorder.isTypeSupported(mimeType)) {
|
if (!MediaRecorder.isTypeSupported(mimeType)) {
|
||||||
@@ -200,6 +225,12 @@ export function useVoiceInput() {
|
|||||||
// Stop all tracks
|
// Stop all tracks
|
||||||
stream.getTracks().forEach(track => track.stop());
|
stream.getTracks().forEach(track => track.stop());
|
||||||
console.log('[Voice] Stream tracks stopped');
|
console.log('[Voice] Stream tracks stopped');
|
||||||
|
|
||||||
|
// Clean up audio context
|
||||||
|
if (audioContextRef.current) {
|
||||||
|
audioContextRef.current.close();
|
||||||
|
audioContextRef.current = null;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
mediaRecorder.onerror = (event) => {
|
mediaRecorder.onerror = (event) => {
|
||||||
@@ -265,6 +296,8 @@ export function useVoiceInput() {
|
|||||||
error: null,
|
error: null,
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
let lastSpeechTime = Date.now();
|
||||||
|
|
||||||
// Set up event handlers
|
// Set up event handlers
|
||||||
recognition.onstart = () => {
|
recognition.onstart = () => {
|
||||||
console.log('[Voice] Started listening');
|
console.log('[Voice] Started listening');
|
||||||
@@ -283,6 +316,26 @@ export function useVoiceInput() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update last speech time
|
||||||
|
lastSpeechTime = Date.now();
|
||||||
|
|
||||||
|
// Reset silence timeout
|
||||||
|
if (silenceTimeoutRef.current) {
|
||||||
|
clearTimeout(silenceTimeoutRef.current);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set new silence timeout (1.5 seconds after last speech)
|
||||||
|
silenceTimeoutRef.current = setTimeout(() => {
|
||||||
|
console.log('[Voice] Silence detected, auto-stopping...');
|
||||||
|
if (recognitionRef.current) {
|
||||||
|
try {
|
||||||
|
recognitionRef.current.stop();
|
||||||
|
} catch (e) {
|
||||||
|
// Ignore errors
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, 1500);
|
||||||
|
|
||||||
// Only update state with final results, show interim in console for debugging
|
// Only update state with final results, show interim in console for debugging
|
||||||
if (finalTranscript) {
|
if (finalTranscript) {
|
||||||
console.log('[Voice] Final result:', finalTranscript);
|
console.log('[Voice] Final result:', finalTranscript);
|
||||||
@@ -347,19 +400,10 @@ export function useVoiceInput() {
|
|||||||
}
|
}
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
// Start listening (chooses appropriate method)
|
// Start listening (always uses MediaRecorder + backend transcription)
|
||||||
const startListening = useCallback(() => {
|
const startListening = useCallback(() => {
|
||||||
if (state.usesFallback) {
|
|
||||||
startListeningWithFallback();
|
startListeningWithFallback();
|
||||||
} else if (recognitionRef.current) {
|
}, [startListeningWithFallback]);
|
||||||
startListeningWithSpeechAPI();
|
|
||||||
} else {
|
|
||||||
setState(prev => ({
|
|
||||||
...prev,
|
|
||||||
error: 'Voice input not supported in this browser',
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
}, [state.usesFallback, startListeningWithFallback, startListeningWithSpeechAPI]);
|
|
||||||
|
|
||||||
// Stop listening
|
// Stop listening
|
||||||
const stopListening = useCallback(() => {
|
const stopListening = useCallback(() => {
|
||||||
|
|||||||
Reference in New Issue
Block a user