- Remove temperature parameter from GPT-5-mini activity extraction (not supported) - Add classification state to useVoiceInput hook to avoid duplicate API calls - Prevent infinite loop in VoiceFloatingButton by tracking lastClassifiedTranscript - Use classification from backend directly instead of making second request - iOS Safari now successfully transcribes with Azure Whisper and classifies with GPT-5-mini 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
176 lines
5.3 KiB
TypeScript
176 lines
5.3 KiB
TypeScript
import { NextRequest, NextResponse } from 'next/server';
|
|
import { classifyIntent, validateClassification, getConfidenceLevel } from '@/lib/voice/intentClassifier';
|
|
|
|
/**
|
|
* Voice transcription and intent classification endpoint
|
|
*
|
|
* Accepts audio file or transcribed text and returns:
|
|
* - Intent classification (feeding/sleep/diaper)
|
|
* - Extracted entities (amounts, times, durations)
|
|
* - Structured data ready for activity creation
|
|
*/
|
|
export async function POST(request: NextRequest) {
|
|
try {
|
|
const contentType = request.headers.get('content-type') || '';
|
|
|
|
let transcribedText: string;
|
|
|
|
if (contentType.includes('application/json')) {
|
|
// Text input (already transcribed)
|
|
const body = await request.json();
|
|
transcribedText = body.text;
|
|
|
|
if (!transcribedText || typeof transcribedText !== 'string') {
|
|
return NextResponse.json(
|
|
{
|
|
error: 'VOICE_INVALID_INPUT',
|
|
message: 'Text must be a non-empty string',
|
|
},
|
|
{ status: 400 }
|
|
);
|
|
}
|
|
} else if (contentType.includes('multipart/form-data')) {
|
|
// Audio file upload - forward to backend for Whisper transcription
|
|
const formData = await request.formData();
|
|
const audioFile = formData.get('audio');
|
|
|
|
if (!audioFile) {
|
|
return NextResponse.json(
|
|
{
|
|
error: 'VOICE_NO_AUDIO',
|
|
message: 'No audio file provided',
|
|
},
|
|
{ status: 400 }
|
|
);
|
|
}
|
|
|
|
// Forward to backend
|
|
const backendUrl = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:3020';
|
|
const backendFormData = new FormData();
|
|
backendFormData.append('audio', audioFile);
|
|
|
|
const backendResponse = await fetch(`${backendUrl}/api/v1/voice/transcribe`, {
|
|
method: 'POST',
|
|
body: backendFormData,
|
|
headers: {
|
|
// Forward auth token if present
|
|
...(request.headers.get('authorization') && {
|
|
authorization: request.headers.get('authorization')!,
|
|
}),
|
|
},
|
|
});
|
|
|
|
if (!backendResponse.ok) {
|
|
const errorData = await backendResponse.json();
|
|
return NextResponse.json(errorData, { status: backendResponse.status });
|
|
}
|
|
|
|
const result = await backendResponse.json();
|
|
|
|
// Backend returns { success, transcript, classification }
|
|
// Return in the format expected by the frontend
|
|
return NextResponse.json(
|
|
{
|
|
success: true,
|
|
transcript: result.transcript,
|
|
classification: result.classification,
|
|
},
|
|
{ status: 200 }
|
|
);
|
|
} else {
|
|
return NextResponse.json(
|
|
{
|
|
error: 'VOICE_INVALID_CONTENT_TYPE',
|
|
message: 'Content-Type must be application/json or multipart/form-data',
|
|
},
|
|
{ status: 400 }
|
|
);
|
|
}
|
|
|
|
// Classify intent
|
|
const classification = classifyIntent(transcribedText);
|
|
|
|
// Validate classification
|
|
if (!validateClassification(classification)) {
|
|
return NextResponse.json(
|
|
{
|
|
error: 'VOICE_CLASSIFICATION_FAILED',
|
|
message: 'Could not understand the command. Please try again.',
|
|
suggestion: 'Try saying something like "Fed baby 100ml" or "Changed wet diaper"',
|
|
classification: {
|
|
intent: classification.intent,
|
|
confidence: classification.confidence,
|
|
confidenceLevel: getConfidenceLevel(classification.confidence),
|
|
},
|
|
},
|
|
{ status: 400 }
|
|
);
|
|
}
|
|
|
|
// Return classification result
|
|
return NextResponse.json(
|
|
{
|
|
success: true,
|
|
transcription: transcribedText,
|
|
classification: {
|
|
intent: classification.intent,
|
|
confidence: classification.confidence,
|
|
confidenceLevel: getConfidenceLevel(classification.confidence),
|
|
entities: classification.entities,
|
|
structuredData: classification.structuredData,
|
|
},
|
|
},
|
|
{ status: 200 }
|
|
);
|
|
} catch (error) {
|
|
console.error('[Voice] Transcription error:', error);
|
|
return NextResponse.json(
|
|
{
|
|
error: 'VOICE_TRANSCRIPTION_FAILED',
|
|
message: 'Failed to process voice command. Please try again.',
|
|
},
|
|
{ status: 500 }
|
|
);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get supported voice commands and examples
|
|
*/
|
|
export async function GET() {
|
|
return NextResponse.json(
|
|
{
|
|
supportedIntents: ['feeding', 'sleep', 'diaper'],
|
|
examples: {
|
|
feeding: [
|
|
'Fed baby 120 ml',
|
|
'Gave him 4 ounces',
|
|
'Nursed on left breast for 15 minutes',
|
|
'Breastfed on both sides',
|
|
'Baby ate solid food',
|
|
],
|
|
sleep: [
|
|
'Baby fell asleep',
|
|
'Napped for 45 minutes',
|
|
'Put baby down for bedtime',
|
|
'Baby woke up',
|
|
],
|
|
diaper: [
|
|
'Changed wet diaper',
|
|
'Dirty diaper change',
|
|
'Changed a wet and dirty diaper',
|
|
'Baby had a bowel movement',
|
|
],
|
|
},
|
|
entities: {
|
|
amounts: ['120 ml', '4 oz', '2 tablespoons'],
|
|
durations: ['15 minutes', '45 mins', '2 hours'],
|
|
times: ['at 3:30 pm', '30 minutes ago', 'just now'],
|
|
breastSides: ['left breast', 'right side', 'both sides'],
|
|
diaperTypes: ['wet', 'dirty', 'wet and dirty', 'bowel movement'],
|
|
},
|
|
},
|
|
{ status: 200 }
|
|
);
|
|
}
|