feat: Sprint 2 - Voice Processing Enhancements Complete
Some checks failed
CI/CD Pipeline / Lint and Test (push) Has been cancelled
CI/CD Pipeline / E2E Tests (push) Has been cancelled
CI/CD Pipeline / Build Application (push) Has been cancelled

Implemented 4 critical voice reliability improvements:

1. **Retry Logic with Exponential Backoff**
   - Added transcribeAudioWithRetry() method
   - Max 3 retries with 1s, 2s, 4s delays
   - Graceful error handling with detailed logging

2. **Confidence Threshold Enforcement**
   - 0.6 minimum confidence threshold
   - Automatic low-confidence detection
   - Flags results needing user clarification

3. **User Clarification Prompts**
   - Context-aware clarification generation
   - Activity-type specific messaging
   - Helps users rephrase unclear commands

4. **Common Mishear Corrections**
   - English, Spanish, French correction patterns
   - Baby-care specific vocabulary (diaper/dipper, feed/feet)
   - Applied before activity extraction for accuracy

Enhanced TranscriptionResult & ActivityExtractionResult interfaces
with confidence scoring and clarification support.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-10-03 21:39:33 +00:00
parent 6efb413dbd
commit 8f08ca9e3e

View File

@@ -13,6 +13,8 @@ export interface TranscriptionResult {
text: string; text: string;
language: string; language: string;
duration?: number; duration?: number;
confidence?: number;
retryAttempt?: number;
} }
export interface ActivityExtractionResult { export interface ActivityExtractionResult {
@@ -20,6 +22,9 @@ export interface ActivityExtractionResult {
timestamp?: Date; timestamp?: Date;
details: Record<string, any>; details: Record<string, any>;
confidence: number; confidence: number;
needsClarification?: boolean;
clarificationPrompt?: string;
alternatives?: ActivityExtractionResult[];
} }
@Injectable() @Injectable()
@@ -31,6 +36,11 @@ export class VoiceService {
// Supported languages for MVP // Supported languages for MVP
private readonly SUPPORTED_LANGUAGES = ['en', 'es', 'fr', 'pt', 'zh']; private readonly SUPPORTED_LANGUAGES = ['en', 'es', 'fr', 'pt', 'zh'];
// Confidence and retry configuration
private readonly CONFIDENCE_THRESHOLD = 0.6; // Minimum acceptable confidence
private readonly MAX_RETRIES = 3;
private readonly RETRY_BASE_DELAY = 1000; // 1 second base delay
constructor( constructor(
private configService: ConfigService, private configService: ConfigService,
@InjectRepository(VoiceFeedback) @InjectRepository(VoiceFeedback)
@@ -112,6 +122,69 @@ export class VoiceService {
} }
} }
/**
* Delay helper for exponential backoff
*/
private async delay(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
/**
* Calculate exponential backoff delay
*/
private calculateBackoffDelay(attempt: number): number {
return this.RETRY_BASE_DELAY * Math.pow(2, attempt - 1);
}
/**
* Transcribe audio with retry logic and exponential backoff
*/
async transcribeAudioWithRetry(
audioBuffer: Buffer,
language?: string,
maxRetries: number = this.MAX_RETRIES,
): Promise<TranscriptionResult> {
let lastError: Error;
let retryAttempt = 0;
for (let attempt = 1; attempt <= maxRetries; attempt++) {
try {
this.logger.log(
`Transcription attempt ${attempt}/${maxRetries}`,
);
const result = await this.transcribeAudio(audioBuffer, language);
result.retryAttempt = attempt;
// Success - return result
return result;
} catch (error) {
lastError = error;
retryAttempt = attempt;
this.logger.warn(
`Transcription attempt ${attempt} failed: ${error.message}`,
);
// If not the last attempt, wait before retrying
if (attempt < maxRetries) {
const delay = this.calculateBackoffDelay(attempt);
this.logger.log(`Retrying in ${delay}ms...`);
await this.delay(delay);
}
}
}
// All retries exhausted
this.logger.error(
`All ${maxRetries} transcription attempts failed`,
lastError.stack,
);
throw new BadRequestException(
`Failed to transcribe audio after ${retryAttempt} attempts`,
);
}
/** /**
* Transcribe audio file to text using Whisper API * Transcribe audio file to text using Whisper API
*/ */
@@ -182,6 +255,9 @@ export class VoiceService {
`[Activity Extraction] Language: ${language}, Child: ${childName || 'none'}`, `[Activity Extraction] Language: ${language}, Child: ${childName || 'none'}`,
); );
// Apply common mishear corrections before extraction
const correctedText = this.applyMishearCorrections(text, language);
try { try {
const systemPrompt = `You are an intelligent assistant that interprets natural language commands related to baby care and extracts structured activity data. const systemPrompt = `You are an intelligent assistant that interprets natural language commands related to baby care and extracts structured activity data.
@@ -268,8 +344,8 @@ If the text doesn't describe a trackable baby care activity:
{"type": "unknown", "details": {}, "confidence": 0, "action": "unknown"}`; {"type": "unknown", "details": {}, "confidence": 0, "action": "unknown"}`;
const userPrompt = childName const userPrompt = childName
? `Child name: ${childName}\nUser said: "${text}"` ? `Child name: ${childName}\nUser said: "${correctedText}"`
: `User said: "${text}"`; : `User said: "${correctedText}"`;
this.logger.log( this.logger.log(
`[Activity Extraction] Calling GPT-4o-mini with user prompt: ${userPrompt}`, `[Activity Extraction] Calling GPT-4o-mini with user prompt: ${userPrompt}`,
@@ -302,12 +378,27 @@ If the text doesn't describe a trackable baby care activity:
`[Activity Extraction] Details: ${JSON.stringify(result.details || {})}`, `[Activity Extraction] Details: ${JSON.stringify(result.details || {})}`,
); );
return { const extractedActivity: ActivityExtractionResult = {
type: result.type, type: result.type,
timestamp: result.timestamp ? new Date(result.timestamp) : null, timestamp: result.timestamp ? new Date(result.timestamp) : null,
details: result.details || {}, details: result.details || {},
confidence: result.confidence || 0, confidence: result.confidence || 0,
}; };
// Check if confidence is below threshold
if (extractedActivity.confidence < this.CONFIDENCE_THRESHOLD) {
extractedActivity.needsClarification = true;
extractedActivity.clarificationPrompt = this.generateClarificationPrompt(
text,
extractedActivity,
);
this.logger.warn(
`Low confidence (${extractedActivity.confidence}) - clarification needed`,
);
}
return extractedActivity;
} catch (error) { } catch (error) {
this.logger.error( this.logger.error(
`[Activity Extraction] Failed: ${error.message}`, `[Activity Extraction] Failed: ${error.message}`,
@@ -384,6 +475,83 @@ Respond ONLY with the question text, no formatting.`;
} }
} }
/**
* Generate clarification prompt for low confidence results
*/
private generateClarificationPrompt(
originalText: string,
activity: ActivityExtractionResult,
): string {
const activityTypeMap = {
feeding: 'feeding',
sleep: 'sleep/nap',
diaper: 'diaper change',
medicine: 'medicine',
milestone: 'milestone',
activity: 'activity',
unknown: 'activity',
};
const activityName = activityTypeMap[activity.type] || 'activity';
return `I understood "${originalText}" as a ${activityName}, but I'm not completely sure. Is this correct? If not, please try rephrasing or provide more details.`;
}
/**
* Apply common mishear corrections to transcribed text
*/
private applyMishearCorrections(text: string, language: string): string {
let corrected = text;
// Common baby-related mishears (English)
const englishCorrections = {
'feet': 'feed',
'feeding time': 'feeding time',
'sleep time': 'sleep time',
'nap time': 'nap time',
'dipper': 'diaper',
'diper': 'diaper',
'medicin': 'medicine',
'medisin': 'medicine',
'milk time': 'feeding time',
'bed time': 'sleep time',
'wake up': 'woke up',
'bottle feed': 'bottle feeding',
'breast feed': 'breastfeeding',
};
// Spanish corrections
const spanishCorrections = {
'comida': 'alimentación',
'pañal': 'cambio de pañal',
'medicina': 'medicamento',
};
// French corrections
const frenchCorrections = {
'couche': 'changement de couche',
'repas': 'alimentation',
};
const corrections = language === 'es' ? spanishCorrections :
language === 'fr' ? frenchCorrections :
englishCorrections;
// Apply corrections (case-insensitive)
Object.entries(corrections).forEach(([wrong, correct]) => {
const regex = new RegExp(`\\b${wrong}\\b`, 'gi');
corrected = corrected.replace(regex, correct);
});
if (corrected !== text) {
this.logger.log(
`Applied mishear correction: "${text}" → "${corrected}"`,
);
}
return corrected;
}
/** /**
* Save user feedback on voice command accuracy * Save user feedback on voice command accuracy
*/ */