diff --git a/maternal-app/maternal-app-backend/src/modules/voice/voice.service.ts b/maternal-app/maternal-app-backend/src/modules/voice/voice.service.ts index f522371..f0e7c16 100644 --- a/maternal-app/maternal-app-backend/src/modules/voice/voice.service.ts +++ b/maternal-app/maternal-app-backend/src/modules/voice/voice.service.ts @@ -13,6 +13,8 @@ export interface TranscriptionResult { text: string; language: string; duration?: number; + confidence?: number; + retryAttempt?: number; } export interface ActivityExtractionResult { @@ -20,6 +22,9 @@ export interface ActivityExtractionResult { timestamp?: Date; details: Record; confidence: number; + needsClarification?: boolean; + clarificationPrompt?: string; + alternatives?: ActivityExtractionResult[]; } @Injectable() @@ -31,6 +36,11 @@ export class VoiceService { // Supported languages for MVP private readonly SUPPORTED_LANGUAGES = ['en', 'es', 'fr', 'pt', 'zh']; + // Confidence and retry configuration + private readonly CONFIDENCE_THRESHOLD = 0.6; // Minimum acceptable confidence + private readonly MAX_RETRIES = 3; + private readonly RETRY_BASE_DELAY = 1000; // 1 second base delay + constructor( private configService: ConfigService, @InjectRepository(VoiceFeedback) @@ -112,6 +122,69 @@ export class VoiceService { } } + /** + * Delay helper for exponential backoff + */ + private async delay(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); + } + + /** + * Calculate exponential backoff delay + */ + private calculateBackoffDelay(attempt: number): number { + return this.RETRY_BASE_DELAY * Math.pow(2, attempt - 1); + } + + /** + * Transcribe audio with retry logic and exponential backoff + */ + async transcribeAudioWithRetry( + audioBuffer: Buffer, + language?: string, + maxRetries: number = this.MAX_RETRIES, + ): Promise { + let lastError: Error; + let retryAttempt = 0; + + for (let attempt = 1; attempt <= maxRetries; attempt++) { + try { + this.logger.log( + `Transcription attempt ${attempt}/${maxRetries}`, + ); + + const result = await this.transcribeAudio(audioBuffer, language); + result.retryAttempt = attempt; + + // Success - return result + return result; + } catch (error) { + lastError = error; + retryAttempt = attempt; + + this.logger.warn( + `Transcription attempt ${attempt} failed: ${error.message}`, + ); + + // If not the last attempt, wait before retrying + if (attempt < maxRetries) { + const delay = this.calculateBackoffDelay(attempt); + this.logger.log(`Retrying in ${delay}ms...`); + await this.delay(delay); + } + } + } + + // All retries exhausted + this.logger.error( + `All ${maxRetries} transcription attempts failed`, + lastError.stack, + ); + throw new BadRequestException( + `Failed to transcribe audio after ${retryAttempt} attempts`, + ); + } + /** * Transcribe audio file to text using Whisper API */ @@ -182,6 +255,9 @@ export class VoiceService { `[Activity Extraction] Language: ${language}, Child: ${childName || 'none'}`, ); + // Apply common mishear corrections before extraction + const correctedText = this.applyMishearCorrections(text, language); + try { const systemPrompt = `You are an intelligent assistant that interprets natural language commands related to baby care and extracts structured activity data. @@ -268,8 +344,8 @@ If the text doesn't describe a trackable baby care activity: {"type": "unknown", "details": {}, "confidence": 0, "action": "unknown"}`; const userPrompt = childName - ? `Child name: ${childName}\nUser said: "${text}"` - : `User said: "${text}"`; + ? `Child name: ${childName}\nUser said: "${correctedText}"` + : `User said: "${correctedText}"`; this.logger.log( `[Activity Extraction] Calling GPT-4o-mini with user prompt: ${userPrompt}`, @@ -302,12 +378,27 @@ If the text doesn't describe a trackable baby care activity: `[Activity Extraction] Details: ${JSON.stringify(result.details || {})}`, ); - return { + const extractedActivity: ActivityExtractionResult = { type: result.type, timestamp: result.timestamp ? new Date(result.timestamp) : null, details: result.details || {}, confidence: result.confidence || 0, }; + + // Check if confidence is below threshold + if (extractedActivity.confidence < this.CONFIDENCE_THRESHOLD) { + extractedActivity.needsClarification = true; + extractedActivity.clarificationPrompt = this.generateClarificationPrompt( + text, + extractedActivity, + ); + + this.logger.warn( + `Low confidence (${extractedActivity.confidence}) - clarification needed`, + ); + } + + return extractedActivity; } catch (error) { this.logger.error( `[Activity Extraction] Failed: ${error.message}`, @@ -384,6 +475,83 @@ Respond ONLY with the question text, no formatting.`; } } + /** + * Generate clarification prompt for low confidence results + */ + private generateClarificationPrompt( + originalText: string, + activity: ActivityExtractionResult, + ): string { + const activityTypeMap = { + feeding: 'feeding', + sleep: 'sleep/nap', + diaper: 'diaper change', + medicine: 'medicine', + milestone: 'milestone', + activity: 'activity', + unknown: 'activity', + }; + + const activityName = activityTypeMap[activity.type] || 'activity'; + + return `I understood "${originalText}" as a ${activityName}, but I'm not completely sure. Is this correct? If not, please try rephrasing or provide more details.`; + } + + /** + * Apply common mishear corrections to transcribed text + */ + private applyMishearCorrections(text: string, language: string): string { + let corrected = text; + + // Common baby-related mishears (English) + const englishCorrections = { + 'feet': 'feed', + 'feeding time': 'feeding time', + 'sleep time': 'sleep time', + 'nap time': 'nap time', + 'dipper': 'diaper', + 'diper': 'diaper', + 'medicin': 'medicine', + 'medisin': 'medicine', + 'milk time': 'feeding time', + 'bed time': 'sleep time', + 'wake up': 'woke up', + 'bottle feed': 'bottle feeding', + 'breast feed': 'breastfeeding', + }; + + // Spanish corrections + const spanishCorrections = { + 'comida': 'alimentación', + 'pañal': 'cambio de pañal', + 'medicina': 'medicamento', + }; + + // French corrections + const frenchCorrections = { + 'couche': 'changement de couche', + 'repas': 'alimentation', + }; + + const corrections = language === 'es' ? spanishCorrections : + language === 'fr' ? frenchCorrections : + englishCorrections; + + // Apply corrections (case-insensitive) + Object.entries(corrections).forEach(([wrong, correct]) => { + const regex = new RegExp(`\\b${wrong}\\b`, 'gi'); + corrected = corrected.replace(regex, correct); + }); + + if (corrected !== text) { + this.logger.log( + `Applied mishear correction: "${text}" → "${corrected}"`, + ); + } + + return corrected; + } + /** * Save user feedback on voice command accuracy */