Add comprehensive .gitignore

2025-10-01 19:01:52 +00:00
commit f3ff07c0ef
254 changed files with 88254 additions and 0 deletions
--- a/docs/maternal-app-voice-processing.md
+++ b/docs/maternal-app-voice-processing.md
@@ -0,0 +1,590 @@
+# Voice Input Processing Guide - Maternal Organization App
+
+## Voice Processing Architecture
+
+### Overview
+Voice input enables hands-free logging during childcare activities. The system processes natural language in 5 languages, extracting structured data from casual speech patterns.
+
+### Processing Pipeline
+```
+Audio Input → Speech Recognition → Language Detection → 
+Intent Classification → Entity Extraction → Action Execution → 
+Confirmation Feedback
+```
+
+---
+
+## Whisper API Integration
+
+### Configuration
+```typescript
+// services/whisperService.ts
+import OpenAI from 'openai';
+
+class WhisperService {
+  private client: OpenAI;
+  
+  constructor() {
+    this.client = new OpenAI({
+      apiKey: process.env.OPENAI_API_KEY,
+    });
+  }
+
+  async transcribeAudio(audioBuffer: Buffer, language?: string): Promise<TranscriptionResult> {
+    try {
+      const response = await this.client.audio.transcriptions.create({
+        file: audioBuffer,
+        model: 'whisper-1',
+        language: language || 'en', // ISO-639-1 code
+        response_format: 'verbose_json',
+        timestamp_granularities: ['word'],
+      });
+      
+      return {
+        text: response.text,
+        language: response.language,
+        confidence: this.calculateConfidence(response),
+        words: response.words,
+      };
+    } catch (error) {
+      return this.handleTranscriptionError(error);
+    }
+  }
+}
+```
+
+### Audio Preprocessing
+```typescript
+// utils/audioPreprocessing.ts
+export const preprocessAudio = async (audioFile: File): Promise<Buffer> => {
+  // Validate format
+  const validFormats = ['wav', 'mp3', 'm4a', 'webm'];
+  if (!validFormats.includes(getFileExtension(audioFile))) {
+    throw new Error('Unsupported audio format');
+  }
+  
+  // Check file size (max 25MB for Whisper)
+  if (audioFile.size > 25 * 1024 * 1024) {
+    // Compress or chunk the audio
+    return await compressAudio(audioFile);
+  }
+  
+  // Noise reduction for better accuracy
+  return await reduceNoise(audioFile);
+};
+```
+
+---
+
+## Natural Language Command Patterns
+
+### Intent Classification
+```typescript
+enum VoiceIntent {
+  LOG_FEEDING = 'LOG_FEEDING',
+  LOG_SLEEP = 'LOG_SLEEP',
+  LOG_DIAPER = 'LOG_DIAPER',
+  LOG_MEDICATION = 'LOG_MEDICATION',
+  START_TIMER = 'START_TIMER',
+  STOP_TIMER = 'STOP_TIMER',
+  ASK_QUESTION = 'ASK_QUESTION',
+  CHECK_STATUS = 'CHECK_STATUS',
+  CANCEL = 'CANCEL'
+}
+
+interface IntentPattern {
+  intent: VoiceIntent;
+  patterns: RegExp[];
+  requiredEntities: string[];
+  examples: string[];
+}
+```
+
+### English Language Patterns
+```typescript
+const englishPatterns: IntentPattern[] = [
+  {
+    intent: VoiceIntent.LOG_FEEDING,
+    patterns: [
+      /(?:baby |she |he )?(?:fed|ate|drank|had|nursed)/i,
+      /(?:bottle|breast|nursing|feeding)/i,
+      /(?:finished|done) (?:eating|feeding|nursing)/i,
+    ],
+    requiredEntities: ['amount?', 'time?', 'type?'],
+    examples: [
+      "Baby fed 4 ounces",
+      "Just nursed for 15 minutes on the left",
+      "She had 120ml of formula at 3pm",
+      "Finished feeding, both sides, 20 minutes total"
+    ]
+  },
+  {
+    intent: VoiceIntent.LOG_SLEEP,
+    patterns: [
+      /(?:went|going) (?:to )?(?:sleep|bed|nap)/i,
+      /(?:woke|wake|waking) up/i,
+      /(?:nap|sleep)(?:ping|ed)? (?:for|since)/i,
+      /(?:fell) asleep/i,
+    ],
+    requiredEntities: ['time?', 'duration?'],
+    examples: [
+      "Down for a nap",
+      "Woke up from nap",
+      "Sleeping since 2pm",
+      "Just fell asleep in the stroller"
+    ]
+  },
+  {
+    intent: VoiceIntent.LOG_DIAPER,
+    patterns: [
+      /(?:chang|dirty|wet|soil|poop|pee)/i,
+      /diaper/i,
+      /(?:number|#) (?:one|two|1|2)/i,
+    ],
+    requiredEntities: ['type?'],
+    examples: [
+      "Changed wet diaper",
+      "Dirty diaper with rash",
+      "Just changed a poopy one",
+      "Diaper change, both wet and dirty"
+    ]
+  }
+];
+```
+
+### Multi-Language Patterns
+```typescript
+// Spanish patterns
+const spanishPatterns: IntentPattern[] = [
+  {
+    intent: VoiceIntent.LOG_FEEDING,
+    patterns: [
+      /(?:comió|tomó|bebió|amamanté)/i,
+      /(?:biberón|pecho|lactancia)/i,
+    ],
+    examples: [
+      "Tomó 120ml de fórmula",
+      "Amamanté 15 minutos lado izquierdo",
+      "Ya comió papilla"
+    ]
+  }
+];
+
+// French patterns
+const frenchPatterns: IntentPattern[] = [
+  {
+    intent: VoiceIntent.LOG_FEEDING,
+    patterns: [
+      /(?:mangé|bu|allaité|nourri)/i,
+      /(?:biberon|sein|tétée)/i,
+    ],
+    examples: [
+      "Biberon de 120ml",
+      "Allaité 15 minutes côté gauche",
+      "A mangé sa purée"
+    ]
+  }
+];
+
+// Portuguese patterns
+const portuguesePatterns: IntentPattern[] = [
+  {
+    intent: VoiceIntent.LOG_FEEDING,
+    patterns: [
+      /(?:comeu|tomou|bebeu|amamentei)/i,
+      /(?:mamadeira|peito|amamentação)/i,
+    ],
+    examples: [
+      "Tomou 120ml de fórmula",
+      "Amamentei 15 minutos lado esquerdo"
+    ]
+  }
+];
+
+// Chinese patterns
+const chinesePatterns: IntentPattern[] = [
+  {
+    intent: VoiceIntent.LOG_FEEDING,
+    patterns: [
+      /(?:喂|吃|喝|哺乳)/,
+      /(?:奶瓶|母乳|配方奶)/,
+    ],
+    examples: [
+      "喝了120毫升配方奶",
+      "母乳喂养15分钟",
+      "吃了辅食"
+    ]
+  }
+];
+```
+
+---
+
+## Entity Extraction
+
+### Entity Types
+```typescript
+interface ExtractedEntities {
+  amount?: {
+    value: number;
+    unit: 'oz' | 'ml' | 'minutes';
+  };
+  time?: {
+    value: Date;
+    precision: 'exact' | 'approximate';
+  };
+  duration?: {
+    value: number;
+    unit: 'minutes' | 'hours';
+  };
+  side?: 'left' | 'right' | 'both';
+  type?: 'breast' | 'bottle' | 'solid' | 'wet' | 'dirty' | 'both';
+  location?: string;
+  notes?: string;
+}
+```
+
+### Extraction Logic
+```typescript
+class EntityExtractor {
+  extractAmount(text: string): ExtractedEntities['amount'] {
+    // Numeric amounts with units
+    const amountPattern = /(\d+(?:\.\d+)?)\s*(oz|ounce|ml|milliliter|minute|min)/i;
+    const match = text.match(amountPattern);
+    
+    if (match) {
+      return {
+        value: parseFloat(match[1]),
+        unit: this.normalizeUnit(match[2])
+      };
+    }
+    
+    // Word numbers
+    const wordNumbers = {
+      'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5,
+      'ten': 10, 'fifteen': 15, 'twenty': 20, 'thirty': 30,
+    };
+    
+    for (const [word, value] of Object.entries(wordNumbers)) {
+      if (text.includes(word)) {
+        return { value, unit: this.inferUnit(text) };
+      }
+    }
+    
+    return undefined;
+  }
+
+  extractTime(text: string, timezone: string): ExtractedEntities['time'] {
+    const now = new Date();
+    
+    // Relative times
+    if (/just|now|right now/i.test(text)) {
+      return { value: now, precision: 'exact' };
+    }
+    
+    if (/ago/i.test(text)) {
+      const minutesAgo = this.extractMinutesAgo(text);
+      return {
+        value: new Date(now.getTime() - minutesAgo * 60000),
+        precision: 'approximate'
+      };
+    }
+    
+    // Clock times
+    const timePattern = /(\d{1,2}):?(\d{2})?\s*(am|pm)?/i;
+    const match = text.match(timePattern);
+    
+    if (match) {
+      return {
+        value: this.parseClockTime(match, timezone),
+        precision: 'exact'
+      };
+    }
+    
+    return { value: now, precision: 'approximate' };
+  }
+
+  extractSide(text: string): ExtractedEntities['side'] {
+    if (/left|izquierdo|gauche|esquerdo|左/i.test(text)) return 'left';
+    if (/right|derecho|droit|direito|右/i.test(text)) return 'right';
+    if (/both|ambos|deux|ambos|两|両/i.test(text)) return 'both';
+    return undefined;
+  }
+}
+```
+
+---
+
+## Intent Processing Engine
+
+### Main Processing Flow
+```typescript
+class VoiceCommandProcessor {
+  async processVoiceInput(
+    audioBuffer: Buffer,
+    context: UserContext
+  ): Promise<ProcessedCommand> {
+    // 1. Transcribe audio
+    const transcription = await this.whisperService.transcribeAudio(
+      audioBuffer,
+      context.language
+    );
+    
+    if (transcription.confidence < 0.5) {
+      return this.handleLowConfidence(transcription);
+    }
+    
+    // 2. Detect intent
+    const intent = await this.detectIntent(
+      transcription.text,
+      context.language
+    );
+    
+    // 3. Extract entities
+    const entities = await this.extractEntities(
+      transcription.text,
+      intent,
+      context
+    );
+    
+    // 4. Validate command
+    const validation = this.validateCommand(intent, entities);
+    
+    if (!validation.isValid) {
+      return this.requestClarification(validation.missingInfo);
+    }
+    
+    // 5. Execute action
+    return this.executeCommand(intent, entities, context);
+  }
+
+  private async detectIntent(
+    text: string,
+    language: string
+  ): Promise<VoiceIntent> {
+    const patterns = this.getPatternsByLanguage(language);
+    
+    for (const pattern of patterns) {
+      for (const regex of pattern.patterns) {
+        if (regex.test(text)) {
+          return pattern.intent;
+        }
+      }
+    }
+    
+    // Fallback to AI intent detection
+    return this.detectIntentWithAI(text, language);
+  }
+}
+```
+
+---
+
+## Error Recovery
+
+### Common Recognition Errors
+```typescript
+interface RecognitionError {
+  type: 'LOW_CONFIDENCE' | 'AMBIGUOUS' | 'MISSING_DATA' | 'INVALID_VALUE';
+  originalText: string;
+  suggestions?: string[];
+}
+
+class ErrorRecovery {
+  handleLowConfidence(transcription: TranscriptionResult): ProcessedCommand {
+    // Check for common misheard phrases
+    const corrections = this.checkCommonMishears(transcription.text);
+    
+    if (corrections.confidence > 0.7) {
+      return this.retryWithCorrection(corrections.text);
+    }
+    
+    return {
+      success: false,
+      action: 'CONFIRM',
+      message: `Did you say "${transcription.text}"?`,
+      alternatives: this.getSimilarPhrases(transcription.text)
+    };
+  }
+
+  checkCommonMishears(text: string): CorrectionResult {
+    const corrections = {
+      'for ounces': 'four ounces',
+      'to ounces': 'two ounces',
+      'write side': 'right side',
+      'laugh side': 'left side',
+      'wet and dirty': 'wet and dirty',
+      'wedding dirty': 'wet and dirty',
+    };
+    
+    for (const [misheard, correct] of Object.entries(corrections)) {
+      if (text.includes(misheard)) {
+        return {
+          text: text.replace(misheard, correct),
+          confidence: 0.8
+        };
+      }
+    }
+    
+    return { text, confidence: 0.3 };
+  }
+}
+```
+
+### Clarification Prompts
+```typescript
+const clarificationPrompts = {
+  MISSING_AMOUNT: {
+    en: "How much did baby eat?",
+    es: "¿Cuánto comió el bebé?",
+    fr: "Combien a mangé bébé?",
+    pt: "Quanto o bebê comeu?",
+    zh: "宝宝吃了多少？"
+  },
+  MISSING_TIME: {
+    en: "When did this happen?",
+    es: "¿Cuándo ocurrió esto?",
+    fr: "Quand cela s'est-il passé?",
+    pt: "Quando isso aconteceu?",
+    zh: "这是什么时候发生的？"
+  },
+  AMBIGUOUS_INTENT: {
+    en: "What would you like to log?",
+    es: "¿Qué te gustaría registrar?",
+    fr: "Que souhaitez-vous enregistrer?",
+    pt: "O que você gostaria de registrar?",
+    zh: "您想记录什么？"
+  }
+};
+```
+
+---
+
+## Offline Voice Processing
+
+### Fallback Strategy
+```typescript
+class OfflineVoiceProcessor {
+  async processOffline(audioBuffer: Buffer): Promise<BasicTranscription> {
+    // Use device's native speech recognition
+    if (Platform.OS === 'ios') {
+      return this.useiOSSpeechRecognition(audioBuffer);
+    } else if (Platform.OS === 'android') {
+      return this.useAndroidSpeechRecognition(audioBuffer);
+    }
+    
+    // Queue for later processing
+    return this.queueForOnlineProcessing(audioBuffer);
+  }
+
+  private async useiOSSpeechRecognition(audio: Buffer) {
+    // Use SFSpeechRecognizer
+    const recognizer = new SFSpeechRecognizer();
+    return recognizer.recognize(audio);
+  }
+
+  private async useAndroidSpeechRecognition(audio: Buffer) {
+    // Use Android SpeechRecognizer
+    const recognizer = new AndroidSpeechRecognizer();
+    return recognizer.recognize(audio);
+  }
+}
+```
+
+---
+
+## Confirmation & Feedback
+
+### Voice Feedback System
+```typescript
+interface VoiceConfirmation {
+  text: string;
+  speech: string; // SSML for TTS
+  visual: {
+    icon: string;
+    color: string;
+    animation: string;
+  };
+  haptic?: 'success' | 'warning' | 'error';
+}
+
+const confirmations = {
+  FEEDING_LOGGED: {
+    text: "Feeding logged",
+    speech: "<speak>Got it! <break time='200ms'/> Logged <say-as interpret-as='cardinal'>4</say-as> ounces.</speak>",
+    visual: {
+      icon: 'check_circle',
+      color: 'success',
+      animation: 'bounce'
+    },
+    haptic: 'success'
+  }
+};
+```
+
+---
+
+## Testing Voice Commands
+
+### Test Scenarios
+```typescript
+const voiceTestCases = [
+  // English
+  { input: "Baby ate 4 ounces", expected: { intent: 'LOG_FEEDING', amount: 4, unit: 'oz' }},
+  { input: "Nursed for fifteen minutes on the left", expected: { intent: 'LOG_FEEDING', duration: 15, side: 'left' }},
+  
+  // Spanish
+  { input: "Tomó 120 mililitros", expected: { intent: 'LOG_FEEDING', amount: 120, unit: 'ml' }},
+  
+  // Edge cases
+  { input: "Fed... um... about 4 or 5 ounces", expected: { intent: 'LOG_FEEDING', amount: 4, confidence: 'low' }},
+  { input: "Changed a really dirty diaper", expected: { intent: 'LOG_DIAPER', type: 'dirty', notes: 'really dirty' }},
+];
+```
+
+---
+
+## Performance Optimization
+
+### Audio Streaming
+```typescript
+class StreamingVoiceProcessor {
+  private audioChunks: Buffer[] = [];
+  private isProcessing = false;
+
+  async processStream(chunk: Buffer) {
+    this.audioChunks.push(chunk);
+    
+    if (!this.isProcessing && this.hasEnoughAudio()) {
+      this.isProcessing = true;
+      const result = await this.processChunks();
+      this.isProcessing = false;
+      return result;
+    }
+  }
+
+  private hasEnoughAudio(): boolean {
+    // Need at least 0.5 seconds of audio
+    const totalSize = this.audioChunks.reduce((sum, chunk) => sum + chunk.length, 0);
+    return totalSize > 8000; // ~0.5s at 16kHz
+  }
+}
+```
+
+### Caching Common Commands
+```typescript
+const commandCache = new LRUCache<string, ProcessedCommand>({
+  max: 100,
+  ttl: 1000 * 60 * 60, // 1 hour
+});
+
+// Cache exact matches for common phrases
+const cachedPhrases = [
+  "wet diaper",
+  "dirty diaper", 
+  "just nursed",
+  "bottle feeding done",
+  "down for a nap",
+  "woke up"
+];
+```