Add iOS Safari support for voice commands with MediaRecorder fallback

Frontend changes: - Add MediaRecorder fallback for iOS Safari (no Web Speech API support) - Automatically detect browser capabilities and use appropriate method - Add usesFallback flag to track which method is being used - Update UI to show "Recording..." vs "Listening..." based on method - Add iOS-specific indicator text - Handle microphone permissions and errors properly Backend changes: - Update /api/v1/voice/transcribe to accept both audio files and text - Support text-based classification (from Web Speech API) - Support audio file transcription + classification (from MediaRecorder) - Return unified response format with transcript and classification How it works: - Chrome/Edge: Uses Web Speech API for realtime transcription - iOS Safari: Records audio with MediaRecorder, sends to server for transcription - Fallback is transparent to the user with appropriate UI feedback 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-02 05:59:26 +00:00
parent ff69848ec5
commit 330c776124
3 changed files with 190 additions and 26 deletions
--- a/maternal-app/maternal-app-backend/src/modules/voice/voice.controller.ts
+++ b/maternal-app/maternal-app-backend/src/modules/voice/voice.controller.ts
@@ -18,20 +18,46 @@ export class VoiceController {
  @UseInterceptors(FileInterceptor('audio'))
  async transcribeAudio(
    @UploadedFile() file: Express.Multer.File,
+    @Body('text') text?: string,
    @Body('language') language?: string,
+    @Body('childName') childName?: string,
  ) {
-    if (!file) {
-      throw new BadRequestException('Audio file is required');
+    // If text is provided (from Web Speech API), classify it directly
+    if (text) {
+      const result = await this.voiceService.extractActivityFromText(
+        text,
+        language || 'en',
+        childName,
+      );
+
+      return {
+        success: true,
+        transcript: text,
+        classification: result,
+      };
    }

-    const result = await this.voiceService.transcribeAudio(
+    // Otherwise, transcribe the audio file
+    if (!file) {
+      throw new BadRequestException('Audio file or text is required');
+    }
+
+    const transcription = await this.voiceService.transcribeAudio(
      file.buffer,
      language,
    );

+    // Also classify the transcription
+    const classification = await this.voiceService.extractActivityFromText(
+      transcription.text,
+      language || 'en',
+      childName,
+    );
+
    return {
      success: true,
-      data: result,
+      transcript: transcription.text,
+      classification: classification,
    };
  }