From 46167a83076c8efe7e9db8946e813dd7100b1d7d Mon Sep 17 00:00:00 2001
From: Andrei <andrei@cloudz.ro>
Date: Thu, 2 Oct 2025 06:12:21 +0000
Subject: [PATCH] Configure voice service to use Azure OpenAI Whisper for
 transcription
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add support for Azure OpenAI Whisper API for audio transcription
- Separate Whisper client for transcription and Chat client for activity extraction
- Auto-detect Azure vs standard OpenAI based on AZURE_OPENAI_ENABLED flag
- Use configured Azure deployments (whisper and gpt-5-mini)
- Add proper logging for service initialization

This fixes the "Audio transcription not yet implemented" error on iOS Safari
by enabling the already-configured Azure Whisper service.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .../src/modules/voice/voice.service.ts        | 64 +++++++++++++++----
 1 file changed, 52 insertions(+), 12 deletions(-)
diff --git a/maternal-app/maternal-app-backend/src/modules/voice/voice.service.ts b/maternal-app/maternal-app-backend/src/modules/voice/voice.service.ts
index 9bee0b5..a587b8d 100644
--- a/maternal-app/maternal-app-backend/src/modules/voice/voice.service.ts
+++ b/maternal-app/maternal-app-backend/src/modules/voice/voice.service.ts
@@ -20,20 +20,60 @@ export interface ActivityExtractionResult {
 @Injectable()
 export class VoiceService {
   private openai: OpenAI;
+  private chatOpenAI: OpenAI;
   private logger = new Logger('VoiceService');
 
   // Supported languages for MVP
   private readonly SUPPORTED_LANGUAGES = ['en', 'es', 'fr', 'pt', 'zh'];
 
   constructor(private configService: ConfigService) {
-    const apiKey = this.configService.get<string>('OPENAI_API_KEY');
+    // Check if Azure OpenAI is enabled
+    const azureEnabled = this.configService.get<boolean>('AZURE_OPENAI_ENABLED');
 
-    if (!apiKey) {
-      this.logger.warn('OPENAI_API_KEY not configured. Voice features will be disabled.');
+    if (azureEnabled) {
+      // Use Azure OpenAI for both Whisper and Chat
+      const whisperEndpoint = this.configService.get<string>('AZURE_OPENAI_WHISPER_ENDPOINT');
+      const whisperKey = this.configService.get<string>('AZURE_OPENAI_WHISPER_API_KEY');
+      const chatEndpoint = this.configService.get<string>('AZURE_OPENAI_CHAT_ENDPOINT');
+      const chatKey = this.configService.get<string>('AZURE_OPENAI_CHAT_API_KEY');
+
+      if (whisperEndpoint && whisperKey) {
+        this.openai = new OpenAI({
+          apiKey: whisperKey,
+          baseURL: `${whisperEndpoint}/openai/deployments/${this.configService.get<string>('AZURE_OPENAI_WHISPER_DEPLOYMENT')}`,
+          defaultQuery: { 'api-version': this.configService.get<string>('AZURE_OPENAI_WHISPER_API_VERSION') },
+          defaultHeaders: { 'api-key': whisperKey },
+        });
+        this.logger.log('Azure OpenAI Whisper configured for voice transcription');
+      } else {
+        this.logger.warn('Azure OpenAI Whisper not fully configured. Voice transcription will be disabled.');
+      }
+
+      if (chatEndpoint && chatKey) {
+        this.chatOpenAI = new OpenAI({
+          apiKey: chatKey,
+          baseURL: `${chatEndpoint}/openai/deployments/${this.configService.get<string>('AZURE_OPENAI_CHAT_DEPLOYMENT')}`,
+          defaultQuery: { 'api-version': this.configService.get<string>('AZURE_OPENAI_CHAT_API_VERSION') },
+          defaultHeaders: { 'api-key': chatKey },
+        });
+        this.logger.log('Azure OpenAI Chat configured for activity extraction');
+      } else {
+        this.logger.warn('Azure OpenAI Chat not configured. Using Whisper client for chat.');
+        this.chatOpenAI = this.openai;
+      }
     } else {
-      this.openai = new OpenAI({
-        apiKey,
-      });
+      // Use standard OpenAI
+      const apiKey = this.configService.get<string>('OPENAI_API_KEY');
+
+      if (!apiKey || apiKey === 'sk-your-openai-api-key-here') {
+        this.logger.warn('OPENAI_API_KEY not configured. Voice features will be disabled.');
+      } else {
+        this.openai = new OpenAI({
+          apiKey,
+        });
+        this.chatOpenAI = this.openai;
+        this.logger.log('OpenAI configured for voice transcription');
+      }
     }
   }
 
@@ -97,8 +137,8 @@ export class VoiceService {
     language: string,
     childName?: string,
   ): Promise<ActivityExtractionResult> {
-    if (!this.openai) {
-      throw new BadRequestException('Voice service not configured');
+    if (!this.chatOpenAI) {
+      throw new BadRequestException('Chat service not configured');
     }
 
     try {
@@ -128,7 +168,7 @@ If the text doesn't describe a trackable activity, respond with:
         ? `Child name: ${childName}\nUser said: "${text}"`
         : `User said: "${text}"`;
 
-      const completion = await this.openai.chat.completions.create({
+      const completion = await this.chatOpenAI.chat.completions.create({
         model: 'gpt-4o-mini',
         messages: [
           { role: 'system', content: systemPrompt },
@@ -194,8 +234,8 @@ If the text doesn't describe a trackable activity, respond with:
     activityType: string,
     language: string,
   ): Promise<string> {
-    if (!this.openai) {
-      throw new BadRequestException('Voice service not configured');
+    if (!this.chatOpenAI) {
+      throw new BadRequestException('Chat service not configured');
     }
 
     try {
@@ -205,7 +245,7 @@ The user provided input about a "${activityType}" activity, but some information
 Generate a brief, friendly clarification question in ${language} to help complete the activity log.
 Respond ONLY with the question text, no formatting.`;
 
-      const completion = await this.openai.chat.completions.create({
+      const completion = await this.chatOpenAI.chat.completions.create({
         model: 'gpt-4o-mini',
         messages: [
           { role: 'system', content: systemPrompt },