Configure voice service to use Azure OpenAI Whisper for transcription

- Add support for Azure OpenAI Whisper API for audio transcription - Separate Whisper client for transcription and Chat client for activity extraction - Auto-detect Azure vs standard OpenAI based on AZURE_OPENAI_ENABLED flag - Use configured Azure deployments (whisper and gpt-5-mini) - Add proper logging for service initialization This fixes the "Audio transcription not yet implemented" error on iOS Safari by enabling the already-configured Azure Whisper service. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-02 06:12:21 +00:00
parent 26d3f8962f
commit 46167a8307
1 changed files with 52 additions and 12 deletions
--- a/maternal-app/maternal-app-backend/src/modules/voice/voice.service.ts
+++ b/maternal-app/maternal-app-backend/src/modules/voice/voice.service.ts
@@ -20,20 +20,60 @@ export interface ActivityExtractionResult {
@Injectable()
 export class VoiceService {
  private openai: OpenAI;
+  private chatOpenAI: OpenAI;
  private logger = new Logger('VoiceService');

  // Supported languages for MVP
  private readonly SUPPORTED_LANGUAGES = ['en', 'es', 'fr', 'pt', 'zh'];

  constructor(private configService: ConfigService) {
-    const apiKey = this.configService.get<string>('OPENAI_API_KEY');
+    // Check if Azure OpenAI is enabled
+    const azureEnabled = this.configService.get<boolean>('AZURE_OPENAI_ENABLED');

-    if (!apiKey) {
-      this.logger.warn('OPENAI_API_KEY not configured. Voice features will be disabled.');
+    if (azureEnabled) {
+      // Use Azure OpenAI for both Whisper and Chat
+      const whisperEndpoint = this.configService.get<string>('AZURE_OPENAI_WHISPER_ENDPOINT');
+      const whisperKey = this.configService.get<string>('AZURE_OPENAI_WHISPER_API_KEY');
+      const chatEndpoint = this.configService.get<string>('AZURE_OPENAI_CHAT_ENDPOINT');
+      const chatKey = this.configService.get<string>('AZURE_OPENAI_CHAT_API_KEY');
+
+      if (whisperEndpoint && whisperKey) {
+        this.openai = new OpenAI({
+          apiKey: whisperKey,
+          baseURL: `${whisperEndpoint}/openai/deployments/${this.configService.get<string>('AZURE_OPENAI_WHISPER_DEPLOYMENT')}`,
+          defaultQuery: { 'api-version': this.configService.get<string>('AZURE_OPENAI_WHISPER_API_VERSION') },
+          defaultHeaders: { 'api-key': whisperKey },
+        });
+        this.logger.log('Azure OpenAI Whisper configured for voice transcription');
+      } else {
+        this.logger.warn('Azure OpenAI Whisper not fully configured. Voice transcription will be disabled.');
+      }
+
+      if (chatEndpoint && chatKey) {
+        this.chatOpenAI = new OpenAI({
+          apiKey: chatKey,
+          baseURL: `${chatEndpoint}/openai/deployments/${this.configService.get<string>('AZURE_OPENAI_CHAT_DEPLOYMENT')}`,
+          defaultQuery: { 'api-version': this.configService.get<string>('AZURE_OPENAI_CHAT_API_VERSION') },
+          defaultHeaders: { 'api-key': chatKey },
+        });
+        this.logger.log('Azure OpenAI Chat configured for activity extraction');
+      } else {
+        this.logger.warn('Azure OpenAI Chat not configured. Using Whisper client for chat.');
+        this.chatOpenAI = this.openai;
+      }
    } else {
-      this.openai = new OpenAI({
-        apiKey,
-      });
+      // Use standard OpenAI
+      const apiKey = this.configService.get<string>('OPENAI_API_KEY');
+
+      if (!apiKey || apiKey === 'sk-your-openai-api-key-here') {
+        this.logger.warn('OPENAI_API_KEY not configured. Voice features will be disabled.');
+      } else {
+        this.openai = new OpenAI({
+          apiKey,
+        });
+        this.chatOpenAI = this.openai;
+        this.logger.log('OpenAI configured for voice transcription');
+      }
    }
  }

@@ -97,8 +137,8 @@ export class VoiceService {
    language: string,
    childName?: string,
  ): Promise<ActivityExtractionResult> {
-    if (!this.openai) {
-      throw new BadRequestException('Voice service not configured');
+    if (!this.chatOpenAI) {
+      throw new BadRequestException('Chat service not configured');
    }

    try {
@@ -128,7 +168,7 @@ If the text doesn't describe a trackable activity, respond with:
        ? `Child name: ${childName}\nUser said: "${text}"`
        : `User said: "${text}"`;

-      const completion = await this.openai.chat.completions.create({
+      const completion = await this.chatOpenAI.chat.completions.create({
        model: 'gpt-4o-mini',
        messages: [
          { role: 'system', content: systemPrompt },
@@ -194,8 +234,8 @@ If the text doesn't describe a trackable activity, respond with:
    activityType: string,
    language: string,
  ): Promise<string> {
-    if (!this.openai) {
-      throw new BadRequestException('Voice service not configured');
+    if (!this.chatOpenAI) {
+      throw new BadRequestException('Chat service not configured');
    }

    try {
@@ -205,7 +245,7 @@ The user provided input about a "${activityType}" activity, but some information
 Generate a brief, friendly clarification question in ${language} to help complete the activity log.
 Respond ONLY with the question text, no formatting.`;

-      const completion = await this.openai.chat.completions.create({
+      const completion = await this.chatOpenAI.chat.completions.create({
        model: 'gpt-4o-mini',
        messages: [
          { role: 'system', content: systemPrompt },