From 46167a83076c8efe7e9db8946e813dd7100b1d7d Mon Sep 17 00:00:00 2001 From: Andrei Date: Thu, 2 Oct 2025 06:12:21 +0000 Subject: [PATCH] Configure voice service to use Azure OpenAI Whisper for transcription MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add support for Azure OpenAI Whisper API for audio transcription - Separate Whisper client for transcription and Chat client for activity extraction - Auto-detect Azure vs standard OpenAI based on AZURE_OPENAI_ENABLED flag - Use configured Azure deployments (whisper and gpt-5-mini) - Add proper logging for service initialization This fixes the "Audio transcription not yet implemented" error on iOS Safari by enabling the already-configured Azure Whisper service. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../src/modules/voice/voice.service.ts | 64 +++++++++++++++---- 1 file changed, 52 insertions(+), 12 deletions(-) diff --git a/maternal-app/maternal-app-backend/src/modules/voice/voice.service.ts b/maternal-app/maternal-app-backend/src/modules/voice/voice.service.ts index 9bee0b5..a587b8d 100644 --- a/maternal-app/maternal-app-backend/src/modules/voice/voice.service.ts +++ b/maternal-app/maternal-app-backend/src/modules/voice/voice.service.ts @@ -20,20 +20,60 @@ export interface ActivityExtractionResult { @Injectable() export class VoiceService { private openai: OpenAI; + private chatOpenAI: OpenAI; private logger = new Logger('VoiceService'); // Supported languages for MVP private readonly SUPPORTED_LANGUAGES = ['en', 'es', 'fr', 'pt', 'zh']; constructor(private configService: ConfigService) { - const apiKey = this.configService.get('OPENAI_API_KEY'); + // Check if Azure OpenAI is enabled + const azureEnabled = this.configService.get('AZURE_OPENAI_ENABLED'); - if (!apiKey) { - this.logger.warn('OPENAI_API_KEY not configured. Voice features will be disabled.'); + if (azureEnabled) { + // Use Azure OpenAI for both Whisper and Chat + const whisperEndpoint = this.configService.get('AZURE_OPENAI_WHISPER_ENDPOINT'); + const whisperKey = this.configService.get('AZURE_OPENAI_WHISPER_API_KEY'); + const chatEndpoint = this.configService.get('AZURE_OPENAI_CHAT_ENDPOINT'); + const chatKey = this.configService.get('AZURE_OPENAI_CHAT_API_KEY'); + + if (whisperEndpoint && whisperKey) { + this.openai = new OpenAI({ + apiKey: whisperKey, + baseURL: `${whisperEndpoint}/openai/deployments/${this.configService.get('AZURE_OPENAI_WHISPER_DEPLOYMENT')}`, + defaultQuery: { 'api-version': this.configService.get('AZURE_OPENAI_WHISPER_API_VERSION') }, + defaultHeaders: { 'api-key': whisperKey }, + }); + this.logger.log('Azure OpenAI Whisper configured for voice transcription'); + } else { + this.logger.warn('Azure OpenAI Whisper not fully configured. Voice transcription will be disabled.'); + } + + if (chatEndpoint && chatKey) { + this.chatOpenAI = new OpenAI({ + apiKey: chatKey, + baseURL: `${chatEndpoint}/openai/deployments/${this.configService.get('AZURE_OPENAI_CHAT_DEPLOYMENT')}`, + defaultQuery: { 'api-version': this.configService.get('AZURE_OPENAI_CHAT_API_VERSION') }, + defaultHeaders: { 'api-key': chatKey }, + }); + this.logger.log('Azure OpenAI Chat configured for activity extraction'); + } else { + this.logger.warn('Azure OpenAI Chat not configured. Using Whisper client for chat.'); + this.chatOpenAI = this.openai; + } } else { - this.openai = new OpenAI({ - apiKey, - }); + // Use standard OpenAI + const apiKey = this.configService.get('OPENAI_API_KEY'); + + if (!apiKey || apiKey === 'sk-your-openai-api-key-here') { + this.logger.warn('OPENAI_API_KEY not configured. Voice features will be disabled.'); + } else { + this.openai = new OpenAI({ + apiKey, + }); + this.chatOpenAI = this.openai; + this.logger.log('OpenAI configured for voice transcription'); + } } } @@ -97,8 +137,8 @@ export class VoiceService { language: string, childName?: string, ): Promise { - if (!this.openai) { - throw new BadRequestException('Voice service not configured'); + if (!this.chatOpenAI) { + throw new BadRequestException('Chat service not configured'); } try { @@ -128,7 +168,7 @@ If the text doesn't describe a trackable activity, respond with: ? `Child name: ${childName}\nUser said: "${text}"` : `User said: "${text}"`; - const completion = await this.openai.chat.completions.create({ + const completion = await this.chatOpenAI.chat.completions.create({ model: 'gpt-4o-mini', messages: [ { role: 'system', content: systemPrompt }, @@ -194,8 +234,8 @@ If the text doesn't describe a trackable activity, respond with: activityType: string, language: string, ): Promise { - if (!this.openai) { - throw new BadRequestException('Voice service not configured'); + if (!this.chatOpenAI) { + throw new BadRequestException('Chat service not configured'); } try { @@ -205,7 +245,7 @@ The user provided input about a "${activityType}" activity, but some information Generate a brief, friendly clarification question in ${language} to help complete the activity log. Respond ONLY with the question text, no formatting.`; - const completion = await this.openai.chat.completions.create({ + const completion = await this.chatOpenAI.chat.completions.create({ model: 'gpt-4o-mini', messages: [ { role: 'system', content: systemPrompt },