Configure voice service to use Azure OpenAI Whisper for transcription
Some checks failed
CI/CD Pipeline / Lint and Test (push) Has been cancelled
CI/CD Pipeline / E2E Tests (push) Has been cancelled
CI/CD Pipeline / Build Application (push) Has been cancelled

- Add support for Azure OpenAI Whisper API for audio transcription
- Separate Whisper client for transcription and Chat client for activity extraction
- Auto-detect Azure vs standard OpenAI based on AZURE_OPENAI_ENABLED flag
- Use configured Azure deployments (whisper and gpt-5-mini)
- Add proper logging for service initialization

This fixes the "Audio transcription not yet implemented" error on iOS Safari
by enabling the already-configured Azure Whisper service.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-10-02 06:12:21 +00:00
parent 26d3f8962f
commit 46167a8307

View File

@@ -20,20 +20,60 @@ export interface ActivityExtractionResult {
@Injectable()
export class VoiceService {
private openai: OpenAI;
private chatOpenAI: OpenAI;
private logger = new Logger('VoiceService');
// Supported languages for MVP
private readonly SUPPORTED_LANGUAGES = ['en', 'es', 'fr', 'pt', 'zh'];
constructor(private configService: ConfigService) {
const apiKey = this.configService.get<string>('OPENAI_API_KEY');
// Check if Azure OpenAI is enabled
const azureEnabled = this.configService.get<boolean>('AZURE_OPENAI_ENABLED');
if (!apiKey) {
this.logger.warn('OPENAI_API_KEY not configured. Voice features will be disabled.');
if (azureEnabled) {
// Use Azure OpenAI for both Whisper and Chat
const whisperEndpoint = this.configService.get<string>('AZURE_OPENAI_WHISPER_ENDPOINT');
const whisperKey = this.configService.get<string>('AZURE_OPENAI_WHISPER_API_KEY');
const chatEndpoint = this.configService.get<string>('AZURE_OPENAI_CHAT_ENDPOINT');
const chatKey = this.configService.get<string>('AZURE_OPENAI_CHAT_API_KEY');
if (whisperEndpoint && whisperKey) {
this.openai = new OpenAI({
apiKey: whisperKey,
baseURL: `${whisperEndpoint}/openai/deployments/${this.configService.get<string>('AZURE_OPENAI_WHISPER_DEPLOYMENT')}`,
defaultQuery: { 'api-version': this.configService.get<string>('AZURE_OPENAI_WHISPER_API_VERSION') },
defaultHeaders: { 'api-key': whisperKey },
});
this.logger.log('Azure OpenAI Whisper configured for voice transcription');
} else {
this.logger.warn('Azure OpenAI Whisper not fully configured. Voice transcription will be disabled.');
}
if (chatEndpoint && chatKey) {
this.chatOpenAI = new OpenAI({
apiKey: chatKey,
baseURL: `${chatEndpoint}/openai/deployments/${this.configService.get<string>('AZURE_OPENAI_CHAT_DEPLOYMENT')}`,
defaultQuery: { 'api-version': this.configService.get<string>('AZURE_OPENAI_CHAT_API_VERSION') },
defaultHeaders: { 'api-key': chatKey },
});
this.logger.log('Azure OpenAI Chat configured for activity extraction');
} else {
this.logger.warn('Azure OpenAI Chat not configured. Using Whisper client for chat.');
this.chatOpenAI = this.openai;
}
} else {
this.openai = new OpenAI({
apiKey,
});
// Use standard OpenAI
const apiKey = this.configService.get<string>('OPENAI_API_KEY');
if (!apiKey || apiKey === 'sk-your-openai-api-key-here') {
this.logger.warn('OPENAI_API_KEY not configured. Voice features will be disabled.');
} else {
this.openai = new OpenAI({
apiKey,
});
this.chatOpenAI = this.openai;
this.logger.log('OpenAI configured for voice transcription');
}
}
}
@@ -97,8 +137,8 @@ export class VoiceService {
language: string,
childName?: string,
): Promise<ActivityExtractionResult> {
if (!this.openai) {
throw new BadRequestException('Voice service not configured');
if (!this.chatOpenAI) {
throw new BadRequestException('Chat service not configured');
}
try {
@@ -128,7 +168,7 @@ If the text doesn't describe a trackable activity, respond with:
? `Child name: ${childName}\nUser said: "${text}"`
: `User said: "${text}"`;
const completion = await this.openai.chat.completions.create({
const completion = await this.chatOpenAI.chat.completions.create({
model: 'gpt-4o-mini',
messages: [
{ role: 'system', content: systemPrompt },
@@ -194,8 +234,8 @@ If the text doesn't describe a trackable activity, respond with:
activityType: string,
language: string,
): Promise<string> {
if (!this.openai) {
throw new BadRequestException('Voice service not configured');
if (!this.chatOpenAI) {
throw new BadRequestException('Chat service not configured');
}
try {
@@ -205,7 +245,7 @@ The user provided input about a "${activityType}" activity, but some information
Generate a brief, friendly clarification question in ${language} to help complete the activity log.
Respond ONLY with the question text, no formatting.`;
const completion = await this.openai.chat.completions.create({
const completion = await this.chatOpenAI.chat.completions.create({
model: 'gpt-4o-mini',
messages: [
{ role: 'system', content: systemPrompt },