Configure voice service to use Azure OpenAI Whisper for transcription
- Add support for Azure OpenAI Whisper API for audio transcription - Separate Whisper client for transcription and Chat client for activity extraction - Auto-detect Azure vs standard OpenAI based on AZURE_OPENAI_ENABLED flag - Use configured Azure deployments (whisper and gpt-5-mini) - Add proper logging for service initialization This fixes the "Audio transcription not yet implemented" error on iOS Safari by enabling the already-configured Azure Whisper service. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -20,20 +20,60 @@ export interface ActivityExtractionResult {
|
||||
@Injectable()
|
||||
export class VoiceService {
|
||||
private openai: OpenAI;
|
||||
private chatOpenAI: OpenAI;
|
||||
private logger = new Logger('VoiceService');
|
||||
|
||||
// Supported languages for MVP
|
||||
private readonly SUPPORTED_LANGUAGES = ['en', 'es', 'fr', 'pt', 'zh'];
|
||||
|
||||
constructor(private configService: ConfigService) {
|
||||
const apiKey = this.configService.get<string>('OPENAI_API_KEY');
|
||||
// Check if Azure OpenAI is enabled
|
||||
const azureEnabled = this.configService.get<boolean>('AZURE_OPENAI_ENABLED');
|
||||
|
||||
if (!apiKey) {
|
||||
this.logger.warn('OPENAI_API_KEY not configured. Voice features will be disabled.');
|
||||
if (azureEnabled) {
|
||||
// Use Azure OpenAI for both Whisper and Chat
|
||||
const whisperEndpoint = this.configService.get<string>('AZURE_OPENAI_WHISPER_ENDPOINT');
|
||||
const whisperKey = this.configService.get<string>('AZURE_OPENAI_WHISPER_API_KEY');
|
||||
const chatEndpoint = this.configService.get<string>('AZURE_OPENAI_CHAT_ENDPOINT');
|
||||
const chatKey = this.configService.get<string>('AZURE_OPENAI_CHAT_API_KEY');
|
||||
|
||||
if (whisperEndpoint && whisperKey) {
|
||||
this.openai = new OpenAI({
|
||||
apiKey: whisperKey,
|
||||
baseURL: `${whisperEndpoint}/openai/deployments/${this.configService.get<string>('AZURE_OPENAI_WHISPER_DEPLOYMENT')}`,
|
||||
defaultQuery: { 'api-version': this.configService.get<string>('AZURE_OPENAI_WHISPER_API_VERSION') },
|
||||
defaultHeaders: { 'api-key': whisperKey },
|
||||
});
|
||||
this.logger.log('Azure OpenAI Whisper configured for voice transcription');
|
||||
} else {
|
||||
this.logger.warn('Azure OpenAI Whisper not fully configured. Voice transcription will be disabled.');
|
||||
}
|
||||
|
||||
if (chatEndpoint && chatKey) {
|
||||
this.chatOpenAI = new OpenAI({
|
||||
apiKey: chatKey,
|
||||
baseURL: `${chatEndpoint}/openai/deployments/${this.configService.get<string>('AZURE_OPENAI_CHAT_DEPLOYMENT')}`,
|
||||
defaultQuery: { 'api-version': this.configService.get<string>('AZURE_OPENAI_CHAT_API_VERSION') },
|
||||
defaultHeaders: { 'api-key': chatKey },
|
||||
});
|
||||
this.logger.log('Azure OpenAI Chat configured for activity extraction');
|
||||
} else {
|
||||
this.logger.warn('Azure OpenAI Chat not configured. Using Whisper client for chat.');
|
||||
this.chatOpenAI = this.openai;
|
||||
}
|
||||
} else {
|
||||
this.openai = new OpenAI({
|
||||
apiKey,
|
||||
});
|
||||
// Use standard OpenAI
|
||||
const apiKey = this.configService.get<string>('OPENAI_API_KEY');
|
||||
|
||||
if (!apiKey || apiKey === 'sk-your-openai-api-key-here') {
|
||||
this.logger.warn('OPENAI_API_KEY not configured. Voice features will be disabled.');
|
||||
} else {
|
||||
this.openai = new OpenAI({
|
||||
apiKey,
|
||||
});
|
||||
this.chatOpenAI = this.openai;
|
||||
this.logger.log('OpenAI configured for voice transcription');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -97,8 +137,8 @@ export class VoiceService {
|
||||
language: string,
|
||||
childName?: string,
|
||||
): Promise<ActivityExtractionResult> {
|
||||
if (!this.openai) {
|
||||
throw new BadRequestException('Voice service not configured');
|
||||
if (!this.chatOpenAI) {
|
||||
throw new BadRequestException('Chat service not configured');
|
||||
}
|
||||
|
||||
try {
|
||||
@@ -128,7 +168,7 @@ If the text doesn't describe a trackable activity, respond with:
|
||||
? `Child name: ${childName}\nUser said: "${text}"`
|
||||
: `User said: "${text}"`;
|
||||
|
||||
const completion = await this.openai.chat.completions.create({
|
||||
const completion = await this.chatOpenAI.chat.completions.create({
|
||||
model: 'gpt-4o-mini',
|
||||
messages: [
|
||||
{ role: 'system', content: systemPrompt },
|
||||
@@ -194,8 +234,8 @@ If the text doesn't describe a trackable activity, respond with:
|
||||
activityType: string,
|
||||
language: string,
|
||||
): Promise<string> {
|
||||
if (!this.openai) {
|
||||
throw new BadRequestException('Voice service not configured');
|
||||
if (!this.chatOpenAI) {
|
||||
throw new BadRequestException('Chat service not configured');
|
||||
}
|
||||
|
||||
try {
|
||||
@@ -205,7 +245,7 @@ The user provided input about a "${activityType}" activity, but some information
|
||||
Generate a brief, friendly clarification question in ${language} to help complete the activity log.
|
||||
Respond ONLY with the question text, no formatting.`;
|
||||
|
||||
const completion = await this.openai.chat.completions.create({
|
||||
const completion = await this.chatOpenAI.chat.completions.create({
|
||||
model: 'gpt-4o-mini',
|
||||
messages: [
|
||||
{ role: 'system', content: systemPrompt },
|
||||
|
||||
Reference in New Issue
Block a user