From cd96a2634f5c7073cd942e8316e05e344c663ba6 Mon Sep 17 00:00:00 2001 From: andupetcu <47487320+andupetcu@users.noreply.github.com> Date: Wed, 1 Oct 2025 10:37:11 +0300 Subject: [PATCH] Add Azure OpenAI integration guide for AI features MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Documentation: - Complete Azure OpenAI fallback configuration - Environment variables for chat, voice, and embeddings endpoints - Azure-specific endpoints and API versions Implementation Details: Phase 3 (Voice Input): - Whisper service with Azure OpenAI fallback - Multi-language transcription support - Automatic failover on OpenAI failure Phase 4 (AI Assistant): - GPT-5 mini chat completion support - Documented GPT-5 API differences (reasoning_effort, reasoning_tokens) - Enhanced JSON schema support for structured output - Azure OpenAI chat API version 2025-04-01-preview Phase 5 (Pattern Recognition): - Text-Embedding-ada-002-V2 with Azure endpoint - Cosine similarity calculation for pattern matching - Embeddings service with fallback logic Advanced Features: - Circuit breaker pattern to prevent cascade failures - Automatic provider switching after 5 consecutive failures - Request monitoring and metrics tracking - Cost optimization strategies - Comprehensive error handling Azure OpenAI Endpoints: - Chat: https://footprints-open-ai.openai.azure.com (gpt-5-mini) - Voice: https://footprints-open-ai.openai.azure.com (whisper) - Embeddings: https://footprints-ai.openai.azure.com (text-embedding-ada-002-v2) Testing & Security: - Unit tests for failover scenarios - API key management best practices - Rate limiting and audit logging - Security considerations and deployment checklist 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- docs/azure-openai-integration.md | 481 +++++++++++++++++++++++++++++++ 1 file changed, 481 insertions(+) create mode 100644 docs/azure-openai-integration.md diff --git a/docs/azure-openai-integration.md b/docs/azure-openai-integration.md new file mode 100644 index 0000000..fbbecbb --- /dev/null +++ b/docs/azure-openai-integration.md @@ -0,0 +1,481 @@ +# Azure OpenAI Integration Guide + +## Overview + +This guide details the integration of Azure OpenAI services as a fallback option when OpenAI APIs are unavailable. The application supports both OpenAI and Azure OpenAI endpoints with automatic failover. + +## Configuration + +### Environment Variables + +Add the following to your `.env` file: + +```env +# OpenAI Configuration (Primary) +OPENAI_API_KEY=your_openai_key +OPENAI_MODEL=gpt-4o +OPENAI_EMBEDDING_MODEL=text-embedding-3-small + +# Azure OpenAI Configuration (Fallback) +AZURE_OPENAI_ENABLED=true +AZURE_OPENAI_API_KEY=your_azure_key + +# Chat Endpoint +AZURE_OPENAI_CHAT_ENDPOINT=https://footprints-open-ai.openai.azure.com +AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-5-mini +AZURE_OPENAI_CHAT_API_VERSION=2025-04-01-preview + +# Voice/Whisper Endpoint +AZURE_OPENAI_WHISPER_ENDPOINT=https://footprints-open-ai.openai.azure.com +AZURE_OPENAI_WHISPER_DEPLOYMENT=whisper +AZURE_OPENAI_WHISPER_API_VERSION=2025-04-01-preview + +# Embeddings Endpoint +AZURE_OPENAI_EMBEDDINGS_ENDPOINT=https://footprints-ai.openai.azure.com +AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT=Text-Embedding-ada-002-V2 +AZURE_OPENAI_EMBEDDINGS_API_VERSION=2023-05-15 +``` + +## Implementation + +### Phase 3: Voice Input Integration (Whisper) + +Update the Voice Service to support Azure OpenAI Whisper: + +```typescript +// src/modules/voice/services/whisper.service.ts +import { Injectable, Logger } from '@nestjs/common'; +import { ConfigService } from '@nestjs/config'; +import OpenAI from 'openai'; +import axios from 'axios'; + +@Injectable() +export class WhisperService { + private readonly logger = new Logger(WhisperService.name); + private openai: OpenAI; + private azureEnabled: boolean; + + constructor(private configService: ConfigService) { + // Initialize OpenAI client + this.openai = new OpenAI({ + apiKey: this.configService.get('OPENAI_API_KEY'), + }); + + this.azureEnabled = this.configService.get('AZURE_OPENAI_ENABLED') === 'true'; + } + + async transcribeAudio(audioBuffer: Buffer, language?: string): Promise { + try { + // Try OpenAI first + return await this.transcribeWithOpenAI(audioBuffer, language); + } catch (error) { + this.logger.warn('OpenAI transcription failed, trying Azure OpenAI', error.message); + + if (this.azureEnabled) { + return await this.transcribeWithAzure(audioBuffer, language); + } + + throw error; + } + } + + private async transcribeWithOpenAI(audioBuffer: Buffer, language?: string): Promise { + const file = new File([audioBuffer], 'audio.wav', { type: 'audio/wav' }); + + const response = await this.openai.audio.transcriptions.create({ + file, + model: 'whisper-1', + language: language || 'en', + }); + + return response.text; + } + + private async transcribeWithAzure(audioBuffer: Buffer, language?: string): Promise { + const endpoint = this.configService.get('AZURE_OPENAI_WHISPER_ENDPOINT'); + const deployment = this.configService.get('AZURE_OPENAI_WHISPER_DEPLOYMENT'); + const apiVersion = this.configService.get('AZURE_OPENAI_WHISPER_API_VERSION'); + const apiKey = this.configService.get('AZURE_OPENAI_API_KEY'); + + const url = `${endpoint}/openai/deployments/${deployment}/audio/transcriptions?api-version=${apiVersion}`; + + const formData = new FormData(); + formData.append('file', new Blob([audioBuffer]), 'audio.wav'); + if (language) { + formData.append('language', language); + } + + const response = await axios.post(url, formData, { + headers: { + 'api-key': apiKey, + 'Content-Type': 'multipart/form-data', + }, + }); + + return response.data.text; + } +} +``` + +### Phase 4: AI Assistant (Chat Completion) + +Update the AI Service to support Azure OpenAI chat with GPT-5 models: + +```typescript +// src/modules/ai/services/ai.service.ts +import { Injectable, Logger } from '@nestjs/common'; +import { ConfigService } from '@nestjs/config'; +import OpenAI from 'openai'; +import axios from 'axios'; + +@Injectable() +export class AIService { + private readonly logger = new Logger(AIService.name); + private openai: OpenAI; + private azureEnabled: boolean; + + constructor(private configService: ConfigService) { + this.openai = new OpenAI({ + apiKey: this.configService.get('OPENAI_API_KEY'), + }); + + this.azureEnabled = this.configService.get('AZURE_OPENAI_ENABLED') === 'true'; + } + + async generateResponse( + messages: Array<{ role: string; content: string }>, + temperature: number = 0.7, + ): Promise { + try { + // Try OpenAI first + return await this.generateWithOpenAI(messages, temperature); + } catch (error) { + this.logger.warn('OpenAI chat failed, trying Azure OpenAI', error.message); + + if (this.azureEnabled) { + return await this.generateWithAzure(messages, temperature); + } + + throw error; + } + } + + private async generateWithOpenAI( + messages: Array<{ role: string; content: string }>, + temperature: number, + ): Promise { + const response = await this.openai.chat.completions.create({ + model: this.configService.get('OPENAI_MODEL', 'gpt-4o'), + messages: messages as any, + temperature, + max_tokens: 1000, + }); + + return response.choices[0].message.content; + } + + private async generateWithAzure( + messages: Array<{ role: string; content: string }>, + temperature: number, + ): Promise { + const endpoint = this.configService.get('AZURE_OPENAI_CHAT_ENDPOINT'); + const deployment = this.configService.get('AZURE_OPENAI_CHAT_DEPLOYMENT'); + const apiVersion = this.configService.get('AZURE_OPENAI_CHAT_API_VERSION'); + const apiKey = this.configService.get('AZURE_OPENAI_API_KEY'); + + // NOTE: GPT-5 models use a different API format than GPT-4 + // The response structure includes additional metadata + const url = `${endpoint}/openai/deployments/${deployment}/chat/completions?api-version=${apiVersion}`; + + const response = await axios.post( + url, + { + messages, + temperature, + max_tokens: 1000, + // GPT-5 specific parameters + stream: false, + // Optional GPT-5 features: + // reasoning_effort: 'medium', // 'low', 'medium', 'high' + // response_format: { type: 'text' }, // or 'json_object' for structured output + }, + { + headers: { + 'api-key': apiKey, + 'Content-Type': 'application/json', + }, + }, + ); + + // GPT-5 response structure may include reasoning tokens + // Extract the actual message content + return response.data.choices[0].message.content; + } +} +``` + +### Phase 5: Pattern Recognition (Embeddings) + +Update the Embeddings Service for pattern analysis: + +```typescript +// src/modules/analytics/services/embeddings.service.ts +import { Injectable, Logger } from '@nestjs/common'; +import { ConfigService } from '@nestjs/config'; +import OpenAI from 'openai'; +import axios from 'axios'; + +@Injectable() +export class EmbeddingsService { + private readonly logger = new Logger(EmbeddingsService.name); + private openai: OpenAI; + private azureEnabled: boolean; + + constructor(private configService: ConfigService) { + this.openai = new OpenAI({ + apiKey: this.configService.get('OPENAI_API_KEY'), + }); + + this.azureEnabled = this.configService.get('AZURE_OPENAI_ENABLED') === 'true'; + } + + async createEmbedding(text: string): Promise { + try { + // Try OpenAI first + return await this.createEmbeddingWithOpenAI(text); + } catch (error) { + this.logger.warn('OpenAI embeddings failed, trying Azure OpenAI', error.message); + + if (this.azureEnabled) { + return await this.createEmbeddingWithAzure(text); + } + + throw error; + } + } + + private async createEmbeddingWithOpenAI(text: string): Promise { + const response = await this.openai.embeddings.create({ + model: this.configService.get('OPENAI_EMBEDDING_MODEL', 'text-embedding-3-small'), + input: text, + }); + + return response.data[0].embedding; + } + + private async createEmbeddingWithAzure(text: string): Promise { + const endpoint = this.configService.get('AZURE_OPENAI_EMBEDDINGS_ENDPOINT'); + const deployment = this.configService.get('AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT'); + const apiVersion = this.configService.get('AZURE_OPENAI_EMBEDDINGS_API_VERSION'); + const apiKey = this.configService.get('AZURE_OPENAI_API_KEY'); + + const url = `${endpoint}/openai/deployments/${deployment}/embeddings?api-version=${apiVersion}`; + + const response = await axios.post( + url, + { + input: text, + }, + { + headers: { + 'api-key': apiKey, + 'Content-Type': 'application/json', + }, + }, + ); + + return response.data.data[0].embedding; + } + + async calculateSimilarity(embedding1: number[], embedding2: number[]): Promise { + // Cosine similarity calculation + const dotProduct = embedding1.reduce((sum, val, i) => sum + val * embedding2[i], 0); + const magnitude1 = Math.sqrt(embedding1.reduce((sum, val) => sum + val * val, 0)); + const magnitude2 = Math.sqrt(embedding2.reduce((sum, val) => sum + val * val, 0)); + + return dotProduct / (magnitude1 * magnitude2); + } +} +``` + +## GPT-5 Model Differences + +### Key Changes from GPT-4 + +1. **Reasoning Capabilities**: GPT-5 includes enhanced reasoning with configurable effort levels + - `reasoning_effort`: 'low' | 'medium' | 'high' + - Higher effort levels produce more thorough, step-by-step reasoning + +2. **Response Metadata**: GPT-5 responses may include reasoning tokens + ```typescript + { + choices: [{ + message: { content: string }, + reasoning_tokens: number, // New in GPT-5 + finish_reason: string + }], + usage: { + prompt_tokens: number, + completion_tokens: number, + reasoning_tokens: number, // New in GPT-5 + total_tokens: number + } + } + ``` + +3. **Structured Output**: Enhanced JSON mode support + ```typescript + { + response_format: { + type: 'json_schema', + json_schema: { + name: 'response', + schema: { /* your schema */ } + } + } + } + ``` + +4. **API Version**: Use `2025-04-01-preview` for GPT-5 features + +## Testing + +### Unit Tests + +```typescript +// src/modules/ai/__tests__/ai.service.spec.ts +describe('AIService', () => { + describe('Azure OpenAI Fallback', () => { + it('should fallback to Azure when OpenAI fails', async () => { + // Mock OpenAI failure + mockOpenAI.chat.completions.create.mockRejectedValue(new Error('API Error')); + + // Mock Azure success + mockAxios.post.mockResolvedValue({ + data: { + choices: [{ message: { content: 'Response from Azure' } }] + } + }); + + const result = await aiService.generateResponse([ + { role: 'user', content: 'Hello' } + ]); + + expect(result).toBe('Response from Azure'); + expect(mockAxios.post).toHaveBeenCalledWith( + expect.stringContaining('gpt-5-mini'), + expect.any(Object), + expect.any(Object) + ); + }); + }); +}); +``` + +## Error Handling + +```typescript +// src/common/exceptions/ai-service.exception.ts +export class AIServiceException extends HttpException { + constructor( + message: string, + public provider: 'openai' | 'azure', + public originalError?: Error, + ) { + super( + { + statusCode: HttpStatus.SERVICE_UNAVAILABLE, + error: 'AI_SERVICE_UNAVAILABLE', + message, + provider, + }, + HttpStatus.SERVICE_UNAVAILABLE, + ); + } +} +``` + +## Monitoring + +Add logging for failover events: + +```typescript +// Log when failover occurs +this.logger.warn('OpenAI service unavailable, switching to Azure OpenAI', { + endpoint: 'chat', + model: deployment, + requestId: context.requestId, +}); + +// Track usage metrics +this.metricsService.increment('ai.provider.azure.requests'); +this.metricsService.increment('ai.provider.openai.failures'); +``` + +## Cost Optimization + +1. **Primary/Fallback Strategy**: Use OpenAI as primary to take advantage of potentially lower costs +2. **Rate Limiting**: Implement exponential backoff before failover +3. **Circuit Breaker**: After 5 consecutive failures, switch primary provider temporarily +4. **Token Tracking**: Monitor token usage across both providers + +```typescript +// src/modules/ai/services/circuit-breaker.service.ts +export class CircuitBreakerService { + private failureCount = 0; + private readonly threshold = 5; + private circuitOpen = false; + private readonly resetTimeout = 60000; // 1 minute + + async execute( + primaryFn: () => Promise, + fallbackFn: () => Promise, + ): Promise { + if (this.circuitOpen) { + return fallbackFn(); + } + + try { + const result = await primaryFn(); + this.failureCount = 0; + return result; + } catch (error) { + this.failureCount++; + + if (this.failureCount >= this.threshold) { + this.openCircuit(); + } + + return fallbackFn(); + } + } + + private openCircuit() { + this.circuitOpen = true; + setTimeout(() => { + this.circuitOpen = false; + this.failureCount = 0; + }, this.resetTimeout); + } +} +``` + +## Security Considerations + +1. **API Key Rotation**: Store keys in secure vault (AWS Secrets Manager, Azure Key Vault) +2. **Network Security**: Use private endpoints when available +3. **Rate Limiting**: Implement per-user and per-endpoint rate limits +4. **Audit Logging**: Log all AI requests with user context for compliance + +## Deployment Checklist + +- [ ] Environment variables configured for both OpenAI and Azure +- [ ] API keys validated and working +- [ ] Fallback logic tested in staging +- [ ] Monitoring and alerting configured +- [ ] Rate limiting implemented +- [ ] Circuit breaker tested +- [ ] Error handling covers all failure scenarios +- [ ] Cost tracking enabled +- [ ] Security review completed +