From 2d27eae756a7a1560829709be57d4bea9eea6821 Mon Sep 17 00:00:00 2001 From: Andrei Date: Thu, 25 Sep 2025 07:21:59 +0000 Subject: [PATCH] Enhance RAG system to support multiple vector databases and improve AI chat functionality MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Update vector-search.ts to query all available vector tables per language instead of single table - Add getAllVectorTables() function to discover all language-specific vector tables - Enhance searchBibleHybrid() to query multiple tables and merge results by relevance score - Enhance searchBibleSemantic() to combine results from all available vector databases - Add comprehensive error handling and logging for vector search operations - Improve Azure OpenAI content filtering detection and error handling - Add test-vector API endpoint for database diagnostics and debugging - Fix environment configuration with complete Azure OpenAI settings - Enable multi-translation biblical context from diverse Bible versions simultaneously Tested: Romanian chat works excellently with rich biblical context and verse citations Issue: English requires vector table creation - 47 English Bible versions exist but no vector tables 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- app/api/chat/route.ts | 40 +++++++- app/api/test-vector/route.ts | 81 +++++++++++++++ lib/vector-search.ts | 188 ++++++++++++++++++++--------------- 3 files changed, 224 insertions(+), 85 deletions(-) create mode 100644 app/api/test-vector/route.ts diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts index 34525c1..66604af 100644 --- a/app/api/chat/route.ts +++ b/app/api/chat/route.ts @@ -170,8 +170,18 @@ export async function POST(request: Request) { async function generateBiblicalResponse(message: string, locale: string, history: any[]): Promise { try { - // Search for relevant Bible verses using vector search with language filtering - const relevantVerses = await searchBibleHybrid(message, locale, 5) + // Temporarily bypass vector search to test Azure OpenAI + console.log('Chat API - Starting biblical response generation for:', message.substring(0, 50)) + let relevantVerses: any[] = [] + + try { + // Search for relevant Bible verses using vector search with language filtering + relevantVerses = await searchBibleHybrid(message, locale, 5) + console.log('Chat API - Vector search successful, found', relevantVerses.length, 'verses') + } catch (vectorError) { + console.warn('Chat API - Vector search failed:', vectorError instanceof Error ? vectorError.message : String(vectorError)) + // Continue without verses - test if Azure OpenAI works alone + } // Create context from relevant verses const versesContext = relevantVerses @@ -221,6 +231,9 @@ Current question: ${message}` const systemPrompt = systemPrompts[locale as keyof typeof systemPrompts] || systemPrompts.en // Call Azure OpenAI + console.log('Chat API - Calling Azure OpenAI with endpoint:', process.env.AZURE_OPENAI_ENDPOINT) + console.log('Chat API - Using deployment:', process.env.AZURE_OPENAI_DEPLOYMENT) + const response = await fetch( `${process.env.AZURE_OPENAI_ENDPOINT}/openai/deployments/${process.env.AZURE_OPENAI_DEPLOYMENT}/chat/completions?api-version=${process.env.AZURE_OPENAI_API_VERSION}`, { @@ -247,12 +260,33 @@ Current question: ${message}` } ) + console.log('Chat API - Azure OpenAI response status:', response.status) + if (!response.ok) { throw new Error(`Azure OpenAI API error: ${response.status}`) } const data = await response.json() - return data.choices[0].message.content + + // Handle content filtering or empty responses + if (!data.choices || data.choices.length === 0) { + throw new Error('No response choices returned from Azure OpenAI') + } + + const choice = data.choices[0] + + // Check for content filtering + if (choice.finish_reason === 'content_filter') { + console.warn('Content was filtered by Azure OpenAI:', choice.content_filter_results) + throw new Error('Content was filtered by Azure OpenAI content policy') + } + + // Check if message content exists + if (!choice.message || !choice.message.content) { + throw new Error('Empty response content from Azure OpenAI') + } + + return choice.message.content } catch (error) { console.error('Error calling Azure OpenAI:', error) diff --git a/app/api/test-vector/route.ts b/app/api/test-vector/route.ts new file mode 100644 index 0000000..6a65203 --- /dev/null +++ b/app/api/test-vector/route.ts @@ -0,0 +1,81 @@ +import { NextResponse } from 'next/server' +import { Pool } from 'pg' + +const pool = new Pool({ + connectionString: process.env.DATABASE_URL, +}) + +export async function GET() { + try { + console.log('Test Vector - Starting database connection test') + + const client = await pool.connect() + try { + // Test basic connection + const testQuery = await client.query('SELECT NOW() as current_time') + console.log('Test Vector - Database connection successful:', testQuery.rows[0]) + + // Check if ai_bible schema exists + const schemaCheck = await client.query(` + SELECT EXISTS ( + SELECT 1 FROM information_schema.schemata + WHERE schema_name = 'ai_bible' + ) AS exists + `) + console.log('Test Vector - ai_bible schema exists:', schemaCheck.rows[0].exists) + + // List all vector tables + const VECTOR_SCHEMA = process.env.VECTOR_SCHEMA || 'ai_bible' + const tables = await client.query(` + SELECT table_name FROM information_schema.tables + WHERE table_schema = $1 AND table_name LIKE 'bv_%' + ORDER BY table_name + `, [VECTOR_SCHEMA]) + + console.log('Test Vector - Found vector tables:', tables.rows.length) + tables.rows.forEach(row => console.log('- ' + row.table_name)) + + // Check for English tables specifically + const englishTables = await client.query(` + SELECT table_name FROM information_schema.tables + WHERE table_schema = $1 AND table_name LIKE 'bv_en_%' + ORDER BY table_name + `, [VECTOR_SCHEMA]) + + console.log('Test Vector - English tables found:', englishTables.rows.length) + + // Check BibleVersion table for available versions + const versions = await client.query(` + SELECT language, abbreviation, "isDefault", name + FROM "BibleVersion" + WHERE language IN ('en', 'ro') + ORDER BY language, "isDefault" DESC, "createdAt" ASC + `) + + console.log('Test Vector - Bible versions available:') + versions.rows.forEach(v => console.log(`- ${v.language}: ${v.abbreviation} (${v.name}) - default: ${v.isDefault}`)) + + return NextResponse.json({ + success: true, + database_connected: true, + ai_bible_schema_exists: schemaCheck.rows[0].exists, + total_vector_tables: tables.rows.length, + english_vector_tables: englishTables.rows.length, + vector_tables: tables.rows.map(r => r.table_name), + english_tables: englishTables.rows.map(r => r.table_name), + bible_versions: versions.rows, + current_time: testQuery.rows[0].current_time + }) + + } finally { + client.release() + } + } catch (error) { + console.error('Test Vector - Database connection failed:', error) + return NextResponse.json({ + success: false, + error: error instanceof Error ? error.message : String(error), + database_connected: false + }, { status: 500 }) + } +} \ No newline at end of file diff --git a/lib/vector-search.ts b/lib/vector-search.ts index 50efc18..3e249ef 100644 --- a/lib/vector-search.ts +++ b/lib/vector-search.ts @@ -10,51 +10,36 @@ function safeIdent(s: string): string { return s.toLowerCase().replace(/[^a-z0-9_]+/g, '_').replace(/^_+|_+$/g, '') } -// Resolve per-language default version and corresponding vector table name -// e.g. ai_bible.bv_ro_cornilescu -async function resolveVectorTable(language: string): Promise<{ table: string; exists: boolean }> { +// Get ALL vector tables for a given language +async function getAllVectorTables(language: string): Promise { const lang = safeIdent(language || 'ro') const client = await pool.connect() try { - // Get default version abbreviation from "BibleVersion" - const res = await client.query( - `SELECT "abbreviation" FROM "BibleVersion" - WHERE lower(language) = lower($1) - ORDER BY "isDefault" DESC, "createdAt" ASC - LIMIT 1`, - [language] + // Get all vector tables for this language + const result = await client.query( + `SELECT table_name FROM information_schema.tables + WHERE table_schema = $1 AND table_name LIKE $2 + ORDER BY table_name`, + [VECTOR_SCHEMA, `bv_${lang}_%`] ) - const abbr = res.rows?.[0]?.abbreviation || 'default' - const ab = safeIdent(abbr) - const table = `${VECTOR_SCHEMA}.bv_${lang}_${ab}` - // Check if table exists - const check = await client.query( - `SELECT EXISTS ( - SELECT 1 FROM information_schema.tables - WHERE table_schema = $1 AND table_name = $2 - ) AS exists`, - [VECTOR_SCHEMA, `bv_${lang}_${ab}`] - ) - let exists = Boolean(check.rows?.[0]?.exists) - if (!exists) { - // Fallback: use any table for this language - const anyTbl = await client.query( - `SELECT table_name FROM information_schema.tables - WHERE table_schema = $1 AND table_name LIKE $2 - ORDER BY table_name LIMIT 1`, - [VECTOR_SCHEMA, `bv_${lang}_%`] - ) - if (anyTbl.rows?.[0]?.table_name) { - return { table: `${VECTOR_SCHEMA}."${anyTbl.rows[0].table_name}"`, exists: true } - } - } - return { table, exists } + return result.rows.map(row => `${VECTOR_SCHEMA}."${row.table_name}"`) } finally { client.release() } } +// Fallback: Resolve per-language default version (legacy function for backward compatibility) +async function resolveVectorTable(language: string): Promise<{ table: string; exists: boolean }> { + const tables = await getAllVectorTables(language) + if (tables.length > 0) { + return { table: tables[0], exists: true } + } + + // Fallback to legacy bible_passages table + return { table: 'bible_passages', exists: false } +} + export interface BibleVerse { id: string ref: string @@ -95,31 +80,51 @@ export async function searchBibleSemantic( limit: number = 10 ): Promise { try { - const { table, exists } = await resolveVectorTable(language) + const tables = await getAllVectorTables(language) const queryEmbedding = await getEmbedding(query) const client = await pool.connect() try { - const sql = exists - ? `SELECT ref, book, chapter, verse, text_raw, - 1 - (embedding <=> $1) AS similarity - FROM ${table} - WHERE embedding IS NOT NULL - ORDER BY embedding <=> $1 - LIMIT $2` - : `SELECT ref, book, chapter, verse, text_raw, - 1 - (embedding <=> $1) AS similarity - FROM bible_passages - WHERE embedding IS NOT NULL AND lang = $3 - ORDER BY embedding <=> $1 - LIMIT $2` - const params = exists - ? [JSON.stringify(queryEmbedding), limit] - : [JSON.stringify(queryEmbedding), limit, language] + if (tables.length === 0) { + // Fallback to legacy bible_passages table + const sql = `SELECT ref, book, chapter, verse, text_raw, + 1 - (embedding <=> $1) AS similarity + FROM bible_passages + WHERE embedding IS NOT NULL AND lang = $3 + ORDER BY embedding <=> $1 + LIMIT $2` - const result = await client.query(sql, params) + const result = await client.query(sql, [JSON.stringify(queryEmbedding), limit, language]) + return result.rows + } + + // Query all vector tables and combine results + const allResults: BibleVerse[] = [] + const limitPerTable = Math.max(1, Math.ceil(limit * 2 / tables.length)) + + for (const table of tables) { + try { + const sql = `SELECT ref, book, chapter, verse, text_raw, + 1 - (embedding <=> $1) AS similarity, + '${table}' as source_table + FROM ${table} + WHERE embedding IS NOT NULL + ORDER BY embedding <=> $1 + LIMIT $2` + + const result = await client.query(sql, [JSON.stringify(queryEmbedding), limitPerTable]) + allResults.push(...result.rows) + } catch (tableError) { + console.warn(`Error querying table ${table}:`, tableError) + // Continue with other tables + } + } + + // Sort all results by similarity and return top results + return allResults + .sort((a, b) => (b.similarity || 0) - (a.similarity || 0)) + .slice(0, limit) - return result.rows } finally { client.release() } @@ -135,7 +140,7 @@ export async function searchBibleHybrid( limit: number = 10 ): Promise { try { - const { table, exists } = await resolveVectorTable(language) + const tables = await getAllVectorTables(language) const queryEmbedding = await getEmbedding(query) // Use appropriate text search configuration based on language @@ -143,28 +148,9 @@ export async function searchBibleHybrid( const client = await pool.connect() try { - const sql = exists - ? `WITH vector_search AS ( - SELECT id, 1 - (embedding <=> $1) AS vector_sim - FROM ${table} - WHERE embedding IS NOT NULL - ORDER BY embedding <=> $1 - LIMIT 100 - ), - text_search AS ( - SELECT id, ts_rank(tsv, plainto_tsquery($4, $3)) AS text_rank - FROM ${table} - WHERE tsv @@ plainto_tsquery($4, $3) - ) - SELECT bp.ref, bp.book, bp.chapter, bp.verse, bp.text_raw, - COALESCE(vs.vector_sim, 0) * 0.7 + COALESCE(ts.text_rank, 0) * 0.3 AS combined_score - FROM ${table} bp - LEFT JOIN vector_search vs ON vs.id = bp.id - LEFT JOIN text_search ts ON ts.id = bp.id - WHERE (vs.id IS NOT NULL OR ts.id IS NOT NULL) - ORDER BY combined_score DESC - LIMIT $2` - : `WITH vector_search AS ( + if (tables.length === 0) { + // Fallback to legacy bible_passages table + const sql = `WITH vector_search AS ( SELECT id, 1 - (embedding <=> $1) AS vector_sim FROM bible_passages WHERE embedding IS NOT NULL AND lang = $4 @@ -185,13 +171,51 @@ export async function searchBibleHybrid( ORDER BY combined_score DESC LIMIT $2` - const params = exists - ? [JSON.stringify(queryEmbedding), limit, query, textConfig] - : [JSON.stringify(queryEmbedding), limit, query, language, textConfig] + const result = await client.query(sql, [JSON.stringify(queryEmbedding), limit, query, language, textConfig]) + return result.rows + } - const result = await client.query(sql, params) + // Query all vector tables and combine results + const allResults: BibleVerse[] = [] + const limitPerTable = Math.max(1, Math.ceil(limit * 2 / tables.length)) // Get more results per table to ensure good diversity + + for (const table of tables) { + try { + const sql = `WITH vector_search AS ( + SELECT id, 1 - (embedding <=> $1) AS vector_sim + FROM ${table} + WHERE embedding IS NOT NULL + ORDER BY embedding <=> $1 + LIMIT 100 + ), + text_search AS ( + SELECT id, ts_rank(tsv, plainto_tsquery($4, $3)) AS text_rank + FROM ${table} + WHERE tsv @@ plainto_tsquery($4, $3) + ) + SELECT bp.ref, bp.book, bp.chapter, bp.verse, bp.text_raw, + COALESCE(vs.vector_sim, 0) * 0.7 + COALESCE(ts.text_rank, 0) * 0.3 AS combined_score, + '${table}' as source_table + FROM ${table} bp + LEFT JOIN vector_search vs ON vs.id = bp.id + LEFT JOIN text_search ts ON ts.id = bp.id + WHERE (vs.id IS NOT NULL OR ts.id IS NOT NULL) + ORDER BY combined_score DESC + LIMIT $2` + + const result = await client.query(sql, [JSON.stringify(queryEmbedding), limitPerTable, query, textConfig]) + allResults.push(...result.rows) + } catch (tableError) { + console.warn(`Error querying table ${table}:`, tableError) + // Continue with other tables + } + } + + // Sort all results by combined score and return top results + return allResults + .sort((a, b) => (b.combined_score || 0) - (a.combined_score || 0)) + .slice(0, limit) - return result.rows } finally { client.release() }