import { Pool } from 'pg' const pool = new Pool({ connectionString: process.env.DATABASE_URL, }) const VECTOR_SCHEMA = process.env.VECTOR_SCHEMA || 'ai_bible' function safeIdent(s: string): string { return s.toLowerCase().replace(/[^a-z0-9_]+/g, '_').replace(/^_+|_+$/g, '') } // Get ALL vector tables for a given language that match the expected embedding dimensions async function getAllVectorTables(language: string): Promise { const lang = safeIdent(language || 'ro') const expectedDims = parseInt(process.env.EMBED_DIMS || '1536', 10) // For now, use a hardcoded whitelist of tables we know have 1536 dimensions // This is much faster than querying each table const knownGoodTables: Record = { 'en': ['bv_en_eng_asv'], 'es': ['bv_es_sparv1909'], // Add more as we create them } if (knownGoodTables[lang]) { return knownGoodTables[lang].map(table => `${VECTOR_SCHEMA}."${table}"`) } // Fallback: check dynamically (slower) const client = await pool.connect() try { const result = await client.query( `SELECT table_name FROM information_schema.tables WHERE table_schema = $1 AND table_name LIKE $2 ORDER BY table_name LIMIT 10`, [VECTOR_SCHEMA, `bv_${lang}_%`] ) // Quick check: just try the first table and see if it works if (result.rows.length > 0) { const firstTable = `${VECTOR_SCHEMA}."${result.rows[0].table_name}"` try { const dimCheck = await client.query( `SELECT pg_column_size(embedding) as size FROM ${firstTable} WHERE embedding IS NOT NULL LIMIT 1` ) if (dimCheck.rows.length > 0) { const actualDims = Math.round(dimCheck.rows[0].size / 4) if (Math.abs(actualDims - expectedDims) <= 5) { // If first table matches, assume all do (they should be consistent) return result.rows.map(row => `${VECTOR_SCHEMA}."${row.table_name}"`) } } } catch (error) { console.warn(`Dimension check failed for ${lang}:`, error) } } return [] } finally { client.release() } } // Fallback: Resolve per-language default version (legacy function for backward compatibility) async function resolveVectorTable(language: string): Promise<{ table: string; exists: boolean }> { const tables = await getAllVectorTables(language) if (tables.length > 0) { return { table: tables[0], exists: true } } // Fallback to legacy bible_passages table return { table: 'bible_passages', exists: false } } export interface BibleVerse { id: string ref: string book: string chapter: number verse: number text_raw: string similarity?: number combined_score?: number } export async function getEmbedding(text: string): Promise { // Try Ollama first (for local embeddings) if (process.env.OLLAMA_API_URL && process.env.OLLAMA_EMBED_MODEL) { try { const response = await fetch(`${process.env.OLLAMA_API_URL}/api/embeddings`, { method: 'POST', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify({ model: process.env.OLLAMA_EMBED_MODEL, prompt: text, }), }) if (response.ok) { const data = await response.json() return data.embedding } else { console.warn(`Ollama embedding failed: ${response.status}, falling back to Azure`) } } catch (error) { console.warn('Ollama embedding error, falling back to Azure:', error) } } // Fallback to Azure OpenAI const embedApiVersion = process.env.AZURE_OPENAI_EMBED_API_VERSION || process.env.AZURE_OPENAI_API_VERSION const response = await fetch( `${process.env.AZURE_OPENAI_ENDPOINT}/openai/deployments/${process.env.AZURE_OPENAI_EMBED_DEPLOYMENT}/embeddings?api-version=${embedApiVersion}`, { method: 'POST', headers: { 'api-key': process.env.AZURE_OPENAI_KEY!, 'Content-Type': 'application/json', }, body: JSON.stringify({ input: [text], }), } ) if (!response.ok) { throw new Error(`Embedding API error: ${response.status}`) } const data = await response.json() return data.data[0].embedding } export async function searchBibleSemantic( query: string, language: string = 'ro', limit: number = 10, fallbackToEnglish: boolean = true ): Promise { try { console.log(`🔍 Searching Bible: language="${language}", query="${query.substring(0, 50)}..."`) let tables = await getAllVectorTables(language) console.log(` Found ${tables.length} table(s) for language "${language}":`, tables.map(t => t.split('.')[1])) const queryEmbedding = await getEmbedding(query) const client = await pool.connect() try { let allResults: BibleVerse[] = [] // Search in primary language tables if (tables.length > 0) { const limitPerTable = Math.max(5, Math.ceil(limit * 1.5 / tables.length)) for (const table of tables) { try { const sql = `SELECT ref, book, chapter, verse, text_raw, 1 - (embedding <=> $1) AS similarity, '${table}' as source_table FROM ${table} WHERE embedding IS NOT NULL ORDER BY embedding <=> $1 LIMIT $2` const result = await client.query(sql, [JSON.stringify(queryEmbedding), limitPerTable]) console.log(` ✓ ${table.split('.')[1]}: found ${result.rows.length} verses`) allResults.push(...result.rows) } catch (tableError) { console.warn(` ✗ Error querying ${table}:`, tableError) } } } // Fallback to English if no results and fallback enabled if (allResults.length === 0 && fallbackToEnglish && language !== 'en') { console.log(` ⚠️ No results in "${language}", falling back to English...`) const englishTables = await getAllVectorTables('en') console.log(` Found ${englishTables.length} English table(s)`) for (const table of englishTables) { try { const sql = `SELECT ref, book, chapter, verse, text_raw, 1 - (embedding <=> $1) AS similarity, '${table}' as source_table FROM ${table} WHERE embedding IS NOT NULL ORDER BY embedding <=> $1 LIMIT $2` const result = await client.query(sql, [JSON.stringify(queryEmbedding), limit]) console.log(` ✓ ${table.split('.')[1]} (EN fallback): found ${result.rows.length} verses`) allResults.push(...result.rows) } catch (tableError) { console.warn(` ✗ Error querying ${table}:`, tableError) } } } // Sort all results by similarity and return top results const topResults = allResults .sort((a, b) => (b.similarity || 0) - (a.similarity || 0)) .slice(0, limit) console.log(` ✅ Returning ${topResults.length} total verses`) return topResults } finally { client.release() } } catch (error) { console.error('Error in semantic search:', error) throw error } } export async function searchBibleHybrid( query: string, language: string = 'ro', limit: number = 10, fallbackToEnglish: boolean = true ): Promise { try { console.log(`🔍 Hybrid Search: language="${language}", query="${query.substring(0, 50)}..."`) let tables = await getAllVectorTables(language) console.log(` Found ${tables.length} table(s) for language "${language}"`) const queryEmbedding = await getEmbedding(query) const textConfig = language === 'ro' ? 'romanian' : language === 'es' ? 'spanish' : 'english' const client = await pool.connect() try { let allResults: BibleVerse[] = [] // Search in primary language tables if (tables.length > 0) { const limitPerTable = Math.max(5, Math.ceil(limit * 1.5 / tables.length)) for (const table of tables) { try { // Use simple semantic search (no text search - TSV column doesn't exist) const sql = `SELECT book || ' ' || chapter || ':' || verse as ref, book, chapter, verse, text_raw, 1 - (embedding <=> $1) AS similarity, 1 - (embedding <=> $1) AS combined_score, '${table}' as source_table FROM ${table} WHERE embedding IS NOT NULL ORDER BY embedding <=> $1 LIMIT $2` const result = await client.query(sql, [JSON.stringify(queryEmbedding), limitPerTable]) console.log(` ✓ ${table.split('.')[1]}: found ${result.rows.length} verses`) allResults.push(...result.rows) } catch (tableError) { console.warn(` ✗ Error querying ${table}:`, tableError) } } } // Fallback to English if no results and fallback enabled if (allResults.length === 0 && fallbackToEnglish && language !== 'en') { console.log(` ⚠️ No results in "${language}", falling back to English...`) const englishTables = await getAllVectorTables('en') console.log(` Found ${englishTables.length} English table(s)`) for (const table of englishTables) { try { // Use simple semantic search (no text search - TSV column doesn't exist) const sql = `SELECT book || ' ' || chapter || ':' || verse as ref, book, chapter, verse, text_raw, 1 - (embedding <=> $1) AS similarity, 1 - (embedding <=> $1) AS combined_score, '${table}' as source_table FROM ${table} WHERE embedding IS NOT NULL ORDER BY embedding <=> $1 LIMIT $2` const result = await client.query(sql, [JSON.stringify(queryEmbedding), limit]) console.log(` ✓ ${table.split('.')[1]} (EN fallback): found ${result.rows.length} verses`) allResults.push(...result.rows) } catch (tableError) { console.warn(` ✗ Error querying ${table}:`, tableError) } } } // Sort all results by combined score and return top results const topResults = allResults .sort((a, b) => (b.combined_score || 0) - (a.combined_score || 0)) .slice(0, limit) console.log(` ✅ Returning ${topResults.length} total verses`) return topResults } finally { client.release() } } catch (error) { console.error('Error in hybrid search:', error) throw error } } export async function getContextVerses( book: string, chapter: number, verse: number, contextSize: number = 2 ): Promise { // For context, we can't infer language here; callers should use the main hybrid result to decide. // For now, fallback to legacy table for context retrieval; can be extended to use per-language table. const client = await pool.connect() try { const result = await client.query( ` SELECT ref, book, chapter, verse, text_raw FROM bible_passages WHERE book = $1 AND chapter = $2 AND verse BETWEEN $3 AND $4 ORDER BY verse `, [book, chapter, verse - contextSize, verse + contextSize] ) return result.rows } finally { client.release() } }