import { Pool } from 'pg' const pool = new Pool({ connectionString: process.env.DATABASE_URL, }) const VECTOR_SCHEMA = process.env.VECTOR_SCHEMA || 'ai_bible' function safeIdent(s: string): string { return s.toLowerCase().replace(/[^a-z0-9_]+/g, '_').replace(/^_+|_+$/g, '') } // Resolve per-language default version and corresponding vector table name // e.g. ai_bible.bv_ro_cornilescu async function resolveVectorTable(language: string): Promise<{ table: string; exists: boolean }> { const lang = safeIdent(language || 'ro') const client = await pool.connect() try { // Get default version abbreviation from "BibleVersion" const res = await client.query( `SELECT "abbreviation" FROM "BibleVersion" WHERE lower(language) = lower($1) ORDER BY "isDefault" DESC, "createdAt" ASC LIMIT 1`, [language] ) const abbr = res.rows?.[0]?.abbreviation || 'default' const ab = safeIdent(abbr) const table = `${VECTOR_SCHEMA}.bv_${lang}_${ab}` // Check if table exists const check = await client.query( `SELECT EXISTS ( SELECT 1 FROM information_schema.tables WHERE table_schema = $1 AND table_name = $2 ) AS exists`, [VECTOR_SCHEMA, `bv_${lang}_${ab}`] ) let exists = Boolean(check.rows?.[0]?.exists) if (!exists) { // Fallback: use any table for this language const anyTbl = await client.query( `SELECT table_name FROM information_schema.tables WHERE table_schema = $1 AND table_name LIKE $2 ORDER BY table_name LIMIT 1`, [VECTOR_SCHEMA, `bv_${lang}_%`] ) if (anyTbl.rows?.[0]?.table_name) { return { table: `${VECTOR_SCHEMA}."${anyTbl.rows[0].table_name}"`, exists: true } } } return { table, exists } } finally { client.release() } } export interface BibleVerse { id: string ref: string book: string chapter: number verse: number text_raw: string similarity?: number combined_score?: number } export async function getEmbedding(text: string): Promise { const response = await fetch( `${process.env.AZURE_OPENAI_ENDPOINT}/openai/deployments/${process.env.AZURE_OPENAI_EMBED_DEPLOYMENT}/embeddings?api-version=${process.env.AZURE_OPENAI_API_VERSION}`, { method: 'POST', headers: { 'api-key': process.env.AZURE_OPENAI_KEY!, 'Content-Type': 'application/json', }, body: JSON.stringify({ input: [text], }), } ) if (!response.ok) { throw new Error(`Embedding API error: ${response.status}`) } const data = await response.json() return data.data[0].embedding } export async function searchBibleSemantic( query: string, language: string = 'ro', limit: number = 10 ): Promise { try { const { table, exists } = await resolveVectorTable(language) const queryEmbedding = await getEmbedding(query) const client = await pool.connect() try { const sql = exists ? `SELECT ref, book, chapter, verse, text_raw, 1 - (embedding <=> $1) AS similarity FROM ${table} WHERE embedding IS NOT NULL ORDER BY embedding <=> $1 LIMIT $2` : `SELECT ref, book, chapter, verse, text_raw, 1 - (embedding <=> $1) AS similarity FROM bible_passages WHERE embedding IS NOT NULL AND lang = $3 ORDER BY embedding <=> $1 LIMIT $2` const params = exists ? [JSON.stringify(queryEmbedding), limit] : [JSON.stringify(queryEmbedding), limit, language] const result = await client.query(sql, params) return result.rows } finally { client.release() } } catch (error) { console.error('Error in semantic search:', error) throw error } } export async function searchBibleHybrid( query: string, language: string = 'ro', limit: number = 10 ): Promise { try { const { table, exists } = await resolveVectorTable(language) const queryEmbedding = await getEmbedding(query) // Use appropriate text search configuration based on language const textConfig = language === 'ro' ? 'romanian' : 'english' const client = await pool.connect() try { const sql = exists ? `WITH vector_search AS ( SELECT id, 1 - (embedding <=> $1) AS vector_sim FROM ${table} WHERE embedding IS NOT NULL ORDER BY embedding <=> $1 LIMIT 100 ), text_search AS ( SELECT id, ts_rank(tsv, plainto_tsquery($4, $3)) AS text_rank FROM ${table} WHERE tsv @@ plainto_tsquery($4, $3) ) SELECT bp.ref, bp.book, bp.chapter, bp.verse, bp.text_raw, COALESCE(vs.vector_sim, 0) * 0.7 + COALESCE(ts.text_rank, 0) * 0.3 AS combined_score FROM ${table} bp LEFT JOIN vector_search vs ON vs.id = bp.id LEFT JOIN text_search ts ON ts.id = bp.id WHERE (vs.id IS NOT NULL OR ts.id IS NOT NULL) ORDER BY combined_score DESC LIMIT $2` : `WITH vector_search AS ( SELECT id, 1 - (embedding <=> $1) AS vector_sim FROM bible_passages WHERE embedding IS NOT NULL AND lang = $4 ORDER BY embedding <=> $1 LIMIT 100 ), text_search AS ( SELECT id, ts_rank(tsv, plainto_tsquery($5, $3)) AS text_rank FROM bible_passages WHERE tsv @@ plainto_tsquery($5, $3) AND lang = $4 ) SELECT bp.ref, bp.book, bp.chapter, bp.verse, bp.text_raw, COALESCE(vs.vector_sim, 0) * 0.7 + COALESCE(ts.text_rank, 0) * 0.3 AS combined_score FROM bible_passages bp LEFT JOIN vector_search vs ON vs.id = bp.id LEFT JOIN text_search ts ON ts.id = bp.id WHERE (vs.id IS NOT NULL OR ts.id IS NOT NULL) AND bp.lang = $4 ORDER BY combined_score DESC LIMIT $2` const params = exists ? [JSON.stringify(queryEmbedding), limit, query, textConfig] : [JSON.stringify(queryEmbedding), limit, query, language, textConfig] const result = await client.query(sql, params) return result.rows } finally { client.release() } } catch (error) { console.error('Error in hybrid search:', error) throw error } } export async function getContextVerses( book: string, chapter: number, verse: number, contextSize: number = 2 ): Promise { // For context, we can't infer language here; callers should use the main hybrid result to decide. // For now, fallback to legacy table for context retrieval; can be extended to use per-language table. const client = await pool.connect() try { const result = await client.query( ` SELECT ref, book, chapter, verse, text_raw FROM bible_passages WHERE book = $1 AND chapter = $2 AND verse BETWEEN $3 AND $4 ORDER BY verse `, [book, chapter, verse - contextSize, verse + contextSize] ) return result.rows } finally { client.release() } }