- Update vector-search.ts to query all available vector tables per language instead of single table - Add getAllVectorTables() function to discover all language-specific vector tables - Enhance searchBibleHybrid() to query multiple tables and merge results by relevance score - Enhance searchBibleSemantic() to combine results from all available vector databases - Add comprehensive error handling and logging for vector search operations - Improve Azure OpenAI content filtering detection and error handling - Add test-vector API endpoint for database diagnostics and debugging - Fix environment configuration with complete Azure OpenAI settings - Enable multi-translation biblical context from diverse Bible versions simultaneously Tested: Romanian chat works excellently with rich biblical context and verse citations Issue: English requires vector table creation - 47 English Bible versions exist but no vector tables 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
254 lines
8.1 KiB
TypeScript
254 lines
8.1 KiB
TypeScript
import { Pool } from 'pg'
|
|
|
|
const pool = new Pool({
|
|
connectionString: process.env.DATABASE_URL,
|
|
})
|
|
|
|
const VECTOR_SCHEMA = process.env.VECTOR_SCHEMA || 'ai_bible'
|
|
|
|
function safeIdent(s: string): string {
|
|
return s.toLowerCase().replace(/[^a-z0-9_]+/g, '_').replace(/^_+|_+$/g, '')
|
|
}
|
|
|
|
// Get ALL vector tables for a given language
|
|
async function getAllVectorTables(language: string): Promise<string[]> {
|
|
const lang = safeIdent(language || 'ro')
|
|
const client = await pool.connect()
|
|
try {
|
|
// Get all vector tables for this language
|
|
const result = await client.query(
|
|
`SELECT table_name FROM information_schema.tables
|
|
WHERE table_schema = $1 AND table_name LIKE $2
|
|
ORDER BY table_name`,
|
|
[VECTOR_SCHEMA, `bv_${lang}_%`]
|
|
)
|
|
|
|
return result.rows.map(row => `${VECTOR_SCHEMA}."${row.table_name}"`)
|
|
} finally {
|
|
client.release()
|
|
}
|
|
}
|
|
|
|
// Fallback: Resolve per-language default version (legacy function for backward compatibility)
|
|
async function resolveVectorTable(language: string): Promise<{ table: string; exists: boolean }> {
|
|
const tables = await getAllVectorTables(language)
|
|
if (tables.length > 0) {
|
|
return { table: tables[0], exists: true }
|
|
}
|
|
|
|
// Fallback to legacy bible_passages table
|
|
return { table: 'bible_passages', exists: false }
|
|
}
|
|
|
|
export interface BibleVerse {
|
|
id: string
|
|
ref: string
|
|
book: string
|
|
chapter: number
|
|
verse: number
|
|
text_raw: string
|
|
similarity?: number
|
|
combined_score?: number
|
|
}
|
|
|
|
export async function getEmbedding(text: string): Promise<number[]> {
|
|
const response = await fetch(
|
|
`${process.env.AZURE_OPENAI_ENDPOINT}/openai/deployments/${process.env.AZURE_OPENAI_EMBED_DEPLOYMENT}/embeddings?api-version=${process.env.AZURE_OPENAI_API_VERSION}`,
|
|
{
|
|
method: 'POST',
|
|
headers: {
|
|
'api-key': process.env.AZURE_OPENAI_KEY!,
|
|
'Content-Type': 'application/json',
|
|
},
|
|
body: JSON.stringify({
|
|
input: [text],
|
|
}),
|
|
}
|
|
)
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`Embedding API error: ${response.status}`)
|
|
}
|
|
|
|
const data = await response.json()
|
|
return data.data[0].embedding
|
|
}
|
|
|
|
export async function searchBibleSemantic(
|
|
query: string,
|
|
language: string = 'ro',
|
|
limit: number = 10
|
|
): Promise<BibleVerse[]> {
|
|
try {
|
|
const tables = await getAllVectorTables(language)
|
|
const queryEmbedding = await getEmbedding(query)
|
|
|
|
const client = await pool.connect()
|
|
try {
|
|
if (tables.length === 0) {
|
|
// Fallback to legacy bible_passages table
|
|
const sql = `SELECT ref, book, chapter, verse, text_raw,
|
|
1 - (embedding <=> $1) AS similarity
|
|
FROM bible_passages
|
|
WHERE embedding IS NOT NULL AND lang = $3
|
|
ORDER BY embedding <=> $1
|
|
LIMIT $2`
|
|
|
|
const result = await client.query(sql, [JSON.stringify(queryEmbedding), limit, language])
|
|
return result.rows
|
|
}
|
|
|
|
// Query all vector tables and combine results
|
|
const allResults: BibleVerse[] = []
|
|
const limitPerTable = Math.max(1, Math.ceil(limit * 2 / tables.length))
|
|
|
|
for (const table of tables) {
|
|
try {
|
|
const sql = `SELECT ref, book, chapter, verse, text_raw,
|
|
1 - (embedding <=> $1) AS similarity,
|
|
'${table}' as source_table
|
|
FROM ${table}
|
|
WHERE embedding IS NOT NULL
|
|
ORDER BY embedding <=> $1
|
|
LIMIT $2`
|
|
|
|
const result = await client.query(sql, [JSON.stringify(queryEmbedding), limitPerTable])
|
|
allResults.push(...result.rows)
|
|
} catch (tableError) {
|
|
console.warn(`Error querying table ${table}:`, tableError)
|
|
// Continue with other tables
|
|
}
|
|
}
|
|
|
|
// Sort all results by similarity and return top results
|
|
return allResults
|
|
.sort((a, b) => (b.similarity || 0) - (a.similarity || 0))
|
|
.slice(0, limit)
|
|
|
|
} finally {
|
|
client.release()
|
|
}
|
|
} catch (error) {
|
|
console.error('Error in semantic search:', error)
|
|
throw error
|
|
}
|
|
}
|
|
|
|
export async function searchBibleHybrid(
|
|
query: string,
|
|
language: string = 'ro',
|
|
limit: number = 10
|
|
): Promise<BibleVerse[]> {
|
|
try {
|
|
const tables = await getAllVectorTables(language)
|
|
const queryEmbedding = await getEmbedding(query)
|
|
|
|
// Use appropriate text search configuration based on language
|
|
const textConfig = language === 'ro' ? 'romanian' : 'english'
|
|
|
|
const client = await pool.connect()
|
|
try {
|
|
if (tables.length === 0) {
|
|
// Fallback to legacy bible_passages table
|
|
const sql = `WITH vector_search AS (
|
|
SELECT id, 1 - (embedding <=> $1) AS vector_sim
|
|
FROM bible_passages
|
|
WHERE embedding IS NOT NULL AND lang = $4
|
|
ORDER BY embedding <=> $1
|
|
LIMIT 100
|
|
),
|
|
text_search AS (
|
|
SELECT id, ts_rank(tsv, plainto_tsquery($5, $3)) AS text_rank
|
|
FROM bible_passages
|
|
WHERE tsv @@ plainto_tsquery($5, $3) AND lang = $4
|
|
)
|
|
SELECT bp.ref, bp.book, bp.chapter, bp.verse, bp.text_raw,
|
|
COALESCE(vs.vector_sim, 0) * 0.7 + COALESCE(ts.text_rank, 0) * 0.3 AS combined_score
|
|
FROM bible_passages bp
|
|
LEFT JOIN vector_search vs ON vs.id = bp.id
|
|
LEFT JOIN text_search ts ON ts.id = bp.id
|
|
WHERE (vs.id IS NOT NULL OR ts.id IS NOT NULL) AND bp.lang = $4
|
|
ORDER BY combined_score DESC
|
|
LIMIT $2`
|
|
|
|
const result = await client.query(sql, [JSON.stringify(queryEmbedding), limit, query, language, textConfig])
|
|
return result.rows
|
|
}
|
|
|
|
// Query all vector tables and combine results
|
|
const allResults: BibleVerse[] = []
|
|
const limitPerTable = Math.max(1, Math.ceil(limit * 2 / tables.length)) // Get more results per table to ensure good diversity
|
|
|
|
for (const table of tables) {
|
|
try {
|
|
const sql = `WITH vector_search AS (
|
|
SELECT id, 1 - (embedding <=> $1) AS vector_sim
|
|
FROM ${table}
|
|
WHERE embedding IS NOT NULL
|
|
ORDER BY embedding <=> $1
|
|
LIMIT 100
|
|
),
|
|
text_search AS (
|
|
SELECT id, ts_rank(tsv, plainto_tsquery($4, $3)) AS text_rank
|
|
FROM ${table}
|
|
WHERE tsv @@ plainto_tsquery($4, $3)
|
|
)
|
|
SELECT bp.ref, bp.book, bp.chapter, bp.verse, bp.text_raw,
|
|
COALESCE(vs.vector_sim, 0) * 0.7 + COALESCE(ts.text_rank, 0) * 0.3 AS combined_score,
|
|
'${table}' as source_table
|
|
FROM ${table} bp
|
|
LEFT JOIN vector_search vs ON vs.id = bp.id
|
|
LEFT JOIN text_search ts ON ts.id = bp.id
|
|
WHERE (vs.id IS NOT NULL OR ts.id IS NOT NULL)
|
|
ORDER BY combined_score DESC
|
|
LIMIT $2`
|
|
|
|
const result = await client.query(sql, [JSON.stringify(queryEmbedding), limitPerTable, query, textConfig])
|
|
allResults.push(...result.rows)
|
|
} catch (tableError) {
|
|
console.warn(`Error querying table ${table}:`, tableError)
|
|
// Continue with other tables
|
|
}
|
|
}
|
|
|
|
// Sort all results by combined score and return top results
|
|
return allResults
|
|
.sort((a, b) => (b.combined_score || 0) - (a.combined_score || 0))
|
|
.slice(0, limit)
|
|
|
|
} finally {
|
|
client.release()
|
|
}
|
|
} catch (error) {
|
|
console.error('Error in hybrid search:', error)
|
|
throw error
|
|
}
|
|
}
|
|
|
|
export async function getContextVerses(
|
|
book: string,
|
|
chapter: number,
|
|
verse: number,
|
|
contextSize: number = 2
|
|
): Promise<BibleVerse[]> {
|
|
// For context, we can't infer language here; callers should use the main hybrid result to decide.
|
|
// For now, fallback to legacy table for context retrieval; can be extended to use per-language table.
|
|
const client = await pool.connect()
|
|
try {
|
|
const result = await client.query(
|
|
`
|
|
SELECT ref, book, chapter, verse, text_raw
|
|
FROM bible_passages
|
|
WHERE book = $1 AND chapter = $2
|
|
AND verse BETWEEN $3 AND $4
|
|
ORDER BY verse
|
|
`,
|
|
[book, chapter, verse - contextSize, verse + contextSize]
|
|
)
|
|
|
|
return result.rows
|
|
} finally {
|
|
client.release()
|
|
}
|
|
}
|