Files
biblical-guide.com/lib/vector-search.ts
Andrei a01377b21a feat: implement AI chat with vector search and random loading messages
Major Features:
-  AI chat with Azure OpenAI GPT-4o integration
-  Vector search across Bible versions (ASV English, RVA 1909 Spanish)
-  Multi-language support with automatic English fallback
-  Bible version citations in responses [ASV] [RVA 1909]
-  Random Bible-themed loading messages (5 variants)
-  Safe build script with memory guardrails
-  8GB swap memory for build safety
-  Stripe donation integration (multiple payment methods)

AI Chat Improvements:
- Implement vector search with 1536-dim embeddings (Azure text-embedding-ada-002)
- Search all Bible versions in user's language, fallback to English
- Cite Bible versions properly in AI responses
- Add 5 random loading messages: "Searching the Scriptures...", etc.
- Fix Ollama conflict (disabled to use Azure OpenAI exclusively)
- Optimize hybrid search queries for actual table schema

Build & Infrastructure:
- Create safe-build.sh script with memory monitoring (prevents server crashes)
- Add 8GB swap memory for emergency relief
- Document build process in BUILD_GUIDE.md
- Set Node.js memory limits (4GB max during builds)

Database:
- Clean up 115 old vector tables with wrong dimensions
- Keep only 2 tables with correct 1536-dim embeddings
- Add Stripe schema for donations and subscriptions

Documentation:
- AI_CHAT_FINAL_STATUS.md - Complete implementation status
- AI_CHAT_IMPLEMENTATION_COMPLETE.md - Technical details
- BUILD_GUIDE.md - Safe building guide with guardrails
- CHAT_LOADING_MESSAGES.md - Loading messages implementation
- STRIPE_IMPLEMENTATION_COMPLETE.md - Stripe integration docs
- STRIPE_SETUP_GUIDE.md - Stripe configuration guide

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-12 19:37:24 +00:00

337 lines
11 KiB
TypeScript

import { Pool } from 'pg'
const pool = new Pool({
connectionString: process.env.DATABASE_URL,
})
const VECTOR_SCHEMA = process.env.VECTOR_SCHEMA || 'ai_bible'
function safeIdent(s: string): string {
return s.toLowerCase().replace(/[^a-z0-9_]+/g, '_').replace(/^_+|_+$/g, '')
}
// Get ALL vector tables for a given language that match the expected embedding dimensions
async function getAllVectorTables(language: string): Promise<string[]> {
const lang = safeIdent(language || 'ro')
const expectedDims = parseInt(process.env.EMBED_DIMS || '1536', 10)
// For now, use a hardcoded whitelist of tables we know have 1536 dimensions
// This is much faster than querying each table
const knownGoodTables: Record<string, string[]> = {
'en': ['bv_en_eng_asv'],
'es': ['bv_es_sparv1909'],
// Add more as we create them
}
if (knownGoodTables[lang]) {
return knownGoodTables[lang].map(table => `${VECTOR_SCHEMA}."${table}"`)
}
// Fallback: check dynamically (slower)
const client = await pool.connect()
try {
const result = await client.query(
`SELECT table_name FROM information_schema.tables
WHERE table_schema = $1 AND table_name LIKE $2
ORDER BY table_name
LIMIT 10`,
[VECTOR_SCHEMA, `bv_${lang}_%`]
)
// Quick check: just try the first table and see if it works
if (result.rows.length > 0) {
const firstTable = `${VECTOR_SCHEMA}."${result.rows[0].table_name}"`
try {
const dimCheck = await client.query(
`SELECT pg_column_size(embedding) as size FROM ${firstTable} WHERE embedding IS NOT NULL LIMIT 1`
)
if (dimCheck.rows.length > 0) {
const actualDims = Math.round(dimCheck.rows[0].size / 4)
if (Math.abs(actualDims - expectedDims) <= 5) {
// If first table matches, assume all do (they should be consistent)
return result.rows.map(row => `${VECTOR_SCHEMA}."${row.table_name}"`)
}
}
} catch (error) {
console.warn(`Dimension check failed for ${lang}:`, error)
}
}
return []
} finally {
client.release()
}
}
// Fallback: Resolve per-language default version (legacy function for backward compatibility)
async function resolveVectorTable(language: string): Promise<{ table: string; exists: boolean }> {
const tables = await getAllVectorTables(language)
if (tables.length > 0) {
return { table: tables[0], exists: true }
}
// Fallback to legacy bible_passages table
return { table: 'bible_passages', exists: false }
}
export interface BibleVerse {
id: string
ref: string
book: string
chapter: number
verse: number
text_raw: string
similarity?: number
combined_score?: number
}
export async function getEmbedding(text: string): Promise<number[]> {
// Try Ollama first (for local embeddings)
if (process.env.OLLAMA_API_URL && process.env.OLLAMA_EMBED_MODEL) {
try {
const response = await fetch(`${process.env.OLLAMA_API_URL}/api/embeddings`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: process.env.OLLAMA_EMBED_MODEL,
prompt: text,
}),
})
if (response.ok) {
const data = await response.json()
return data.embedding
} else {
console.warn(`Ollama embedding failed: ${response.status}, falling back to Azure`)
}
} catch (error) {
console.warn('Ollama embedding error, falling back to Azure:', error)
}
}
// Fallback to Azure OpenAI
const embedApiVersion = process.env.AZURE_OPENAI_EMBED_API_VERSION || process.env.AZURE_OPENAI_API_VERSION
const response = await fetch(
`${process.env.AZURE_OPENAI_ENDPOINT}/openai/deployments/${process.env.AZURE_OPENAI_EMBED_DEPLOYMENT}/embeddings?api-version=${embedApiVersion}`,
{
method: 'POST',
headers: {
'api-key': process.env.AZURE_OPENAI_KEY!,
'Content-Type': 'application/json',
},
body: JSON.stringify({
input: [text],
}),
}
)
if (!response.ok) {
throw new Error(`Embedding API error: ${response.status}`)
}
const data = await response.json()
return data.data[0].embedding
}
export async function searchBibleSemantic(
query: string,
language: string = 'ro',
limit: number = 10,
fallbackToEnglish: boolean = true
): Promise<BibleVerse[]> {
try {
console.log(`🔍 Searching Bible: language="${language}", query="${query.substring(0, 50)}..."`)
let tables = await getAllVectorTables(language)
console.log(` Found ${tables.length} table(s) for language "${language}":`, tables.map(t => t.split('.')[1]))
const queryEmbedding = await getEmbedding(query)
const client = await pool.connect()
try {
let allResults: BibleVerse[] = []
// Search in primary language tables
if (tables.length > 0) {
const limitPerTable = Math.max(5, Math.ceil(limit * 1.5 / tables.length))
for (const table of tables) {
try {
const sql = `SELECT ref, book, chapter, verse, text_raw,
1 - (embedding <=> $1) AS similarity,
'${table}' as source_table
FROM ${table}
WHERE embedding IS NOT NULL
ORDER BY embedding <=> $1
LIMIT $2`
const result = await client.query(sql, [JSON.stringify(queryEmbedding), limitPerTable])
console.log(`${table.split('.')[1]}: found ${result.rows.length} verses`)
allResults.push(...result.rows)
} catch (tableError) {
console.warn(` ✗ Error querying ${table}:`, tableError)
}
}
}
// Fallback to English if no results and fallback enabled
if (allResults.length === 0 && fallbackToEnglish && language !== 'en') {
console.log(` ⚠️ No results in "${language}", falling back to English...`)
const englishTables = await getAllVectorTables('en')
console.log(` Found ${englishTables.length} English table(s)`)
for (const table of englishTables) {
try {
const sql = `SELECT ref, book, chapter, verse, text_raw,
1 - (embedding <=> $1) AS similarity,
'${table}' as source_table
FROM ${table}
WHERE embedding IS NOT NULL
ORDER BY embedding <=> $1
LIMIT $2`
const result = await client.query(sql, [JSON.stringify(queryEmbedding), limit])
console.log(`${table.split('.')[1]} (EN fallback): found ${result.rows.length} verses`)
allResults.push(...result.rows)
} catch (tableError) {
console.warn(` ✗ Error querying ${table}:`, tableError)
}
}
}
// Sort all results by similarity and return top results
const topResults = allResults
.sort((a, b) => (b.similarity || 0) - (a.similarity || 0))
.slice(0, limit)
console.log(` ✅ Returning ${topResults.length} total verses`)
return topResults
} finally {
client.release()
}
} catch (error) {
console.error('Error in semantic search:', error)
throw error
}
}
export async function searchBibleHybrid(
query: string,
language: string = 'ro',
limit: number = 10,
fallbackToEnglish: boolean = true
): Promise<BibleVerse[]> {
try {
console.log(`🔍 Hybrid Search: language="${language}", query="${query.substring(0, 50)}..."`)
let tables = await getAllVectorTables(language)
console.log(` Found ${tables.length} table(s) for language "${language}"`)
const queryEmbedding = await getEmbedding(query)
const textConfig = language === 'ro' ? 'romanian' : language === 'es' ? 'spanish' : 'english'
const client = await pool.connect()
try {
let allResults: BibleVerse[] = []
// Search in primary language tables
if (tables.length > 0) {
const limitPerTable = Math.max(5, Math.ceil(limit * 1.5 / tables.length))
for (const table of tables) {
try {
// Use simple semantic search (no text search - TSV column doesn't exist)
const sql = `SELECT book || ' ' || chapter || ':' || verse as ref,
book, chapter, verse, text_raw,
1 - (embedding <=> $1) AS similarity,
1 - (embedding <=> $1) AS combined_score,
'${table}' as source_table
FROM ${table}
WHERE embedding IS NOT NULL
ORDER BY embedding <=> $1
LIMIT $2`
const result = await client.query(sql, [JSON.stringify(queryEmbedding), limitPerTable])
console.log(`${table.split('.')[1]}: found ${result.rows.length} verses`)
allResults.push(...result.rows)
} catch (tableError) {
console.warn(` ✗ Error querying ${table}:`, tableError)
}
}
}
// Fallback to English if no results and fallback enabled
if (allResults.length === 0 && fallbackToEnglish && language !== 'en') {
console.log(` ⚠️ No results in "${language}", falling back to English...`)
const englishTables = await getAllVectorTables('en')
console.log(` Found ${englishTables.length} English table(s)`)
for (const table of englishTables) {
try {
// Use simple semantic search (no text search - TSV column doesn't exist)
const sql = `SELECT book || ' ' || chapter || ':' || verse as ref,
book, chapter, verse, text_raw,
1 - (embedding <=> $1) AS similarity,
1 - (embedding <=> $1) AS combined_score,
'${table}' as source_table
FROM ${table}
WHERE embedding IS NOT NULL
ORDER BY embedding <=> $1
LIMIT $2`
const result = await client.query(sql, [JSON.stringify(queryEmbedding), limit])
console.log(`${table.split('.')[1]} (EN fallback): found ${result.rows.length} verses`)
allResults.push(...result.rows)
} catch (tableError) {
console.warn(` ✗ Error querying ${table}:`, tableError)
}
}
}
// Sort all results by combined score and return top results
const topResults = allResults
.sort((a, b) => (b.combined_score || 0) - (a.combined_score || 0))
.slice(0, limit)
console.log(` ✅ Returning ${topResults.length} total verses`)
return topResults
} finally {
client.release()
}
} catch (error) {
console.error('Error in hybrid search:', error)
throw error
}
}
export async function getContextVerses(
book: string,
chapter: number,
verse: number,
contextSize: number = 2
): Promise<BibleVerse[]> {
// For context, we can't infer language here; callers should use the main hybrid result to decide.
// For now, fallback to legacy table for context retrieval; can be extended to use per-language table.
const client = await pool.connect()
try {
const result = await client.query(
`
SELECT ref, book, chapter, verse, text_raw
FROM bible_passages
WHERE book = $1 AND chapter = $2
AND verse BETWEEN $3 AND $4
ORDER BY verse
`,
[book, chapter, verse - contextSize, verse + contextSize]
)
return result.rows
} finally {
client.release()
}
}