- Add pgvector support with bible_passages table for vector search - Create Python ingestion script for Azure OpenAI embed-3 embeddings - Implement hybrid search combining vector similarity and full-text search - Update AI chat to use vector search with Azure OpenAI gpt-4o - Add floating chat component with Material UI design - Import complete Romanian Bible (FIDELA) with 30K+ verses - Add vector search library for semantic Bible search - Create multi-language implementation plan for future expansion 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
140 lines
3.5 KiB
TypeScript
140 lines
3.5 KiB
TypeScript
import { Pool } from 'pg'
|
|
|
|
const pool = new Pool({
|
|
connectionString: process.env.DATABASE_URL,
|
|
})
|
|
|
|
export interface BibleVerse {
|
|
id: string
|
|
ref: string
|
|
book: string
|
|
chapter: number
|
|
verse: number
|
|
text_raw: string
|
|
similarity?: number
|
|
combined_score?: number
|
|
}
|
|
|
|
export async function getEmbedding(text: string): Promise<number[]> {
|
|
const response = await fetch(
|
|
`${process.env.AZURE_OPENAI_ENDPOINT}/openai/deployments/${process.env.AZURE_OPENAI_EMBED_DEPLOYMENT}/embeddings?api-version=${process.env.AZURE_OPENAI_API_VERSION}`,
|
|
{
|
|
method: 'POST',
|
|
headers: {
|
|
'api-key': process.env.AZURE_OPENAI_KEY!,
|
|
'Content-Type': 'application/json',
|
|
},
|
|
body: JSON.stringify({
|
|
input: [text],
|
|
}),
|
|
}
|
|
)
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`Embedding API error: ${response.status}`)
|
|
}
|
|
|
|
const data = await response.json()
|
|
return data.data[0].embedding
|
|
}
|
|
|
|
export async function searchBibleSemantic(
|
|
query: string,
|
|
limit: number = 10
|
|
): Promise<BibleVerse[]> {
|
|
try {
|
|
const queryEmbedding = await getEmbedding(query)
|
|
|
|
const client = await pool.connect()
|
|
try {
|
|
const result = await client.query(
|
|
`
|
|
SELECT ref, book, chapter, verse, text_raw,
|
|
1 - (embedding <=> $1) AS similarity
|
|
FROM bible_passages
|
|
WHERE embedding IS NOT NULL
|
|
ORDER BY embedding <=> $1
|
|
LIMIT $2
|
|
`,
|
|
[JSON.stringify(queryEmbedding), limit]
|
|
)
|
|
|
|
return result.rows
|
|
} finally {
|
|
client.release()
|
|
}
|
|
} catch (error) {
|
|
console.error('Error in semantic search:', error)
|
|
throw error
|
|
}
|
|
}
|
|
|
|
export async function searchBibleHybrid(
|
|
query: string,
|
|
limit: number = 10
|
|
): Promise<BibleVerse[]> {
|
|
try {
|
|
const queryEmbedding = await getEmbedding(query)
|
|
|
|
const client = await pool.connect()
|
|
try {
|
|
const result = await client.query(
|
|
`
|
|
WITH vector_search AS (
|
|
SELECT id, 1 - (embedding <=> $1) AS vector_sim
|
|
FROM bible_passages
|
|
WHERE embedding IS NOT NULL
|
|
ORDER BY embedding <=> $1
|
|
LIMIT 100
|
|
),
|
|
text_search AS (
|
|
SELECT id, ts_rank(tsv, plainto_tsquery('romanian', $3)) AS text_rank
|
|
FROM bible_passages
|
|
WHERE tsv @@ plainto_tsquery('romanian', $3)
|
|
)
|
|
SELECT bp.ref, bp.book, bp.chapter, bp.verse, bp.text_raw,
|
|
COALESCE(vs.vector_sim, 0) * 0.7 + COALESCE(ts.text_rank, 0) * 0.3 AS combined_score
|
|
FROM bible_passages bp
|
|
LEFT JOIN vector_search vs ON vs.id = bp.id
|
|
LEFT JOIN text_search ts ON ts.id = bp.id
|
|
WHERE vs.id IS NOT NULL OR ts.id IS NOT NULL
|
|
ORDER BY combined_score DESC
|
|
LIMIT $2
|
|
`,
|
|
[JSON.stringify(queryEmbedding), limit, query]
|
|
)
|
|
|
|
return result.rows
|
|
} finally {
|
|
client.release()
|
|
}
|
|
} catch (error) {
|
|
console.error('Error in hybrid search:', error)
|
|
throw error
|
|
}
|
|
}
|
|
|
|
export async function getContextVerses(
|
|
book: string,
|
|
chapter: number,
|
|
verse: number,
|
|
contextSize: number = 2
|
|
): Promise<BibleVerse[]> {
|
|
const client = await pool.connect()
|
|
try {
|
|
const result = await client.query(
|
|
`
|
|
SELECT ref, book, chapter, verse, text_raw
|
|
FROM bible_passages
|
|
WHERE book = $1 AND chapter = $2
|
|
AND verse BETWEEN $3 AND $4
|
|
ORDER BY verse
|
|
`,
|
|
[book, chapter, verse - contextSize, verse + contextSize]
|
|
)
|
|
|
|
return result.rows
|
|
} finally {
|
|
client.release()
|
|
}
|
|
} |