Implement Azure OpenAI vector embeddings for Romanian Bible

- Add pgvector support with bible_passages table for vector search
- Create Python ingestion script for Azure OpenAI embed-3 embeddings
- Implement hybrid search combining vector similarity and full-text search
- Update AI chat to use vector search with Azure OpenAI gpt-4o
- Add floating chat component with Material UI design
- Import complete Romanian Bible (FIDELA) with 30K+ verses
- Add vector search library for semantic Bible search
- Create multi-language implementation plan for future expansion

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
andupetcu
2025-09-20 15:18:00 +03:00
parent 3b375c869b
commit dd5e1102eb
14 changed files with 2082 additions and 68 deletions

140
lib/vector-search.ts Normal file
View File

@@ -0,0 +1,140 @@
import { Pool } from 'pg'
const pool = new Pool({
connectionString: process.env.DATABASE_URL,
})
export interface BibleVerse {
id: string
ref: string
book: string
chapter: number
verse: number
text_raw: string
similarity?: number
combined_score?: number
}
export async function getEmbedding(text: string): Promise<number[]> {
const response = await fetch(
`${process.env.AZURE_OPENAI_ENDPOINT}/openai/deployments/${process.env.AZURE_OPENAI_EMBED_DEPLOYMENT}/embeddings?api-version=${process.env.AZURE_OPENAI_API_VERSION}`,
{
method: 'POST',
headers: {
'api-key': process.env.AZURE_OPENAI_KEY!,
'Content-Type': 'application/json',
},
body: JSON.stringify({
input: [text],
}),
}
)
if (!response.ok) {
throw new Error(`Embedding API error: ${response.status}`)
}
const data = await response.json()
return data.data[0].embedding
}
export async function searchBibleSemantic(
query: string,
limit: number = 10
): Promise<BibleVerse[]> {
try {
const queryEmbedding = await getEmbedding(query)
const client = await pool.connect()
try {
const result = await client.query(
`
SELECT ref, book, chapter, verse, text_raw,
1 - (embedding <=> $1) AS similarity
FROM bible_passages
WHERE embedding IS NOT NULL
ORDER BY embedding <=> $1
LIMIT $2
`,
[JSON.stringify(queryEmbedding), limit]
)
return result.rows
} finally {
client.release()
}
} catch (error) {
console.error('Error in semantic search:', error)
throw error
}
}
export async function searchBibleHybrid(
query: string,
limit: number = 10
): Promise<BibleVerse[]> {
try {
const queryEmbedding = await getEmbedding(query)
const client = await pool.connect()
try {
const result = await client.query(
`
WITH vector_search AS (
SELECT id, 1 - (embedding <=> $1) AS vector_sim
FROM bible_passages
WHERE embedding IS NOT NULL
ORDER BY embedding <=> $1
LIMIT 100
),
text_search AS (
SELECT id, ts_rank(tsv, plainto_tsquery('romanian', $3)) AS text_rank
FROM bible_passages
WHERE tsv @@ plainto_tsquery('romanian', $3)
)
SELECT bp.ref, bp.book, bp.chapter, bp.verse, bp.text_raw,
COALESCE(vs.vector_sim, 0) * 0.7 + COALESCE(ts.text_rank, 0) * 0.3 AS combined_score
FROM bible_passages bp
LEFT JOIN vector_search vs ON vs.id = bp.id
LEFT JOIN text_search ts ON ts.id = bp.id
WHERE vs.id IS NOT NULL OR ts.id IS NOT NULL
ORDER BY combined_score DESC
LIMIT $2
`,
[JSON.stringify(queryEmbedding), limit, query]
)
return result.rows
} finally {
client.release()
}
} catch (error) {
console.error('Error in hybrid search:', error)
throw error
}
}
export async function getContextVerses(
book: string,
chapter: number,
verse: number,
contextSize: number = 2
): Promise<BibleVerse[]> {
const client = await pool.connect()
try {
const result = await client.query(
`
SELECT ref, book, chapter, verse, text_raw
FROM bible_passages
WHERE book = $1 AND chapter = $2
AND verse BETWEEN $3 AND $4
ORDER BY verse
`,
[book, chapter, verse - contextSize, verse + contextSize]
)
return result.rows
} finally {
client.release()
}
}