Implement Azure OpenAI vector embeddings for Romanian Bible
- Add pgvector support with bible_passages table for vector search - Create Python ingestion script for Azure OpenAI embed-3 embeddings - Implement hybrid search combining vector similarity and full-text search - Update AI chat to use vector search with Azure OpenAI gpt-4o - Add floating chat component with Material UI design - Import complete Romanian Bible (FIDELA) with 30K+ verses - Add vector search library for semantic Bible search - Create multi-language implementation plan for future expansion 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
140
lib/vector-search.ts
Normal file
140
lib/vector-search.ts
Normal file
@@ -0,0 +1,140 @@
|
||||
import { Pool } from 'pg'
|
||||
|
||||
const pool = new Pool({
|
||||
connectionString: process.env.DATABASE_URL,
|
||||
})
|
||||
|
||||
export interface BibleVerse {
|
||||
id: string
|
||||
ref: string
|
||||
book: string
|
||||
chapter: number
|
||||
verse: number
|
||||
text_raw: string
|
||||
similarity?: number
|
||||
combined_score?: number
|
||||
}
|
||||
|
||||
export async function getEmbedding(text: string): Promise<number[]> {
|
||||
const response = await fetch(
|
||||
`${process.env.AZURE_OPENAI_ENDPOINT}/openai/deployments/${process.env.AZURE_OPENAI_EMBED_DEPLOYMENT}/embeddings?api-version=${process.env.AZURE_OPENAI_API_VERSION}`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'api-key': process.env.AZURE_OPENAI_KEY!,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
input: [text],
|
||||
}),
|
||||
}
|
||||
)
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Embedding API error: ${response.status}`)
|
||||
}
|
||||
|
||||
const data = await response.json()
|
||||
return data.data[0].embedding
|
||||
}
|
||||
|
||||
export async function searchBibleSemantic(
|
||||
query: string,
|
||||
limit: number = 10
|
||||
): Promise<BibleVerse[]> {
|
||||
try {
|
||||
const queryEmbedding = await getEmbedding(query)
|
||||
|
||||
const client = await pool.connect()
|
||||
try {
|
||||
const result = await client.query(
|
||||
`
|
||||
SELECT ref, book, chapter, verse, text_raw,
|
||||
1 - (embedding <=> $1) AS similarity
|
||||
FROM bible_passages
|
||||
WHERE embedding IS NOT NULL
|
||||
ORDER BY embedding <=> $1
|
||||
LIMIT $2
|
||||
`,
|
||||
[JSON.stringify(queryEmbedding), limit]
|
||||
)
|
||||
|
||||
return result.rows
|
||||
} finally {
|
||||
client.release()
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error in semantic search:', error)
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
export async function searchBibleHybrid(
|
||||
query: string,
|
||||
limit: number = 10
|
||||
): Promise<BibleVerse[]> {
|
||||
try {
|
||||
const queryEmbedding = await getEmbedding(query)
|
||||
|
||||
const client = await pool.connect()
|
||||
try {
|
||||
const result = await client.query(
|
||||
`
|
||||
WITH vector_search AS (
|
||||
SELECT id, 1 - (embedding <=> $1) AS vector_sim
|
||||
FROM bible_passages
|
||||
WHERE embedding IS NOT NULL
|
||||
ORDER BY embedding <=> $1
|
||||
LIMIT 100
|
||||
),
|
||||
text_search AS (
|
||||
SELECT id, ts_rank(tsv, plainto_tsquery('romanian', $3)) AS text_rank
|
||||
FROM bible_passages
|
||||
WHERE tsv @@ plainto_tsquery('romanian', $3)
|
||||
)
|
||||
SELECT bp.ref, bp.book, bp.chapter, bp.verse, bp.text_raw,
|
||||
COALESCE(vs.vector_sim, 0) * 0.7 + COALESCE(ts.text_rank, 0) * 0.3 AS combined_score
|
||||
FROM bible_passages bp
|
||||
LEFT JOIN vector_search vs ON vs.id = bp.id
|
||||
LEFT JOIN text_search ts ON ts.id = bp.id
|
||||
WHERE vs.id IS NOT NULL OR ts.id IS NOT NULL
|
||||
ORDER BY combined_score DESC
|
||||
LIMIT $2
|
||||
`,
|
||||
[JSON.stringify(queryEmbedding), limit, query]
|
||||
)
|
||||
|
||||
return result.rows
|
||||
} finally {
|
||||
client.release()
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error in hybrid search:', error)
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
export async function getContextVerses(
|
||||
book: string,
|
||||
chapter: number,
|
||||
verse: number,
|
||||
contextSize: number = 2
|
||||
): Promise<BibleVerse[]> {
|
||||
const client = await pool.connect()
|
||||
try {
|
||||
const result = await client.query(
|
||||
`
|
||||
SELECT ref, book, chapter, verse, text_raw
|
||||
FROM bible_passages
|
||||
WHERE book = $1 AND chapter = $2
|
||||
AND verse BETWEEN $3 AND $4
|
||||
ORDER BY verse
|
||||
`,
|
||||
[book, chapter, verse - contextSize, verse + contextSize]
|
||||
)
|
||||
|
||||
return result.rows
|
||||
} finally {
|
||||
client.release()
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user