Vector schema per version: add ai_bible schema with per-version tables (bv_<lang>_<abbr>) in Python ingest; dynamic table resolution in vector-search with fallback to legacy table; sample pgAdmin queries printed.
This commit is contained in:
@@ -4,6 +4,44 @@ const pool = new Pool({
|
||||
connectionString: process.env.DATABASE_URL,
|
||||
})
|
||||
|
||||
const VECTOR_SCHEMA = process.env.VECTOR_SCHEMA || 'ai_bible'
|
||||
|
||||
function safeIdent(s: string): string {
|
||||
return s.toLowerCase().replace(/[^a-z0-9_]+/g, '_').replace(/^_+|_+$/g, '')
|
||||
}
|
||||
|
||||
// Resolve per-language default version and corresponding vector table name
|
||||
// e.g. ai_bible.bv_ro_cornilescu
|
||||
async function resolveVectorTable(language: string): Promise<{ table: string; exists: boolean }> {
|
||||
const lang = safeIdent(language || 'ro')
|
||||
const client = await pool.connect()
|
||||
try {
|
||||
// Get default version abbreviation from "BibleVersion"
|
||||
const res = await client.query(
|
||||
`SELECT "abbreviation" FROM "BibleVersion"
|
||||
WHERE lower(language) = lower($1)
|
||||
ORDER BY "isDefault" DESC, "createdAt" ASC
|
||||
LIMIT 1`,
|
||||
[language]
|
||||
)
|
||||
const abbr = res.rows?.[0]?.abbreviation || 'default'
|
||||
const ab = safeIdent(abbr)
|
||||
const table = `${VECTOR_SCHEMA}.bv_${lang}_${ab}`
|
||||
|
||||
// Check if table exists
|
||||
const check = await client.query(
|
||||
`SELECT EXISTS (
|
||||
SELECT 1 FROM information_schema.tables
|
||||
WHERE table_schema = $1 AND table_name = $2
|
||||
) AS exists`,
|
||||
[VECTOR_SCHEMA, `bv_${lang}_${ab}`]
|
||||
)
|
||||
return { table, exists: Boolean(check.rows?.[0]?.exists) }
|
||||
} finally {
|
||||
client.release()
|
||||
}
|
||||
}
|
||||
|
||||
export interface BibleVerse {
|
||||
id: string
|
||||
ref: string
|
||||
@@ -44,21 +82,29 @@ export async function searchBibleSemantic(
|
||||
limit: number = 10
|
||||
): Promise<BibleVerse[]> {
|
||||
try {
|
||||
const { table, exists } = await resolveVectorTable(language)
|
||||
const queryEmbedding = await getEmbedding(query)
|
||||
|
||||
const client = await pool.connect()
|
||||
try {
|
||||
const result = await client.query(
|
||||
`
|
||||
SELECT ref, book, chapter, verse, text_raw,
|
||||
1 - (embedding <=> $1) AS similarity
|
||||
FROM bible_passages
|
||||
WHERE embedding IS NOT NULL AND lang = $3
|
||||
ORDER BY embedding <=> $1
|
||||
LIMIT $2
|
||||
`,
|
||||
[JSON.stringify(queryEmbedding), limit, language]
|
||||
)
|
||||
const sql = exists
|
||||
? `SELECT ref, book, chapter, verse, text_raw,
|
||||
1 - (embedding <=> $1) AS similarity
|
||||
FROM ${table}
|
||||
WHERE embedding IS NOT NULL
|
||||
ORDER BY embedding <=> $1
|
||||
LIMIT $2`
|
||||
: `SELECT ref, book, chapter, verse, text_raw,
|
||||
1 - (embedding <=> $1) AS similarity
|
||||
FROM bible_passages
|
||||
WHERE embedding IS NOT NULL AND lang = $3
|
||||
ORDER BY embedding <=> $1
|
||||
LIMIT $2`
|
||||
const params = exists
|
||||
? [JSON.stringify(queryEmbedding), limit]
|
||||
: [JSON.stringify(queryEmbedding), limit, language]
|
||||
|
||||
const result = await client.query(sql, params)
|
||||
|
||||
return result.rows
|
||||
} finally {
|
||||
@@ -76,6 +122,7 @@ export async function searchBibleHybrid(
|
||||
limit: number = 10
|
||||
): Promise<BibleVerse[]> {
|
||||
try {
|
||||
const { table, exists } = await resolveVectorTable(language)
|
||||
const queryEmbedding = await getEmbedding(query)
|
||||
|
||||
// Use appropriate text search configuration based on language
|
||||
@@ -83,31 +130,53 @@ export async function searchBibleHybrid(
|
||||
|
||||
const client = await pool.connect()
|
||||
try {
|
||||
const result = await client.query(
|
||||
`
|
||||
WITH vector_search AS (
|
||||
SELECT id, 1 - (embedding <=> $1) AS vector_sim
|
||||
FROM bible_passages
|
||||
WHERE embedding IS NOT NULL AND lang = $4
|
||||
ORDER BY embedding <=> $1
|
||||
LIMIT 100
|
||||
),
|
||||
text_search AS (
|
||||
SELECT id, ts_rank(tsv, plainto_tsquery($5, $3)) AS text_rank
|
||||
FROM bible_passages
|
||||
WHERE tsv @@ plainto_tsquery($5, $3) AND lang = $4
|
||||
)
|
||||
SELECT bp.ref, bp.book, bp.chapter, bp.verse, bp.text_raw,
|
||||
COALESCE(vs.vector_sim, 0) * 0.7 + COALESCE(ts.text_rank, 0) * 0.3 AS combined_score
|
||||
FROM bible_passages bp
|
||||
LEFT JOIN vector_search vs ON vs.id = bp.id
|
||||
LEFT JOIN text_search ts ON ts.id = bp.id
|
||||
WHERE (vs.id IS NOT NULL OR ts.id IS NOT NULL) AND bp.lang = $4
|
||||
ORDER BY combined_score DESC
|
||||
LIMIT $2
|
||||
`,
|
||||
[JSON.stringify(queryEmbedding), limit, query, language, textConfig]
|
||||
)
|
||||
const sql = exists
|
||||
? `WITH vector_search AS (
|
||||
SELECT id, 1 - (embedding <=> $1) AS vector_sim
|
||||
FROM ${table}
|
||||
WHERE embedding IS NOT NULL
|
||||
ORDER BY embedding <=> $1
|
||||
LIMIT 100
|
||||
),
|
||||
text_search AS (
|
||||
SELECT id, ts_rank(tsv, plainto_tsquery($4, $3)) AS text_rank
|
||||
FROM ${table}
|
||||
WHERE tsv @@ plainto_tsquery($4, $3)
|
||||
)
|
||||
SELECT bp.ref, bp.book, bp.chapter, bp.verse, bp.text_raw,
|
||||
COALESCE(vs.vector_sim, 0) * 0.7 + COALESCE(ts.text_rank, 0) * 0.3 AS combined_score
|
||||
FROM ${table} bp
|
||||
LEFT JOIN vector_search vs ON vs.id = bp.id
|
||||
LEFT JOIN text_search ts ON ts.id = bp.id
|
||||
WHERE (vs.id IS NOT NULL OR ts.id IS NOT NULL)
|
||||
ORDER BY combined_score DESC
|
||||
LIMIT $2`
|
||||
: `WITH vector_search AS (
|
||||
SELECT id, 1 - (embedding <=> $1) AS vector_sim
|
||||
FROM bible_passages
|
||||
WHERE embedding IS NOT NULL AND lang = $4
|
||||
ORDER BY embedding <=> $1
|
||||
LIMIT 100
|
||||
),
|
||||
text_search AS (
|
||||
SELECT id, ts_rank(tsv, plainto_tsquery($5, $3)) AS text_rank
|
||||
FROM bible_passages
|
||||
WHERE tsv @@ plainto_tsquery($5, $3) AND lang = $4
|
||||
)
|
||||
SELECT bp.ref, bp.book, bp.chapter, bp.verse, bp.text_raw,
|
||||
COALESCE(vs.vector_sim, 0) * 0.7 + COALESCE(ts.text_rank, 0) * 0.3 AS combined_score
|
||||
FROM bible_passages bp
|
||||
LEFT JOIN vector_search vs ON vs.id = bp.id
|
||||
LEFT JOIN text_search ts ON ts.id = bp.id
|
||||
WHERE (vs.id IS NOT NULL OR ts.id IS NOT NULL) AND bp.lang = $4
|
||||
ORDER BY combined_score DESC
|
||||
LIMIT $2`
|
||||
|
||||
const params = exists
|
||||
? [JSON.stringify(queryEmbedding), limit, query, textConfig]
|
||||
: [JSON.stringify(queryEmbedding), limit, query, language, textConfig]
|
||||
|
||||
const result = await client.query(sql, params)
|
||||
|
||||
return result.rows
|
||||
} finally {
|
||||
@@ -125,6 +194,8 @@ export async function getContextVerses(
|
||||
verse: number,
|
||||
contextSize: number = 2
|
||||
): Promise<BibleVerse[]> {
|
||||
// For context, we can't infer language here; callers should use the main hybrid result to decide.
|
||||
// For now, fallback to legacy table for context retrieval; can be extended to use per-language table.
|
||||
const client = await pool.connect()
|
||||
try {
|
||||
const result = await client.query(
|
||||
@@ -142,4 +213,4 @@ export async function getContextVerses(
|
||||
} finally {
|
||||
client.release()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user