Add Ollama embedding support and improve prayer system with public/private visibility

- Add Ollama fallback support in vector search with Azure OpenAI as primary
- Enhance prayer system with public/private visibility options and language filtering
- Update OG image to use new biblical-guide-og-image.png
- Improve prayer request management with better categorization
- Remove deprecated ingest_json_pgvector.py script

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-09-28 19:25:49 +00:00
parent 2d27eae756
commit e4b815cb40
8 changed files with 457 additions and 320 deletions

View File

@@ -16,6 +16,7 @@ export async function generateMetadata({ params }: { params: Promise<{ locale: s
const currentUrl = locale === 'ro' ? 'https://biblical-guide.com/ro/' : 'https://biblical-guide.com/en/' const currentUrl = locale === 'ro' ? 'https://biblical-guide.com/ro/' : 'https://biblical-guide.com/en/'
const alternateUrl = locale === 'ro' ? 'https://biblical-guide.com/en/' : 'https://biblical-guide.com/ro/' const alternateUrl = locale === 'ro' ? 'https://biblical-guide.com/en/' : 'https://biblical-guide.com/ro/'
const ogImageUrl = 'https://biblical-guide.com/biblical-guide-og-image.png'
return { return {
title: t('title'), title: t('title'),
@@ -38,7 +39,7 @@ export async function generateMetadata({ params }: { params: Promise<{ locale: s
type: 'website', type: 'website',
images: [ images: [
{ {
url: `https://ghidulbiblic.ro/og-image-${locale}.jpg`, url: ogImageUrl,
width: 1200, width: 1200,
height: 630, height: 630,
alt: t('ogTitle'), alt: t('ogTitle'),
@@ -50,7 +51,7 @@ export async function generateMetadata({ params }: { params: Promise<{ locale: s
site: '@ghidbiblic', site: '@ghidbiblic',
title: t('twitterTitle'), title: t('twitterTitle'),
description: t('twitterDescription'), description: t('twitterDescription'),
images: [`https://ghidulbiblic.ro/og-image-${locale}.jpg`], images: [ogImageUrl],
}, },
other: { other: {
'application/ld+json': JSON.stringify({ 'application/ld+json': JSON.stringify({

View File

@@ -15,9 +15,6 @@ import {
DialogTitle, DialogTitle,
DialogContent, DialogContent,
DialogActions, DialogActions,
List,
ListItem,
ListItemAvatar,
ListItemText, ListItemText,
MenuItem, MenuItem,
useTheme, useTheme,
@@ -27,6 +24,10 @@ import {
Tabs, Tabs,
Tab, Tab,
FormControlLabel, FormControlLabel,
FormControl,
Select,
Checkbox,
SelectChangeEvent,
Switch, Switch,
} from '@mui/material' } from '@mui/material'
import { import {
@@ -42,7 +43,7 @@ import {
Edit, Edit,
Login, Login,
} from '@mui/icons-material' } from '@mui/icons-material'
import { useState, useEffect } from 'react' import { useState, useEffect, useMemo } from 'react'
import { useTranslations, useLocale, useFormatter } from 'next-intl' import { useTranslations, useLocale, useFormatter } from 'next-intl'
import { useAuth } from '@/hooks/use-auth' import { useAuth } from '@/hooks/use-auth'
@@ -55,6 +56,9 @@ interface PrayerRequest {
timestamp: Date timestamp: Date
prayerCount: number prayerCount: number
isPrayedFor: boolean isPrayedFor: boolean
isPublic: boolean
language: string
isOwner: boolean
} }
export default function PrayersPage() { export default function PrayersPage() {
@@ -72,10 +76,50 @@ export default function PrayersPage() {
title: '', title: '',
description: '', description: '',
category: 'personal', category: 'personal',
isPublic: false,
}) })
const [aiPrompt, setAiPrompt] = useState('') const [aiPrompt, setAiPrompt] = useState('')
const [isGenerating, setIsGenerating] = useState(false) const [isGenerating, setIsGenerating] = useState(false)
const [loading, setLoading] = useState(true) const [loading, setLoading] = useState(true)
const [viewMode, setViewMode] = useState<'private' | 'public'>(user ? 'private' : 'public')
const [selectedLanguages, setSelectedLanguages] = useState<string[]>([locale])
const languagesKey = useMemo(() => selectedLanguages.slice().sort().join(','), [selectedLanguages])
const languageOptions = useMemo(() => ([
{ value: 'en', label: t('languageFilter.options.en') },
{ value: 'ro', label: t('languageFilter.options.ro') }
]), [t])
const languageLabelMap = useMemo(() => (
languageOptions.reduce((acc, option) => {
acc[option.value] = option.label
return acc
}, {} as Record<string, string>)
), [languageOptions])
useEffect(() => {
if (user) {
setViewMode(prev => (prev === 'private' ? prev : 'private'))
} else {
setViewMode('public')
}
}, [user])
useEffect(() => {
if (viewMode === 'public') {
setSelectedLanguages(prev => {
if (prev.includes(locale)) {
return prev
}
return [...prev, locale]
})
}
}, [locale, viewMode])
useEffect(() => {
if (viewMode === 'public' && selectedLanguages.length === 0) {
setSelectedLanguages([locale])
}
}, [viewMode, selectedLanguages, locale])
const categories = [ const categories = [
{ value: 'personal', label: t('categories.personal'), color: 'primary' }, { value: 'personal', label: t('categories.personal'), color: 'primary' },
@@ -88,6 +132,12 @@ export default function PrayersPage() {
// Fetch prayers from API // Fetch prayers from API
const fetchPrayers = async () => { const fetchPrayers = async () => {
if (viewMode === 'private' && !user) {
setPrayers([])
setLoading(false)
return
}
setLoading(true) setLoading(true)
try { try {
const params = new URLSearchParams() const params = new URLSearchParams()
@@ -95,11 +145,25 @@ export default function PrayersPage() {
params.append('category', selectedCategory) params.append('category', selectedCategory)
} }
params.append('limit', '50') params.append('limit', '50')
if (user?.id) { params.append('visibility', viewMode)
params.append('userId', user.id)
if (viewMode === 'public') {
const languagesToQuery = selectedLanguages.length > 0 ? selectedLanguages : [locale]
languagesToQuery.forEach(lang => params.append('languages', lang))
} }
const response = await fetch(`/api/prayers?${params.toString()}`) const headers: Record<string, string> = {}
if (typeof window !== 'undefined') {
const token = localStorage.getItem('authToken')
if (token) {
headers['Authorization'] = `Bearer ${token}`
}
}
const response = await fetch(`/api/prayers?${params.toString()}`, {
headers
})
if (response.ok) { if (response.ok) {
const data = await response.json() const data = await response.json()
setPrayers(data.prayers.map((prayer: any) => ({ setPrayers(data.prayers.map((prayer: any) => ({
@@ -107,6 +171,9 @@ export default function PrayersPage() {
timestamp: new Date(prayer.timestamp) timestamp: new Date(prayer.timestamp)
}))) })))
} else { } else {
if (response.status === 401) {
setPrayers([])
}
console.error('Failed to fetch prayers') console.error('Failed to fetch prayers')
} }
} catch (error) { } catch (error) {
@@ -118,7 +185,7 @@ export default function PrayersPage() {
useEffect(() => { useEffect(() => {
fetchPrayers() fetchPrayers()
}, [selectedCategory, user]) }, [selectedCategory, user, viewMode, languagesKey])
const handleGenerateAIPrayer = async () => { const handleGenerateAIPrayer = async () => {
if (!aiPrompt.trim()) return if (!aiPrompt.trim()) return
@@ -144,7 +211,8 @@ export default function PrayersPage() {
setNewPrayer({ setNewPrayer({
title: data.title || '', title: data.title || '',
description: data.prayer || '', description: data.prayer || '',
category: newPrayer.category category: newPrayer.category,
isPublic: newPrayer.isPublic
}) })
setTabValue(0) // Switch to write tab to review generated prayer setTabValue(0) // Switch to write tab to review generated prayer
} else { } else {
@@ -157,43 +225,41 @@ export default function PrayersPage() {
} }
} }
const handleLanguageChange = (event: SelectChangeEvent<string[]>) => {
const value = event.target.value
const parsed = typeof value === 'string'
? value.split(',')
: (value as string[])
const uniqueValues = Array.from(new Set(parsed.filter(Boolean)))
setSelectedLanguages(uniqueValues)
}
const handleSubmitPrayer = async () => { const handleSubmitPrayer = async () => {
if (!newPrayer.title.trim() || !newPrayer.description.trim()) return if (!newPrayer.title.trim() || !newPrayer.description.trim()) return
if (!user) return if (!user) return
const prayer: PrayerRequest = {
id: Date.now().toString(),
title: newPrayer.title,
description: newPrayer.description,
category: newPrayer.category,
author: user.name || (locale === 'en' ? 'You' : 'Tu'),
timestamp: new Date(),
prayerCount: 0,
isPrayedFor: false,
}
try { try {
const token = localStorage.getItem('authToken')
const response = await fetch('/api/prayers', { const response = await fetch('/api/prayers', {
method: 'POST', method: 'POST',
headers: { headers: {
'Content-Type': 'application/json', 'Content-Type': 'application/json',
'Authorization': `Bearer ${localStorage.getItem('authToken')}` ...(token ? { 'Authorization': `Bearer ${token}` } : {})
}, },
body: JSON.stringify({ body: JSON.stringify({
title: newPrayer.title, title: newPrayer.title,
description: newPrayer.description, description: newPrayer.description,
category: newPrayer.category, category: newPrayer.category,
isAnonymous: false isAnonymous: false,
isPublic: newPrayer.isPublic,
language: locale
}), }),
}) })
if (response.ok) { if (response.ok) {
const data = await response.json() await fetchPrayers()
setPrayers([{ setNewPrayer({ title: '', description: '', category: 'personal', isPublic: false })
...data.prayer,
timestamp: new Date(data.prayer.timestamp)
}, ...prayers])
setNewPrayer({ title: '', description: '', category: 'personal' })
setAiPrompt('') setAiPrompt('')
setTabValue(0) setTabValue(0)
setOpenDialog(false) setOpenDialog(false)
@@ -341,6 +407,36 @@ export default function PrayersPage() {
))} ))}
</Box> </Box>
{viewMode === 'public' && (
<Box sx={{ mt: 3 }}>
<Typography variant="h6" sx={{ mb: 1 }}>
{t('languageFilter.title')}
</Typography>
<FormControl fullWidth size="small">
<Select
multiple
value={selectedLanguages}
onChange={handleLanguageChange}
renderValue={(selected) =>
(selected as string[])
.map(code => languageLabelMap[code] || code.toUpperCase())
.join(', ')
}
>
{languageOptions.map(option => (
<MenuItem key={option.value} value={option.value}>
<Checkbox checked={selectedLanguages.includes(option.value)} />
<ListItemText primary={option.label} />
</MenuItem>
))}
</Select>
</FormControl>
<Typography variant="caption" color="text.secondary" sx={{ mt: 1 }}>
{t('languageFilter.helper')}
</Typography>
</Box>
)}
<Typography variant="h6" sx={{ mt: 3, mb: 1 }}> <Typography variant="h6" sx={{ mt: 3, mb: 1 }}>
{t('stats.title')} {t('stats.title')}
</Typography> </Typography>
@@ -355,6 +451,30 @@ export default function PrayersPage() {
{/* Prayer Requests */} {/* Prayer Requests */}
<Box sx={{ flex: 1, width: { xs: '100%', md: '75%' } }}> <Box sx={{ flex: 1, width: { xs: '100%', md: '75%' } }}>
{user && (
<Tabs
value={viewMode}
onChange={(_, newValue) => setViewMode(newValue as 'private' | 'public')}
sx={{ mb: 3 }}
variant="fullWidth"
>
<Tab value="private" label={t('viewModes.private')} />
<Tab value="public" label={t('viewModes.public')} />
</Tabs>
)}
{viewMode === 'private' && (
<Alert severity="info" sx={{ mb: 3 }}>
{t('alerts.privateInfo')}
</Alert>
)}
{viewMode === 'public' && !user && (
<Alert severity="info" sx={{ mb: 3 }}>
{t('alerts.publicInfo')}
</Alert>
)}
{loading ? ( {loading ? (
<Box> <Box>
{Array.from({ length: 3 }).map((_, index) => ( {Array.from({ length: 3 }).map((_, index) => (
@@ -388,23 +508,43 @@ export default function PrayersPage() {
</Box> </Box>
) : ( ) : (
<Box> <Box>
{prayers.map((prayer) => { {prayers.length === 0 ? (
<Paper sx={{ p: 3, textAlign: 'center' }}>
<Typography variant="body1" color="text.secondary">
{viewMode === 'private' ? t('empty.private') : t('empty.public')}
</Typography>
</Paper>
) : prayers.map((prayer) => {
const categoryInfo = getCategoryInfo(prayer.category) const categoryInfo = getCategoryInfo(prayer.category)
const authorName = prayer.isOwner ? (locale === 'en' ? 'You' : 'Tu') : prayer.author
const languageLabel = languageLabelMap[prayer.language] || prayer.language.toUpperCase()
return ( return (
<Card key={prayer.id} sx={{ mb: 3 }}> <Card key={prayer.id} sx={{ mb: 3 }}>
<CardContent> <CardContent>
<Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'flex-start', mb: 2 }}> <Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'flex-start', mb: 2 }}>
<Box sx={{ flexGrow: 1 }}> <Box sx={{ flexGrow: 1 }}>
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 1 }}>
<Typography variant="h6" component="h3"> <Typography variant="h6" component="h3">
{prayer.title} {prayer.title}
</Typography> </Typography>
<Box sx={{ display: 'flex', flexWrap: 'wrap', gap: 1, mb: 1, mt: 1 }}>
<Chip <Chip
label={categoryInfo.label} label={categoryInfo.label}
color={categoryInfo.color as any} color={categoryInfo.color as any}
size="small" size="small"
variant="outlined" variant="outlined"
/> />
<Chip
label={prayer.isPublic ? t('chips.public') : t('chips.private')}
size="small"
color={prayer.isPublic ? 'success' : 'default'}
variant={prayer.isPublic ? 'filled' : 'outlined'}
/>
<Chip
label={languageLabel}
size="small"
variant="outlined"
/>
</Box> </Box>
<Box sx={{ display: 'flex', alignItems: 'center', gap: 2, mb: 2 }}> <Box sx={{ display: 'flex', alignItems: 'center', gap: 2, mb: 2 }}>
@@ -413,7 +553,7 @@ export default function PrayersPage() {
<Person sx={{ fontSize: 16 }} /> <Person sx={{ fontSize: 16 }} />
</Avatar> </Avatar>
<Typography variant="body2" color="text.secondary"> <Typography variant="body2" color="text.secondary">
{prayer.author} {authorName}
</Typography> </Typography>
</Box> </Box>
<Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}> <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
@@ -450,6 +590,7 @@ export default function PrayersPage() {
variant="outlined" variant="outlined"
size="small" size="small"
startIcon={<Share />} startIcon={<Share />}
disabled={!prayer.isPublic}
> >
{t('buttons.share')} {t('buttons.share')}
</Button> </Button>
@@ -602,6 +743,21 @@ export default function PrayersPage() {
)} )}
</Box> </Box>
)} )}
<Box sx={{ mt: 3 }}>
<FormControlLabel
control={
<Switch
checked={newPrayer.isPublic}
onChange={(event) => setNewPrayer({ ...newPrayer, isPublic: event.target.checked })}
/>
}
label={t('dialog.makePublic')}
/>
<Typography variant="caption" color="text.secondary" display="block">
{newPrayer.isPublic ? t('dialog.visibilityPublic') : t('dialog.visibilityPrivate')}
</Typography>
</Box>
</DialogContent> </DialogContent>
<DialogActions> <DialogActions>

View File

@@ -50,6 +50,8 @@ export async function GET(request: Request) {
category: true, category: true,
author: true, author: true,
isAnonymous: true, isAnonymous: true,
isPublic: true,
language: true,
prayerCount: true, prayerCount: true,
isActive: true, isActive: true,
createdAt: true, createdAt: true,

View File

@@ -52,6 +52,32 @@ export interface BibleVerse {
} }
export async function getEmbedding(text: string): Promise<number[]> { export async function getEmbedding(text: string): Promise<number[]> {
// Try Ollama first (for local embeddings)
if (process.env.OLLAMA_API_URL && process.env.OLLAMA_EMBED_MODEL) {
try {
const response = await fetch(`${process.env.OLLAMA_API_URL}/api/embeddings`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: process.env.OLLAMA_EMBED_MODEL,
prompt: text,
}),
})
if (response.ok) {
const data = await response.json()
return data.embedding
} else {
console.warn(`Ollama embedding failed: ${response.status}, falling back to Azure`)
}
} catch (error) {
console.warn('Ollama embedding error, falling back to Azure:', error)
}
}
// Fallback to Azure OpenAI
const response = await fetch( const response = await fetch(
`${process.env.AZURE_OPENAI_ENDPOINT}/openai/deployments/${process.env.AZURE_OPENAI_EMBED_DEPLOYMENT}/embeddings?api-version=${process.env.AZURE_OPENAI_API_VERSION}`, `${process.env.AZURE_OPENAI_ENDPOINT}/openai/deployments/${process.env.AZURE_OPENAI_EMBED_DEPLOYMENT}/embeddings?api-version=${process.env.AZURE_OPENAI_API_VERSION}`,
{ {

Binary file not shown.

After

Width:  |  Height:  |  Size: 995 KiB

View File

@@ -1,4 +1,4 @@
import os, re, json, math, time, asyncio import os, re, json, math, time, asyncio, glob
from typing import List, Dict, Tuple, Iterable from typing import List, Dict, Tuple, Iterable
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
@@ -13,30 +13,28 @@ AZ_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT", "").rstrip("/")
AZ_API_KEY = os.getenv("AZURE_OPENAI_KEY") AZ_API_KEY = os.getenv("AZURE_OPENAI_KEY")
AZ_API_VER = os.getenv("AZURE_OPENAI_API_VERSION", "2024-05-01-preview") AZ_API_VER = os.getenv("AZURE_OPENAI_API_VERSION", "2024-05-01-preview")
AZ_DEPLOYMENT = os.getenv("AZURE_OPENAI_EMBED_DEPLOYMENT", "embed-3") AZ_DEPLOYMENT = os.getenv("AZURE_OPENAI_EMBED_DEPLOYMENT", "embed-3")
EMBED_DIMS = int(os.getenv("EMBED_DIMS", "3072")) EMBED_DIMS = int(os.getenv("EMBED_DIMS", "1536"))
DB_URL = os.getenv("DATABASE_URL") DB_URL = os.getenv("DATABASE_URL")
BIBLE_MD_PATH = os.getenv("BIBLE_MD_PATH") BIBLE_JSON_DIR = os.getenv("BIBLE_JSON_DIR", "/root/biblical-guide/bibles/json")
LANG_CODE = os.getenv("LANG_CODE", "ro")
TRANSLATION = os.getenv("TRANSLATION_CODE", "FIDELA")
VECTOR_SCHEMA = os.getenv("VECTOR_SCHEMA", "ai_bible") VECTOR_SCHEMA = os.getenv("VECTOR_SCHEMA", "ai_bible")
MIN_FILE_SIZE = int(os.getenv("MIN_FILE_SIZE", "512000")) # 500KB in bytes
assert AZ_ENDPOINT and AZ_API_KEY and DB_URL and BIBLE_MD_PATH, "Missing required env vars" assert AZ_ENDPOINT and AZ_API_KEY and DB_URL and BIBLE_JSON_DIR, "Missing required env vars"
EMBED_URL = f"{AZ_ENDPOINT}/openai/deployments/{AZ_DEPLOYMENT}/embeddings?api-version={AZ_API_VER}" EMBED_URL = f"{AZ_ENDPOINT}/openai/deployments/{AZ_DEPLOYMENT}/embeddings?api-version={AZ_API_VER}"
BOOKS_OT = [ def get_large_bible_files():
"Geneza","Exodul","Leviticul","Numeri","Deuteronom","Iosua","Judecători","Rut", """Get all bible JSON files larger than MIN_FILE_SIZE"""
"1 Samuel","2 Samuel","1 Imparati","2 Imparati","1 Cronici","2 Cronici","Ezra","Neemia","Estera", bible_files = []
"Iov","Psalmii","Proverbe","Eclesiastul","Cântarea Cântărilor","Isaia","Ieremia","Plângerile", pattern = os.path.join(BIBLE_JSON_DIR, "*_bible.json")
"Ezechiel","Daniel","Osea","Ioel","Amos","Obadia","Iona","Mica","Naum","Habacuc","Țefania","Hagai","Zaharia","Maleahi"
]
BOOKS_NT = [
"Matei","Marcu","Luca","Ioan","Faptele Apostolilor","Romani","1 Corinteni","2 Corinteni",
"Galateni","Efeseni","Filipeni","Coloseni","1 Tesaloniceni","2 Tesaloniceni","1 Timotei","2 Timotei",
"Titus","Filimon","Evrei","Iacov","1 Petru","2 Petru","1 Ioan","2 Ioan","3 Ioan","Iuda","Revelaţia"
]
BOOK_CANON = {b:("OT" if b in BOOKS_OT else "NT") for b in BOOKS_OT + BOOKS_NT} for filepath in glob.glob(pattern):
file_size = os.path.getsize(filepath)
if file_size >= MIN_FILE_SIZE:
bible_files.append(filepath)
bible_files.sort()
return bible_files
@dataclass @dataclass
class Verse: class Verse:
@@ -52,59 +50,52 @@ def normalize_text(s: str) -> str:
s = s.replace(" ", " ") s = s.replace(" ", " ")
return s return s
BOOK_RE = re.compile(r"^(?P<book>[A-ZĂÂÎȘȚ][^\n]+?)\s*$") def parse_bible_json(json_file_path: str):
CH_RE = re.compile(r"^(?i:Capitolul|CApitoLuL)\s+(?P<ch>\d+)\b") """Parse a Bible JSON file and yield verse data"""
VERSE_RE = re.compile(r"^(?P<v>\d+)\s+(?P<body>.+)$") try:
with open(json_file_path, 'r', encoding='utf-8') as f:
bible_data = json.load(f)
def parse_bible_md(md_text: str): bible_name = bible_data.get('name', 'Unknown Bible')
cur_book, cur_ch = None, None abbreviation = bible_data.get('abbreviation', 'UNKNOWN')
testament = None language = bible_data.get('language', 'unknown')
is_in_bible_content = False
for line in md_text.splitlines(): print(f"Processing: {bible_name} ({abbreviation}, {language})")
line = line.rstrip()
# Start processing after "VECHIUL TESTAMENT" or when we find book markers for book in bible_data.get('books', []):
if line == 'VECHIUL TESTAMENT' or line == 'TESTAMENT' or '' in line: book_name = book.get('name', 'Unknown Book')
is_in_bible_content = True testament = book.get('testament', 'Unknown')
if not is_in_bible_content: # Convert testament to short form for consistency
continue if 'Old' in testament:
testament = 'OT'
elif 'New' in testament:
testament = 'NT'
# Book detection: … BookName … for chapter in book.get('chapters', []):
book_match = re.match(r'^…\s*(.+?)\s*…$', line) chapter_num = chapter.get('chapterNum', 1)
if book_match:
bname = book_match.group(1).strip()
if bname in BOOK_CANON:
cur_book = bname
testament = BOOK_CANON[bname]
cur_ch = None
print(f"Found book: {bname}")
continue
# Chapter detection: Capitolul X or CApitoLuL X for verse in chapter.get('verses', []):
m_ch = CH_RE.match(line) verse_num = verse.get('verseNum', 1)
if m_ch and cur_book: text_raw = verse.get('text', '')
cur_ch = int(m_ch.group("ch"))
print(f" Chapter {cur_ch}")
continue
# Verse detection: starts with number if text_raw: # Only process non-empty verses
m_v = VERSE_RE.match(line) text_norm = normalize_text(text_raw)
if m_v and cur_book and cur_ch:
vnum = int(m_v.group("v"))
body = m_v.group("body").strip()
# Remove paragraph markers
body = re.sub(r'\s*', '', body)
raw = body
norm = normalize_text(body)
yield { yield {
"testament": testament, "book": cur_book, "chapter": cur_ch, "verse": vnum, "testament": testament,
"text_raw": raw, "text_norm": norm "book": book_name,
"chapter": chapter_num,
"verse": verse_num,
"text_raw": text_raw,
"text_norm": text_norm,
"language": language,
"translation": abbreviation
} }
except Exception as e:
print(f"Error processing {json_file_path}: {e}")
return
async def embed_batch(client, inputs): async def embed_batch(client, inputs):
payload = {"input": inputs} payload = {"input": inputs}
headers = {"api-key": AZ_API_KEY, "Content-Type": "application/json"} headers = {"api-key": AZ_API_KEY, "Content-Type": "application/json"}
@@ -130,18 +121,23 @@ async def embed_batch(client, inputs):
def safe_ident(s: str) -> str: def safe_ident(s: str) -> str:
return re.sub(r"[^a-z0-9_]+", "_", s.lower()).strip("_") return re.sub(r"[^a-z0-9_]+", "_", s.lower()).strip("_")
TABLE_BASENAME = f"bv_{safe_ident(LANG_CODE)}_{safe_ident(TRANSLATION)}" def get_table_info(language: str, translation: str):
TABLE_FQN = f'"{VECTOR_SCHEMA}"."{TABLE_BASENAME}"' """Get table name and fully qualified name for a specific bible version"""
table_basename = f"bv_{safe_ident(language)}_{safe_ident(translation)}"
table_fqn = f'"{VECTOR_SCHEMA}"."{table_basename}"'
return table_basename, table_fqn
def create_table_sql() -> str: def create_table_sql(table_fqn: str) -> str:
return f""" return f"""
CREATE SCHEMA IF NOT EXISTS "{VECTOR_SCHEMA}"; CREATE SCHEMA IF NOT EXISTS "{VECTOR_SCHEMA}";
CREATE TABLE IF NOT EXISTS {TABLE_FQN} ( CREATE TABLE IF NOT EXISTS {table_fqn} (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(), id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
testament TEXT NOT NULL, testament TEXT NOT NULL,
book TEXT NOT NULL, book TEXT NOT NULL,
chapter INT NOT NULL, chapter INT NOT NULL,
verse INT NOT NULL, verse INT NOT NULL,
language TEXT NOT NULL,
translation TEXT NOT NULL,
ref TEXT GENERATED ALWAYS AS (book || ' ' || chapter || ':' || verse) STORED, ref TEXT GENERATED ALWAYS AS (book || ' ' || chapter || ':' || verse) STORED,
text_raw TEXT NOT NULL, text_raw TEXT NOT NULL,
text_norm TEXT NOT NULL, text_norm TEXT NOT NULL,
@@ -152,20 +148,21 @@ def create_table_sql() -> str:
); );
""" """
def create_indexes_sql() -> str: def create_indexes_sql(table_fqn: str, table_basename: str) -> str:
return f""" return f"""
CREATE UNIQUE INDEX IF NOT EXISTS ux_ref_{TABLE_BASENAME} ON {TABLE_FQN} (book, chapter, verse); CREATE UNIQUE INDEX IF NOT EXISTS ux_ref_{table_basename} ON {table_fqn} (translation, language, book, chapter, verse);
CREATE INDEX IF NOT EXISTS idx_tsv_{TABLE_BASENAME} ON {TABLE_FQN} USING GIN (tsv); CREATE INDEX IF NOT EXISTS idx_tsv_{table_basename} ON {table_fqn} USING GIN (tsv);
CREATE INDEX IF NOT EXISTS idx_book_ch_{TABLE_BASENAME} ON {TABLE_FQN} (book, chapter); CREATE INDEX IF NOT EXISTS idx_book_ch_{table_basename} ON {table_fqn} (book, chapter);
CREATE INDEX IF NOT EXISTS idx_testament_{TABLE_BASENAME} ON {TABLE_FQN} (testament); CREATE INDEX IF NOT EXISTS idx_testament_{table_basename} ON {table_fqn} (testament);
CREATE INDEX IF NOT EXISTS idx_lang_trans_{table_basename} ON {table_fqn} (language, translation);
""" """
def upsert_sql() -> str: def upsert_sql(table_fqn: str) -> str:
return f""" return f"""
INSERT INTO {TABLE_FQN} (testament, book, chapter, verse, text_raw, text_norm, tsv, embedding) INSERT INTO {table_fqn} (testament, book, chapter, verse, language, translation, text_raw, text_norm, tsv, embedding)
VALUES (%(testament)s, %(book)s, %(chapter)s, %(verse)s, %(text_raw)s, %(text_norm)s, VALUES (%(testament)s, %(book)s, %(chapter)s, %(verse)s, %(language)s, %(translation)s, %(text_raw)s, %(text_norm)s,
to_tsvector(COALESCE(%(ts_lang)s,'simple')::regconfig, %(text_norm)s), %(embedding)s) to_tsvector(COALESCE(%(ts_lang)s,'simple')::regconfig, %(text_norm)s), %(embedding)s)
ON CONFLICT (book, chapter, verse) DO UPDATE ON CONFLICT (translation, language, book, chapter, verse) DO UPDATE
SET text_raw=EXCLUDED.text_raw, SET text_raw=EXCLUDED.text_raw,
text_norm=EXCLUDED.text_norm, text_norm=EXCLUDED.text_norm,
tsv=EXCLUDED.tsv, tsv=EXCLUDED.tsv,
@@ -173,27 +170,36 @@ def upsert_sql() -> str:
updated_at=now(); updated_at=now();
""" """
async def main(): async def process_bible_file(bible_file_path: str, client):
print("Starting Bible embedding ingestion...") """Process a single Bible JSON file"""
print(f"\n=== Processing {os.path.basename(bible_file_path)} ===")
md_text = Path(BIBLE_MD_PATH).read_text(encoding="utf-8", errors="ignore") verses = list(parse_bible_json(bible_file_path))
verses = list(parse_bible_md(md_text)) if not verses:
print(f"Parsed verses: {len(verses)}") print(f"No verses found in {bible_file_path}, skipping...")
return
batch_size = 128 print(f"Parsed {len(verses):,} verses")
# First create the schema + table structure for this language/version # Get language and translation from first verse
first_verse = verses[0]
language = first_verse["language"]
translation = first_verse["translation"]
table_basename, table_fqn = get_table_info(language, translation)
# Create schema + table structure for this bible version
with psycopg.connect(DB_URL) as conn: with psycopg.connect(DB_URL) as conn:
with conn.cursor() as cur: with conn.cursor() as cur:
print(f"Creating schema '{VECTOR_SCHEMA}' and table {TABLE_FQN} ...") print(f"Creating table {table_fqn} ...")
cur.execute("CREATE EXTENSION IF NOT EXISTS vector;") cur.execute("CREATE EXTENSION IF NOT EXISTS vector;")
cur.execute(create_table_sql()) cur.execute(create_table_sql(table_fqn))
cur.execute(create_indexes_sql()) cur.execute(create_indexes_sql(table_fqn, table_basename))
conn.commit() conn.commit()
print("Schema/table ready") print("Schema/table ready")
# Now process embeddings # Process embeddings in batches
async with httpx.AsyncClient() as client: batch_size = 128
with psycopg.connect(DB_URL, autocommit=False) as conn: with psycopg.connect(DB_URL, autocommit=False) as conn:
with conn.cursor() as cur: with conn.cursor() as cur:
for i in range(0, len(verses), batch_size): for i in range(0, len(verses), batch_size):
@@ -205,13 +211,28 @@ async def main():
rows = [] rows = []
for v, e in zip(batch, embs): for v, e in zip(batch, embs):
# Determine text search language based on language code
ts_lang = "simple" # default
if v["language"].lower().startswith("ro"):
ts_lang = "romanian"
elif v["language"].lower().startswith("en"):
ts_lang = "english"
elif v["language"].lower().startswith("es"):
ts_lang = "spanish"
elif v["language"].lower().startswith("fr"):
ts_lang = "french"
elif v["language"].lower().startswith("de"):
ts_lang = "german"
elif v["language"].lower().startswith("it"):
ts_lang = "italian"
rows.append({ rows.append({
**v, **v,
"ts_lang": "romanian" if LANG_CODE.lower().startswith("ro") else ("english" if LANG_CODE.lower().startswith("en") else "simple"), "ts_lang": ts_lang,
"embedding": e "embedding": e
}) })
cur.executemany(upsert_sql(), rows) cur.executemany(upsert_sql(table_fqn), rows)
conn.commit() conn.commit()
print(f"Upserted {len(rows)} verses... {i+len(rows)}/{len(verses)}") print(f"Upserted {len(rows)} verses... {i+len(rows)}/{len(verses)}")
@@ -219,20 +240,118 @@ async def main():
print("Creating IVFFLAT index...") print("Creating IVFFLAT index...")
with psycopg.connect(DB_URL, autocommit=True) as conn: with psycopg.connect(DB_URL, autocommit=True) as conn:
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute(f"VACUUM ANALYZE {TABLE_FQN};") cur.execute(f"VACUUM ANALYZE {table_fqn};")
cur.execute(f""" cur.execute(f"""
CREATE INDEX IF NOT EXISTS idx_vec_ivfflat_{TABLE_BASENAME} CREATE INDEX IF NOT EXISTS idx_vec_ivfflat_{table_basename}
ON {TABLE_FQN} USING ivfflat (embedding vector_cosine_ops) ON {table_fqn} USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 200); WITH (lists = 200);
""") """)
print("Bible embedding ingestion completed successfully!") print(f"{translation} ({language}) completed successfully! Total verses: {len(verses):,}")
# Helpful pgAdmin queries: def update_status(status_data):
print("\nRun these sample queries in pgAdmin:") """Update the status file for monitoring progress"""
print(f"SELECT count(*) FROM {TABLE_FQN};") status_file = "/root/biblical-guide/scripts/ingest_status.json"
print(f"SELECT book, chapter, verse, left(text_raw, 80) AS preview FROM {TABLE_FQN} ORDER BY book, chapter, verse LIMIT 10;") try:
print(f"SELECT * FROM {TABLE_FQN} WHERE book='Geneza' AND chapter=1 AND verse=1;") import json
from datetime import datetime
status_data["last_update"] = datetime.now().isoformat()
with open(status_file, 'w') as f:
json.dump(status_data, f, indent=2)
except Exception as e:
print(f"Warning: Could not update status file: {e}")
async def main():
start_time = time.time()
print("Starting Bible embedding ingestion for all large Bible files...")
print(f"Timestamp: {time.strftime('%Y-%m-%d %H:%M:%S')}")
# Get all Bible files larger than minimum size
bible_files = get_large_bible_files()
if not bible_files:
print(f"No Bible files found larger than {MIN_FILE_SIZE/1024:.0f}KB in {BIBLE_JSON_DIR}")
return
print(f"Found {len(bible_files)} Bible files to process (>{MIN_FILE_SIZE/1024:.0f}KB each)")
# Initialize status tracking
status = {
"status": "running",
"start_time": time.strftime('%Y-%m-%d %H:%M:%S'),
"total_files": len(bible_files),
"processed": 0,
"successful": 0,
"failed": 0,
"current_file": "",
"errors": []
}
update_status(status)
# Process files one by one to avoid memory issues
async with httpx.AsyncClient(timeout=120.0) as client:
successful = 0
failed = 0
failed_files = []
for i, bible_file in enumerate(bible_files, 1):
try:
file_size_mb = os.path.getsize(bible_file) / (1024 * 1024)
filename = os.path.basename(bible_file)
print(f"\n[{i}/{len(bible_files)}] Processing {filename} ({file_size_mb:.1f}MB)")
print(f"Progress: {(i-1)/len(bible_files)*100:.1f}% complete")
# Update status
status["current_file"] = filename
status["processed"] = i - 1
status["successful"] = successful
status["failed"] = failed
update_status(status)
await process_bible_file(bible_file, client)
successful += 1
print(f"✅ Completed {filename}")
except Exception as e:
error_msg = f"Failed to process {os.path.basename(bible_file)}: {str(e)}"
print(f"{error_msg}")
failed += 1
failed_files.append(os.path.basename(bible_file))
status["errors"].append({"file": os.path.basename(bible_file), "error": str(e), "timestamp": time.strftime('%Y-%m-%d %H:%M:%S')})
update_status(status)
continue
# Final summary
elapsed_time = time.time() - start_time
elapsed_hours = elapsed_time / 3600
print(f"\n=== Final Summary ===")
print(f"✅ Successfully processed: {successful} files")
print(f"❌ Failed to process: {failed} files")
print(f"📊 Total files: {len(bible_files)}")
print(f"⏱️ Total time: {elapsed_hours:.2f} hours ({elapsed_time:.0f} seconds)")
print(f"📈 Average: {elapsed_time/len(bible_files):.1f} seconds per file")
if failed_files:
print(f"\n❌ Failed files:")
for filename in failed_files:
print(f" - {filename}")
# Final status update
status.update({
"status": "completed",
"end_time": time.strftime('%Y-%m-%d %H:%M:%S'),
"processed": len(bible_files),
"successful": successful,
"failed": failed,
"duration_seconds": elapsed_time,
"current_file": ""
})
update_status(status)
print("\n🎉 All large Bible files have been processed!")
print(f"📋 Status file: /root/biblical-guide/scripts/ingest_status.json")
if __name__ == "__main__": if __name__ == "__main__":
asyncio.run(main()) asyncio.run(main())

View File

@@ -1,169 +0,0 @@
import os, json, re, asyncio
from pathlib import Path
from typing import List, Dict
from dotenv import load_dotenv
import httpx
import psycopg
load_dotenv()
AZ_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT", "").rstrip("/")
AZ_API_KEY = os.getenv("AZURE_OPENAI_KEY")
AZ_API_VER = os.getenv("AZURE_OPENAI_API_VERSION", "2024-05-01-preview")
AZ_DEPLOYMENT = os.getenv("AZURE_OPENAI_EMBED_DEPLOYMENT", "embed-3")
EMBED_DIMS = int(os.getenv("EMBED_DIMS", "3072"))
DB_URL = os.getenv("DATABASE_URL")
VECTOR_SCHEMA = os.getenv("VECTOR_SCHEMA", "ai_bible")
LANG_CODE = os.getenv("LANG_CODE", "en")
TRANSLATION = os.getenv("TRANSLATION_CODE", "WEB")
JSON_DIR = os.getenv("JSON_DIR", f"data/en_bible/{TRANSLATION}")
assert AZ_ENDPOINT and AZ_API_KEY and DB_URL and JSON_DIR, "Missing required env vars"
EMBED_URL = f"{AZ_ENDPOINT}/openai/deployments/{AZ_DEPLOYMENT}/embeddings?api-version={AZ_API_VER}"
def safe_ident(s: str) -> str:
return re.sub(r"[^a-z0-9_]+", "_", s.lower()).strip("_")
TABLE_BASENAME = f"bv_{safe_ident(LANG_CODE)}_{safe_ident(TRANSLATION)}"
TABLE_FQN = f'"{VECTOR_SCHEMA}"."{TABLE_BASENAME}"'
def create_table_sql() -> str:
return f"""
CREATE SCHEMA IF NOT EXISTS "{VECTOR_SCHEMA}";
CREATE TABLE IF NOT EXISTS {TABLE_FQN} (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
testament TEXT NOT NULL,
book TEXT NOT NULL,
chapter INT NOT NULL,
verse INT NOT NULL,
ref TEXT GENERATED ALWAYS AS (book || ' ' || chapter || ':' || verse) STORED,
text_raw TEXT NOT NULL,
text_norm TEXT NOT NULL,
tsv tsvector,
embedding vector({EMBED_DIMS}),
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now()
);
"""
def create_indexes_sql() -> str:
return f"""
CREATE UNIQUE INDEX IF NOT EXISTS ux_ref_{TABLE_BASENAME} ON {TABLE_FQN} (book, chapter, verse);
CREATE INDEX IF NOT EXISTS idx_tsv_{TABLE_BASENAME} ON {TABLE_FQN} USING GIN (tsv);
CREATE INDEX IF NOT EXISTS idx_book_ch_{TABLE_BASENAME} ON {TABLE_FQN} (book, chapter);
CREATE INDEX IF NOT EXISTS idx_testament_{TABLE_BASENAME} ON {TABLE_FQN} (testament);
"""
def upsert_sql() -> str:
return f"""
INSERT INTO {TABLE_FQN} (testament, book, chapter, verse, text_raw, text_norm, tsv, embedding)
VALUES (%(testament)s, %(book)s, %(chapter)s, %(verse)s, %(text_raw)s, %(text_norm)s,
to_tsvector(COALESCE(%(ts_lang)s,'simple')::regconfig, %(text_norm)s), %(embedding)s)
ON CONFLICT (book, chapter, verse) DO UPDATE
SET text_raw=EXCLUDED.text_raw,
text_norm=EXCLUDED.text_norm,
tsv=EXCLUDED.tsv,
embedding=EXCLUDED.embedding,
updated_at=now();
"""
def normalize(s: str) -> str:
s = re.sub(r"\s+", " ", s.strip())
return s
async def embed_batch(client: httpx.AsyncClient, inputs: List[str]) -> List[List[float]]:
payload = {"input": inputs}
headers = {"api-key": AZ_API_KEY, "Content-Type": "application/json"}
for attempt in range(6):
try:
r = await client.post(EMBED_URL, headers=headers, json=payload, timeout=60)
if r.status_code == 200:
data = r.json()
ordered = sorted(data["data"], key=lambda x: x["index"])
return [d["embedding"] for d in ordered]
elif r.status_code in (429, 500, 502, 503):
backoff = 2 ** attempt + (0.25 * attempt)
print(f"Rate/Server limited ({r.status_code}), waiting {backoff:.1f}s...")
await asyncio.sleep(backoff)
else:
raise RuntimeError(f"Embedding error {r.status_code}: {r.text}")
except Exception as e:
backoff = 2 ** attempt + (0.25 * attempt)
print(f"Error on attempt {attempt + 1}: {e}, waiting {backoff:.1f}s...")
await asyncio.sleep(backoff)
raise RuntimeError("Failed to embed after retries")
def load_json() -> List[Dict]:
ot = json.loads(Path(Path(JSON_DIR)/'old_testament.json').read_text('utf-8'))
nt = json.loads(Path(Path(JSON_DIR)/'new_testament.json').read_text('utf-8'))
verses = []
for test in (ot, nt):
testament = test.get('testament')
for book in test.get('books', []):
bname = book.get('name')
for ch in book.get('chapters', []):
cnum = int(ch.get('chapterNum'))
for v in ch.get('verses', []):
vnum = int(v.get('verseNum'))
text = str(v.get('text') or '').strip()
if text:
verses.append({
'testament': testament,
'book': bname,
'chapter': cnum,
'verse': vnum,
'text_raw': text,
'text_norm': normalize(text),
})
return verses
async def main():
print("Starting JSON embedding ingestion...", JSON_DIR)
verses = load_json()
print("Verses loaded:", len(verses))
batch_size = int(os.getenv('BATCH_SIZE', '128'))
# Prepare schema/table
with psycopg.connect(DB_URL) as conn:
with conn.cursor() as cur:
print(f"Ensuring schema/table {TABLE_FQN} ...")
cur.execute("CREATE EXTENSION IF NOT EXISTS vector;")
cur.execute(create_table_sql())
cur.execute(create_indexes_sql())
conn.commit()
async with httpx.AsyncClient() as client:
with psycopg.connect(DB_URL, autocommit=False) as conn:
with conn.cursor() as cur:
for i in range(0, len(verses), batch_size):
batch = verses[i:i+batch_size]
inputs = [v['text_norm'] for v in batch]
embs = await embed_batch(client, inputs)
rows = []
ts_lang = 'english' if LANG_CODE.lower().startswith('en') else 'simple'
for v, e in zip(batch, embs):
rows.append({ **v, 'ts_lang': ts_lang, 'embedding': e })
cur.executemany(upsert_sql(), rows)
conn.commit()
print(f"Upserted {len(rows)} verses... {i+len(rows)}/{len(verses)}")
print("Creating IVFFLAT index...")
with psycopg.connect(DB_URL, autocommit=True) as conn:
with conn.cursor() as cur:
cur.execute(f"VACUUM ANALYZE {TABLE_FQN};")
try:
cur.execute(f"""
CREATE INDEX IF NOT EXISTS idx_vec_ivfflat_{TABLE_BASENAME}
ON {TABLE_FQN} USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 200);
""")
except Exception as e:
print('IVFFLAT creation skipped (tune maintenance_work_mem):', e)
print("✅ JSON embedding ingestion completed successfully!")
if __name__ == '__main__':
asyncio.run(main())

View File

@@ -48,6 +48,8 @@ export interface PrayerRequest {
userId: string | null userId: string | null
content: string content: string
isAnonymous: boolean isAnonymous: boolean
isPublic: boolean
language: string
prayerCount: number prayerCount: number
createdAt: Date createdAt: Date
updatedAt: Date updatedAt: Date