Add Ollama embedding support and improve prayer system with public/private visibility
- Add Ollama fallback support in vector search with Azure OpenAI as primary - Enhance prayer system with public/private visibility options and language filtering - Update OG image to use new biblical-guide-og-image.png - Improve prayer request management with better categorization - Remove deprecated ingest_json_pgvector.py script 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -16,6 +16,7 @@ export async function generateMetadata({ params }: { params: Promise<{ locale: s
|
|||||||
|
|
||||||
const currentUrl = locale === 'ro' ? 'https://biblical-guide.com/ro/' : 'https://biblical-guide.com/en/'
|
const currentUrl = locale === 'ro' ? 'https://biblical-guide.com/ro/' : 'https://biblical-guide.com/en/'
|
||||||
const alternateUrl = locale === 'ro' ? 'https://biblical-guide.com/en/' : 'https://biblical-guide.com/ro/'
|
const alternateUrl = locale === 'ro' ? 'https://biblical-guide.com/en/' : 'https://biblical-guide.com/ro/'
|
||||||
|
const ogImageUrl = 'https://biblical-guide.com/biblical-guide-og-image.png'
|
||||||
|
|
||||||
return {
|
return {
|
||||||
title: t('title'),
|
title: t('title'),
|
||||||
@@ -38,7 +39,7 @@ export async function generateMetadata({ params }: { params: Promise<{ locale: s
|
|||||||
type: 'website',
|
type: 'website',
|
||||||
images: [
|
images: [
|
||||||
{
|
{
|
||||||
url: `https://ghidulbiblic.ro/og-image-${locale}.jpg`,
|
url: ogImageUrl,
|
||||||
width: 1200,
|
width: 1200,
|
||||||
height: 630,
|
height: 630,
|
||||||
alt: t('ogTitle'),
|
alt: t('ogTitle'),
|
||||||
@@ -50,7 +51,7 @@ export async function generateMetadata({ params }: { params: Promise<{ locale: s
|
|||||||
site: '@ghidbiblic',
|
site: '@ghidbiblic',
|
||||||
title: t('twitterTitle'),
|
title: t('twitterTitle'),
|
||||||
description: t('twitterDescription'),
|
description: t('twitterDescription'),
|
||||||
images: [`https://ghidulbiblic.ro/og-image-${locale}.jpg`],
|
images: [ogImageUrl],
|
||||||
},
|
},
|
||||||
other: {
|
other: {
|
||||||
'application/ld+json': JSON.stringify({
|
'application/ld+json': JSON.stringify({
|
||||||
|
|||||||
@@ -15,9 +15,6 @@ import {
|
|||||||
DialogTitle,
|
DialogTitle,
|
||||||
DialogContent,
|
DialogContent,
|
||||||
DialogActions,
|
DialogActions,
|
||||||
List,
|
|
||||||
ListItem,
|
|
||||||
ListItemAvatar,
|
|
||||||
ListItemText,
|
ListItemText,
|
||||||
MenuItem,
|
MenuItem,
|
||||||
useTheme,
|
useTheme,
|
||||||
@@ -27,6 +24,10 @@ import {
|
|||||||
Tabs,
|
Tabs,
|
||||||
Tab,
|
Tab,
|
||||||
FormControlLabel,
|
FormControlLabel,
|
||||||
|
FormControl,
|
||||||
|
Select,
|
||||||
|
Checkbox,
|
||||||
|
SelectChangeEvent,
|
||||||
Switch,
|
Switch,
|
||||||
} from '@mui/material'
|
} from '@mui/material'
|
||||||
import {
|
import {
|
||||||
@@ -42,7 +43,7 @@ import {
|
|||||||
Edit,
|
Edit,
|
||||||
Login,
|
Login,
|
||||||
} from '@mui/icons-material'
|
} from '@mui/icons-material'
|
||||||
import { useState, useEffect } from 'react'
|
import { useState, useEffect, useMemo } from 'react'
|
||||||
import { useTranslations, useLocale, useFormatter } from 'next-intl'
|
import { useTranslations, useLocale, useFormatter } from 'next-intl'
|
||||||
import { useAuth } from '@/hooks/use-auth'
|
import { useAuth } from '@/hooks/use-auth'
|
||||||
|
|
||||||
@@ -55,6 +56,9 @@ interface PrayerRequest {
|
|||||||
timestamp: Date
|
timestamp: Date
|
||||||
prayerCount: number
|
prayerCount: number
|
||||||
isPrayedFor: boolean
|
isPrayedFor: boolean
|
||||||
|
isPublic: boolean
|
||||||
|
language: string
|
||||||
|
isOwner: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
export default function PrayersPage() {
|
export default function PrayersPage() {
|
||||||
@@ -72,10 +76,50 @@ export default function PrayersPage() {
|
|||||||
title: '',
|
title: '',
|
||||||
description: '',
|
description: '',
|
||||||
category: 'personal',
|
category: 'personal',
|
||||||
|
isPublic: false,
|
||||||
})
|
})
|
||||||
const [aiPrompt, setAiPrompt] = useState('')
|
const [aiPrompt, setAiPrompt] = useState('')
|
||||||
const [isGenerating, setIsGenerating] = useState(false)
|
const [isGenerating, setIsGenerating] = useState(false)
|
||||||
const [loading, setLoading] = useState(true)
|
const [loading, setLoading] = useState(true)
|
||||||
|
const [viewMode, setViewMode] = useState<'private' | 'public'>(user ? 'private' : 'public')
|
||||||
|
const [selectedLanguages, setSelectedLanguages] = useState<string[]>([locale])
|
||||||
|
|
||||||
|
const languagesKey = useMemo(() => selectedLanguages.slice().sort().join(','), [selectedLanguages])
|
||||||
|
const languageOptions = useMemo(() => ([
|
||||||
|
{ value: 'en', label: t('languageFilter.options.en') },
|
||||||
|
{ value: 'ro', label: t('languageFilter.options.ro') }
|
||||||
|
]), [t])
|
||||||
|
const languageLabelMap = useMemo(() => (
|
||||||
|
languageOptions.reduce((acc, option) => {
|
||||||
|
acc[option.value] = option.label
|
||||||
|
return acc
|
||||||
|
}, {} as Record<string, string>)
|
||||||
|
), [languageOptions])
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (user) {
|
||||||
|
setViewMode(prev => (prev === 'private' ? prev : 'private'))
|
||||||
|
} else {
|
||||||
|
setViewMode('public')
|
||||||
|
}
|
||||||
|
}, [user])
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (viewMode === 'public') {
|
||||||
|
setSelectedLanguages(prev => {
|
||||||
|
if (prev.includes(locale)) {
|
||||||
|
return prev
|
||||||
|
}
|
||||||
|
return [...prev, locale]
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}, [locale, viewMode])
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (viewMode === 'public' && selectedLanguages.length === 0) {
|
||||||
|
setSelectedLanguages([locale])
|
||||||
|
}
|
||||||
|
}, [viewMode, selectedLanguages, locale])
|
||||||
|
|
||||||
const categories = [
|
const categories = [
|
||||||
{ value: 'personal', label: t('categories.personal'), color: 'primary' },
|
{ value: 'personal', label: t('categories.personal'), color: 'primary' },
|
||||||
@@ -88,6 +132,12 @@ export default function PrayersPage() {
|
|||||||
|
|
||||||
// Fetch prayers from API
|
// Fetch prayers from API
|
||||||
const fetchPrayers = async () => {
|
const fetchPrayers = async () => {
|
||||||
|
if (viewMode === 'private' && !user) {
|
||||||
|
setPrayers([])
|
||||||
|
setLoading(false)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
setLoading(true)
|
setLoading(true)
|
||||||
try {
|
try {
|
||||||
const params = new URLSearchParams()
|
const params = new URLSearchParams()
|
||||||
@@ -95,11 +145,25 @@ export default function PrayersPage() {
|
|||||||
params.append('category', selectedCategory)
|
params.append('category', selectedCategory)
|
||||||
}
|
}
|
||||||
params.append('limit', '50')
|
params.append('limit', '50')
|
||||||
if (user?.id) {
|
params.append('visibility', viewMode)
|
||||||
params.append('userId', user.id)
|
|
||||||
|
if (viewMode === 'public') {
|
||||||
|
const languagesToQuery = selectedLanguages.length > 0 ? selectedLanguages : [locale]
|
||||||
|
languagesToQuery.forEach(lang => params.append('languages', lang))
|
||||||
}
|
}
|
||||||
|
|
||||||
const response = await fetch(`/api/prayers?${params.toString()}`)
|
const headers: Record<string, string> = {}
|
||||||
|
if (typeof window !== 'undefined') {
|
||||||
|
const token = localStorage.getItem('authToken')
|
||||||
|
if (token) {
|
||||||
|
headers['Authorization'] = `Bearer ${token}`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const response = await fetch(`/api/prayers?${params.toString()}`, {
|
||||||
|
headers
|
||||||
|
})
|
||||||
|
|
||||||
if (response.ok) {
|
if (response.ok) {
|
||||||
const data = await response.json()
|
const data = await response.json()
|
||||||
setPrayers(data.prayers.map((prayer: any) => ({
|
setPrayers(data.prayers.map((prayer: any) => ({
|
||||||
@@ -107,6 +171,9 @@ export default function PrayersPage() {
|
|||||||
timestamp: new Date(prayer.timestamp)
|
timestamp: new Date(prayer.timestamp)
|
||||||
})))
|
})))
|
||||||
} else {
|
} else {
|
||||||
|
if (response.status === 401) {
|
||||||
|
setPrayers([])
|
||||||
|
}
|
||||||
console.error('Failed to fetch prayers')
|
console.error('Failed to fetch prayers')
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@@ -118,7 +185,7 @@ export default function PrayersPage() {
|
|||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
fetchPrayers()
|
fetchPrayers()
|
||||||
}, [selectedCategory, user])
|
}, [selectedCategory, user, viewMode, languagesKey])
|
||||||
|
|
||||||
const handleGenerateAIPrayer = async () => {
|
const handleGenerateAIPrayer = async () => {
|
||||||
if (!aiPrompt.trim()) return
|
if (!aiPrompt.trim()) return
|
||||||
@@ -144,7 +211,8 @@ export default function PrayersPage() {
|
|||||||
setNewPrayer({
|
setNewPrayer({
|
||||||
title: data.title || '',
|
title: data.title || '',
|
||||||
description: data.prayer || '',
|
description: data.prayer || '',
|
||||||
category: newPrayer.category
|
category: newPrayer.category,
|
||||||
|
isPublic: newPrayer.isPublic
|
||||||
})
|
})
|
||||||
setTabValue(0) // Switch to write tab to review generated prayer
|
setTabValue(0) // Switch to write tab to review generated prayer
|
||||||
} else {
|
} else {
|
||||||
@@ -157,43 +225,41 @@ export default function PrayersPage() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const handleLanguageChange = (event: SelectChangeEvent<string[]>) => {
|
||||||
|
const value = event.target.value
|
||||||
|
const parsed = typeof value === 'string'
|
||||||
|
? value.split(',')
|
||||||
|
: (value as string[])
|
||||||
|
|
||||||
|
const uniqueValues = Array.from(new Set(parsed.filter(Boolean)))
|
||||||
|
setSelectedLanguages(uniqueValues)
|
||||||
|
}
|
||||||
|
|
||||||
const handleSubmitPrayer = async () => {
|
const handleSubmitPrayer = async () => {
|
||||||
if (!newPrayer.title.trim() || !newPrayer.description.trim()) return
|
if (!newPrayer.title.trim() || !newPrayer.description.trim()) return
|
||||||
if (!user) return
|
if (!user) return
|
||||||
|
|
||||||
const prayer: PrayerRequest = {
|
|
||||||
id: Date.now().toString(),
|
|
||||||
title: newPrayer.title,
|
|
||||||
description: newPrayer.description,
|
|
||||||
category: newPrayer.category,
|
|
||||||
author: user.name || (locale === 'en' ? 'You' : 'Tu'),
|
|
||||||
timestamp: new Date(),
|
|
||||||
prayerCount: 0,
|
|
||||||
isPrayedFor: false,
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
const token = localStorage.getItem('authToken')
|
||||||
const response = await fetch('/api/prayers', {
|
const response = await fetch('/api/prayers', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: {
|
headers: {
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
'Authorization': `Bearer ${localStorage.getItem('authToken')}`
|
...(token ? { 'Authorization': `Bearer ${token}` } : {})
|
||||||
},
|
},
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
title: newPrayer.title,
|
title: newPrayer.title,
|
||||||
description: newPrayer.description,
|
description: newPrayer.description,
|
||||||
category: newPrayer.category,
|
category: newPrayer.category,
|
||||||
isAnonymous: false
|
isAnonymous: false,
|
||||||
|
isPublic: newPrayer.isPublic,
|
||||||
|
language: locale
|
||||||
}),
|
}),
|
||||||
})
|
})
|
||||||
|
|
||||||
if (response.ok) {
|
if (response.ok) {
|
||||||
const data = await response.json()
|
await fetchPrayers()
|
||||||
setPrayers([{
|
setNewPrayer({ title: '', description: '', category: 'personal', isPublic: false })
|
||||||
...data.prayer,
|
|
||||||
timestamp: new Date(data.prayer.timestamp)
|
|
||||||
}, ...prayers])
|
|
||||||
setNewPrayer({ title: '', description: '', category: 'personal' })
|
|
||||||
setAiPrompt('')
|
setAiPrompt('')
|
||||||
setTabValue(0)
|
setTabValue(0)
|
||||||
setOpenDialog(false)
|
setOpenDialog(false)
|
||||||
@@ -341,6 +407,36 @@ export default function PrayersPage() {
|
|||||||
))}
|
))}
|
||||||
</Box>
|
</Box>
|
||||||
|
|
||||||
|
{viewMode === 'public' && (
|
||||||
|
<Box sx={{ mt: 3 }}>
|
||||||
|
<Typography variant="h6" sx={{ mb: 1 }}>
|
||||||
|
{t('languageFilter.title')}
|
||||||
|
</Typography>
|
||||||
|
<FormControl fullWidth size="small">
|
||||||
|
<Select
|
||||||
|
multiple
|
||||||
|
value={selectedLanguages}
|
||||||
|
onChange={handleLanguageChange}
|
||||||
|
renderValue={(selected) =>
|
||||||
|
(selected as string[])
|
||||||
|
.map(code => languageLabelMap[code] || code.toUpperCase())
|
||||||
|
.join(', ')
|
||||||
|
}
|
||||||
|
>
|
||||||
|
{languageOptions.map(option => (
|
||||||
|
<MenuItem key={option.value} value={option.value}>
|
||||||
|
<Checkbox checked={selectedLanguages.includes(option.value)} />
|
||||||
|
<ListItemText primary={option.label} />
|
||||||
|
</MenuItem>
|
||||||
|
))}
|
||||||
|
</Select>
|
||||||
|
</FormControl>
|
||||||
|
<Typography variant="caption" color="text.secondary" sx={{ mt: 1 }}>
|
||||||
|
{t('languageFilter.helper')}
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
)}
|
||||||
|
|
||||||
<Typography variant="h6" sx={{ mt: 3, mb: 1 }}>
|
<Typography variant="h6" sx={{ mt: 3, mb: 1 }}>
|
||||||
{t('stats.title')}
|
{t('stats.title')}
|
||||||
</Typography>
|
</Typography>
|
||||||
@@ -355,6 +451,30 @@ export default function PrayersPage() {
|
|||||||
|
|
||||||
{/* Prayer Requests */}
|
{/* Prayer Requests */}
|
||||||
<Box sx={{ flex: 1, width: { xs: '100%', md: '75%' } }}>
|
<Box sx={{ flex: 1, width: { xs: '100%', md: '75%' } }}>
|
||||||
|
{user && (
|
||||||
|
<Tabs
|
||||||
|
value={viewMode}
|
||||||
|
onChange={(_, newValue) => setViewMode(newValue as 'private' | 'public')}
|
||||||
|
sx={{ mb: 3 }}
|
||||||
|
variant="fullWidth"
|
||||||
|
>
|
||||||
|
<Tab value="private" label={t('viewModes.private')} />
|
||||||
|
<Tab value="public" label={t('viewModes.public')} />
|
||||||
|
</Tabs>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{viewMode === 'private' && (
|
||||||
|
<Alert severity="info" sx={{ mb: 3 }}>
|
||||||
|
{t('alerts.privateInfo')}
|
||||||
|
</Alert>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{viewMode === 'public' && !user && (
|
||||||
|
<Alert severity="info" sx={{ mb: 3 }}>
|
||||||
|
{t('alerts.publicInfo')}
|
||||||
|
</Alert>
|
||||||
|
)}
|
||||||
|
|
||||||
{loading ? (
|
{loading ? (
|
||||||
<Box>
|
<Box>
|
||||||
{Array.from({ length: 3 }).map((_, index) => (
|
{Array.from({ length: 3 }).map((_, index) => (
|
||||||
@@ -388,23 +508,43 @@ export default function PrayersPage() {
|
|||||||
</Box>
|
</Box>
|
||||||
) : (
|
) : (
|
||||||
<Box>
|
<Box>
|
||||||
{prayers.map((prayer) => {
|
{prayers.length === 0 ? (
|
||||||
|
<Paper sx={{ p: 3, textAlign: 'center' }}>
|
||||||
|
<Typography variant="body1" color="text.secondary">
|
||||||
|
{viewMode === 'private' ? t('empty.private') : t('empty.public')}
|
||||||
|
</Typography>
|
||||||
|
</Paper>
|
||||||
|
) : prayers.map((prayer) => {
|
||||||
const categoryInfo = getCategoryInfo(prayer.category)
|
const categoryInfo = getCategoryInfo(prayer.category)
|
||||||
|
const authorName = prayer.isOwner ? (locale === 'en' ? 'You' : 'Tu') : prayer.author
|
||||||
|
const languageLabel = languageLabelMap[prayer.language] || prayer.language.toUpperCase()
|
||||||
return (
|
return (
|
||||||
<Card key={prayer.id} sx={{ mb: 3 }}>
|
<Card key={prayer.id} sx={{ mb: 3 }}>
|
||||||
<CardContent>
|
<CardContent>
|
||||||
<Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'flex-start', mb: 2 }}>
|
<Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'flex-start', mb: 2 }}>
|
||||||
<Box sx={{ flexGrow: 1 }}>
|
<Box sx={{ flexGrow: 1 }}>
|
||||||
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 1 }}>
|
|
||||||
<Typography variant="h6" component="h3">
|
<Typography variant="h6" component="h3">
|
||||||
{prayer.title}
|
{prayer.title}
|
||||||
</Typography>
|
</Typography>
|
||||||
|
|
||||||
|
<Box sx={{ display: 'flex', flexWrap: 'wrap', gap: 1, mb: 1, mt: 1 }}>
|
||||||
<Chip
|
<Chip
|
||||||
label={categoryInfo.label}
|
label={categoryInfo.label}
|
||||||
color={categoryInfo.color as any}
|
color={categoryInfo.color as any}
|
||||||
size="small"
|
size="small"
|
||||||
variant="outlined"
|
variant="outlined"
|
||||||
/>
|
/>
|
||||||
|
<Chip
|
||||||
|
label={prayer.isPublic ? t('chips.public') : t('chips.private')}
|
||||||
|
size="small"
|
||||||
|
color={prayer.isPublic ? 'success' : 'default'}
|
||||||
|
variant={prayer.isPublic ? 'filled' : 'outlined'}
|
||||||
|
/>
|
||||||
|
<Chip
|
||||||
|
label={languageLabel}
|
||||||
|
size="small"
|
||||||
|
variant="outlined"
|
||||||
|
/>
|
||||||
</Box>
|
</Box>
|
||||||
|
|
||||||
<Box sx={{ display: 'flex', alignItems: 'center', gap: 2, mb: 2 }}>
|
<Box sx={{ display: 'flex', alignItems: 'center', gap: 2, mb: 2 }}>
|
||||||
@@ -413,7 +553,7 @@ export default function PrayersPage() {
|
|||||||
<Person sx={{ fontSize: 16 }} />
|
<Person sx={{ fontSize: 16 }} />
|
||||||
</Avatar>
|
</Avatar>
|
||||||
<Typography variant="body2" color="text.secondary">
|
<Typography variant="body2" color="text.secondary">
|
||||||
{prayer.author}
|
{authorName}
|
||||||
</Typography>
|
</Typography>
|
||||||
</Box>
|
</Box>
|
||||||
<Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
|
<Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
|
||||||
@@ -450,6 +590,7 @@ export default function PrayersPage() {
|
|||||||
variant="outlined"
|
variant="outlined"
|
||||||
size="small"
|
size="small"
|
||||||
startIcon={<Share />}
|
startIcon={<Share />}
|
||||||
|
disabled={!prayer.isPublic}
|
||||||
>
|
>
|
||||||
{t('buttons.share')}
|
{t('buttons.share')}
|
||||||
</Button>
|
</Button>
|
||||||
@@ -602,6 +743,21 @@ export default function PrayersPage() {
|
|||||||
)}
|
)}
|
||||||
</Box>
|
</Box>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
<Box sx={{ mt: 3 }}>
|
||||||
|
<FormControlLabel
|
||||||
|
control={
|
||||||
|
<Switch
|
||||||
|
checked={newPrayer.isPublic}
|
||||||
|
onChange={(event) => setNewPrayer({ ...newPrayer, isPublic: event.target.checked })}
|
||||||
|
/>
|
||||||
|
}
|
||||||
|
label={t('dialog.makePublic')}
|
||||||
|
/>
|
||||||
|
<Typography variant="caption" color="text.secondary" display="block">
|
||||||
|
{newPrayer.isPublic ? t('dialog.visibilityPublic') : t('dialog.visibilityPrivate')}
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
</DialogContent>
|
</DialogContent>
|
||||||
|
|
||||||
<DialogActions>
|
<DialogActions>
|
||||||
|
|||||||
@@ -50,6 +50,8 @@ export async function GET(request: Request) {
|
|||||||
category: true,
|
category: true,
|
||||||
author: true,
|
author: true,
|
||||||
isAnonymous: true,
|
isAnonymous: true,
|
||||||
|
isPublic: true,
|
||||||
|
language: true,
|
||||||
prayerCount: true,
|
prayerCount: true,
|
||||||
isActive: true,
|
isActive: true,
|
||||||
createdAt: true,
|
createdAt: true,
|
||||||
|
|||||||
@@ -52,6 +52,32 @@ export interface BibleVerse {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export async function getEmbedding(text: string): Promise<number[]> {
|
export async function getEmbedding(text: string): Promise<number[]> {
|
||||||
|
// Try Ollama first (for local embeddings)
|
||||||
|
if (process.env.OLLAMA_API_URL && process.env.OLLAMA_EMBED_MODEL) {
|
||||||
|
try {
|
||||||
|
const response = await fetch(`${process.env.OLLAMA_API_URL}/api/embeddings`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
model: process.env.OLLAMA_EMBED_MODEL,
|
||||||
|
prompt: text,
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
|
||||||
|
if (response.ok) {
|
||||||
|
const data = await response.json()
|
||||||
|
return data.embedding
|
||||||
|
} else {
|
||||||
|
console.warn(`Ollama embedding failed: ${response.status}, falling back to Azure`)
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('Ollama embedding error, falling back to Azure:', error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback to Azure OpenAI
|
||||||
const response = await fetch(
|
const response = await fetch(
|
||||||
`${process.env.AZURE_OPENAI_ENDPOINT}/openai/deployments/${process.env.AZURE_OPENAI_EMBED_DEPLOYMENT}/embeddings?api-version=${process.env.AZURE_OPENAI_API_VERSION}`,
|
`${process.env.AZURE_OPENAI_ENDPOINT}/openai/deployments/${process.env.AZURE_OPENAI_EMBED_DEPLOYMENT}/embeddings?api-version=${process.env.AZURE_OPENAI_API_VERSION}`,
|
||||||
{
|
{
|
||||||
|
|||||||
BIN
public/biblical-guide-og-image.png
Normal file
BIN
public/biblical-guide-og-image.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 995 KiB |
@@ -1,4 +1,4 @@
|
|||||||
import os, re, json, math, time, asyncio
|
import os, re, json, math, time, asyncio, glob
|
||||||
from typing import List, Dict, Tuple, Iterable
|
from typing import List, Dict, Tuple, Iterable
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -13,30 +13,28 @@ AZ_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT", "").rstrip("/")
|
|||||||
AZ_API_KEY = os.getenv("AZURE_OPENAI_KEY")
|
AZ_API_KEY = os.getenv("AZURE_OPENAI_KEY")
|
||||||
AZ_API_VER = os.getenv("AZURE_OPENAI_API_VERSION", "2024-05-01-preview")
|
AZ_API_VER = os.getenv("AZURE_OPENAI_API_VERSION", "2024-05-01-preview")
|
||||||
AZ_DEPLOYMENT = os.getenv("AZURE_OPENAI_EMBED_DEPLOYMENT", "embed-3")
|
AZ_DEPLOYMENT = os.getenv("AZURE_OPENAI_EMBED_DEPLOYMENT", "embed-3")
|
||||||
EMBED_DIMS = int(os.getenv("EMBED_DIMS", "3072"))
|
EMBED_DIMS = int(os.getenv("EMBED_DIMS", "1536"))
|
||||||
DB_URL = os.getenv("DATABASE_URL")
|
DB_URL = os.getenv("DATABASE_URL")
|
||||||
BIBLE_MD_PATH = os.getenv("BIBLE_MD_PATH")
|
BIBLE_JSON_DIR = os.getenv("BIBLE_JSON_DIR", "/root/biblical-guide/bibles/json")
|
||||||
LANG_CODE = os.getenv("LANG_CODE", "ro")
|
|
||||||
TRANSLATION = os.getenv("TRANSLATION_CODE", "FIDELA")
|
|
||||||
VECTOR_SCHEMA = os.getenv("VECTOR_SCHEMA", "ai_bible")
|
VECTOR_SCHEMA = os.getenv("VECTOR_SCHEMA", "ai_bible")
|
||||||
|
MIN_FILE_SIZE = int(os.getenv("MIN_FILE_SIZE", "512000")) # 500KB in bytes
|
||||||
|
|
||||||
assert AZ_ENDPOINT and AZ_API_KEY and DB_URL and BIBLE_MD_PATH, "Missing required env vars"
|
assert AZ_ENDPOINT and AZ_API_KEY and DB_URL and BIBLE_JSON_DIR, "Missing required env vars"
|
||||||
|
|
||||||
EMBED_URL = f"{AZ_ENDPOINT}/openai/deployments/{AZ_DEPLOYMENT}/embeddings?api-version={AZ_API_VER}"
|
EMBED_URL = f"{AZ_ENDPOINT}/openai/deployments/{AZ_DEPLOYMENT}/embeddings?api-version={AZ_API_VER}"
|
||||||
|
|
||||||
BOOKS_OT = [
|
def get_large_bible_files():
|
||||||
"Geneza","Exodul","Leviticul","Numeri","Deuteronom","Iosua","Judecători","Rut",
|
"""Get all bible JSON files larger than MIN_FILE_SIZE"""
|
||||||
"1 Samuel","2 Samuel","1 Imparati","2 Imparati","1 Cronici","2 Cronici","Ezra","Neemia","Estera",
|
bible_files = []
|
||||||
"Iov","Psalmii","Proverbe","Eclesiastul","Cântarea Cântărilor","Isaia","Ieremia","Plângerile",
|
pattern = os.path.join(BIBLE_JSON_DIR, "*_bible.json")
|
||||||
"Ezechiel","Daniel","Osea","Ioel","Amos","Obadia","Iona","Mica","Naum","Habacuc","Țefania","Hagai","Zaharia","Maleahi"
|
|
||||||
]
|
|
||||||
BOOKS_NT = [
|
|
||||||
"Matei","Marcu","Luca","Ioan","Faptele Apostolilor","Romani","1 Corinteni","2 Corinteni",
|
|
||||||
"Galateni","Efeseni","Filipeni","Coloseni","1 Tesaloniceni","2 Tesaloniceni","1 Timotei","2 Timotei",
|
|
||||||
"Titus","Filimon","Evrei","Iacov","1 Petru","2 Petru","1 Ioan","2 Ioan","3 Ioan","Iuda","Revelaţia"
|
|
||||||
]
|
|
||||||
|
|
||||||
BOOK_CANON = {b:("OT" if b in BOOKS_OT else "NT") for b in BOOKS_OT + BOOKS_NT}
|
for filepath in glob.glob(pattern):
|
||||||
|
file_size = os.path.getsize(filepath)
|
||||||
|
if file_size >= MIN_FILE_SIZE:
|
||||||
|
bible_files.append(filepath)
|
||||||
|
|
||||||
|
bible_files.sort()
|
||||||
|
return bible_files
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Verse:
|
class Verse:
|
||||||
@@ -52,59 +50,52 @@ def normalize_text(s: str) -> str:
|
|||||||
s = s.replace(" ", " ")
|
s = s.replace(" ", " ")
|
||||||
return s
|
return s
|
||||||
|
|
||||||
BOOK_RE = re.compile(r"^(?P<book>[A-ZĂÂÎȘȚ][^\n]+?)\s*$")
|
def parse_bible_json(json_file_path: str):
|
||||||
CH_RE = re.compile(r"^(?i:Capitolul|CApitoLuL)\s+(?P<ch>\d+)\b")
|
"""Parse a Bible JSON file and yield verse data"""
|
||||||
VERSE_RE = re.compile(r"^(?P<v>\d+)\s+(?P<body>.+)$")
|
try:
|
||||||
|
with open(json_file_path, 'r', encoding='utf-8') as f:
|
||||||
|
bible_data = json.load(f)
|
||||||
|
|
||||||
def parse_bible_md(md_text: str):
|
bible_name = bible_data.get('name', 'Unknown Bible')
|
||||||
cur_book, cur_ch = None, None
|
abbreviation = bible_data.get('abbreviation', 'UNKNOWN')
|
||||||
testament = None
|
language = bible_data.get('language', 'unknown')
|
||||||
is_in_bible_content = False
|
|
||||||
|
|
||||||
for line in md_text.splitlines():
|
print(f"Processing: {bible_name} ({abbreviation}, {language})")
|
||||||
line = line.rstrip()
|
|
||||||
|
|
||||||
# Start processing after "VECHIUL TESTAMENT" or when we find book markers
|
for book in bible_data.get('books', []):
|
||||||
if line == 'VECHIUL TESTAMENT' or line == 'TESTAMENT' or '…' in line:
|
book_name = book.get('name', 'Unknown Book')
|
||||||
is_in_bible_content = True
|
testament = book.get('testament', 'Unknown')
|
||||||
|
|
||||||
if not is_in_bible_content:
|
# Convert testament to short form for consistency
|
||||||
continue
|
if 'Old' in testament:
|
||||||
|
testament = 'OT'
|
||||||
|
elif 'New' in testament:
|
||||||
|
testament = 'NT'
|
||||||
|
|
||||||
# Book detection: … BookName …
|
for chapter in book.get('chapters', []):
|
||||||
book_match = re.match(r'^…\s*(.+?)\s*…$', line)
|
chapter_num = chapter.get('chapterNum', 1)
|
||||||
if book_match:
|
|
||||||
bname = book_match.group(1).strip()
|
|
||||||
if bname in BOOK_CANON:
|
|
||||||
cur_book = bname
|
|
||||||
testament = BOOK_CANON[bname]
|
|
||||||
cur_ch = None
|
|
||||||
print(f"Found book: {bname}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Chapter detection: Capitolul X or CApitoLuL X
|
for verse in chapter.get('verses', []):
|
||||||
m_ch = CH_RE.match(line)
|
verse_num = verse.get('verseNum', 1)
|
||||||
if m_ch and cur_book:
|
text_raw = verse.get('text', '')
|
||||||
cur_ch = int(m_ch.group("ch"))
|
|
||||||
print(f" Chapter {cur_ch}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Verse detection: starts with number
|
if text_raw: # Only process non-empty verses
|
||||||
m_v = VERSE_RE.match(line)
|
text_norm = normalize_text(text_raw)
|
||||||
if m_v and cur_book and cur_ch:
|
|
||||||
vnum = int(m_v.group("v"))
|
|
||||||
body = m_v.group("body").strip()
|
|
||||||
|
|
||||||
# Remove paragraph markers
|
|
||||||
body = re.sub(r'^¶\s*', '', body)
|
|
||||||
|
|
||||||
raw = body
|
|
||||||
norm = normalize_text(body)
|
|
||||||
yield {
|
yield {
|
||||||
"testament": testament, "book": cur_book, "chapter": cur_ch, "verse": vnum,
|
"testament": testament,
|
||||||
"text_raw": raw, "text_norm": norm
|
"book": book_name,
|
||||||
|
"chapter": chapter_num,
|
||||||
|
"verse": verse_num,
|
||||||
|
"text_raw": text_raw,
|
||||||
|
"text_norm": text_norm,
|
||||||
|
"language": language,
|
||||||
|
"translation": abbreviation
|
||||||
}
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error processing {json_file_path}: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
async def embed_batch(client, inputs):
|
async def embed_batch(client, inputs):
|
||||||
payload = {"input": inputs}
|
payload = {"input": inputs}
|
||||||
headers = {"api-key": AZ_API_KEY, "Content-Type": "application/json"}
|
headers = {"api-key": AZ_API_KEY, "Content-Type": "application/json"}
|
||||||
@@ -130,18 +121,23 @@ async def embed_batch(client, inputs):
|
|||||||
def safe_ident(s: str) -> str:
|
def safe_ident(s: str) -> str:
|
||||||
return re.sub(r"[^a-z0-9_]+", "_", s.lower()).strip("_")
|
return re.sub(r"[^a-z0-9_]+", "_", s.lower()).strip("_")
|
||||||
|
|
||||||
TABLE_BASENAME = f"bv_{safe_ident(LANG_CODE)}_{safe_ident(TRANSLATION)}"
|
def get_table_info(language: str, translation: str):
|
||||||
TABLE_FQN = f'"{VECTOR_SCHEMA}"."{TABLE_BASENAME}"'
|
"""Get table name and fully qualified name for a specific bible version"""
|
||||||
|
table_basename = f"bv_{safe_ident(language)}_{safe_ident(translation)}"
|
||||||
|
table_fqn = f'"{VECTOR_SCHEMA}"."{table_basename}"'
|
||||||
|
return table_basename, table_fqn
|
||||||
|
|
||||||
def create_table_sql() -> str:
|
def create_table_sql(table_fqn: str) -> str:
|
||||||
return f"""
|
return f"""
|
||||||
CREATE SCHEMA IF NOT EXISTS "{VECTOR_SCHEMA}";
|
CREATE SCHEMA IF NOT EXISTS "{VECTOR_SCHEMA}";
|
||||||
CREATE TABLE IF NOT EXISTS {TABLE_FQN} (
|
CREATE TABLE IF NOT EXISTS {table_fqn} (
|
||||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
testament TEXT NOT NULL,
|
testament TEXT NOT NULL,
|
||||||
book TEXT NOT NULL,
|
book TEXT NOT NULL,
|
||||||
chapter INT NOT NULL,
|
chapter INT NOT NULL,
|
||||||
verse INT NOT NULL,
|
verse INT NOT NULL,
|
||||||
|
language TEXT NOT NULL,
|
||||||
|
translation TEXT NOT NULL,
|
||||||
ref TEXT GENERATED ALWAYS AS (book || ' ' || chapter || ':' || verse) STORED,
|
ref TEXT GENERATED ALWAYS AS (book || ' ' || chapter || ':' || verse) STORED,
|
||||||
text_raw TEXT NOT NULL,
|
text_raw TEXT NOT NULL,
|
||||||
text_norm TEXT NOT NULL,
|
text_norm TEXT NOT NULL,
|
||||||
@@ -152,20 +148,21 @@ def create_table_sql() -> str:
|
|||||||
);
|
);
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def create_indexes_sql() -> str:
|
def create_indexes_sql(table_fqn: str, table_basename: str) -> str:
|
||||||
return f"""
|
return f"""
|
||||||
CREATE UNIQUE INDEX IF NOT EXISTS ux_ref_{TABLE_BASENAME} ON {TABLE_FQN} (book, chapter, verse);
|
CREATE UNIQUE INDEX IF NOT EXISTS ux_ref_{table_basename} ON {table_fqn} (translation, language, book, chapter, verse);
|
||||||
CREATE INDEX IF NOT EXISTS idx_tsv_{TABLE_BASENAME} ON {TABLE_FQN} USING GIN (tsv);
|
CREATE INDEX IF NOT EXISTS idx_tsv_{table_basename} ON {table_fqn} USING GIN (tsv);
|
||||||
CREATE INDEX IF NOT EXISTS idx_book_ch_{TABLE_BASENAME} ON {TABLE_FQN} (book, chapter);
|
CREATE INDEX IF NOT EXISTS idx_book_ch_{table_basename} ON {table_fqn} (book, chapter);
|
||||||
CREATE INDEX IF NOT EXISTS idx_testament_{TABLE_BASENAME} ON {TABLE_FQN} (testament);
|
CREATE INDEX IF NOT EXISTS idx_testament_{table_basename} ON {table_fqn} (testament);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_lang_trans_{table_basename} ON {table_fqn} (language, translation);
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def upsert_sql() -> str:
|
def upsert_sql(table_fqn: str) -> str:
|
||||||
return f"""
|
return f"""
|
||||||
INSERT INTO {TABLE_FQN} (testament, book, chapter, verse, text_raw, text_norm, tsv, embedding)
|
INSERT INTO {table_fqn} (testament, book, chapter, verse, language, translation, text_raw, text_norm, tsv, embedding)
|
||||||
VALUES (%(testament)s, %(book)s, %(chapter)s, %(verse)s, %(text_raw)s, %(text_norm)s,
|
VALUES (%(testament)s, %(book)s, %(chapter)s, %(verse)s, %(language)s, %(translation)s, %(text_raw)s, %(text_norm)s,
|
||||||
to_tsvector(COALESCE(%(ts_lang)s,'simple')::regconfig, %(text_norm)s), %(embedding)s)
|
to_tsvector(COALESCE(%(ts_lang)s,'simple')::regconfig, %(text_norm)s), %(embedding)s)
|
||||||
ON CONFLICT (book, chapter, verse) DO UPDATE
|
ON CONFLICT (translation, language, book, chapter, verse) DO UPDATE
|
||||||
SET text_raw=EXCLUDED.text_raw,
|
SET text_raw=EXCLUDED.text_raw,
|
||||||
text_norm=EXCLUDED.text_norm,
|
text_norm=EXCLUDED.text_norm,
|
||||||
tsv=EXCLUDED.tsv,
|
tsv=EXCLUDED.tsv,
|
||||||
@@ -173,27 +170,36 @@ def upsert_sql() -> str:
|
|||||||
updated_at=now();
|
updated_at=now();
|
||||||
"""
|
"""
|
||||||
|
|
||||||
async def main():
|
async def process_bible_file(bible_file_path: str, client):
|
||||||
print("Starting Bible embedding ingestion...")
|
"""Process a single Bible JSON file"""
|
||||||
|
print(f"\n=== Processing {os.path.basename(bible_file_path)} ===")
|
||||||
|
|
||||||
md_text = Path(BIBLE_MD_PATH).read_text(encoding="utf-8", errors="ignore")
|
verses = list(parse_bible_json(bible_file_path))
|
||||||
verses = list(parse_bible_md(md_text))
|
if not verses:
|
||||||
print(f"Parsed verses: {len(verses)}")
|
print(f"No verses found in {bible_file_path}, skipping...")
|
||||||
|
return
|
||||||
|
|
||||||
batch_size = 128
|
print(f"Parsed {len(verses):,} verses")
|
||||||
|
|
||||||
# First create the schema + table structure for this language/version
|
# Get language and translation from first verse
|
||||||
|
first_verse = verses[0]
|
||||||
|
language = first_verse["language"]
|
||||||
|
translation = first_verse["translation"]
|
||||||
|
|
||||||
|
table_basename, table_fqn = get_table_info(language, translation)
|
||||||
|
|
||||||
|
# Create schema + table structure for this bible version
|
||||||
with psycopg.connect(DB_URL) as conn:
|
with psycopg.connect(DB_URL) as conn:
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
print(f"Creating schema '{VECTOR_SCHEMA}' and table {TABLE_FQN} ...")
|
print(f"Creating table {table_fqn} ...")
|
||||||
cur.execute("CREATE EXTENSION IF NOT EXISTS vector;")
|
cur.execute("CREATE EXTENSION IF NOT EXISTS vector;")
|
||||||
cur.execute(create_table_sql())
|
cur.execute(create_table_sql(table_fqn))
|
||||||
cur.execute(create_indexes_sql())
|
cur.execute(create_indexes_sql(table_fqn, table_basename))
|
||||||
conn.commit()
|
conn.commit()
|
||||||
print("Schema/table ready")
|
print("Schema/table ready")
|
||||||
|
|
||||||
# Now process embeddings
|
# Process embeddings in batches
|
||||||
async with httpx.AsyncClient() as client:
|
batch_size = 128
|
||||||
with psycopg.connect(DB_URL, autocommit=False) as conn:
|
with psycopg.connect(DB_URL, autocommit=False) as conn:
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
for i in range(0, len(verses), batch_size):
|
for i in range(0, len(verses), batch_size):
|
||||||
@@ -205,13 +211,28 @@ async def main():
|
|||||||
|
|
||||||
rows = []
|
rows = []
|
||||||
for v, e in zip(batch, embs):
|
for v, e in zip(batch, embs):
|
||||||
|
# Determine text search language based on language code
|
||||||
|
ts_lang = "simple" # default
|
||||||
|
if v["language"].lower().startswith("ro"):
|
||||||
|
ts_lang = "romanian"
|
||||||
|
elif v["language"].lower().startswith("en"):
|
||||||
|
ts_lang = "english"
|
||||||
|
elif v["language"].lower().startswith("es"):
|
||||||
|
ts_lang = "spanish"
|
||||||
|
elif v["language"].lower().startswith("fr"):
|
||||||
|
ts_lang = "french"
|
||||||
|
elif v["language"].lower().startswith("de"):
|
||||||
|
ts_lang = "german"
|
||||||
|
elif v["language"].lower().startswith("it"):
|
||||||
|
ts_lang = "italian"
|
||||||
|
|
||||||
rows.append({
|
rows.append({
|
||||||
**v,
|
**v,
|
||||||
"ts_lang": "romanian" if LANG_CODE.lower().startswith("ro") else ("english" if LANG_CODE.lower().startswith("en") else "simple"),
|
"ts_lang": ts_lang,
|
||||||
"embedding": e
|
"embedding": e
|
||||||
})
|
})
|
||||||
|
|
||||||
cur.executemany(upsert_sql(), rows)
|
cur.executemany(upsert_sql(table_fqn), rows)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
print(f"Upserted {len(rows)} verses... {i+len(rows)}/{len(verses)}")
|
print(f"Upserted {len(rows)} verses... {i+len(rows)}/{len(verses)}")
|
||||||
|
|
||||||
@@ -219,20 +240,118 @@ async def main():
|
|||||||
print("Creating IVFFLAT index...")
|
print("Creating IVFFLAT index...")
|
||||||
with psycopg.connect(DB_URL, autocommit=True) as conn:
|
with psycopg.connect(DB_URL, autocommit=True) as conn:
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cur.execute(f"VACUUM ANALYZE {TABLE_FQN};")
|
cur.execute(f"VACUUM ANALYZE {table_fqn};")
|
||||||
cur.execute(f"""
|
cur.execute(f"""
|
||||||
CREATE INDEX IF NOT EXISTS idx_vec_ivfflat_{TABLE_BASENAME}
|
CREATE INDEX IF NOT EXISTS idx_vec_ivfflat_{table_basename}
|
||||||
ON {TABLE_FQN} USING ivfflat (embedding vector_cosine_ops)
|
ON {table_fqn} USING ivfflat (embedding vector_cosine_ops)
|
||||||
WITH (lists = 200);
|
WITH (lists = 200);
|
||||||
""")
|
""")
|
||||||
|
|
||||||
print("✅ Bible embedding ingestion completed successfully!")
|
print(f"✅ {translation} ({language}) completed successfully! Total verses: {len(verses):,}")
|
||||||
|
|
||||||
# Helpful pgAdmin queries:
|
def update_status(status_data):
|
||||||
print("\nRun these sample queries in pgAdmin:")
|
"""Update the status file for monitoring progress"""
|
||||||
print(f"SELECT count(*) FROM {TABLE_FQN};")
|
status_file = "/root/biblical-guide/scripts/ingest_status.json"
|
||||||
print(f"SELECT book, chapter, verse, left(text_raw, 80) AS preview FROM {TABLE_FQN} ORDER BY book, chapter, verse LIMIT 10;")
|
try:
|
||||||
print(f"SELECT * FROM {TABLE_FQN} WHERE book='Geneza' AND chapter=1 AND verse=1;")
|
import json
|
||||||
|
from datetime import datetime
|
||||||
|
status_data["last_update"] = datetime.now().isoformat()
|
||||||
|
with open(status_file, 'w') as f:
|
||||||
|
json.dump(status_data, f, indent=2)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: Could not update status file: {e}")
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
start_time = time.time()
|
||||||
|
print("Starting Bible embedding ingestion for all large Bible files...")
|
||||||
|
print(f"Timestamp: {time.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||||
|
|
||||||
|
# Get all Bible files larger than minimum size
|
||||||
|
bible_files = get_large_bible_files()
|
||||||
|
|
||||||
|
if not bible_files:
|
||||||
|
print(f"No Bible files found larger than {MIN_FILE_SIZE/1024:.0f}KB in {BIBLE_JSON_DIR}")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"Found {len(bible_files)} Bible files to process (>{MIN_FILE_SIZE/1024:.0f}KB each)")
|
||||||
|
|
||||||
|
# Initialize status tracking
|
||||||
|
status = {
|
||||||
|
"status": "running",
|
||||||
|
"start_time": time.strftime('%Y-%m-%d %H:%M:%S'),
|
||||||
|
"total_files": len(bible_files),
|
||||||
|
"processed": 0,
|
||||||
|
"successful": 0,
|
||||||
|
"failed": 0,
|
||||||
|
"current_file": "",
|
||||||
|
"errors": []
|
||||||
|
}
|
||||||
|
update_status(status)
|
||||||
|
|
||||||
|
# Process files one by one to avoid memory issues
|
||||||
|
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||||
|
successful = 0
|
||||||
|
failed = 0
|
||||||
|
failed_files = []
|
||||||
|
|
||||||
|
for i, bible_file in enumerate(bible_files, 1):
|
||||||
|
try:
|
||||||
|
file_size_mb = os.path.getsize(bible_file) / (1024 * 1024)
|
||||||
|
filename = os.path.basename(bible_file)
|
||||||
|
|
||||||
|
print(f"\n[{i}/{len(bible_files)}] Processing {filename} ({file_size_mb:.1f}MB)")
|
||||||
|
print(f"Progress: {(i-1)/len(bible_files)*100:.1f}% complete")
|
||||||
|
|
||||||
|
# Update status
|
||||||
|
status["current_file"] = filename
|
||||||
|
status["processed"] = i - 1
|
||||||
|
status["successful"] = successful
|
||||||
|
status["failed"] = failed
|
||||||
|
update_status(status)
|
||||||
|
|
||||||
|
await process_bible_file(bible_file, client)
|
||||||
|
successful += 1
|
||||||
|
print(f"✅ Completed {filename}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = f"Failed to process {os.path.basename(bible_file)}: {str(e)}"
|
||||||
|
print(f"❌ {error_msg}")
|
||||||
|
failed += 1
|
||||||
|
failed_files.append(os.path.basename(bible_file))
|
||||||
|
status["errors"].append({"file": os.path.basename(bible_file), "error": str(e), "timestamp": time.strftime('%Y-%m-%d %H:%M:%S')})
|
||||||
|
update_status(status)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Final summary
|
||||||
|
elapsed_time = time.time() - start_time
|
||||||
|
elapsed_hours = elapsed_time / 3600
|
||||||
|
|
||||||
|
print(f"\n=== Final Summary ===")
|
||||||
|
print(f"✅ Successfully processed: {successful} files")
|
||||||
|
print(f"❌ Failed to process: {failed} files")
|
||||||
|
print(f"📊 Total files: {len(bible_files)}")
|
||||||
|
print(f"⏱️ Total time: {elapsed_hours:.2f} hours ({elapsed_time:.0f} seconds)")
|
||||||
|
print(f"📈 Average: {elapsed_time/len(bible_files):.1f} seconds per file")
|
||||||
|
|
||||||
|
if failed_files:
|
||||||
|
print(f"\n❌ Failed files:")
|
||||||
|
for filename in failed_files:
|
||||||
|
print(f" - {filename}")
|
||||||
|
|
||||||
|
# Final status update
|
||||||
|
status.update({
|
||||||
|
"status": "completed",
|
||||||
|
"end_time": time.strftime('%Y-%m-%d %H:%M:%S'),
|
||||||
|
"processed": len(bible_files),
|
||||||
|
"successful": successful,
|
||||||
|
"failed": failed,
|
||||||
|
"duration_seconds": elapsed_time,
|
||||||
|
"current_file": ""
|
||||||
|
})
|
||||||
|
update_status(status)
|
||||||
|
|
||||||
|
print("\n🎉 All large Bible files have been processed!")
|
||||||
|
print(f"📋 Status file: /root/biblical-guide/scripts/ingest_status.json")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
asyncio.run(main())
|
asyncio.run(main())
|
||||||
|
|||||||
@@ -1,169 +0,0 @@
|
|||||||
import os, json, re, asyncio
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import List, Dict
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
import httpx
|
|
||||||
import psycopg
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
AZ_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT", "").rstrip("/")
|
|
||||||
AZ_API_KEY = os.getenv("AZURE_OPENAI_KEY")
|
|
||||||
AZ_API_VER = os.getenv("AZURE_OPENAI_API_VERSION", "2024-05-01-preview")
|
|
||||||
AZ_DEPLOYMENT = os.getenv("AZURE_OPENAI_EMBED_DEPLOYMENT", "embed-3")
|
|
||||||
EMBED_DIMS = int(os.getenv("EMBED_DIMS", "3072"))
|
|
||||||
DB_URL = os.getenv("DATABASE_URL")
|
|
||||||
VECTOR_SCHEMA = os.getenv("VECTOR_SCHEMA", "ai_bible")
|
|
||||||
LANG_CODE = os.getenv("LANG_CODE", "en")
|
|
||||||
TRANSLATION = os.getenv("TRANSLATION_CODE", "WEB")
|
|
||||||
JSON_DIR = os.getenv("JSON_DIR", f"data/en_bible/{TRANSLATION}")
|
|
||||||
|
|
||||||
assert AZ_ENDPOINT and AZ_API_KEY and DB_URL and JSON_DIR, "Missing required env vars"
|
|
||||||
|
|
||||||
EMBED_URL = f"{AZ_ENDPOINT}/openai/deployments/{AZ_DEPLOYMENT}/embeddings?api-version={AZ_API_VER}"
|
|
||||||
|
|
||||||
def safe_ident(s: str) -> str:
|
|
||||||
return re.sub(r"[^a-z0-9_]+", "_", s.lower()).strip("_")
|
|
||||||
|
|
||||||
TABLE_BASENAME = f"bv_{safe_ident(LANG_CODE)}_{safe_ident(TRANSLATION)}"
|
|
||||||
TABLE_FQN = f'"{VECTOR_SCHEMA}"."{TABLE_BASENAME}"'
|
|
||||||
|
|
||||||
def create_table_sql() -> str:
|
|
||||||
return f"""
|
|
||||||
CREATE SCHEMA IF NOT EXISTS "{VECTOR_SCHEMA}";
|
|
||||||
CREATE TABLE IF NOT EXISTS {TABLE_FQN} (
|
|
||||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
||||||
testament TEXT NOT NULL,
|
|
||||||
book TEXT NOT NULL,
|
|
||||||
chapter INT NOT NULL,
|
|
||||||
verse INT NOT NULL,
|
|
||||||
ref TEXT GENERATED ALWAYS AS (book || ' ' || chapter || ':' || verse) STORED,
|
|
||||||
text_raw TEXT NOT NULL,
|
|
||||||
text_norm TEXT NOT NULL,
|
|
||||||
tsv tsvector,
|
|
||||||
embedding vector({EMBED_DIMS}),
|
|
||||||
created_at TIMESTAMPTZ DEFAULT now(),
|
|
||||||
updated_at TIMESTAMPTZ DEFAULT now()
|
|
||||||
);
|
|
||||||
"""
|
|
||||||
|
|
||||||
def create_indexes_sql() -> str:
|
|
||||||
return f"""
|
|
||||||
CREATE UNIQUE INDEX IF NOT EXISTS ux_ref_{TABLE_BASENAME} ON {TABLE_FQN} (book, chapter, verse);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_tsv_{TABLE_BASENAME} ON {TABLE_FQN} USING GIN (tsv);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_book_ch_{TABLE_BASENAME} ON {TABLE_FQN} (book, chapter);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_testament_{TABLE_BASENAME} ON {TABLE_FQN} (testament);
|
|
||||||
"""
|
|
||||||
|
|
||||||
def upsert_sql() -> str:
|
|
||||||
return f"""
|
|
||||||
INSERT INTO {TABLE_FQN} (testament, book, chapter, verse, text_raw, text_norm, tsv, embedding)
|
|
||||||
VALUES (%(testament)s, %(book)s, %(chapter)s, %(verse)s, %(text_raw)s, %(text_norm)s,
|
|
||||||
to_tsvector(COALESCE(%(ts_lang)s,'simple')::regconfig, %(text_norm)s), %(embedding)s)
|
|
||||||
ON CONFLICT (book, chapter, verse) DO UPDATE
|
|
||||||
SET text_raw=EXCLUDED.text_raw,
|
|
||||||
text_norm=EXCLUDED.text_norm,
|
|
||||||
tsv=EXCLUDED.tsv,
|
|
||||||
embedding=EXCLUDED.embedding,
|
|
||||||
updated_at=now();
|
|
||||||
"""
|
|
||||||
|
|
||||||
def normalize(s: str) -> str:
|
|
||||||
s = re.sub(r"\s+", " ", s.strip())
|
|
||||||
return s
|
|
||||||
|
|
||||||
async def embed_batch(client: httpx.AsyncClient, inputs: List[str]) -> List[List[float]]:
|
|
||||||
payload = {"input": inputs}
|
|
||||||
headers = {"api-key": AZ_API_KEY, "Content-Type": "application/json"}
|
|
||||||
for attempt in range(6):
|
|
||||||
try:
|
|
||||||
r = await client.post(EMBED_URL, headers=headers, json=payload, timeout=60)
|
|
||||||
if r.status_code == 200:
|
|
||||||
data = r.json()
|
|
||||||
ordered = sorted(data["data"], key=lambda x: x["index"])
|
|
||||||
return [d["embedding"] for d in ordered]
|
|
||||||
elif r.status_code in (429, 500, 502, 503):
|
|
||||||
backoff = 2 ** attempt + (0.25 * attempt)
|
|
||||||
print(f"Rate/Server limited ({r.status_code}), waiting {backoff:.1f}s...")
|
|
||||||
await asyncio.sleep(backoff)
|
|
||||||
else:
|
|
||||||
raise RuntimeError(f"Embedding error {r.status_code}: {r.text}")
|
|
||||||
except Exception as e:
|
|
||||||
backoff = 2 ** attempt + (0.25 * attempt)
|
|
||||||
print(f"Error on attempt {attempt + 1}: {e}, waiting {backoff:.1f}s...")
|
|
||||||
await asyncio.sleep(backoff)
|
|
||||||
raise RuntimeError("Failed to embed after retries")
|
|
||||||
|
|
||||||
def load_json() -> List[Dict]:
|
|
||||||
ot = json.loads(Path(Path(JSON_DIR)/'old_testament.json').read_text('utf-8'))
|
|
||||||
nt = json.loads(Path(Path(JSON_DIR)/'new_testament.json').read_text('utf-8'))
|
|
||||||
verses = []
|
|
||||||
for test in (ot, nt):
|
|
||||||
testament = test.get('testament')
|
|
||||||
for book in test.get('books', []):
|
|
||||||
bname = book.get('name')
|
|
||||||
for ch in book.get('chapters', []):
|
|
||||||
cnum = int(ch.get('chapterNum'))
|
|
||||||
for v in ch.get('verses', []):
|
|
||||||
vnum = int(v.get('verseNum'))
|
|
||||||
text = str(v.get('text') or '').strip()
|
|
||||||
if text:
|
|
||||||
verses.append({
|
|
||||||
'testament': testament,
|
|
||||||
'book': bname,
|
|
||||||
'chapter': cnum,
|
|
||||||
'verse': vnum,
|
|
||||||
'text_raw': text,
|
|
||||||
'text_norm': normalize(text),
|
|
||||||
})
|
|
||||||
return verses
|
|
||||||
|
|
||||||
async def main():
|
|
||||||
print("Starting JSON embedding ingestion...", JSON_DIR)
|
|
||||||
verses = load_json()
|
|
||||||
print("Verses loaded:", len(verses))
|
|
||||||
|
|
||||||
batch_size = int(os.getenv('BATCH_SIZE', '128'))
|
|
||||||
|
|
||||||
# Prepare schema/table
|
|
||||||
with psycopg.connect(DB_URL) as conn:
|
|
||||||
with conn.cursor() as cur:
|
|
||||||
print(f"Ensuring schema/table {TABLE_FQN} ...")
|
|
||||||
cur.execute("CREATE EXTENSION IF NOT EXISTS vector;")
|
|
||||||
cur.execute(create_table_sql())
|
|
||||||
cur.execute(create_indexes_sql())
|
|
||||||
conn.commit()
|
|
||||||
|
|
||||||
async with httpx.AsyncClient() as client:
|
|
||||||
with psycopg.connect(DB_URL, autocommit=False) as conn:
|
|
||||||
with conn.cursor() as cur:
|
|
||||||
for i in range(0, len(verses), batch_size):
|
|
||||||
batch = verses[i:i+batch_size]
|
|
||||||
inputs = [v['text_norm'] for v in batch]
|
|
||||||
embs = await embed_batch(client, inputs)
|
|
||||||
rows = []
|
|
||||||
ts_lang = 'english' if LANG_CODE.lower().startswith('en') else 'simple'
|
|
||||||
for v, e in zip(batch, embs):
|
|
||||||
rows.append({ **v, 'ts_lang': ts_lang, 'embedding': e })
|
|
||||||
cur.executemany(upsert_sql(), rows)
|
|
||||||
conn.commit()
|
|
||||||
print(f"Upserted {len(rows)} verses... {i+len(rows)}/{len(verses)}")
|
|
||||||
|
|
||||||
print("Creating IVFFLAT index...")
|
|
||||||
with psycopg.connect(DB_URL, autocommit=True) as conn:
|
|
||||||
with conn.cursor() as cur:
|
|
||||||
cur.execute(f"VACUUM ANALYZE {TABLE_FQN};")
|
|
||||||
try:
|
|
||||||
cur.execute(f"""
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_vec_ivfflat_{TABLE_BASENAME}
|
|
||||||
ON {TABLE_FQN} USING ivfflat (embedding vector_cosine_ops)
|
|
||||||
WITH (lists = 200);
|
|
||||||
""")
|
|
||||||
except Exception as e:
|
|
||||||
print('IVFFLAT creation skipped (tune maintenance_work_mem):', e)
|
|
||||||
|
|
||||||
print("✅ JSON embedding ingestion completed successfully!")
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
asyncio.run(main())
|
|
||||||
|
|
||||||
@@ -48,6 +48,8 @@ export interface PrayerRequest {
|
|||||||
userId: string | null
|
userId: string | null
|
||||||
content: string
|
content: string
|
||||||
isAnonymous: boolean
|
isAnonymous: boolean
|
||||||
|
isPublic: boolean
|
||||||
|
language: string
|
||||||
prayerCount: number
|
prayerCount: number
|
||||||
createdAt: Date
|
createdAt: Date
|
||||||
updatedAt: Date
|
updatedAt: Date
|
||||||
|
|||||||
Reference in New Issue
Block a user