- Add pgvector support with bible_passages table for vector search - Create Python ingestion script for Azure OpenAI embed-3 embeddings - Implement hybrid search combining vector similarity and full-text search - Update AI chat to use vector search with Azure OpenAI gpt-4o - Add floating chat component with Material UI design - Import complete Romanian Bible (FIDELA) with 30K+ verses - Add vector search library for semantic Bible search - Create multi-language implementation plan for future expansion 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
305 lines
12 KiB
TypeScript
305 lines
12 KiB
TypeScript
import { PrismaClient } from '@prisma/client'
|
|
import * as fs from 'fs'
|
|
import * as path from 'path'
|
|
|
|
const prisma = new PrismaClient()
|
|
|
|
// Book name mappings from Romanian to standardized names
|
|
const BOOK_MAPPINGS: Record<string, { name: string; abbreviation: string; testament: string; orderNum: number }> = {
|
|
'Geneza': { name: 'Geneza', abbreviation: 'GEN', testament: 'OT', orderNum: 1 },
|
|
'Exodul': { name: 'Exodul', abbreviation: 'EXO', testament: 'OT', orderNum: 2 },
|
|
'Leviticul': { name: 'Leviticul', abbreviation: 'LEV', testament: 'OT', orderNum: 3 },
|
|
'Numeri': { name: 'Numerii', abbreviation: 'NUM', testament: 'OT', orderNum: 4 },
|
|
'Deuteronom': { name: 'Deuteronomul', abbreviation: 'DEU', testament: 'OT', orderNum: 5 },
|
|
'Iosua': { name: 'Iosua', abbreviation: 'JOS', testament: 'OT', orderNum: 6 },
|
|
'Judecători': { name: 'Judecătorii', abbreviation: 'JDG', testament: 'OT', orderNum: 7 },
|
|
'Rut': { name: 'Rut', abbreviation: 'RUT', testament: 'OT', orderNum: 8 },
|
|
'1 Samuel': { name: '1 Samuel', abbreviation: '1SA', testament: 'OT', orderNum: 9 },
|
|
'2 Samuel': { name: '2 Samuel', abbreviation: '2SA', testament: 'OT', orderNum: 10 },
|
|
'1 Imparati': { name: '1 Împărați', abbreviation: '1KI', testament: 'OT', orderNum: 11 },
|
|
'2 Imparati': { name: '2 Împărați', abbreviation: '2KI', testament: 'OT', orderNum: 12 },
|
|
'1 Cronici': { name: '1 Cronici', abbreviation: '1CH', testament: 'OT', orderNum: 13 },
|
|
'2 Cronici': { name: '2 Cronici', abbreviation: '2CH', testament: 'OT', orderNum: 14 },
|
|
'Ezra': { name: 'Ezra', abbreviation: 'EZR', testament: 'OT', orderNum: 15 },
|
|
'Neemia': { name: 'Neemia', abbreviation: 'NEH', testament: 'OT', orderNum: 16 },
|
|
'Estera': { name: 'Estera', abbreviation: 'EST', testament: 'OT', orderNum: 17 },
|
|
'Iov': { name: 'Iov', abbreviation: 'JOB', testament: 'OT', orderNum: 18 },
|
|
'Psalmii': { name: 'Psalmii', abbreviation: 'PSA', testament: 'OT', orderNum: 19 },
|
|
'Proverbe': { name: 'Proverbele', abbreviation: 'PRO', testament: 'OT', orderNum: 20 },
|
|
'Eclesiastul': { name: 'Eclesiastul', abbreviation: 'ECC', testament: 'OT', orderNum: 21 },
|
|
'Cântarea Cântărilor': { name: 'Cântarea Cântărilor', abbreviation: 'SNG', testament: 'OT', orderNum: 22 },
|
|
'Isaia': { name: 'Isaia', abbreviation: 'ISA', testament: 'OT', orderNum: 23 },
|
|
'Ieremia': { name: 'Ieremia', abbreviation: 'JER', testament: 'OT', orderNum: 24 },
|
|
'Plângerile': { name: 'Plângerile', abbreviation: 'LAM', testament: 'OT', orderNum: 25 },
|
|
'Ezechiel': { name: 'Ezechiel', abbreviation: 'EZK', testament: 'OT', orderNum: 26 },
|
|
'Daniel': { name: 'Daniel', abbreviation: 'DAN', testament: 'OT', orderNum: 27 },
|
|
'Osea': { name: 'Osea', abbreviation: 'HOS', testament: 'OT', orderNum: 28 },
|
|
'Ioel': { name: 'Ioel', abbreviation: 'JOL', testament: 'OT', orderNum: 29 },
|
|
'Amos': { name: 'Amos', abbreviation: 'AMO', testament: 'OT', orderNum: 30 },
|
|
'Obadia': { name: 'Obadia', abbreviation: 'OBA', testament: 'OT', orderNum: 31 },
|
|
'Iona': { name: 'Iona', abbreviation: 'JON', testament: 'OT', orderNum: 32 },
|
|
'Mica': { name: 'Mica', abbreviation: 'MIC', testament: 'OT', orderNum: 33 },
|
|
'Naum': { name: 'Naum', abbreviation: 'NAM', testament: 'OT', orderNum: 34 },
|
|
'Habacuc': { name: 'Habacuc', abbreviation: 'HAB', testament: 'OT', orderNum: 35 },
|
|
'Țefania': { name: 'Țefania', abbreviation: 'ZEP', testament: 'OT', orderNum: 36 },
|
|
'Hagai': { name: 'Hagai', abbreviation: 'HAG', testament: 'OT', orderNum: 37 },
|
|
'Zaharia': { name: 'Zaharia', abbreviation: 'ZEC', testament: 'OT', orderNum: 38 },
|
|
'Maleahi': { name: 'Maleahi', abbreviation: 'MAL', testament: 'OT', orderNum: 39 },
|
|
|
|
// New Testament
|
|
'Matei': { name: 'Matei', abbreviation: 'MAT', testament: 'NT', orderNum: 40 },
|
|
'Marcu': { name: 'Marcu', abbreviation: 'MRK', testament: 'NT', orderNum: 41 },
|
|
'Luca': { name: 'Luca', abbreviation: 'LUK', testament: 'NT', orderNum: 42 },
|
|
'Ioan': { name: 'Ioan', abbreviation: 'JHN', testament: 'NT', orderNum: 43 },
|
|
'Faptele Apostolilor': { name: 'Faptele Apostolilor', abbreviation: 'ACT', testament: 'NT', orderNum: 44 },
|
|
'Romani': { name: 'Romani', abbreviation: 'ROM', testament: 'NT', orderNum: 45 },
|
|
'1 Corinteni': { name: '1 Corinteni', abbreviation: '1CO', testament: 'NT', orderNum: 46 },
|
|
'2 Corinteni': { name: '2 Corinteni', abbreviation: '2CO', testament: 'NT', orderNum: 47 },
|
|
'Galateni': { name: 'Galateni', abbreviation: 'GAL', testament: 'NT', orderNum: 48 },
|
|
'Efeseni': { name: 'Efeseni', abbreviation: 'EPH', testament: 'NT', orderNum: 49 },
|
|
'Filipeni': { name: 'Filipeni', abbreviation: 'PHP', testament: 'NT', orderNum: 50 },
|
|
'Coloseni': { name: 'Coloseni', abbreviation: 'COL', testament: 'NT', orderNum: 51 },
|
|
'1 Tesaloniceni': { name: '1 Tesaloniceni', abbreviation: '1TH', testament: 'NT', orderNum: 52 },
|
|
'2 Tesaloniceni': { name: '2 Tesaloniceni', abbreviation: '2TH', testament: 'NT', orderNum: 53 },
|
|
'1 Timotei': { name: '1 Timotei', abbreviation: '1TI', testament: 'NT', orderNum: 54 },
|
|
'2 Timotei': { name: '2 Timotei', abbreviation: '2TI', testament: 'NT', orderNum: 55 },
|
|
'Titus': { name: 'Titus', abbreviation: 'TIT', testament: 'NT', orderNum: 56 },
|
|
'Filimon': { name: 'Filimon', abbreviation: 'PHM', testament: 'NT', orderNum: 57 },
|
|
'Evrei': { name: 'Evrei', abbreviation: 'HEB', testament: 'NT', orderNum: 58 },
|
|
'Iacov': { name: 'Iacov', abbreviation: 'JAS', testament: 'NT', orderNum: 59 },
|
|
'1 Petru': { name: '1 Petru', abbreviation: '1PE', testament: 'NT', orderNum: 60 },
|
|
'2 Petru': { name: '2 Petru', abbreviation: '2PE', testament: 'NT', orderNum: 61 },
|
|
'1 Ioan': { name: '1 Ioan', abbreviation: '1JN', testament: 'NT', orderNum: 62 },
|
|
'2 Ioan': { name: '2 Ioan', abbreviation: '2JN', testament: 'NT', orderNum: 63 },
|
|
'3 Ioan': { name: '3 Ioan', abbreviation: '3JN', testament: 'NT', orderNum: 64 },
|
|
'Iuda': { name: 'Iuda', abbreviation: 'JUD', testament: 'NT', orderNum: 65 },
|
|
'Revelaţia': { name: 'Revelația', abbreviation: 'REV', testament: 'NT', orderNum: 66 },
|
|
}
|
|
|
|
interface ParsedVerse {
|
|
verseNum: number
|
|
text: string
|
|
}
|
|
|
|
interface ParsedChapter {
|
|
chapterNum: number
|
|
verses: ParsedVerse[]
|
|
}
|
|
|
|
interface ParsedBook {
|
|
name: string
|
|
chapters: ParsedChapter[]
|
|
}
|
|
|
|
async function parseRomanianBible(filePath: string): Promise<ParsedBook[]> {
|
|
console.log(`Reading Romanian Bible from: ${filePath}`)
|
|
|
|
const content = fs.readFileSync(filePath, 'utf-8')
|
|
const lines = content.split('\n')
|
|
|
|
const books: ParsedBook[] = []
|
|
let currentBook: ParsedBook | null = null
|
|
let currentChapter: ParsedChapter | null = null
|
|
let isInBibleContent = false
|
|
|
|
for (let i = 0; i < lines.length; i++) {
|
|
const line = lines[i].trim()
|
|
|
|
// Start processing after "VECHIUL TESTAMENT"
|
|
if (line === 'VECHIUL TESTAMENT' || line === 'TESTAMENT') {
|
|
isInBibleContent = true
|
|
continue
|
|
}
|
|
|
|
if (!isInBibleContent) continue
|
|
|
|
// Book detection: … BookName …
|
|
const bookMatch = line.match(/^…\s*(.+?)\s*…$/)
|
|
if (bookMatch) {
|
|
// Save previous book if exists
|
|
if (currentBook && currentBook.chapters.length > 0) {
|
|
books.push(currentBook)
|
|
}
|
|
|
|
const bookName = bookMatch[1].trim()
|
|
console.log(`Found book: ${bookName}`)
|
|
|
|
currentBook = {
|
|
name: bookName,
|
|
chapters: []
|
|
}
|
|
currentChapter = null
|
|
continue
|
|
}
|
|
|
|
// Chapter detection: Capitolul X or CApitoLuL X
|
|
const chapterMatch = line.match(/^[cC][aA][pP][iI][tT][oO][lL][uU][lL]\s+(\d+)$/i)
|
|
if (chapterMatch && currentBook) {
|
|
// Save previous chapter if exists
|
|
if (currentChapter && currentChapter.verses.length > 0) {
|
|
currentBook.chapters.push(currentChapter)
|
|
}
|
|
|
|
const chapterNum = parseInt(chapterMatch[1])
|
|
console.log(` Chapter ${chapterNum}`)
|
|
|
|
currentChapter = {
|
|
chapterNum,
|
|
verses: []
|
|
}
|
|
continue
|
|
}
|
|
|
|
// Verse detection: starts with number
|
|
const verseMatch = line.match(/^(\d+)\s+(.+)$/)
|
|
if (verseMatch && currentChapter) {
|
|
const verseNum = parseInt(verseMatch[1])
|
|
let verseText = verseMatch[2].trim()
|
|
|
|
// Handle paragraph markers
|
|
verseText = verseText.replace(/^¶\s*/, '')
|
|
|
|
// Look ahead for continuation lines (lines that don't start with numbers or special markers)
|
|
let j = i + 1
|
|
while (j < lines.length) {
|
|
const nextLine = lines[j].trim()
|
|
|
|
// Stop if we hit a new verse, chapter, book, or empty line
|
|
if (!nextLine ||
|
|
nextLine.match(/^\d+\s/) || // New verse
|
|
nextLine.match(/^[cC][aA][pP][iI][tT][oO][lL][uU][lL]\s+\d+$/i) || // New chapter
|
|
nextLine.match(/^….*…$/) || // New book
|
|
nextLine === 'TESTAMENT') { // Testament marker
|
|
break
|
|
}
|
|
|
|
// Add continuation line
|
|
verseText += ' ' + nextLine
|
|
j++
|
|
}
|
|
|
|
// Clean up the text
|
|
verseText = verseText.replace(/\s+/g, ' ').trim()
|
|
|
|
currentChapter.verses.push({
|
|
verseNum,
|
|
text: verseText
|
|
})
|
|
|
|
// Skip the lines we've processed
|
|
i = j - 1
|
|
continue
|
|
}
|
|
}
|
|
|
|
// Save the last book and chapter
|
|
if (currentChapter && currentChapter.verses.length > 0 && currentBook) {
|
|
currentBook.chapters.push(currentChapter)
|
|
}
|
|
if (currentBook && currentBook.chapters.length > 0) {
|
|
books.push(currentBook)
|
|
}
|
|
|
|
console.log(`Parsed ${books.length} books`)
|
|
return books
|
|
}
|
|
|
|
async function importRomanianBible() {
|
|
try {
|
|
console.log('Starting Romanian Bible import...')
|
|
|
|
// Clear existing data
|
|
console.log('Clearing existing data...')
|
|
await prisma.bibleVerse.deleteMany()
|
|
await prisma.bibleChapter.deleteMany()
|
|
await prisma.bibleBook.deleteMany()
|
|
|
|
// Parse the markdown file
|
|
const filePath = path.join(process.cwd(), 'bibles', 'Biblia-Fidela-limba-romana.md')
|
|
const books = await parseRomanianBible(filePath)
|
|
|
|
console.log(`Importing ${books.length} books into database...`)
|
|
|
|
for (const book of books) {
|
|
const bookInfo = BOOK_MAPPINGS[book.name]
|
|
if (!bookInfo) {
|
|
console.warn(`Warning: No mapping found for book "${book.name}", skipping...`)
|
|
continue
|
|
}
|
|
|
|
console.log(`Creating book: ${bookInfo.name}`)
|
|
|
|
// Create book
|
|
const createdBook = await prisma.bibleBook.create({
|
|
data: {
|
|
id: bookInfo.orderNum,
|
|
name: bookInfo.name,
|
|
testament: bookInfo.testament,
|
|
orderNum: bookInfo.orderNum
|
|
}
|
|
})
|
|
|
|
// Create chapters and verses
|
|
for (const chapter of book.chapters) {
|
|
console.log(` Creating chapter ${chapter.chapterNum} with ${chapter.verses.length} verses`)
|
|
|
|
const createdChapter = await prisma.bibleChapter.create({
|
|
data: {
|
|
bookId: createdBook.id,
|
|
chapterNum: chapter.chapterNum
|
|
}
|
|
})
|
|
|
|
// Create verses in batch (deduplicate by verse number)
|
|
const uniqueVerses = chapter.verses.reduce((acc, verse) => {
|
|
acc[verse.verseNum] = verse // This will overwrite duplicates
|
|
return acc
|
|
}, {} as Record<number, ParsedVerse>)
|
|
|
|
const versesData = Object.values(uniqueVerses).map(verse => ({
|
|
chapterId: createdChapter.id,
|
|
verseNum: verse.verseNum,
|
|
text: verse.text,
|
|
version: 'FIDELA'
|
|
}))
|
|
|
|
if (versesData.length > 0) {
|
|
await prisma.bibleVerse.createMany({
|
|
data: versesData
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
// Print summary
|
|
const bookCount = await prisma.bibleBook.count()
|
|
const chapterCount = await prisma.bibleChapter.count()
|
|
const verseCount = await prisma.bibleVerse.count()
|
|
|
|
console.log('\n✅ Romanian Bible import completed successfully!')
|
|
console.log(`📚 Books imported: ${bookCount}`)
|
|
console.log(`📖 Chapters imported: ${chapterCount}`)
|
|
console.log(`📝 Verses imported: ${verseCount}`)
|
|
|
|
} catch (error) {
|
|
console.error('❌ Error importing Romanian Bible:', error)
|
|
throw error
|
|
} finally {
|
|
await prisma.$disconnect()
|
|
}
|
|
}
|
|
|
|
// Run the import
|
|
if (require.main === module) {
|
|
importRomanianBible()
|
|
.then(() => {
|
|
console.log('Import completed successfully!')
|
|
process.exit(0)
|
|
})
|
|
.catch((error) => {
|
|
console.error('Import failed:', error)
|
|
process.exit(1)
|
|
})
|
|
}
|
|
|
|
export { importRomanianBible } |