Fix authentication state persistence and admin role display
- Implement complete authentication system with JWT token validation - Add auth provider with persistent login state across page refreshes - Create multilingual login/register forms with Material-UI components - Fix token validation using raw SQL queries to bypass Prisma sync issues - Add comprehensive error handling for expired/invalid tokens - Create profile and settings pages with full i18n support - Add proper user role management (admin/user) with database sync - Implement secure middleware with CSRF protection and auth checks - Add debug endpoints for troubleshooting authentication issues - Fix Zustand store persistence for authentication state 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
71
scripts/assemble-english-from-cache.ts
Normal file
71
scripts/assemble-english-from-cache.ts
Normal file
@@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env tsx
|
||||
import fs from 'fs'
|
||||
import path from 'path'
|
||||
|
||||
const ABBR = (process.env.EN_ABBR || 'BSB').toUpperCase()
|
||||
const ROOT = process.env.INPUT_DIR || path.join('data', 'en_bible', ABBR)
|
||||
|
||||
const OT_ORDER = [
|
||||
'Genesis','Exodus','Leviticus','Numbers','Deuteronomy','Joshua','Judges','Ruth','1 Samuel','2 Samuel','1 Kings','2 Kings','1 Chronicles','2 Chronicles','Ezra','Nehemiah','Esther','Job','Psalms','Proverbs','Ecclesiastes','Song of Songs','Isaiah','Jeremiah','Lamentations','Ezekiel','Daniel','Hosea','Joel','Amos','Obadiah','Jonah','Micah','Nahum','Habakkuk','Zephaniah','Haggai','Zechariah','Malachi'
|
||||
]
|
||||
const NT_ORDER = [
|
||||
'Matthew','Mark','Luke','John','Acts','Romans','1 Corinthians','2 Corinthians','Galatians','Ephesians','Philippians','Colossians','1 Thessalonians','2 Thessalonians','1 Timothy','2 Timothy','Titus','Philemon','Hebrews','James','1 Peter','2 Peter','1 John','2 John','3 John','Jude','Revelation'
|
||||
]
|
||||
|
||||
function titleFromAbbr(abbr: string): string {
|
||||
const map: Record<string,string> = { GEN:'Genesis', EXO:'Exodus', LEV:'Leviticus', NUM:'Numbers', DEU:'Deuteronomy', JOS:'Joshua', JDG:'Judges', RUT:'Ruth', '1SA':'1 Samuel', '2SA':'2 Samuel', '1KI':'1 Kings', '2KI':'2 Kings', '1CH':'1 Chronicles', '2CH':'2 Chronicles', EZR:'Ezra', NEH:'Nehemiah', EST:'Esther', JOB:'Job', PSA:'Psalms', PRO:'Proverbs', ECC:'Ecclesiastes', SNG:'Song of Songs', ISA:'Isaiah', JER:'Jeremiah', LAM:'Lamentations', EZK:'Ezekiel', DAN:'Daniel', HOS:'Hosea', JOL:'Joel', AMO:'Amos', OBA:'Obadiah', JON:'Jonah', MIC:'Micah', NAM:'Nahum', HAB:'Habakkuk', ZEP:'Zephaniah', HAG:'Haggai', ZEC:'Zechariah', MAL:'Malachi', MAT:'Matthew', MRK:'Mark', LUK:'Luke', JHN:'John', ACT:'Acts', ROM:'Romans', '1CO':'1 Corinthians', '2CO':'2 Corinthians', GAL:'Galatians', EPH:'Ephesians', PHP:'Philippians', COL:'Colossians', '1TH':'1 Thessalonians', '2TH':'2 Thessalonians', '1TI':'1 Timothy', '2TI':'2 Timothy', TIT:'Titus', PHM:'Philemon', HEB:'Hebrews', JAS:'James', '1PE':'1 Peter', '2PE':'2 Peter', '1JN':'1 John', '2JN':'2 John', '3JN':'3 John', JUD:'Jude', REV:'Revelation' }
|
||||
return map[abbr] || abbr
|
||||
}
|
||||
|
||||
function detectBooks(dir: string): string[] {
|
||||
if (!fs.existsSync(dir)) return []
|
||||
return fs.readdirSync(dir).filter(d => fs.statSync(path.join(dir, d)).isDirectory())
|
||||
}
|
||||
|
||||
function readChapters(bookDir: string) {
|
||||
const files = fs.readdirSync(bookDir).filter(f => f.startsWith('chapter-') && f.endsWith('.json') && !f.includes('intro'))
|
||||
const chapters: any[] = []
|
||||
for (const f of files) {
|
||||
const obj = JSON.parse(fs.readFileSync(path.join(bookDir, f), 'utf-8'))
|
||||
chapters.push(obj)
|
||||
}
|
||||
chapters.sort((a,b) => a.chapterNum - b.chapterNum)
|
||||
return chapters
|
||||
}
|
||||
|
||||
function assemble() {
|
||||
const bookDirs = detectBooks(ROOT)
|
||||
const books: { name: string; chapters: any[] }[] = []
|
||||
for (const d of bookDirs) {
|
||||
// d is likely an abbreviation; try to infer common titles for GEN/EXO/LEV etc.
|
||||
let name = d
|
||||
if (d.toUpperCase() === d) name = titleFromAbbr(d.toUpperCase())
|
||||
const chapters = readChapters(path.join(ROOT, d))
|
||||
if (chapters.length > 0) books.push({ name, chapters })
|
||||
}
|
||||
|
||||
const ot = books.filter(b => OT_ORDER.includes(b.name)).sort((a,b) => OT_ORDER.indexOf(a.name) - OT_ORDER.indexOf(b.name))
|
||||
const nt = books.filter(b => NT_ORDER.includes(b.name)).sort((a,b) => NT_ORDER.indexOf(a.name) - NT_ORDER.indexOf(b.name))
|
||||
|
||||
// Verify completeness: all 66 books must be present
|
||||
const have = new Set(books.map(b => b.name))
|
||||
const missing = [...OT_ORDER, ...NT_ORDER].filter(n => !have.has(n))
|
||||
if (missing.length > 0 && process.env.ALLOW_PARTIAL !== '1') {
|
||||
console.error(`Missing ${missing.length} books. Full EN Bible not downloaded yet.`)
|
||||
console.error('First few missing:', missing.slice(0, 10).join(', ') + (missing.length > 10 ? '...' : ''))
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
const otObj = { testament: 'Old Testament', books: ot }
|
||||
const ntObj = { testament: 'New Testament', books: nt }
|
||||
|
||||
const otFile = path.join(ROOT, 'old_testament.json')
|
||||
const ntFile = path.join(ROOT, 'new_testament.json')
|
||||
fs.mkdirSync(ROOT, { recursive: true })
|
||||
fs.writeFileSync(otFile, JSON.stringify(otObj, null, 2), 'utf-8')
|
||||
fs.writeFileSync(ntFile, JSON.stringify(ntObj, null, 2), 'utf-8')
|
||||
console.log('Assembled:', otFile)
|
||||
console.log('Assembled:', ntFile)
|
||||
}
|
||||
|
||||
assemble()
|
||||
63
scripts/cleanup-english-versions.ts
Normal file
63
scripts/cleanup-english-versions.ts
Normal file
@@ -0,0 +1,63 @@
|
||||
import { PrismaClient } from '@prisma/client'
|
||||
|
||||
const prisma = new PrismaClient()
|
||||
|
||||
async function cleanup() {
|
||||
try {
|
||||
console.log('Starting cleanup of English Bible versions (keeping WEB)...')
|
||||
|
||||
// Ensure WEB exists
|
||||
const web = await prisma.bibleVersion.findFirst({ where: { language: 'en', abbreviation: 'WEB' } })
|
||||
if (!web) {
|
||||
console.error('WEB version not found. Please import WEB first (via usfm-to-json + import). Aborting.')
|
||||
return
|
||||
}
|
||||
|
||||
// Gather non-WEB English versions (e.g., BSB, BSB_MD, BSB_SAMPLES, etc.)
|
||||
const others = await prisma.bibleVersion.findMany({
|
||||
where: { language: 'en', NOT: { abbreviation: 'WEB' } },
|
||||
orderBy: { createdAt: 'asc' }
|
||||
})
|
||||
console.log('Found non-WEB EN versions:', others.map(v => v.abbreviation))
|
||||
|
||||
for (const v of others) {
|
||||
console.log(`Deleting content for ${v.abbreviation} (${v.id}) ...`)
|
||||
// Delete verses for all chapters under this version
|
||||
const delVerses = await prisma.bibleVerse.deleteMany({
|
||||
where: { chapter: { book: { versionId: v.id } } }
|
||||
})
|
||||
console.log(' Verses deleted:', delVerses.count)
|
||||
|
||||
// Delete chapters
|
||||
const delCh = await prisma.bibleChapter.deleteMany({
|
||||
where: { book: { versionId: v.id } }
|
||||
})
|
||||
console.log(' Chapters deleted:', delCh.count)
|
||||
|
||||
// Delete books
|
||||
const delBooks = await prisma.bibleBook.deleteMany({ where: { versionId: v.id } })
|
||||
console.log(' Books deleted:', delBooks.count)
|
||||
|
||||
// Delete version
|
||||
const delVer = await prisma.bibleVersion.delete({ where: { id: v.id } })
|
||||
console.log(' Version deleted:', delVer.abbreviation)
|
||||
}
|
||||
|
||||
// Normalize defaults: set all EN isDefault=false then set WEB=true
|
||||
await prisma.bibleVersion.updateMany({ where: { language: 'en' }, data: { isDefault: false } })
|
||||
await prisma.bibleVersion.update({ where: { id: web.id }, data: { isDefault: true } })
|
||||
console.log('Set WEB as the sole default English version.')
|
||||
|
||||
// Quick sanity: count WEB books
|
||||
const webBooks = await prisma.bibleBook.count({ where: { versionId: web.id } })
|
||||
console.log('WEB book count:', webBooks)
|
||||
} catch (e) {
|
||||
console.error('Cleanup failed:', e)
|
||||
process.exit(1)
|
||||
} finally {
|
||||
await prisma.$disconnect()
|
||||
}
|
||||
}
|
||||
|
||||
cleanup()
|
||||
|
||||
74
scripts/clone_vector_table.ts
Normal file
74
scripts/clone_vector_table.ts
Normal file
@@ -0,0 +1,74 @@
|
||||
import 'dotenv/config'
|
||||
import { Pool } from 'pg'
|
||||
|
||||
async function main() {
|
||||
const pool = new Pool({ connectionString: process.env.DATABASE_URL })
|
||||
const schema = (process.env.VECTOR_SCHEMA || 'ai_bible').replace(/[^a-zA-Z0-9_]/g, '')
|
||||
const source = `${schema}.bv_ro_fidela`
|
||||
const target = `${schema}.bv_ro_cornilescu`
|
||||
|
||||
const client = await pool.connect()
|
||||
try {
|
||||
console.log('Cloning vector table from', source, 'to', target)
|
||||
await client.query('BEGIN')
|
||||
await client.query(`CREATE EXTENSION IF NOT EXISTS vector;`)
|
||||
await client.query(`CREATE SCHEMA IF NOT EXISTS "${schema}";`)
|
||||
// Create target table if not exists with same structure
|
||||
await client.query(`
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.tables
|
||||
WHERE table_schema = '${schema}' AND table_name = 'bv_ro_cornilescu') THEN
|
||||
EXECUTE format('CREATE TABLE %I.%I (LIKE %I.%I INCLUDING ALL)', '${schema}', 'bv_ro_cornilescu', '${schema}', 'bv_ro_fidela');
|
||||
END IF;
|
||||
END$$;`)
|
||||
|
||||
// Insert rows if target empty
|
||||
const cnt = await client.query(`SELECT count(*)::int AS c FROM ${target}`)
|
||||
if ((cnt.rows?.[0]?.c ?? 0) === 0) {
|
||||
console.log('Copying rows...')
|
||||
await client.query(`
|
||||
INSERT INTO ${target} (testament, book, chapter, verse, text_raw, text_norm, tsv, embedding, created_at, updated_at)
|
||||
SELECT testament, book, chapter, verse, text_raw, text_norm, tsv, embedding, created_at, updated_at
|
||||
FROM ${source}
|
||||
ON CONFLICT DO NOTHING
|
||||
`)
|
||||
} else {
|
||||
console.log('Target already has rows, skipping copy')
|
||||
}
|
||||
|
||||
// Create indexes if not exist
|
||||
await client.query(`
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS ux_ref_bv_ro_cornilescu ON ${target} (book, chapter, verse);
|
||||
CREATE INDEX IF NOT EXISTS idx_tsv_bv_ro_cornilescu ON ${target} USING GIN (tsv);
|
||||
CREATE INDEX IF NOT EXISTS idx_book_ch_bv_ro_cornilescu ON ${target} (book, chapter);
|
||||
CREATE INDEX IF NOT EXISTS idx_testament_bv_ro_cornilescu ON ${target} (testament);
|
||||
`)
|
||||
|
||||
await client.query('COMMIT')
|
||||
console.log('Rows copied and indexes created. Running post-copy maintenance...')
|
||||
|
||||
// Run maintenance commands outside of transaction
|
||||
await client.query(`VACUUM ANALYZE ${target};`)
|
||||
try {
|
||||
await client.query(`
|
||||
CREATE INDEX IF NOT EXISTS idx_vec_ivfflat_bv_ro_cornilescu
|
||||
ON ${target} USING ivfflat (embedding vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
`)
|
||||
} catch (e) {
|
||||
console.warn('IVFFLAT index creation hit memory limits; skipping for now. You can create it later with higher maintenance_work_mem.')
|
||||
}
|
||||
console.log('Clone completed.')
|
||||
} catch (e) {
|
||||
await client.query('ROLLBACK')
|
||||
console.error('Clone error:', e)
|
||||
process.exit(1)
|
||||
} finally {
|
||||
client.release()
|
||||
await pool.end()
|
||||
}
|
||||
}
|
||||
|
||||
main()
|
||||
140
scripts/import-english-json.ts
Normal file
140
scripts/import-english-json.ts
Normal file
@@ -0,0 +1,140 @@
|
||||
import { PrismaClient } from '@prisma/client'
|
||||
import fs from 'fs'
|
||||
import path from 'path'
|
||||
|
||||
const prisma = new PrismaClient()
|
||||
|
||||
interface Verse { verseNum: number; text: string }
|
||||
interface Chapter { chapterNum: number; verses: Verse[] }
|
||||
interface Book { name: string; chapters: Chapter[] }
|
||||
interface TestamentFile { testament: string; books: Book[] }
|
||||
|
||||
function loadJson(file: string): TestamentFile {
|
||||
return JSON.parse(fs.readFileSync(file, 'utf-8'))
|
||||
}
|
||||
|
||||
function getBookKeyEn(name: string): string {
|
||||
const map: Record<string, string> = {
|
||||
'Genesis': 'genesis', 'Exodus': 'exodus', 'Leviticus': 'leviticus', 'Numbers': 'numbers', 'Deuteronomy': 'deuteronomy',
|
||||
'Joshua': 'joshua', 'Judges': 'judges', 'Ruth': 'ruth', '1 Samuel': '1_samuel', '2 Samuel': '2_samuel',
|
||||
'1 Kings': '1_kings', '2 Kings': '2_kings', '1 Chronicles': '1_chronicles', '2 Chronicles': '2_chronicles',
|
||||
'Ezra': 'ezra', 'Nehemiah': 'nehemiah', 'Esther': 'esther', 'Job': 'job', 'Psalms': 'psalms',
|
||||
'Proverbs': 'proverbs', 'Ecclesiastes': 'ecclesiastes', 'Song of Songs': 'song_of_songs', 'Isaiah': 'isaiah',
|
||||
'Jeremiah': 'jeremiah', 'Lamentations': 'lamentations', 'Ezekiel': 'ezekiel', 'Daniel': 'daniel',
|
||||
'Hosea': 'hosea', 'Joel': 'joel', 'Amos': 'amos', 'Obadiah': 'obadiah', 'Jonah': 'jonah', 'Micah': 'micah',
|
||||
'Nahum': 'nahum', 'Habakkuk': 'habakkuk', 'Zephaniah': 'zephaniah', 'Haggai': 'haggai', 'Zechariah': 'zechariah', 'Malachi': 'malachi',
|
||||
'Matthew': 'matthew', 'Mark': 'mark', 'Luke': 'luke', 'John': 'john', 'Acts': 'acts', 'Romans': 'romans',
|
||||
'1 Corinthians': '1_corinthians', '2 Corinthians': '2_corinthians', 'Galatians': 'galatians', 'Ephesians': 'ephesians', 'Philippians': 'philippians', 'Colossians': 'colossians',
|
||||
'1 Thessalonians': '1_thessalonians', '2 Thessalonians': '2_thessalonians', '1 Timothy': '1_timothy', '2 Timothy': '2_timothy', 'Titus': 'titus', 'Philemon': 'philemon',
|
||||
'Hebrews': 'hebrews', 'James': 'james', '1 Peter': '1_peter', '2 Peter': '2_peter', '1 John': '1_john', '2 John': '2_john', '3 John': '3_john', 'Jude': 'jude', 'Revelation': 'revelation'
|
||||
}
|
||||
return map[name] || name.toLowerCase().replace(/\s+/g, '_')
|
||||
}
|
||||
|
||||
function getOrderFromList(name: string, list: string[]): number {
|
||||
const idx = list.indexOf(name)
|
||||
return idx >= 0 ? idx + 1 : 999
|
||||
}
|
||||
|
||||
async function main() {
|
||||
try {
|
||||
const abbr = (process.env.EN_ABBR || 'BSB').toUpperCase()
|
||||
const inputDir = process.env.INPUT_DIR || path.join('data', 'en_bible', abbr)
|
||||
const lang = 'en'
|
||||
|
||||
const otPath = path.join(inputDir, 'old_testament.json')
|
||||
const ntPath = path.join(inputDir, 'new_testament.json')
|
||||
if (!fs.existsSync(otPath) || !fs.existsSync(ntPath)) {
|
||||
throw new Error(`Missing OT/NT JSON at ${inputDir}. Run fetch-english-bible.ts first.`)
|
||||
}
|
||||
|
||||
// Upsert English version
|
||||
const englishVersion = await prisma.bibleVersion.upsert({
|
||||
where: { abbreviation_language: { abbreviation: abbr, language: lang } },
|
||||
update: {},
|
||||
create: {
|
||||
name: abbr,
|
||||
abbreviation: abbr,
|
||||
language: lang,
|
||||
description: `English Bible (${abbr})`,
|
||||
isDefault: true
|
||||
}
|
||||
})
|
||||
|
||||
const ot = loadJson(otPath)
|
||||
const nt = loadJson(ntPath)
|
||||
const canon = [...ot.books.map(b => b.name), ...nt.books.map(b => b.name)]
|
||||
|
||||
let importedBooks = 0
|
||||
let importedChapters = 0
|
||||
let importedVerses = 0
|
||||
|
||||
async function importTestament(test: TestamentFile) {
|
||||
for (const book of test.books) {
|
||||
const orderNum = getOrderFromList(book.name, canon)
|
||||
const testament = test.testament
|
||||
const bookKey = getBookKeyEn(book.name)
|
||||
|
||||
const createdBook = await prisma.bibleBook.upsert({
|
||||
where: {
|
||||
versionId_orderNum: {
|
||||
versionId: englishVersion.id,
|
||||
orderNum
|
||||
}
|
||||
},
|
||||
update: {},
|
||||
create: {
|
||||
versionId: englishVersion.id,
|
||||
name: book.name,
|
||||
testament,
|
||||
orderNum,
|
||||
bookKey
|
||||
}
|
||||
})
|
||||
importedBooks++
|
||||
|
||||
for (const chapter of book.chapters) {
|
||||
const createdChapter = await prisma.bibleChapter.upsert({
|
||||
where: {
|
||||
bookId_chapterNum: {
|
||||
bookId: createdBook.id,
|
||||
chapterNum: chapter.chapterNum
|
||||
}
|
||||
},
|
||||
update: {},
|
||||
create: { bookId: createdBook.id, chapterNum: chapter.chapterNum }
|
||||
})
|
||||
importedChapters++
|
||||
|
||||
// Deduplicate verses by verseNum
|
||||
const unique = new Map<number, string>()
|
||||
for (const v of chapter.verses) {
|
||||
if (!unique.has(v.verseNum)) unique.set(v.verseNum, v.text)
|
||||
}
|
||||
const versesData = Array.from(unique.entries()).map(([num, text]) => ({
|
||||
chapterId: createdChapter.id,
|
||||
verseNum: num,
|
||||
text
|
||||
}))
|
||||
if (versesData.length > 0) {
|
||||
await prisma.bibleVerse.createMany({ data: versesData, skipDuplicates: true })
|
||||
importedVerses += versesData.length
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
await importTestament(ot)
|
||||
await importTestament(nt)
|
||||
|
||||
console.log(`Imported ${importedBooks} books, ${importedChapters} chapters, ${importedVerses} verses for ${abbr}.`)
|
||||
} catch (e) {
|
||||
console.error('English JSON import failed:', e)
|
||||
process.exit(1)
|
||||
} finally {
|
||||
await prisma.$disconnect()
|
||||
}
|
||||
}
|
||||
|
||||
main()
|
||||
|
||||
103
scripts/parse-bsb-md-full.js
Normal file
103
scripts/parse-bsb-md-full.js
Normal file
@@ -0,0 +1,103 @@
|
||||
#!/usr/bin/env node
|
||||
const fs = require('fs')
|
||||
const path = require('path')
|
||||
|
||||
const SRC = process.env.BSB_MD_PATH || path.join('bibles', 'bible-bsb.md')
|
||||
const OUT_ABBR = (process.env.EN_ABBR || 'BSB_MD').toUpperCase()
|
||||
const OUT_DIR = process.env.OUTPUT_DIR || path.join('data','en_bible', OUT_ABBR)
|
||||
|
||||
function ensureDir(p){ fs.mkdirSync(p,{recursive:true}) }
|
||||
function writeJson(file,obj){ ensureDir(path.dirname(file)); fs.writeFileSync(file, JSON.stringify(obj,null,2),'utf-8') }
|
||||
|
||||
const BOOKS = [
|
||||
['Genesis', ['Genesis'],'OT'], ['Exodus',['Exodus'],'OT'], ['Leviticus',['Leviticus'],'OT'], ['Numbers',['Numbers'],'OT'], ['Deuteronomy',['Deuteronomy'],'OT'],
|
||||
['Joshua',['Joshua'],'OT'], ['Judges',['Judges'],'OT'], ['Ruth',['Ruth'],'OT'], ['1 Samuel',['1\s+Samuel','1\s+Samuel'],'OT'], ['2 Samuel',['2\s+Samuel','2\s+Samuel'],'OT'],
|
||||
['1 Kings',['1\s+Kings'],'OT'], ['2 Kings',['2\s+Kings'],'OT'], ['1 Chronicles',['1\s+Chronicles'],'OT'], ['2 Chronicles',['2\s+Chronicles'],'OT'],
|
||||
['Ezra',['Ezra'],'OT'], ['Nehemiah',['Nehemiah'],'OT'], ['Esther',['Esther'],'OT'], ['Job',['Job'],'OT'], ['Psalms',['Psalms|Psalm'],'OT'],
|
||||
['Proverbs',['Proverbs'],'OT'], ['Ecclesiastes',['Ecclesiastes'],'OT'], ['Song of Songs',['Song\s+of\s+Songs|Song\s+of\s+Solomon'],'OT'], ['Isaiah',['Isaiah'],'OT'],
|
||||
['Jeremiah',['Jeremiah'],'OT'], ['Lamentations',['Lamentations'],'OT'], ['Ezekiel',['Ezekiel'],'OT'], ['Daniel',['Daniel'],'OT'],
|
||||
['Hosea',['Hosea'],'OT'], ['Joel',['Joel'],'OT'], ['Amos',['Amos'],'OT'], ['Obadiah',['Obadiah'],'OT'], ['Jonah',['Jonah'],'OT'], ['Micah',['Micah'],'OT'],
|
||||
['Nahum',['Nahum'],'OT'], ['Habakkuk',['Habakkuk'],'OT'], ['Zephaniah',['Zephaniah'],'OT'], ['Haggai',['Haggai'],'OT'], ['Zechariah',['Zechariah'],'OT'], ['Malachi',['Malachi'],'OT'],
|
||||
['Matthew',['Matthew'],'NT'], ['Mark',['Mark'],'NT'], ['Luke',['Luke'],'NT'], ['John',['John'],'NT'], ['Acts',['Acts'],'NT'],
|
||||
['Romans',['Romans'],'NT'], ['1 Corinthians',['1\s+Corinthians'],'NT'], ['2 Corinthians',['2\s+Corinthians'],'NT'], ['Galatians',['Galatians'],'NT'], ['Ephesians',['Ephesians'],'NT'],
|
||||
['Philippians',['Philippians'],'NT'], ['Colossians',['Colossians'],'NT'], ['1 Thessalonians',['1\s+Thessalonians'],'NT'], ['2 Thessalonians',['2\s+Thessalonians'],'NT'],
|
||||
['1 Timothy',['1\s+Timothy'],'NT'], ['2 Timothy',['2\s+Timothy'],'NT'], ['Titus',['Titus'],'NT'], ['Philemon',['Philemon'],'NT'],
|
||||
['Hebrews',['Hebrews'],'NT'], ['James',['James'],'NT'], ['1 Peter',['1\\s+Peter'],'NT'], ['2 Peter',['2\\s+Peter'],'NT'],
|
||||
['1 John',['1\s+John'],'NT'], ['2 John',['2\s+John'],'NT'], ['3 John',['3\s+John'],'NT'], ['Jude',['Jude'],'NT'], ['Revelation',['Revelation'],'NT']
|
||||
]
|
||||
|
||||
function main(){
|
||||
if(!fs.existsSync(SRC)) { console.error('Missing source:', SRC); process.exit(1) }
|
||||
const md = fs.readFileSync(SRC,'utf-8')
|
||||
|
||||
// Collect all verse markers across the entire doc
|
||||
const markers = []
|
||||
for(const [name, variants] of BOOKS){
|
||||
const names = variants.join('|')
|
||||
const re = new RegExp('(?:^|[\\n\\r\\f\\s\\|\\(])(?:'+names+')\\s+(\\d+):(\\d+)', 'gi')
|
||||
let m
|
||||
while((m=re.exec(md))!==null){
|
||||
markers.push({ book:name, chapter:parseInt(m[1],10), verse:parseInt(m[2],10), index:m.index, matchLen:m[0].length })
|
||||
}
|
||||
}
|
||||
if(markers.length===0){ console.error('No verse markers found'); process.exit(1) }
|
||||
markers.sort((a,b)=>a.index-b.index)
|
||||
|
||||
// Build text segments per marker (chapter/verse)
|
||||
const entries = []
|
||||
for(let i=0;i<markers.length;i++){
|
||||
const cur = markers[i]
|
||||
const start = cur.index + cur.matchLen
|
||||
const end = (i+1<markers.length) ? markers[i+1].index : md.length
|
||||
let text = md.slice(start, end)
|
||||
text = text.replace(/[\u000c\r]+/g,'\n') // formfeed
|
||||
text = text.replace(/\s+/g,' ').trim()
|
||||
// Stop overly long spill
|
||||
if(text.length>1500) text = text.slice(0,1500).trim()
|
||||
entries.push({ ...cur, text })
|
||||
}
|
||||
|
||||
// Aggregate into OT/NT JSON
|
||||
const bookIndex = new Map(BOOKS.map(([n,_,t],i)=>[n,{testament:t, order:i+1}]))
|
||||
const byBook = new Map()
|
||||
for(const e of entries){
|
||||
if(!byBook.has(e.book)) byBook.set(e.book, new Map())
|
||||
const chMap = byBook.get(e.book)
|
||||
if(!chMap.has(e.chapter)) chMap.set(e.chapter, new Map())
|
||||
const vMap = chMap.get(e.chapter)
|
||||
if(!vMap.has(e.verse)) vMap.set(e.verse, e.text)
|
||||
}
|
||||
|
||||
const otBooks=[]; const ntBooks=[]
|
||||
for(const [name, chMap] of byBook){
|
||||
const meta = bookIndex.get(name)
|
||||
const chapters=[]
|
||||
for(const [ch, vMap] of Array.from(chMap.entries()).sort((a,b)=>a[0]-b[0])){
|
||||
const verses=[]
|
||||
for(const [vn, txt] of Array.from(vMap.entries()).sort((a,b)=>a[0]-b[0])){
|
||||
verses.push({ verseNum: vn, text: txt })
|
||||
}
|
||||
if(verses.length>0) chapters.push({ chapterNum: ch, verses })
|
||||
}
|
||||
const bookObj={ name, chapters }
|
||||
if(meta?.testament==='OT') otBooks.push({name,chapters})
|
||||
else ntBooks.push({name,chapters})
|
||||
}
|
||||
|
||||
// Sort books in canonical order
|
||||
otBooks.sort((a,b)=>bookIndex.get(a.name).order-bookIndex.get(b.name).order)
|
||||
ntBooks.sort((a,b)=>bookIndex.get(a.name).order-bookIndex.get(b.name).order)
|
||||
|
||||
const ot={ testament:'Old Testament', books: otBooks }
|
||||
const nt={ testament:'New Testament', books: ntBooks }
|
||||
|
||||
const otFile = path.join(OUT_DIR,'old_testament.json')
|
||||
const ntFile = path.join(OUT_DIR,'new_testament.json')
|
||||
writeJson(otFile, ot)
|
||||
writeJson(ntFile, nt)
|
||||
console.log('Wrote:', otFile)
|
||||
console.log('Wrote:', ntFile)
|
||||
console.log('Books parsed:', otBooks.length + ntBooks.length)
|
||||
}
|
||||
|
||||
main()
|
||||
83
scripts/parse-bsb-md-samples.ts
Normal file
83
scripts/parse-bsb-md-samples.ts
Normal file
@@ -0,0 +1,83 @@
|
||||
#!/usr/bin/env tsx
|
||||
import fs from 'fs'
|
||||
import path from 'path'
|
||||
|
||||
/*
|
||||
Quick sample extractor from bibles/bible-bsb.md to our OT/NT JSON format.
|
||||
- Looks for Genesis 3:20–24 markers and builds a small sample JSON.
|
||||
- Output directory: data/en_bible/BSB_SAMPLES
|
||||
- Intended for demo/import testing without hitting API limits.
|
||||
*/
|
||||
|
||||
const SRC = process.env.BSB_MD_PATH || path.join('bibles', 'bible-bsb.md')
|
||||
const OUT = path.join('data', 'en_bible', 'BSB_SAMPLES')
|
||||
|
||||
function ensureDir(p: string) { fs.mkdirSync(p, { recursive: true }) }
|
||||
function writeJson(file: string, obj: any) { ensureDir(path.dirname(file)); fs.writeFileSync(file, JSON.stringify(obj, null, 2), 'utf-8') }
|
||||
|
||||
function extractGenesis3Samples(md: string): { chapterNum: number; verses: { verseNum: number; text: string }[] } {
|
||||
// Find all markers like "Genesis 3:20" and capture their file offsets
|
||||
const regex = /Genesis\s+3:(\d+)/g
|
||||
const indices: { verse: number; index: number }[] = []
|
||||
for (const m of md.matchAll(regex) as any) {
|
||||
const verse = parseInt(m[1], 10)
|
||||
indices.push({ verse, index: m.index })
|
||||
}
|
||||
|
||||
// We'll only keep verses 20..24 as a small sample
|
||||
const keep = new Set([20, 21, 22, 23, 24])
|
||||
const kept = indices.filter(x => keep.has(x.verse)).sort((a,b) => a.verse - b.verse)
|
||||
|
||||
const verses: { verseNum: number; text: string }[] = []
|
||||
for (let i = 0; i < kept.length; i++) {
|
||||
const cur = kept[i]
|
||||
const next = kept[i+1]
|
||||
const start = cur.index!
|
||||
const end = next ? next.index! : Math.min(md.length, start + 2000) // cap window
|
||||
let chunk = md.slice(start, end)
|
||||
// Remove the marker itself and nearby page headers/footers and footnote junk
|
||||
chunk = chunk.replace(/Genesis\s+3:\d+.*\n?/,'')
|
||||
chunk = chunk.replace(/\f\d+\s*\|\s*Genesis\s*3:\d+.*\n?/g,'')
|
||||
chunk = chunk.replace(/[\u000c\r]+/g,'\n') // form feed cleanup
|
||||
chunk = chunk.replace(/\s+/g,' ').trim()
|
||||
// Try to cut off before the next verse number embedded as an isolated number
|
||||
const stop = chunk.search(/\s(?:2[1-9]|3\d|\d{1,2})\s/) // heuristic
|
||||
const clean = (stop > 40 ? chunk.slice(0, stop) : chunk).trim()
|
||||
if (clean.length > 0) verses.push({ verseNum: cur.verse, text: clean })
|
||||
}
|
||||
|
||||
// Fallback if nothing captured
|
||||
if (verses.length === 0) {
|
||||
verses.push({ verseNum: 20, text: 'And Adam named his wife Eve, because she would be the mother of all the living.' })
|
||||
}
|
||||
|
||||
return { chapterNum: 3, verses }
|
||||
}
|
||||
|
||||
function main() {
|
||||
if (!fs.existsSync(SRC)) {
|
||||
console.error('Missing source file:', SRC)
|
||||
process.exit(1)
|
||||
}
|
||||
const md = fs.readFileSync(SRC, 'utf-8')
|
||||
const gen3 = extractGenesis3Samples(md)
|
||||
|
||||
const ot = {
|
||||
testament: 'Old Testament',
|
||||
books: [
|
||||
{
|
||||
name: 'Genesis',
|
||||
chapters: [gen3]
|
||||
}
|
||||
]
|
||||
}
|
||||
// Minimal NT placeholder for structure completeness
|
||||
const nt = { testament: 'New Testament', books: [] as any[] }
|
||||
|
||||
writeJson(path.join(OUT, 'old_testament.json'), ot)
|
||||
writeJson(path.join(OUT, 'new_testament.json'), nt)
|
||||
console.log('Wrote samples to', OUT)
|
||||
}
|
||||
|
||||
main()
|
||||
|
||||
163
scripts/usfm-to-json.ts
Normal file
163
scripts/usfm-to-json.ts
Normal file
@@ -0,0 +1,163 @@
|
||||
#!/usr/bin/env tsx
|
||||
import fs from 'fs'
|
||||
import path from 'path'
|
||||
|
||||
/*
|
||||
Convert a directory of USFM files (e.g., WEB/KJV) into our OT/NT JSON format.
|
||||
|
||||
Env:
|
||||
- INPUT_USFM_DIR: path to folder with *.usfm files (unzipped)
|
||||
- EN_ABBR: English version abbreviation for output folder (e.g., WEB or KJV)
|
||||
- OUTPUT_DIR (optional): defaults to data/en_bible/<EN_ABBR>
|
||||
|
||||
Output:
|
||||
- <OUTPUT_DIR>/old_testament.json
|
||||
- <OUTPUT_DIR>/new_testament.json
|
||||
|
||||
USFM markers parsed:
|
||||
- \id <BOOKID>
|
||||
- \h <Header/Book name> (optional)
|
||||
- \c <chapter number>
|
||||
- \v <verse number> <text>
|
||||
*/
|
||||
|
||||
const INPUT = process.env.INPUT_USFM_DIR || ''
|
||||
const ABBR = (process.env.EN_ABBR || 'WEB').toUpperCase()
|
||||
const OUTPUT_DIR = process.env.OUTPUT_DIR || path.join('data','en_bible', ABBR)
|
||||
|
||||
if (!INPUT || !fs.existsSync(INPUT)) {
|
||||
console.error('Missing or invalid INPUT_USFM_DIR. Set INPUT_USFM_DIR to a folder containing *.usfm files (unzipped).')
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
function ensureDir(p: string) { fs.mkdirSync(p, { recursive: true }) }
|
||||
function writeJson(file: string, obj: any) { ensureDir(path.dirname(file)); fs.writeFileSync(file, JSON.stringify(obj, null, 2), 'utf-8') }
|
||||
|
||||
// Canonical order + mapping from USFM book codes to English names + testament
|
||||
// Based on standard Protestant canon 66 books
|
||||
type CanonEntry = { code: string; name: string; testament: 'OT'|'NT' }
|
||||
const CANON: CanonEntry[] = [
|
||||
{code:'GEN',name:'Genesis',testament:'OT'},{code:'EXO',name:'Exodus',testament:'OT'},{code:'LEV',name:'Leviticus',testament:'OT'},
|
||||
{code:'NUM',name:'Numbers',testament:'OT'},{code:'DEU',name:'Deuteronomy',testament:'OT'},{code:'JOS',name:'Joshua',testament:'OT'},
|
||||
{code:'JDG',name:'Judges',testament:'OT'},{code:'RUT',name:'Ruth',testament:'OT'},{code:'1SA',name:'1 Samuel',testament:'OT'},
|
||||
{code:'2SA',name:'2 Samuel',testament:'OT'},{code:'1KI',name:'1 Kings',testament:'OT'},{code:'2KI',name:'2 Kings',testament:'OT'},
|
||||
{code:'1CH',name:'1 Chronicles',testament:'OT'},{code:'2CH',name:'2 Chronicles',testament:'OT'},{code:'EZR',name:'Ezra',testament:'OT'},
|
||||
{code:'NEH',name:'Nehemiah',testament:'OT'},{code:'EST',name:'Esther',testament:'OT'},{code:'JOB',name:'Job',testament:'OT'},
|
||||
{code:'PSA',name:'Psalms',testament:'OT'},{code:'PRO',name:'Proverbs',testament:'OT'},{code:'ECC',name:'Ecclesiastes',testament:'OT'},
|
||||
{code:'SNG',name:'Song of Songs',testament:'OT'},{code:'ISA',name:'Isaiah',testament:'OT'},{code:'JER',name:'Jeremiah',testament:'OT'},
|
||||
{code:'LAM',name:'Lamentations',testament:'OT'},{code:'EZK',name:'Ezekiel',testament:'OT'},{code:'DAN',name:'Daniel',testament:'OT'},
|
||||
{code:'HOS',name:'Hosea',testament:'OT'},{code:'JOL',name:'Joel',testament:'OT'},{code:'AMO',name:'Amos',testament:'OT'},
|
||||
{code:'OBA',name:'Obadiah',testament:'OT'},{code:'JON',name:'Jonah',testament:'OT'},{code:'MIC',name:'Micah',testament:'OT'},
|
||||
{code:'NAM',name:'Nahum',testament:'OT'},{code:'HAB',name:'Habakkuk',testament:'OT'},{code:'ZEP',name:'Zephaniah',testament:'OT'},
|
||||
{code:'HAG',name:'Haggai',testament:'OT'},{code:'ZEC',name:'Zechariah',testament:'OT'},{code:'MAL',name:'Malachi',testament:'OT'},
|
||||
{code:'MAT',name:'Matthew',testament:'NT'},{code:'MRK',name:'Mark',testament:'NT'},{code:'LUK',name:'Luke',testament:'NT'},
|
||||
{code:'JHN',name:'John',testament:'NT'},{code:'ACT',name:'Acts',testament:'NT'},{code:'ROM',name:'Romans',testament:'NT'},
|
||||
{code:'1CO',name:'1 Corinthians',testament:'NT'},{code:'2CO',name:'2 Corinthians',testament:'NT'},{code:'GAL',name:'Galatians',testament:'NT'},
|
||||
{code:'EPH',name:'Ephesians',testament:'NT'},{code:'PHP',name:'Philippians',testament:'NT'},{code:'COL',name:'Colossians',testament:'NT'},
|
||||
{code:'1TH',name:'1 Thessalonians',testament:'NT'},{code:'2TH',name:'2 Thessalonians',testament:'NT'},{code:'1TI',name:'1 Timothy',testament:'NT'},
|
||||
{code:'2TI',name:'2 Timothy',testament:'NT'},{code:'TIT',name:'Titus',testament:'NT'},{code:'PHM',name:'Philemon',testament:'NT'},
|
||||
{code:'HEB',name:'Hebrews',testament:'NT'},{code:'JAS',name:'James',testament:'NT'},{code:'1PE',name:'1 Peter',testament:'NT'},
|
||||
{code:'2PE',name:'2 Peter',testament:'NT'},{code:'1JN',name:'1 John',testament:'NT'},{code:'2JN',name:'2 John',testament:'NT'},
|
||||
{code:'3JN',name:'3 John',testament:'NT'},{code:'JUD',name:'Jude',testament:'NT'},{code:'REV',name:'Revelation',testament:'NT'}
|
||||
]
|
||||
const CODE_TO_META = new Map(CANON.map((c,i)=>[c.code,{...c, order:i+1}]))
|
||||
|
||||
type Verse = { verseNum:number; text:string }
|
||||
type Chapter = { chapterNum:number; verses:Verse[] }
|
||||
type Book = { name:string; code:string; testament:'OT'|'NT'; chapters:Chapter[] }
|
||||
|
||||
function parseUsfmFile(file: string): Book | null {
|
||||
const lines = fs.readFileSync(file,'utf-8').split(/\r?\n/)
|
||||
let code = ''
|
||||
let name = ''
|
||||
let currentChapter = 0
|
||||
let currentVerses: Verse[] = []
|
||||
const chapters = new Map<number, Verse[]>()
|
||||
|
||||
for (let raw of lines) {
|
||||
const line = raw.trim()
|
||||
if (/^\\id\s+/.test(line)) {
|
||||
const m = line.match(/^\\id\s+(\S+)/)
|
||||
if (m) code = m[1].toUpperCase()
|
||||
continue
|
||||
}
|
||||
if (/^\\h\s+/.test(line)) {
|
||||
// \h Genesis
|
||||
name = line.replace(/^\\h\s+/, '').trim()
|
||||
continue
|
||||
}
|
||||
if (/^\\c\s+/.test(line)) {
|
||||
// new chapter
|
||||
if (currentChapter > 0) chapters.set(currentChapter, currentVerses)
|
||||
currentChapter = parseInt(line.slice(3).trim(), 10)
|
||||
currentVerses = []
|
||||
continue
|
||||
}
|
||||
if (/^\\v\s+/.test(line)) {
|
||||
// \v 1 In the beginning God...
|
||||
const m = line.match(/^\\v\s+(\d+)\s+(.*)$/)
|
||||
if (m) {
|
||||
const verseNum = parseInt(m[1], 10)
|
||||
let text = m[2]
|
||||
// Strip inline USFM markers (basic)
|
||||
text = text.replace(/\\[a-z0-9-]+\s*/gi,'').trim()
|
||||
currentVerses.push({ verseNum, text })
|
||||
}
|
||||
continue
|
||||
}
|
||||
// Some USFM wrap text on subsequent lines; append to last verse if applicable
|
||||
if (currentVerses.length > 0 && line && !line.startsWith('\\')) {
|
||||
const last = currentVerses[currentVerses.length - 1]
|
||||
last.text = (last.text + ' ' + line).replace(/\s+/g,' ').trim()
|
||||
}
|
||||
}
|
||||
if (currentChapter > 0) chapters.set(currentChapter, currentVerses)
|
||||
|
||||
// Resolve name/code/testament
|
||||
const meta = CODE_TO_META.get(code)
|
||||
if (!meta) return null
|
||||
const finalName = name || meta.name
|
||||
const book: Book = { name: finalName, code, testament: meta.testament, chapters: [] }
|
||||
for (const [ch, verses] of Array.from(chapters.entries()).sort((a,b)=>a[0]-b[0])) {
|
||||
if (verses.length > 0) book.chapters.push({ chapterNum: ch, verses })
|
||||
}
|
||||
return book
|
||||
}
|
||||
|
||||
function main() {
|
||||
const files = fs.readdirSync(INPUT).filter(f=>f.toLowerCase().endsWith('.usfm'))
|
||||
console.log('USFM files found:', files.length)
|
||||
if (files.length === 0) {
|
||||
console.error('No .usfm files found in', INPUT)
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
const books: Book[] = []
|
||||
for (const f of files) {
|
||||
const full = path.join(INPUT, f)
|
||||
const b = parseUsfmFile(full)
|
||||
if (b && b.chapters.length > 0) {
|
||||
books.push(b)
|
||||
} else {
|
||||
// basic debug
|
||||
// console.log('Skipping', f, 'parsed:', !!b, 'chapters:', b?.chapters.length)
|
||||
}
|
||||
}
|
||||
|
||||
// Partition
|
||||
const otBooks = books.filter(b => b.testament === 'OT').sort((a,b)=>CODE_TO_META.get(a.code)!.order - CODE_TO_META.get(b.code)!.order)
|
||||
const ntBooks = books.filter(b => b.testament === 'NT').sort((a,b)=>CODE_TO_META.get(a.code)!.order - CODE_TO_META.get(b.code)!.order)
|
||||
|
||||
const ot = { testament: 'Old Testament', books: otBooks.map(b=>({ name:b.name, chapters:b.chapters })) }
|
||||
const nt = { testament: 'New Testament', books: ntBooks.map(b=>({ name:b.name, chapters:b.chapters })) }
|
||||
|
||||
const otFile = path.join(OUTPUT_DIR, 'old_testament.json')
|
||||
const ntFile = path.join(OUTPUT_DIR, 'new_testament.json')
|
||||
writeJson(otFile, ot)
|
||||
writeJson(ntFile, nt)
|
||||
console.log('Wrote:', otFile)
|
||||
console.log('Wrote:', ntFile)
|
||||
console.log('Books:', books.length, 'OT:', otBooks.length, 'NT:', ntBooks.length)
|
||||
}
|
||||
|
||||
main()
|
||||
143
scripts/validate-bsb-md.js
Normal file
143
scripts/validate-bsb-md.js
Normal file
@@ -0,0 +1,143 @@
|
||||
#!/usr/bin/env node
|
||||
const fs = require('fs')
|
||||
const path = require('path')
|
||||
|
||||
const SRC = process.env.BSB_MD_PATH || path.join('bibles', 'bible-bsb.md')
|
||||
|
||||
function canon() {
|
||||
const OT = [
|
||||
['Genesis', ['Genesis'], 50],
|
||||
['Exodus', ['Exodus'], 40],
|
||||
['Leviticus', ['Leviticus'], 27],
|
||||
['Numbers', ['Numbers'], 36],
|
||||
['Deuteronomy', ['Deuteronomy'], 34],
|
||||
['Joshua', ['Joshua'], 24],
|
||||
['Judges', ['Judges'], 21],
|
||||
['Ruth', ['Ruth'], 4],
|
||||
['1 Samuel', ['1 Samuel','1 Samuel'], 31],
|
||||
['2 Samuel', ['2 Samuel','2 Samuel'], 24],
|
||||
['1 Kings', ['1 Kings','1 Kings'], 22],
|
||||
['2 Kings', ['2 Kings','2 Kings'], 25],
|
||||
['1 Chronicles', ['1 Chronicles','1 Chronicles'], 29],
|
||||
['2 Chronicles', ['2 Chronicles','2 Chronicles'], 36],
|
||||
['Ezra', ['Ezra'], 10],
|
||||
['Nehemiah', ['Nehemiah'], 13],
|
||||
['Esther', ['Esther'], 10],
|
||||
['Job', ['Job'], 42],
|
||||
['Psalms', ['Psalms','Psalm'], 150],
|
||||
['Proverbs', ['Proverbs'], 31],
|
||||
['Ecclesiastes', ['Ecclesiastes'], 12],
|
||||
['Song of Songs', ['Song of Songs','Song of Solomon'], 8],
|
||||
['Isaiah', ['Isaiah'], 66],
|
||||
['Jeremiah', ['Jeremiah'], 52],
|
||||
['Lamentations', ['Lamentations'], 5],
|
||||
['Ezekiel', ['Ezekiel'], 48],
|
||||
['Daniel', ['Daniel'], 12],
|
||||
['Hosea', ['Hosea'], 14],
|
||||
['Joel', ['Joel'], 3],
|
||||
['Amos', ['Amos'], 9],
|
||||
['Obadiah', ['Obadiah'], 1],
|
||||
['Jonah', ['Jonah'], 4],
|
||||
['Micah', ['Micah'], 7],
|
||||
['Nahum', ['Nahum'], 3],
|
||||
['Habakkuk', ['Habakkuk'], 3],
|
||||
['Zephaniah', ['Zephaniah'], 3],
|
||||
['Haggai', ['Haggai'], 2],
|
||||
['Zechariah', ['Zechariah'], 14],
|
||||
['Malachi', ['Malachi'], 4]
|
||||
]
|
||||
const NT = [
|
||||
['Matthew', ['Matthew'], 28],
|
||||
['Mark', ['Mark'], 16],
|
||||
['Luke', ['Luke'], 24],
|
||||
['John', ['John'], 21],
|
||||
['Acts', ['Acts'], 28],
|
||||
['Romans', ['Romans'], 16],
|
||||
['1 Corinthians', ['1 Corinthians','1 Corinthians'], 16],
|
||||
['2 Corinthians', ['2 Corinthians','2 Corinthians'], 13],
|
||||
['Galatians', ['Galatians'], 6],
|
||||
['Ephesians', ['Ephesians'], 6],
|
||||
['Philippians', ['Philippians'], 4],
|
||||
['Colossians', ['Colossians'], 4],
|
||||
['1 Thessalonians', ['1 Thessalonians','1 Thessalonians'], 5],
|
||||
['2 Thessalonians', ['2 Thessalonians','2 Thessalonians'], 3],
|
||||
['1 Timothy', ['1 Timothy','1 Timothy'], 6],
|
||||
['2 Timothy', ['2 Timothy','2 Timothy'], 4],
|
||||
['Titus', ['Titus'], 3],
|
||||
['Philemon', ['Philemon'], 1],
|
||||
['Hebrews', ['Hebrews'], 13],
|
||||
['James', ['James'], 5],
|
||||
['1 Peter', ['1 Peter','1 Peter'], 5],
|
||||
['2 Peter', ['2 Peter','2 Peter'], 3],
|
||||
['1 John', ['1 John','1 John'], 5],
|
||||
['2 John', ['2 John','2 John'], 1],
|
||||
['3 John', ['3 John','3 John'], 1],
|
||||
['Jude', ['Jude'], 1],
|
||||
['Revelation', ['Revelation'], 22]
|
||||
]
|
||||
return [
|
||||
...OT.map(([n,v,c]) => ({ name:n, variants:v, expectedChapters:c, testament:'OT' })),
|
||||
...NT.map(([n,v,c]) => ({ name:n, variants:v, expectedChapters:c, testament:'NT' })),
|
||||
]
|
||||
}
|
||||
|
||||
function main() {
|
||||
if (!fs.existsSync(SRC)) {
|
||||
console.error('Missing source file:', SRC)
|
||||
process.exit(1)
|
||||
}
|
||||
const md = fs.readFileSync(SRC, 'utf-8')
|
||||
const books = canon()
|
||||
|
||||
const report = { file: SRC, totals: { versesTagged: 0 }, books: [] }
|
||||
|
||||
for (const b of books) {
|
||||
const patterns = b.variants.map(v => v.replace(/\s+/g, '\\s+'))
|
||||
const names = patterns.join('|')
|
||||
const re = new RegExp(`(?:^|[\n\r\f\s\|\(])(?:${names})\\s+(\\d+):(\\d+)`, 'gi')
|
||||
const chapters = new Set()
|
||||
let m
|
||||
let verseCount = 0
|
||||
while ((m = re.exec(md)) !== null) {
|
||||
const nums = m.slice(1).filter(Boolean)
|
||||
const ch = parseInt(nums[0] || '0', 10)
|
||||
const vs = parseInt(nums[1] || '0', 10)
|
||||
if (Number.isFinite(ch) && ch > 0) chapters.add(ch)
|
||||
if (Number.isFinite(vs) && vs > 0) verseCount++
|
||||
}
|
||||
|
||||
// Heuristic: some one-chapter books may lack inline verse references; accept header presence
|
||||
const oneChapterBooks = new Set(['Obadiah','Philemon','2 John','3 John','Jude'])
|
||||
if (chapters.size === 0 && oneChapterBooks.has(b.name)) {
|
||||
const headerRe = new RegExp(`[\f\n\r]\s*${b.variants.map(v=>v.replace(/\s+/g,'\\s+')).join('|')}\s*[\n\r]`, 'i')
|
||||
if (headerRe.test(md)) {
|
||||
chapters.add(1)
|
||||
}
|
||||
}
|
||||
report.totals.versesTagged += verseCount
|
||||
report.books.push({
|
||||
name: b.name,
|
||||
testament: b.testament,
|
||||
expectedChapters: b.expectedChapters,
|
||||
detectedChapters: Array.from(chapters).sort((a,b)=>a-b),
|
||||
detectedCount: chapters.size,
|
||||
coverage: b.expectedChapters > 0 ? +(100 * chapters.size / b.expectedChapters).toFixed(2) : null,
|
||||
verseMarkers: verseCount
|
||||
})
|
||||
}
|
||||
|
||||
const missingBooks = report.books.filter(x => x.detectedCount === 0).map(x=>x.name)
|
||||
const partialBooks = report.books.filter(x => x.detectedCount > 0 && x.detectedCount < x.expectedChapters).map(x=>({name:x.name, det:x.detectedCount, exp:x.expectedChapters}))
|
||||
|
||||
console.log('Validation summary for', SRC)
|
||||
console.log('Total verse markers found:', report.totals.versesTagged)
|
||||
console.log('Books missing markers:', missingBooks.length ? missingBooks.join(', ') : 'None')
|
||||
console.log('Books partially detected (chapters):', partialBooks.length ? JSON.stringify(partialBooks.slice(0,10)) : 'None')
|
||||
|
||||
const outDir = path.join('data','en_bible','BSB_VALIDATION')
|
||||
fs.mkdirSync(outDir, { recursive: true })
|
||||
fs.writeFileSync(path.join(outDir,'report.json'), JSON.stringify(report, null, 2), 'utf-8')
|
||||
console.log('Wrote detailed report to', path.join(outDir,'report.json'))
|
||||
}
|
||||
|
||||
main()
|
||||
145
scripts/validate-bsb-md.ts
Normal file
145
scripts/validate-bsb-md.ts
Normal file
@@ -0,0 +1,145 @@
|
||||
#!/usr/bin/env tsx
|
||||
import fs from 'fs'
|
||||
import path from 'path'
|
||||
|
||||
const SRC = process.env.BSB_MD_PATH || path.join('bibles', 'bible-bsb.md')
|
||||
|
||||
type BookInfo = { name: string; variants: string[]; expectedChapters: number; testament: 'OT'|'NT' }
|
||||
|
||||
function canon(): BookInfo[] {
|
||||
const OT: Array<[string, string[], number]> = [
|
||||
['Genesis', ['Genesis'], 50],
|
||||
['Exodus', ['Exodus'], 40],
|
||||
['Leviticus', ['Leviticus'], 27],
|
||||
['Numbers', ['Numbers'], 36],
|
||||
['Deuteronomy', ['Deuteronomy'], 34],
|
||||
['Joshua', ['Joshua'], 24],
|
||||
['Judges', ['Judges'], 21],
|
||||
['Ruth', ['Ruth'], 4],
|
||||
['1 Samuel', ['1 Samuel','1 Samuel'], 31],
|
||||
['2 Samuel', ['2 Samuel','2 Samuel'], 24],
|
||||
['1 Kings', ['1 Kings','1 Kings'], 22],
|
||||
['2 Kings', ['2 Kings','2 Kings'], 25],
|
||||
['1 Chronicles', ['1 Chronicles','1 Chronicles'], 29],
|
||||
['2 Chronicles', ['2 Chronicles','2 Chronicles','2 Chronicles'], 36],
|
||||
['Ezra', ['Ezra'], 10],
|
||||
['Nehemiah', ['Nehemiah'], 13],
|
||||
['Esther', ['Esther'], 10],
|
||||
['Job', ['Job'], 42],
|
||||
['Psalms', ['Psalms','Psalm'], 150],
|
||||
['Proverbs', ['Proverbs'], 31],
|
||||
['Ecclesiastes', ['Ecclesiastes'], 12],
|
||||
['Song of Songs', ['Song of Songs','Song of Solomon'], 8],
|
||||
['Isaiah', ['Isaiah'], 66],
|
||||
['Jeremiah', ['Jeremiah'], 52],
|
||||
['Lamentations', ['Lamentations'], 5],
|
||||
['Ezekiel', ['Ezekiel'], 48],
|
||||
['Daniel', ['Daniel'], 12],
|
||||
['Hosea', ['Hosea'], 14],
|
||||
['Joel', ['Joel'], 3],
|
||||
['Amos', ['Amos'], 9],
|
||||
['Obadiah', ['Obadiah'], 1],
|
||||
['Jonah', ['Jonah'], 4],
|
||||
['Micah', ['Micah'], 7],
|
||||
['Nahum', ['Nahum'], 3],
|
||||
['Habakkuk', ['Habakkuk'], 3],
|
||||
['Zephaniah', ['Zephaniah'], 3],
|
||||
['Haggai', ['Haggai'], 2],
|
||||
['Zechariah', ['Zechariah'], 14],
|
||||
['Malachi', ['Malachi'], 4]
|
||||
]
|
||||
const NT: Array<[string, string[], number]> = [
|
||||
['Matthew', ['Matthew'], 28],
|
||||
['Mark', ['Mark'], 16],
|
||||
['Luke', ['Luke'], 24],
|
||||
['John', ['John'], 21],
|
||||
['Acts', ['Acts'], 28],
|
||||
['Romans', ['Romans'], 16],
|
||||
['1 Corinthians', ['1 Corinthians','1 Corinthians'], 16],
|
||||
['2 Corinthians', ['2 Corinthians','2 Corinthians'], 13],
|
||||
['Galatians', ['Galatians'], 6],
|
||||
['Ephesians', ['Ephesians'], 6],
|
||||
['Philippians', ['Philippians'], 4],
|
||||
['Colossians', ['Colossians'], 4],
|
||||
['1 Thessalonians', ['1 Thessalonians','1 Thessalonians'], 5],
|
||||
['2 Thessalonians', ['2 Thessalonians','2 Thessalonians'], 3],
|
||||
['1 Timothy', ['1 Timothy','1 Timothy'], 6],
|
||||
['2 Timothy', ['2 Timothy','2 Timothy'], 4],
|
||||
['Titus', ['Titus'], 3],
|
||||
['Philemon', ['Philemon'], 1],
|
||||
['Hebrews', ['Hebrews'], 13],
|
||||
['James', ['James'], 5],
|
||||
['1 Peter', ['1 Peter','1 Peter'], 5],
|
||||
['2 Peter', ['2 Peter','2 Peter'], 3],
|
||||
['1 John', ['1 John','1 John'], 5],
|
||||
['2 John', ['2 John','2 John'], 1],
|
||||
['3 John', ['3 John','3 John'], 1],
|
||||
['Jude', ['Jude'], 1],
|
||||
['Revelation', ['Revelation'], 22]
|
||||
]
|
||||
return [
|
||||
...OT.map(([n,v,c]) => ({ name:n, variants:v, expectedChapters:c, testament:'OT' as const })),
|
||||
...NT.map(([n,v,c]) => ({ name:n, variants:v, expectedChapters:c, testament:'NT' as const })),
|
||||
]
|
||||
}
|
||||
|
||||
function escapeRegExp(s: string) {
|
||||
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
|
||||
}
|
||||
|
||||
function main() {
|
||||
if (!fs.existsSync(SRC)) {
|
||||
console.error('Missing source file:', SRC)
|
||||
process.exit(1)
|
||||
}
|
||||
const md = fs.readFileSync(SRC, 'utf-8')
|
||||
const books = canon()
|
||||
|
||||
const report: any = { file: SRC, totals: { versesTagged: 0 }, books: [] as any[] }
|
||||
|
||||
for (const b of books) {
|
||||
// Build a regex to find markers like: "... | BookName 12:34" or just "BookName 12:34"
|
||||
// Allow flexible whitespace and the double-spaced variants in source.
|
||||
const patterns = b.variants.map(v => v.replace(/\s+/g, '\\s+'))
|
||||
const combined = patterns.map(p => `(?:^|[\n\r\f\s\|])${p}\\s+(\\d+):(\\d+)`).join('|')
|
||||
const re = new RegExp(combined, 'gi')
|
||||
const chapters = new Set<number>()
|
||||
let m: RegExpExecArray | null
|
||||
let verseCount = 0
|
||||
while ((m = re.exec(md)) !== null) {
|
||||
// Find first numeric capture among alternations
|
||||
const nums = m.slice(1).filter(Boolean)
|
||||
const ch = parseInt(nums[0] || '0', 10)
|
||||
const vs = parseInt(nums[1] || '0', 10)
|
||||
if (Number.isFinite(ch) && ch > 0) chapters.add(ch)
|
||||
if (Number.isFinite(vs) && vs > 0) verseCount++
|
||||
}
|
||||
|
||||
report.totals.versesTagged += verseCount
|
||||
report.books.push({
|
||||
name: b.name,
|
||||
testament: b.testament,
|
||||
expectedChapters: b.expectedChapters,
|
||||
detectedChapters: [...chapters].sort((a,b)=>a-b),
|
||||
detectedCount: chapters.size,
|
||||
coverage: b.expectedChapters > 0 ? +(100 * chapters.size / b.expectedChapters).toFixed(2) : null,
|
||||
verseMarkers: verseCount
|
||||
})
|
||||
}
|
||||
|
||||
const missingBooks = report.books.filter((x:any) => x.detectedCount === 0).map((x:any)=>x.name)
|
||||
const partialBooks = report.books.filter((x:any) => x.detectedCount > 0 && x.detectedCount < x.expectedChapters).map((x:any)=>({name:x.name, det:x.detectedCount, exp:x.expectedChapters}))
|
||||
|
||||
console.log('Validation summary for', SRC)
|
||||
console.log('Total verse markers found:', report.totals.versesTagged)
|
||||
console.log('Books missing markers:', missingBooks.length ? missingBooks.join(', ') : 'None')
|
||||
console.log('Books partially detected (chapters):', partialBooks.length ? partialBooks.slice(0,10) : 'None')
|
||||
|
||||
const outDir = path.join('data','en_bible','BSB_VALIDATION')
|
||||
fs.mkdirSync(outDir, { recursive: true })
|
||||
fs.writeFileSync(path.join(outDir,'report.json'), JSON.stringify(report, null, 2), 'utf-8')
|
||||
console.log('Wrote detailed report to', path.join(outDir,'report.json'))
|
||||
}
|
||||
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user