#!/usr/bin/env tsx import fs from 'fs' import path from 'path' /* Quick sample extractor from bibles/bible-bsb.md to our OT/NT JSON format. - Looks for Genesis 3:20–24 markers and builds a small sample JSON. - Output directory: data/en_bible/BSB_SAMPLES - Intended for demo/import testing without hitting API limits. */ const SRC = process.env.BSB_MD_PATH || path.join('bibles', 'bible-bsb.md') const OUT = path.join('data', 'en_bible', 'BSB_SAMPLES') function ensureDir(p: string) { fs.mkdirSync(p, { recursive: true }) } function writeJson(file: string, obj: any) { ensureDir(path.dirname(file)); fs.writeFileSync(file, JSON.stringify(obj, null, 2), 'utf-8') } function extractGenesis3Samples(md: string): { chapterNum: number; verses: { verseNum: number; text: string }[] } { // Find all markers like "Genesis 3:20" and capture their file offsets const regex = /Genesis\s+3:(\d+)/g const indices: { verse: number; index: number }[] = [] for (const m of md.matchAll(regex) as any) { const verse = parseInt(m[1], 10) indices.push({ verse, index: m.index }) } // We'll only keep verses 20..24 as a small sample const keep = new Set([20, 21, 22, 23, 24]) const kept = indices.filter(x => keep.has(x.verse)).sort((a,b) => a.verse - b.verse) const verses: { verseNum: number; text: string }[] = [] for (let i = 0; i < kept.length; i++) { const cur = kept[i] const next = kept[i+1] const start = cur.index! const end = next ? next.index! : Math.min(md.length, start + 2000) // cap window let chunk = md.slice(start, end) // Remove the marker itself and nearby page headers/footers and footnote junk chunk = chunk.replace(/Genesis\s+3:\d+.*\n?/,'') chunk = chunk.replace(/\f\d+\s*\|\s*Genesis\s*3:\d+.*\n?/g,'') chunk = chunk.replace(/[\u000c\r]+/g,'\n') // form feed cleanup chunk = chunk.replace(/\s+/g,' ').trim() // Try to cut off before the next verse number embedded as an isolated number const stop = chunk.search(/\s(?:2[1-9]|3\d|\d{1,2})\s/) // heuristic const clean = (stop > 40 ? chunk.slice(0, stop) : chunk).trim() if (clean.length > 0) verses.push({ verseNum: cur.verse, text: clean }) } // Fallback if nothing captured if (verses.length === 0) { verses.push({ verseNum: 20, text: 'And Adam named his wife Eve, because she would be the mother of all the living.' }) } return { chapterNum: 3, verses } } function main() { if (!fs.existsSync(SRC)) { console.error('Missing source file:', SRC) process.exit(1) } const md = fs.readFileSync(SRC, 'utf-8') const gen3 = extractGenesis3Samples(md) const ot = { testament: 'Old Testament', books: [ { name: 'Genesis', chapters: [gen3] } ] } // Minimal NT placeholder for structure completeness const nt = { testament: 'New Testament', books: [] as any[] } writeJson(path.join(OUT, 'old_testament.json'), ot) writeJson(path.join(OUT, 'new_testament.json'), nt) console.log('Wrote samples to', OUT) } main()