#!/usr/bin/env tsx import fs from 'fs' import path from 'path' /* Convert a directory of USFM files (e.g., WEB/KJV) into our OT/NT JSON format. Env: - INPUT_USFM_DIR: path to folder with *.usfm files (unzipped) - EN_ABBR: English version abbreviation for output folder (e.g., WEB or KJV) - OUTPUT_DIR (optional): defaults to data/en_bible/ Output: - /old_testament.json - /new_testament.json USFM markers parsed: - \id - \h
(optional) - \c - \v */ const INPUT = process.env.INPUT_USFM_DIR || '' const ABBR = (process.env.EN_ABBR || 'WEB').toUpperCase() const OUTPUT_DIR = process.env.OUTPUT_DIR || path.join('data','en_bible', ABBR) if (!INPUT || !fs.existsSync(INPUT)) { console.error('Missing or invalid INPUT_USFM_DIR. Set INPUT_USFM_DIR to a folder containing *.usfm files (unzipped).') process.exit(1) } function ensureDir(p: string) { fs.mkdirSync(p, { recursive: true }) } function writeJson(file: string, obj: any) { ensureDir(path.dirname(file)); fs.writeFileSync(file, JSON.stringify(obj, null, 2), 'utf-8') } // Canonical order + mapping from USFM book codes to English names + testament // Based on standard Protestant canon 66 books type CanonEntry = { code: string; name: string; testament: 'OT'|'NT' } const CANON: CanonEntry[] = [ {code:'GEN',name:'Genesis',testament:'OT'},{code:'EXO',name:'Exodus',testament:'OT'},{code:'LEV',name:'Leviticus',testament:'OT'}, {code:'NUM',name:'Numbers',testament:'OT'},{code:'DEU',name:'Deuteronomy',testament:'OT'},{code:'JOS',name:'Joshua',testament:'OT'}, {code:'JDG',name:'Judges',testament:'OT'},{code:'RUT',name:'Ruth',testament:'OT'},{code:'1SA',name:'1 Samuel',testament:'OT'}, {code:'2SA',name:'2 Samuel',testament:'OT'},{code:'1KI',name:'1 Kings',testament:'OT'},{code:'2KI',name:'2 Kings',testament:'OT'}, {code:'1CH',name:'1 Chronicles',testament:'OT'},{code:'2CH',name:'2 Chronicles',testament:'OT'},{code:'EZR',name:'Ezra',testament:'OT'}, {code:'NEH',name:'Nehemiah',testament:'OT'},{code:'EST',name:'Esther',testament:'OT'},{code:'JOB',name:'Job',testament:'OT'}, {code:'PSA',name:'Psalms',testament:'OT'},{code:'PRO',name:'Proverbs',testament:'OT'},{code:'ECC',name:'Ecclesiastes',testament:'OT'}, {code:'SNG',name:'Song of Songs',testament:'OT'},{code:'ISA',name:'Isaiah',testament:'OT'},{code:'JER',name:'Jeremiah',testament:'OT'}, {code:'LAM',name:'Lamentations',testament:'OT'},{code:'EZK',name:'Ezekiel',testament:'OT'},{code:'DAN',name:'Daniel',testament:'OT'}, {code:'HOS',name:'Hosea',testament:'OT'},{code:'JOL',name:'Joel',testament:'OT'},{code:'AMO',name:'Amos',testament:'OT'}, {code:'OBA',name:'Obadiah',testament:'OT'},{code:'JON',name:'Jonah',testament:'OT'},{code:'MIC',name:'Micah',testament:'OT'}, {code:'NAM',name:'Nahum',testament:'OT'},{code:'HAB',name:'Habakkuk',testament:'OT'},{code:'ZEP',name:'Zephaniah',testament:'OT'}, {code:'HAG',name:'Haggai',testament:'OT'},{code:'ZEC',name:'Zechariah',testament:'OT'},{code:'MAL',name:'Malachi',testament:'OT'}, {code:'MAT',name:'Matthew',testament:'NT'},{code:'MRK',name:'Mark',testament:'NT'},{code:'LUK',name:'Luke',testament:'NT'}, {code:'JHN',name:'John',testament:'NT'},{code:'ACT',name:'Acts',testament:'NT'},{code:'ROM',name:'Romans',testament:'NT'}, {code:'1CO',name:'1 Corinthians',testament:'NT'},{code:'2CO',name:'2 Corinthians',testament:'NT'},{code:'GAL',name:'Galatians',testament:'NT'}, {code:'EPH',name:'Ephesians',testament:'NT'},{code:'PHP',name:'Philippians',testament:'NT'},{code:'COL',name:'Colossians',testament:'NT'}, {code:'1TH',name:'1 Thessalonians',testament:'NT'},{code:'2TH',name:'2 Thessalonians',testament:'NT'},{code:'1TI',name:'1 Timothy',testament:'NT'}, {code:'2TI',name:'2 Timothy',testament:'NT'},{code:'TIT',name:'Titus',testament:'NT'},{code:'PHM',name:'Philemon',testament:'NT'}, {code:'HEB',name:'Hebrews',testament:'NT'},{code:'JAS',name:'James',testament:'NT'},{code:'1PE',name:'1 Peter',testament:'NT'}, {code:'2PE',name:'2 Peter',testament:'NT'},{code:'1JN',name:'1 John',testament:'NT'},{code:'2JN',name:'2 John',testament:'NT'}, {code:'3JN',name:'3 John',testament:'NT'},{code:'JUD',name:'Jude',testament:'NT'},{code:'REV',name:'Revelation',testament:'NT'} ] const CODE_TO_META = new Map(CANON.map((c,i)=>[c.code,{...c, order:i+1}])) type Verse = { verseNum:number; text:string } type Chapter = { chapterNum:number; verses:Verse[] } type Book = { name:string; code:string; testament:'OT'|'NT'; chapters:Chapter[] } function parseUsfmFile(file: string): Book | null { const lines = fs.readFileSync(file,'utf-8').split(/\r?\n/) let code = '' let name = '' let currentChapter = 0 let currentVerses: Verse[] = [] const chapters = new Map() for (let raw of lines) { const line = raw.trim() if (/^\\id\s+/.test(line)) { const m = line.match(/^\\id\s+(\S+)/) if (m) code = m[1].toUpperCase() continue } if (/^\\h\s+/.test(line)) { // \h Genesis name = line.replace(/^\\h\s+/, '').trim() continue } if (/^\\c\s+/.test(line)) { // new chapter if (currentChapter > 0) chapters.set(currentChapter, currentVerses) currentChapter = parseInt(line.slice(3).trim(), 10) currentVerses = [] continue } if (/^\\v\s+/.test(line)) { // \v 1 In the beginning God... const m = line.match(/^\\v\s+(\d+)\s+(.*)$/) if (m) { const verseNum = parseInt(m[1], 10) let text = m[2] // Strip inline USFM markup, preserving words // Remove word wrappers: \w Word|strong="..."\w* and \+w ... \+w* text = text.replace(/\\\+?w\s+/gi, '') .replace(/\|strong="[^"]*"/gi, '') .replace(/\\\+?w\*/gi, '') // Remove footnotes / cross-refs blocks: \f ... \f* and \x ... \x* text = text.replace(/\\f\s+.*?\\f\*/gis, ' ') .replace(/\\x\s+.*?\\x\*/gis, ' ') // Remove any remaining inline tags like \\add, \\nd, \\qs, etc. text = text.replace(/\\[a-z0-9-]+\s*/gi, ' ') // Collapse whitespace text = text.replace(/\s+/g, ' ').trim() currentVerses.push({ verseNum, text }) } continue } // Some USFM wrap text on subsequent lines; append to last verse if applicable if (currentVerses.length > 0 && line && !line.startsWith('\\')) { const last = currentVerses[currentVerses.length - 1] last.text = (last.text + ' ' + line).replace(/\s+/g,' ').trim() } } if (currentChapter > 0) chapters.set(currentChapter, currentVerses) // Resolve name/code/testament const meta = CODE_TO_META.get(code) if (!meta) return null const finalName = name || meta.name const book: Book = { name: finalName, code, testament: meta.testament, chapters: [] } for (const [ch, verses] of Array.from(chapters.entries()).sort((a,b)=>a[0]-b[0])) { if (verses.length > 0) book.chapters.push({ chapterNum: ch, verses }) } return book } function main() { const files = fs.readdirSync(INPUT).filter(f=>f.toLowerCase().endsWith('.usfm')) console.log('USFM files found:', files.length) if (files.length === 0) { console.error('No .usfm files found in', INPUT) process.exit(1) } const books: Book[] = [] for (const f of files) { const full = path.join(INPUT, f) const b = parseUsfmFile(full) if (b && b.chapters.length > 0) { books.push(b) } else { // basic debug // console.log('Skipping', f, 'parsed:', !!b, 'chapters:', b?.chapters.length) } } // Partition const otBooks = books.filter(b => b.testament === 'OT').sort((a,b)=>CODE_TO_META.get(a.code)!.order - CODE_TO_META.get(b.code)!.order) const ntBooks = books.filter(b => b.testament === 'NT').sort((a,b)=>CODE_TO_META.get(a.code)!.order - CODE_TO_META.get(b.code)!.order) const ot = { testament: 'Old Testament', books: otBooks.map(b=>({ name:b.name, chapters:b.chapters })) } const nt = { testament: 'New Testament', books: ntBooks.map(b=>({ name:b.name, chapters:b.chapters })) } const otFile = path.join(OUTPUT_DIR, 'old_testament.json') const ntFile = path.join(OUTPUT_DIR, 'new_testament.json') writeJson(otFile, ot) writeJson(ntFile, nt) console.log('Wrote:', otFile) console.log('Wrote:', ntFile) console.log('Books:', books.length, 'OT:', otBooks.length, 'NT:', ntBooks.length) } main()