diff --git a/scripts/fetch-english-bible.ts b/scripts/fetch-english-bible.ts new file mode 100644 index 0000000..dd0e8d2 --- /dev/null +++ b/scripts/fetch-english-bible.ts @@ -0,0 +1,175 @@ +#!/usr/bin/env tsx +/* + Fetch the full English Bible from api.bible with careful rate limiting. + - Requires env: + - API_BIBLE_KEY: your api.scripture.api.bible key + - API_BIBLE_BASE (optional): default https://api.scripture.api.bible + - API_BIBLE_ID (optional): specific Bible ID to fetch (overrides abbr search) + - API_BIBLE_ABBR (optional): preferred abbreviation to locate (e.g., BSB, NIV, KJV) + - OUTPUT_DIR (optional): default data/en_bible + + Output: Creates OT/NT JSON in format compatible with scripts/import-romanian-versioned.ts + data/en_bible//old_testament.json + data/en_bible//new_testament.json + + Notes on rate limits: + - Serializes requests (concurrency 1) with a base delay between calls (baseDelayMs). + - Handles 429 with Retry-After header or exponential backoff. + - Supports resume by writing per-chapter JSON; if a chapter file exists, it skips re-fetching it. +*/ + +import 'dotenv/config' +import fs from 'fs' +import path from 'path' + +const API_KEY = process.env.API_BIBLE_KEY || '' +const API_BASE = (process.env.API_BIBLE_BASE || 'https://api.scripture.api.bible').replace(/\/$/, '') +const PREF_ABBR = process.env.API_BIBLE_ABBR || 'BSB' +const FORCE_BIBLE_ID = process.env.API_BIBLE_ID || '' +const OUTPUT_ROOT = process.env.OUTPUT_DIR || path.join('data', 'en_bible') + +if (!API_KEY) { + console.error('Missing API_BIBLE_KEY in environment') + process.exit(1) +} + +// Simple throttling and backoff +const baseDelayMs = 350 // ~3 requests/second baseline +function sleep(ms: number) { return new Promise(res => setTimeout(res, ms)) } + +async function requestJson(url: string, init: RequestInit = {}, attempt = 0): Promise { + await sleep(baseDelayMs) + const res = await fetch(url, { + ...init, + headers: { + 'accept': 'application/json', + 'api-key': API_KEY, + ...(init.headers || {}) + } + }) + + if (res.status === 429) { + const retry = parseInt(res.headers.get('retry-after') || '0', 10) + const wait = Math.max(1000, (retry || 1) * 1000) + if (attempt < 6) { + console.warn(`429 rate limited. Waiting ${wait}ms and retrying...`) + await sleep(wait) + return requestJson(url, init, attempt + 1) + } + } + + if (!res.ok) { + const body = await res.text() + throw new Error(`HTTP ${res.status} for ${url}: ${body}`) + } + return res.json() +} + +type BibleMeta = { id: string; name: string; abbreviation: string; language: { id: string; name: string } } + +async function resolveBible(): Promise { + if (FORCE_BIBLE_ID) { + const data = await requestJson(`${API_BASE}/v1/bibles/${FORCE_BIBLE_ID}`) + return data.data as BibleMeta + } + const list = await requestJson(`${API_BASE}/v1/bibles?language=eng`) // English + const bibles: BibleMeta[] = list.data || [] + let chosen = bibles.find(b => (b.abbreviation || '').toUpperCase() === PREF_ABBR.toUpperCase()) + if (!chosen) chosen = bibles[0] + if (!chosen) throw new Error('No English bibles found via API') + return chosen +} + +type Book = { id: string; name: string; abbreviation: string; ord: number } +type Chapter = { id: string; number: string } + +async function fetchBooks(bibleId: string): Promise { + const resp = await requestJson(`${API_BASE}/v1/bibles/${bibleId}/books`) + const data = resp.data || [] + return data.map((b: any, i: number) => ({ id: b.id, name: b.name, abbreviation: b.abbreviation, ord: b.order || (i + 1) })) +} + +async function fetchChapters(bibleId: string, bookId: string): Promise { + const resp = await requestJson(`${API_BASE}/v1/bibles/${bibleId}/books/${bookId}/chapters`) + const data = resp.data || [] + return data.map((c: any) => ({ id: c.id, number: c.number })) +} + +// Fetch all verse IDs for a chapter, then fetch each verse text (contentType=text) +async function fetchChapterVerses(bibleId: string, chapterId: string): Promise<{ verseNum: number; text: string }[]> { + const resp = await requestJson(`${API_BASE}/v1/bibles/${bibleId}/chapters/${chapterId}/verses`) + const verses: any[] = resp.data || [] + const results: { verseNum: number; text: string }[] = [] + for (const v of verses) { + const vId = v.id + // Respect rate limits while fetching verse content + const vResp = await requestJson(`${API_BASE}/v1/bibles/${bibleId}/verses/${vId}?content-type=text&include-notes=false&include-titles=false&include-chapter-numbers=false&include-verse-numbers=false&include-verse-spans=false`) + const text: string = vResp?.data?.content?.trim?.() || vResp?.data?.content || '' + // Extract verse number from reference when available, fallback to sequence + const num = parseInt((v.reference || '').split(':')[1] || v.verseCount || v.position || results.length + 1, 10) + results.push({ verseNum: Number.isFinite(num) ? num : (results.length + 1), text }) + } + // Sort by verse number just in case + results.sort((a, b) => a.verseNum - b.verseNum) + return results +} + +function ensureDir(p: string) { fs.mkdirSync(p, { recursive: true }) } +function writeJson(file: string, obj: any) { ensureDir(path.dirname(file)); fs.writeFileSync(file, JSON.stringify(obj, null, 2), 'utf-8') } +function exists(p: string) { try { fs.accessSync(p); return true } catch { return false } } + +async function main() { + const bible = await resolveBible() + console.log(`Using Bible: ${bible.name} (${bible.abbreviation}) [${bible.id}]`) + const outDir = path.join(OUTPUT_ROOT, bible.abbreviation.toUpperCase()) + ensureDir(outDir) + + const books = await fetchBooks(bible.id) + // Partition into OT/NT by order threshold (first 39 = OT) + const otBooks = books.filter(b => b.ord <= 39) + const ntBooks = books.filter(b => b.ord > 39) + + const buildTestament = async (subset: Book[], label: 'Old Testament' | 'New Testament') => { + const result: any = { testament: label, books: [] as any[] } + for (const b of subset) { + console.log(`Book: ${b.name}`) + const bookOutDir = path.join(outDir, b.abbreviation || b.name) + ensureDir(bookOutDir) + const chs = await fetchChapters(bible.id, b.id) + const chaptersArr: any[] = [] + for (const ch of chs) { + const cacheFile = path.join(bookOutDir, `chapter-${ch.number}.json`) + if (exists(cacheFile)) { + const cached = JSON.parse(fs.readFileSync(cacheFile, 'utf-8')) + chaptersArr.push(cached) + continue + } + console.log(` Chapter ${ch.number}`) + const verses = await fetchChapterVerses(bible.id, ch.id) + const chapterObj = { chapterNum: parseInt(ch.number, 10), verses } + writeJson(cacheFile, chapterObj) + chaptersArr.push(chapterObj) + } + result.books.push({ name: b.name, chapters: chaptersArr }) + } + return result + } + + const ot = await buildTestament(otBooks, 'Old Testament') + const nt = await buildTestament(ntBooks, 'New Testament') + + const otFile = path.join(outDir, 'old_testament.json') + const ntFile = path.join(outDir, 'new_testament.json') + writeJson(otFile, ot) + writeJson(ntFile, nt) + console.log('Wrote:', otFile) + console.log('Wrote:', ntFile) + + console.log('\nNext: import into the versioned schema using scripts/import-romanian-versioned.ts with LANG_CODE=en and TRANSLATION_CODE matching', bible.abbreviation) +} + +main().catch(err => { + console.error('Fetch failed:', err) + process.exit(1) +}) +