Add rate-limited api.bible fetcher for English Bible (configurable Bible ID/abbr); outputs OT/NT JSON compatible with versioned import; resumable per-chapter cache.
This commit is contained in:
175
scripts/fetch-english-bible.ts
Normal file
175
scripts/fetch-english-bible.ts
Normal file
@@ -0,0 +1,175 @@
|
||||
#!/usr/bin/env tsx
|
||||
/*
|
||||
Fetch the full English Bible from api.bible with careful rate limiting.
|
||||
- Requires env:
|
||||
- API_BIBLE_KEY: your api.scripture.api.bible key
|
||||
- API_BIBLE_BASE (optional): default https://api.scripture.api.bible
|
||||
- API_BIBLE_ID (optional): specific Bible ID to fetch (overrides abbr search)
|
||||
- API_BIBLE_ABBR (optional): preferred abbreviation to locate (e.g., BSB, NIV, KJV)
|
||||
- OUTPUT_DIR (optional): default data/en_bible
|
||||
|
||||
Output: Creates OT/NT JSON in format compatible with scripts/import-romanian-versioned.ts
|
||||
data/en_bible/<ABBR>/old_testament.json
|
||||
data/en_bible/<ABBR>/new_testament.json
|
||||
|
||||
Notes on rate limits:
|
||||
- Serializes requests (concurrency 1) with a base delay between calls (baseDelayMs).
|
||||
- Handles 429 with Retry-After header or exponential backoff.
|
||||
- Supports resume by writing per-chapter JSON; if a chapter file exists, it skips re-fetching it.
|
||||
*/
|
||||
|
||||
import 'dotenv/config'
|
||||
import fs from 'fs'
|
||||
import path from 'path'
|
||||
|
||||
const API_KEY = process.env.API_BIBLE_KEY || ''
|
||||
const API_BASE = (process.env.API_BIBLE_BASE || 'https://api.scripture.api.bible').replace(/\/$/, '')
|
||||
const PREF_ABBR = process.env.API_BIBLE_ABBR || 'BSB'
|
||||
const FORCE_BIBLE_ID = process.env.API_BIBLE_ID || ''
|
||||
const OUTPUT_ROOT = process.env.OUTPUT_DIR || path.join('data', 'en_bible')
|
||||
|
||||
if (!API_KEY) {
|
||||
console.error('Missing API_BIBLE_KEY in environment')
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
// Simple throttling and backoff
|
||||
const baseDelayMs = 350 // ~3 requests/second baseline
|
||||
function sleep(ms: number) { return new Promise(res => setTimeout(res, ms)) }
|
||||
|
||||
async function requestJson(url: string, init: RequestInit = {}, attempt = 0): Promise<any> {
|
||||
await sleep(baseDelayMs)
|
||||
const res = await fetch(url, {
|
||||
...init,
|
||||
headers: {
|
||||
'accept': 'application/json',
|
||||
'api-key': API_KEY,
|
||||
...(init.headers || {})
|
||||
}
|
||||
})
|
||||
|
||||
if (res.status === 429) {
|
||||
const retry = parseInt(res.headers.get('retry-after') || '0', 10)
|
||||
const wait = Math.max(1000, (retry || 1) * 1000)
|
||||
if (attempt < 6) {
|
||||
console.warn(`429 rate limited. Waiting ${wait}ms and retrying...`)
|
||||
await sleep(wait)
|
||||
return requestJson(url, init, attempt + 1)
|
||||
}
|
||||
}
|
||||
|
||||
if (!res.ok) {
|
||||
const body = await res.text()
|
||||
throw new Error(`HTTP ${res.status} for ${url}: ${body}`)
|
||||
}
|
||||
return res.json()
|
||||
}
|
||||
|
||||
type BibleMeta = { id: string; name: string; abbreviation: string; language: { id: string; name: string } }
|
||||
|
||||
async function resolveBible(): Promise<BibleMeta> {
|
||||
if (FORCE_BIBLE_ID) {
|
||||
const data = await requestJson(`${API_BASE}/v1/bibles/${FORCE_BIBLE_ID}`)
|
||||
return data.data as BibleMeta
|
||||
}
|
||||
const list = await requestJson(`${API_BASE}/v1/bibles?language=eng`) // English
|
||||
const bibles: BibleMeta[] = list.data || []
|
||||
let chosen = bibles.find(b => (b.abbreviation || '').toUpperCase() === PREF_ABBR.toUpperCase())
|
||||
if (!chosen) chosen = bibles[0]
|
||||
if (!chosen) throw new Error('No English bibles found via API')
|
||||
return chosen
|
||||
}
|
||||
|
||||
type Book = { id: string; name: string; abbreviation: string; ord: number }
|
||||
type Chapter = { id: string; number: string }
|
||||
|
||||
async function fetchBooks(bibleId: string): Promise<Book[]> {
|
||||
const resp = await requestJson(`${API_BASE}/v1/bibles/${bibleId}/books`)
|
||||
const data = resp.data || []
|
||||
return data.map((b: any, i: number) => ({ id: b.id, name: b.name, abbreviation: b.abbreviation, ord: b.order || (i + 1) }))
|
||||
}
|
||||
|
||||
async function fetchChapters(bibleId: string, bookId: string): Promise<Chapter[]> {
|
||||
const resp = await requestJson(`${API_BASE}/v1/bibles/${bibleId}/books/${bookId}/chapters`)
|
||||
const data = resp.data || []
|
||||
return data.map((c: any) => ({ id: c.id, number: c.number }))
|
||||
}
|
||||
|
||||
// Fetch all verse IDs for a chapter, then fetch each verse text (contentType=text)
|
||||
async function fetchChapterVerses(bibleId: string, chapterId: string): Promise<{ verseNum: number; text: string }[]> {
|
||||
const resp = await requestJson(`${API_BASE}/v1/bibles/${bibleId}/chapters/${chapterId}/verses`)
|
||||
const verses: any[] = resp.data || []
|
||||
const results: { verseNum: number; text: string }[] = []
|
||||
for (const v of verses) {
|
||||
const vId = v.id
|
||||
// Respect rate limits while fetching verse content
|
||||
const vResp = await requestJson(`${API_BASE}/v1/bibles/${bibleId}/verses/${vId}?content-type=text&include-notes=false&include-titles=false&include-chapter-numbers=false&include-verse-numbers=false&include-verse-spans=false`)
|
||||
const text: string = vResp?.data?.content?.trim?.() || vResp?.data?.content || ''
|
||||
// Extract verse number from reference when available, fallback to sequence
|
||||
const num = parseInt((v.reference || '').split(':')[1] || v.verseCount || v.position || results.length + 1, 10)
|
||||
results.push({ verseNum: Number.isFinite(num) ? num : (results.length + 1), text })
|
||||
}
|
||||
// Sort by verse number just in case
|
||||
results.sort((a, b) => a.verseNum - b.verseNum)
|
||||
return results
|
||||
}
|
||||
|
||||
function ensureDir(p: string) { fs.mkdirSync(p, { recursive: true }) }
|
||||
function writeJson(file: string, obj: any) { ensureDir(path.dirname(file)); fs.writeFileSync(file, JSON.stringify(obj, null, 2), 'utf-8') }
|
||||
function exists(p: string) { try { fs.accessSync(p); return true } catch { return false } }
|
||||
|
||||
async function main() {
|
||||
const bible = await resolveBible()
|
||||
console.log(`Using Bible: ${bible.name} (${bible.abbreviation}) [${bible.id}]`)
|
||||
const outDir = path.join(OUTPUT_ROOT, bible.abbreviation.toUpperCase())
|
||||
ensureDir(outDir)
|
||||
|
||||
const books = await fetchBooks(bible.id)
|
||||
// Partition into OT/NT by order threshold (first 39 = OT)
|
||||
const otBooks = books.filter(b => b.ord <= 39)
|
||||
const ntBooks = books.filter(b => b.ord > 39)
|
||||
|
||||
const buildTestament = async (subset: Book[], label: 'Old Testament' | 'New Testament') => {
|
||||
const result: any = { testament: label, books: [] as any[] }
|
||||
for (const b of subset) {
|
||||
console.log(`Book: ${b.name}`)
|
||||
const bookOutDir = path.join(outDir, b.abbreviation || b.name)
|
||||
ensureDir(bookOutDir)
|
||||
const chs = await fetchChapters(bible.id, b.id)
|
||||
const chaptersArr: any[] = []
|
||||
for (const ch of chs) {
|
||||
const cacheFile = path.join(bookOutDir, `chapter-${ch.number}.json`)
|
||||
if (exists(cacheFile)) {
|
||||
const cached = JSON.parse(fs.readFileSync(cacheFile, 'utf-8'))
|
||||
chaptersArr.push(cached)
|
||||
continue
|
||||
}
|
||||
console.log(` Chapter ${ch.number}`)
|
||||
const verses = await fetchChapterVerses(bible.id, ch.id)
|
||||
const chapterObj = { chapterNum: parseInt(ch.number, 10), verses }
|
||||
writeJson(cacheFile, chapterObj)
|
||||
chaptersArr.push(chapterObj)
|
||||
}
|
||||
result.books.push({ name: b.name, chapters: chaptersArr })
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
const ot = await buildTestament(otBooks, 'Old Testament')
|
||||
const nt = await buildTestament(ntBooks, 'New Testament')
|
||||
|
||||
const otFile = path.join(outDir, 'old_testament.json')
|
||||
const ntFile = path.join(outDir, 'new_testament.json')
|
||||
writeJson(otFile, ot)
|
||||
writeJson(ntFile, nt)
|
||||
console.log('Wrote:', otFile)
|
||||
console.log('Wrote:', ntFile)
|
||||
|
||||
console.log('\nNext: import into the versioned schema using scripts/import-romanian-versioned.ts with LANG_CODE=en and TRANSLATION_CODE matching', bible.abbreviation)
|
||||
}
|
||||
|
||||
main().catch(err => {
|
||||
console.error('Fetch failed:', err)
|
||||
process.exit(1)
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user