diff --git a/docs/guide/getting-started.md b/docs/guide/getting-started.md index f01a98d..6e23c4d 100644 --- a/docs/guide/getting-started.md +++ b/docs/guide/getting-started.md @@ -12,5 +12,8 @@ These files are available at the following paths: - [`https://bible.helloao.org/api/available_commentaries.json`](../reference/README.md#available-commentaries) - [`https://bible.helloao.org/api/c/{commentary}/books.json`](../reference/README.md#list-books-in-a-commentary) - [`https://bible.helloao.org/api/c/{commentary}/{book}/{chapter}.json`](../reference/README.md#get-a-chapter-from-a-commentary) +- [`https://bible.helloao.org/api/available_datasets.json`](../reference/README.md#available-datasets) +- [`https://bible.helloao.org/api/d/{dataset}/books.json`](../reference/README.md#list-books-in-a-dataset) +- [`https://bible.helloao.org/api/d/{dataset}/{book}/{chapter}.json`](../reference/README.md#get-a-chapter-from-a-dataset) For more information about each endpoint, see the [next page](./making-requests.md). diff --git a/docs/guide/making-requests.md b/docs/guide/making-requests.md index 0c81a35..c026cfb 100644 --- a/docs/guide/making-requests.md +++ b/docs/guide/making-requests.md @@ -104,7 +104,7 @@ fetch(`https://bible.helloao.org/api/c/${commentary}/${book}/${chapter}.json`) }); ``` -## List Profiles in a Commentary +### List Profiles in a Commentary ([reference](../reference/README.md#list-profiles-in-a-commentary)) @@ -119,7 +119,7 @@ fetch(`https://bible.helloao.org/api/c/${commentary}/profiles.json`) }); ``` -## Get a Profile in a Commentary +### Get a Profile in a Commentary ([reference](../reference/README.md#get-a-profile-in-a-commentary)) @@ -134,3 +134,47 @@ fetch(`https://bible.helloao.org/api/c/${commentary}/profiles/${profile}.json`) console.log('The Aaron tyndale commentary profile:', profile); }); ``` + +### Get the list of Available Datasets + +([reference](../reference/README.md#available-datasets)) + +```ts:no-line-numbers title="fetch-datasets.js" +fetch(`https://bible.helloao.org/api/available_datasets.json`) + .then(request => request.json()) + .then(availableDatasets => { + console.log('The API has the following datasets:', availableDatasets); + }); +``` + +### Get the list of books in a dataset + +([reference](../reference/README.md#list-books-in-a-dataset)) + +```ts:no-line-numbers title="fetch-dataset-books.js" +const dataset = 'open-cross-ref'; + +// Get the list of books for the open-cross-ref dataset +fetch(`https://bible.helloao.org/api/d/${dataset}/books.json`) + .then(request => request.json()) + .then(books => { + console.log('The open-cross-ref dataset has the following books:', books); + }); +``` + +### Get a Chapter from a Dataset + +([reference](../reference/README.md#get-a-chapter-from-a-dataset)) + +```ts:no-line-numbers title="fetch-dataset-chapter.js" +const dataset = 'open-cross-ref'; +const book = 'GEN'; +const chapter = 1; + +// Get Genesis 1 from the open-cross-ref dataset +fetch(`https://bible.helloao.org/api/d/${dataset}/${book}/${chapter}.json`) + .then(request => request.json()) + .then(chapter => { + console.log('Genesis 1 (open-cross-ref):', chapter); + }); +``` diff --git a/docs/reference/README.md b/docs/reference/README.md index 1249a24..7cc0586 100644 --- a/docs/reference/README.md +++ b/docs/reference/README.md @@ -1417,3 +1417,487 @@ export interface CommentaryProfileContent { ] } ``` + +## Available Datasets + +`GET https://bible.helloao.org/api/available_datasets.json` + +Gets the list of available Bible datasets in the API. + +### Code Example + +```ts:no-line-numbers title="fetch-datasets.js" +fetch(`https://bible.helloao.org/api/available_datasets.json`) + .then(request => request.json()) + .then(availableDatasets => { + console.log('The API has the following commentaries:', availableDatasets); + }); +``` + +### Structure + +```typescript:no-line-numbers title="available-datasets.ts" +export interface AvailableDatasets { + /** + * The list of datasets. + */ + datasets: Dataset[]; +} + +export interface Dataset { + /** + * The ID of the dataset. + */ + id: string; + + /** + * The name of the dataset. + */ + name: string; + + /** + * The website for the dataset. + */ + website: string; + + /** + * The URL that the license for the dataset can be found. + */ + licenseUrl: string; + + /** + * The english name for the dataset. + */ + englishName: string; + + /** + * The ISO 639 3-letter language tag that the dataset is primarily in. + */ + language: string; + + /** + * The direction that the language is written in. + * "ltr" indicates that the text is written from the left side of the page to the right. + * "rtl" indicates that the text is written from the right side of the page to the left. + */ + textDirection: 'ltr' | 'rtl'; + + /** + * The API link for the list of available books for this dataset. + */ + listOfBooksApiLink: string; + + /** + * The available list of formats. + */ + availableFormats: ('json' | 'usfm')[]; + + /** + * The number of books that are contained in this dataset. + */ + numberOfBooks: number; + + /** + * The total number of chapters that are contained in this dataset. + */ + totalNumberOfChapters: number; + + /** + * The total number of verses that are contained in this dataset. + */ + totalNumberOfVerses: number; + + /** + * The total number of cross references that are contained in this dataset. + */ + totalNumberOfReferences: number; + + /** + * Gets the name of the language that the dataset is in. + * Null or undefined if the name of the language is not known. + */ + languageName?: string; + + /** + * Gets the name of the language in English. + * Null or undefined if the language doesn't have an english name. + */ + languageEnglishName?: string; +} +``` + +### Example + +```json:no-line-numbers title="/api/available_datasets.json" +{ + "datasets": [ + { + "id": "open-cross-ref", + "name": "Bible Cross References", + "website": "https://www.openbible.info/labs/cross-references/", + "licenseUrl": "https://creativecommons.org/licenses/by/4.0/", + "licenseNotes": "Changes were made to the data to fit the Free Use Bible API format.", + "englishName": "Bible Cross References", + "language": "eng", + "textDirection": "ltr", + "availableFormats": [ + "json" + ], + "listOfBooksApiLink": "/api/d/open-cross-ref/books.json", + "numberOfBooks": 66, + "totalNumberOfChapters": 1189, + "totalNumberOfVerses": 29364, + "totalNumberOfReferences": 344799, + "languageName": "English", + "languageEnglishName": "English" + } + ] +} +``` + +## List Books in a Dataset + +`GET https://bible.helloao.org/api/d/{dataset}/books.json` + +Gets the list of books that are available for the given dataset. + +- `dataset` the ID of the dataset (e.g. `open-cross-ref`). + +### Code Example + +```ts:no-line-numbers title="fetch-dataset-books.js" +const dataset = 'open-cross-ref'; + +// Get the list of books for the open-cross-ref dataset +fetch(`https://bible.helloao.org/api/c/${dataset}/books.json`) + .then(request => request.json()) + .then(books => { + console.log('The open-cross-ref dataset has the following books:', books); + }); +``` + +### Structure + +```typescript:no-line-numbers title="dataset-books.ts" +export interface DatasetBooks { + /** + * The dataset information for the books. + */ + dataset: Dataset; + + /** + * The list of books that are available for the dataset. + */ + books: DatasetBook[]; +} + +interface DatasetBook { + /** + * The ID of the book. + * Matches the ID of the corresponding book in the Bible (GEN, EXO, etc.). + */ + id: string; + + /** + * The order of the book in the Bible. + */ + order: number; + + /** + * The number of the first chapter in the book. + */ + firstChapterNumber: number; + + /** + * The link to the first chapter of the book. + */ + firstChapterApiLink: string | null; + + /** + * The number of the last chapter in the book. + */ + lastChapterNumber: number | null; + + /** + * The link to the last chapter of the book. + */ + lastChapterApiLink: string | null; + + /** + * The number of chapters that the book contains. + */ + numberOfChapters: number; + + /** + * The number of verses that the book contains. + */ + totalNumberOfVerses: number; + + /** + * The total number of cross references that this book contains. + */ + totalNumberOfReferences: number; +} +``` + +### Example + +```json:no-line-numbers title="/api/d/open-cross-ref/books.json" +{ + "dataset": { + "id": "open-cross-ref", + "name": "Bible Cross References", + "website": "https://www.openbible.info/labs/cross-references/", + "licenseUrl": "https://creativecommons.org/licenses/by/4.0/", + "licenseNotes": "Changes were made to the data to fit the Free Use Bible API format.", + "englishName": "Bible Cross References", + "language": "eng", + "textDirection": "ltr", + "availableFormats": [ + "json" + ], + "listOfBooksApiLink": "/api/d/open-cross-ref/books.json", + "numberOfBooks": 66, + "totalNumberOfChapters": 1189, + "totalNumberOfVerses": 29364, + "totalNumberOfReferences": 344799, + "languageName": "English", + "languageEnglishName": "English" + }, + "books": [ + { + "id": "GEN", + "datasetId": "open-cross-ref", + "order": 1, + "numberOfChapters": 50, + "firstChapterNumber": 1, + "firstChapterApiLink": "/api/d/open-cross-ref/GEN/1.json", + "lastChapterNumber": 50, + "lastChapterApiLink": "/api/d/open-cross-ref/GEN/50.json", + "totalNumberOfVerses": 1382, + "totalNumberOfReferences": 13327 + }, + { + "id": "EXO", + "datasetId": "open-cross-ref", + "order": 2, + "numberOfChapters": 40, + "firstChapterNumber": 1, + "firstChapterApiLink": "/api/d/open-cross-ref/EXO/1.json", + "lastChapterNumber": 40, + "lastChapterApiLink": "/api/d/open-cross-ref/EXO/40.json", + "totalNumberOfVerses": 1084, + "totalNumberOfReferences": 9974 + }, + ] +} +``` + +## Get a Chapter from a Dataset + +`GET https://bible.helloao.org/api/d/{dataset}/{book}/{chapter}.json` + +Gets the content of a single chapter for a given book and dataset. + +- `dataset` the ID of the dataset (e.g. `open-cross-ref`). +- `book` is the ID of the book (e.g. `GEN` for Genesis). +- `chapter` is the numerical chapter number (e.g. `1` for the first chapter). + +### Code Example + +```ts:no-line-numbers title="fetch-dataset-chapter.js" +const dataset = 'open-cross-ref'; +const book = 'GEN'; +const chapter = 1; + +// Get Genesis 1 from the open-cross-ref dataset +fetch(`https://bible.helloao.org/api/d/${dataset}/${book}/${chapter}.json`) + .then(request => request.json()) + .then(chapter => { + console.log('Genesis 1 (open-cross-ref):', chapter); + }); +``` + +### Structure + +```typescript:no-line-numbers title="dataset-chapter.ts" +export interface DatasetBookChapter { + /** + * The dataset information for the book chapter. + */ + dataset: Dataset; + + /** + * The book information for the book chapter. + */ + book: DatasetBook; + + /** + * The link to this chapter. + */ + thisChapterLink: string; + + /** + * The link to the next chapter. + * Null if this is the last chapter in the dataset. + */ + nextChapterApiLink: string | null; + + /** + * The link to the previous chapter. + * Null if this is the first chapter in the dataset. + */ + previousChapterApiLink: string | null; + + /** + * The number of verses that the chapter contains. + */ + numberOfVerses: number; + + /** + * The information for the chapter. + */ + chapter: DatasetChapterData; +} + +interface DatasetChapterData { + /** + * The number of the chapter. + */ + number: number; + + /** + * The content of the chapter. + */ + content: DatasetVerse[]; +} + +interface DatasetVerse { + /** + * The number of the verse. + */ + verse: number; + + /** + * The cross-references for the verse. + * + * Sorted by score, descending. + */ + references: DatasetReference[]; +} + +interface DatasetReference { + /** + * The ID of the book that is being referenced. + */ + book: string; + + /** + * The chapter number. + */ + chapter: number; + + /** + * The verse number. + * If `endVerse` is present, then this is the verse that the reference starts at. + */ + verse: number; + + /** + * The verse that the reference ends at. + */ + endVerse?: number; + + /** + * The relevence score for the reference. + */ + score?: number; +} +``` + +### Example + +```json:no-line-numbers title="/api/d/open-cross-ref/REV/22.json" +{ + "dataset": { + "id": "open-cross-ref", + "name": "Bible Cross References", + "website": "https://www.openbible.info/labs/cross-references/", + "licenseUrl": "https://creativecommons.org/licenses/by/4.0/", + "licenseNotes": "Changes were made to the data to fit the Free Use Bible API format.", + "englishName": "Bible Cross References", + "language": "eng", + "textDirection": "ltr", + "availableFormats": [ + "json" + ], + "listOfBooksApiLink": "/api/d/open-cross-ref/books.json", + "numberOfBooks": 66, + "totalNumberOfChapters": 1189, + "totalNumberOfVerses": 29364, + "totalNumberOfReferences": 344799, + "languageName": "English", + "languageEnglishName": "English" + }, + "book": { + "id": "REV", + "datasetId": "open-cross-ref", + "order": 66, + "numberOfChapters": 22, + "firstChapterNumber": 1, + "firstChapterApiLink": "/api/d/open-cross-ref/REV/1.json", + "lastChapterNumber": 22, + "lastChapterApiLink": "/api/d/open-cross-ref/REV/22.json", + "totalNumberOfVerses": 402, + "totalNumberOfReferences": 6495 + }, + "chapter": { + "number": 22, + "content": [ + { + "verse": 1, + "references": [ + { + "book": "REV", + "chapter": 7, + "verse": 17, + "score": 74 + }, + { + "book": "JHN", + "chapter": 4, + "verse": 14, + "score": 62 + }, + { + "book": "PSA", + "chapter": 36, + "verse": 8, + "endVerse": 9, + "score": 59 + }, + { + "book": "JHN", + "chapter": 7, + "verse": 38, + "endVerse": 39, + "score": 59 + }, + { + "book": "JHN", + "chapter": 4, + "verse": 10, + "endVerse": 11, + "score": 55 + }, + ] + } + ] + }, + "thisChapterLink": "/api/d/open-cross-ref/REV/22.json", + "nextChapterApiLink": null, + "previousChapterApiLink": "/api/d/open-cross-ref/REV/21.json", + "numberOfVerses": 21, + "numberOfReferences": 360 +} +``` diff --git a/packages/helloao-cli/actions.ts b/packages/helloao-cli/actions.ts index 2a6a4ff..a717c15 100644 --- a/packages/helloao-cli/actions.ts +++ b/packages/helloao-cli/actions.ts @@ -1,21 +1,38 @@ import path, { basename, extname } from 'node:path'; import * as database from './db.js'; -import Sql from 'better-sqlite3'; +import Sql, { Database } from 'better-sqlite3'; import { DOMParser, Element, Node } from 'linkedom'; import { mkdir, readdir, rm, writeFile } from 'node:fs/promises'; -import { getFirstNonEmpty, normalizeLanguage } from '@helloao/tools/utils.js'; -import { InputTranslationMetadata } from '@helloao/tools/generation/index.js'; +import { + getBookId, + getFirstNonEmpty, + normalizeLanguage, +} from '@helloao/tools/utils.js'; +import { + bookOrder, + dataset, + InputTranslationMetadata, +} from '@helloao/tools/generation/index.js'; import { exists, readFile } from 'fs-extra'; import { KNOWN_AUDIO_TRANSLATIONS } from '@helloao/tools/generation/audio.js'; -import { bookChapterCountMap } from '@helloao/tools/generation/book-order.js'; +import { + bookChapterCountMap, + bookOrderMap, +} from '@helloao/tools/generation/book-order.js'; import { downloadFile, unzipToDirectory } from './downloads.js'; import { batch, toAsyncIterable } from '@helloao/tools/parser/iterators.js'; import { hashInputFiles, + loadDatasetsFromDirectory, loadTranslationFiles, loadTranslationsFiles, } from './files.js'; -import { generateDataset } from '@helloao/tools/generation/dataset.js'; +import { + DatasetDataset, + DatasetDatasetBook, + DatasetOutput, + generateDataset, +} from '@helloao/tools/generation/dataset.js'; import { serializeAndUploadDatasets, UploadApiFromDatabaseOptions, @@ -36,6 +53,7 @@ import { convertUsfmToUsx3, } from './conversion.js'; import { fetchEBibleMetadata } from './ebible.js'; +import { importDatasetOutput } from './db.js'; export interface GetTranslationsItem { id: string; @@ -327,6 +345,25 @@ export async function initDb( CREATE TABLE "CommentaryChapterVerse" AS SELECT * FROM source.CommentaryChapterVerse INNER JOIN source.Commentary ON source.Commentary.id = source.CommentaryChapterVerse.commentaryId WHERE source.Commentary.language IN ${languages}; + + CREATE TABLE "Dataset" AS SELECT * FROM source.Dataset + WHERE language IN ${languages}; + + CREATE TABLE "DatasetBook" AS SELECT * FROM source.DatasetBook + INNER JOIN source.Dataset ON source.Dataset.id = source.DatasetBook.datasetId + WHERE source.Dataset.language IN ${languages}; + + CREATE TABLE "DatasetChapter" AS SELECT * FROM source.DatasetChapter + INNER JOIN source.Dataset ON source.Dataset.id = source.DatasetChapter.datasetId + WHERE source.Dataset.language IN ${languages}; + + CREATE TABLE "DatasetChapterVerse" AS SELECT * FROM source.DatasetChapterVerse + INNER JOIN source.Dataset ON source.Dataset.id = source.DatasetChapterVerse.datasetId + WHERE source.Dataset.language IN ${languages}; + + CREATE TABLE "DatasetReference" AS SELECT * FROM source.DatasetReference + INNER JOIN source.Dataset ON source.Dataset.id = source.DatasetReference.datasetId + WHERE source.Dataset.language IN ${languages}; `); } else { db.exec(` @@ -343,6 +380,11 @@ export async function initDb( CREATE TABLE "CommentaryBook" AS SELECT * FROM source.CommentaryBook; CREATE TABLE "CommentaryChapter" AS SELECT * FROM source.CommentaryChapter; CREATE TABLE "CommentaryChapterVerse" AS SELECT * FROM source.CommentaryChapterVerse; + CREATE TABLE "Dataset" AS SELECT * FROM source.Dataset; + CREATE TABLE "DatasetBook" AS SELECT * FROM source.DatasetBook; + CREATE TABLE "DatasetChapter" AS SELECT * FROM source.DatasetChapter; + CREATE TABLE "DatasetChapterVerse" AS SELECT * FROM source.DatasetChapterVerse; + CREATE TABLE "DatasetReference" AS SELECT * FROM source.DatasetReference; `); } @@ -489,6 +531,28 @@ export async function importCommentaries( } } +/** + * Imports the API from the given directory into the database in the current working directory. + * @param dir The directory that the API is located in. + * @param options The options. + */ +export async function importApi( + dir: string, + options: ImportTranslationOptions +) { + const db = await database.getDb(options.db); + try { + const datasets = await loadDatasetsFromDirectory(dir); + importDatasetOutput(db, { + commentaries: [], + translations: [], + datasets, + }); + } finally { + db.close(); + } +} + export interface FetchTranslationsOptions { /** * Fetch all translations. If omitted, only undownloaded translations will be fetched. diff --git a/packages/helloao-cli/cli.ts b/packages/helloao-cli/cli.ts index c1305ca..64462fb 100644 --- a/packages/helloao-cli/cli.ts +++ b/packages/helloao-cli/cli.ts @@ -11,6 +11,7 @@ import { fetchAudio, generateTranslationFiles, generateTranslationsFiles, + importApi, importCommentaries, importCommentary, importTranslation, @@ -145,6 +146,17 @@ async function start() { }); }); + program + .command('import-api ') + .description('Imports API files from the given directory into the DB.') + .option('--overwrite', 'Whether to overwrite existing files.') + .action(async (dir: string, options: any) => { + await importApi(dir, { + ...program.opts(), + ...options, + }); + }); + program .command('upload-test-translation ') .description( diff --git a/packages/helloao-cli/db.ts b/packages/helloao-cli/db.ts index c3777c2..e351ab1 100644 --- a/packages/helloao-cli/db.ts +++ b/packages/helloao-cli/db.ts @@ -7,6 +7,8 @@ import { DatasetCommentary, DatasetCommentaryBook, DatasetCommentaryProfile, + DatasetDataset, + DatasetDatasetBook, DatasetOutput, DatasetTranslation, DatasetTranslationBook, @@ -19,6 +21,10 @@ import { OutputFile, OutputFileContent, CommentaryBookChapter, + DatasetBookChapter, + DatasetChapterVerseContent, + Dataset, + DatasetBook, } from '@helloao/tools/generation/index.js'; import { generateApiForDataset, @@ -197,15 +203,31 @@ export async function importFileBatch( logger.log('Generated', output.translations.length, 'translations'); logger.log('Generated', output.commentaries.length, 'commentaries'); + importDatasetOutput(db, output); + insertFileMetadata(db, changedFiles); +} + +/** + * Imports the given dataset output into the database. + * @param db The database to import the dataset into. + * @param output The dataset output to import. + */ +export function importDatasetOutput(db: Database, output: DatasetOutput) { + const logger = log.getLogger(); + insertTranslations(db, output.translations); updateTranslationHashes(db, output.translations); insertCommentaries(db, output.commentaries); updateCommentaryHashes(db, output.commentaries); - insertFileMetadata(db, changedFiles); + insertDatasets(db, output.datasets ?? []); + updateDatasetHashes(db, output.datasets ?? []); insertWarningMetadata(db, output.parseMessages); logger.log(`Inserted ${output.translations.length} translations into DB`); logger.log(`Inserted ${output.commentaries.length} commentaries into DB`); + if (output.datasets) { + logger.log(`Inserted ${output.datasets.length} datasets into DB`); + } logger.log( `Produced ${output.parseMessages?.length ?? 0} warnings/errors.` ); @@ -1173,6 +1195,326 @@ function updateCommentaryHashes( logger.log(`Updated.`); } +export function insertDatasets(db: Database, datasets: DatasetDataset[]) { + const translationUpsert = db.prepare(`INSERT INTO Dataset( + id, + name, + language, + textDirection, + licenseUrl, + licenseNotes, + website, + englishName + ) VALUES ( + @id, + @name, + @language, + @textDirection, + @licenseUrl, + @licenseNotes, + @website, + @englishName + ) ON CONFLICT(id) DO + UPDATE SET + name=excluded.name, + language=excluded.language, + textDirection=excluded.textDirection, + licenseUrl=excluded.licenseUrl, + licenseNotes=excluded.licenseNotes, + website=excluded.website, + englishName=excluded.englishName;`); + + const insertManyTranslations = db.transaction( + (datasets: DatasetDataset[]) => { + for (let dataset of datasets) { + translationUpsert.run({ + id: dataset.id, + name: dataset.name, + language: dataset.language, + textDirection: dataset.textDirection, + licenseUrl: dataset.licenseUrl, + licenseNotes: dataset.licenseNotes, + website: dataset.website, + englishName: dataset.englishName, + }); + } + } + ); + + insertManyTranslations(datasets); + + const deleteReferences = db.prepare(`DELETE FROM DatasetReference + WHERE datasetId = @datasetId;`); + + for (let dataset of datasets) { + deleteReferences.run({ + datasetId: dataset.id, + }); + insertDatasetBooks(db, dataset, dataset.books); + } +} + +export function insertDatasetBooks( + db: Database, + dataset: DatasetDataset, + datasetBooks: DatasetDatasetBook[] +) { + const bookUpsert = db.prepare(`INSERT INTO DatasetBook( + id, + datasetId, + numberOfChapters, + \`order\` + ) VALUES ( + @id, + @datasetId, + @numberOfChapters, + @bookOrder + ) ON CONFLICT(id,datasetId) DO + UPDATE SET + numberOfChapters=excluded.numberOfChapters;`); + + const insertMany = db.transaction((books: DatasetDatasetBook[]) => { + for (let book of books) { + if (!book) { + continue; + } + bookUpsert.run({ + id: book.id, + datasetId: dataset.id, + numberOfChapters: book.chapters.length, + bookOrder: book.order ?? 9999, + }); + } + }); + + insertMany(datasetBooks); + + for (let book of datasetBooks) { + insertDatasetContent(db, dataset, book, book.chapters); + } +} + +export function insertDatasetContent( + db: Database, + dataset: DatasetDataset, + book: DatasetDatasetBook, + chapters: DatasetBookChapter[] +) { + const logger = log.getLogger(); + + const chapterUpsert = db.prepare(`INSERT INTO DatasetChapter( + datasetId, + bookId, + number, + json + ) VALUES ( + @datasetId, + @bookId, + @number, + @json + ) ON CONFLICT(datasetId,bookId,number) DO + UPDATE SET + json=excluded.json;`); + const verseUpsert = db.prepare(`INSERT INTO DatasetChapterVerse( + datasetId, + bookId, + chapterNumber, + number, + contentJson + ) VALUES ( + @datasetId, + @bookId, + @chapterNumber, + @number, + @contentJson + ) ON CONFLICT(datasetId,bookId,chapterNumber,number) DO + UPDATE SET + contentJson=excluded.contentJson;`); + + const referenceInsert = db.prepare(`INSERT INTO DatasetReference( + datasetId, + bookId, + chapterNumber, + verseNumber, + referenceBookId, + referenceChapter, + referenceVerse, + endVerseNumber, + score + ) VALUES ( + @datasetId, + @bookId, + @chapterNumber, + @verseNumber, + @referenceBookId, + @referenceChapter, + @referenceVerse, + @endVerseNumber, + @score + );`); + + const insertChaptersAndVerses = db.transaction(() => { + for (let chapter of chapters) { + chapterUpsert.run({ + datasetId: dataset.id, + bookId: book.id, + number: chapter.chapter.number, + json: JSON.stringify(chapter.chapter), + }); + + for (let verse of chapter.chapter.content) { + verseUpsert.run({ + datasetId: dataset.id, + bookId: book.id, + chapterNumber: chapter.chapter.number, + number: verse.verse, + contentJson: JSON.stringify(verse), + }); + + for (let ref of verse.references) { + referenceInsert.run({ + datasetId: dataset.id, + bookId: book.id, + chapterNumber: chapter.chapter.number, + verseNumber: verse.verse, + referenceBookId: ref.book, + referenceChapter: ref.chapter, + referenceVerse: ref.verse, + endVerseNumber: ref.endVerse ?? null, + score: ref.score ?? null, + }); + } + } + } + }); + + insertChaptersAndVerses(); +} + +/** + * Updates the hashes for the datasets in the database. + * @param db The database to update the hashes in. + * @param datasets The datasets to update the hashes for. + */ +function updateDatasetHashes(db: Database, datasets: Dataset[]) { + const logger = log.getLogger(); + logger.log(`Updating hashes for ${datasets.length} datasets.`); + + const updateTranslationHash = db.prepare( + `UPDATE Dataset SET sha256 = @sha256 WHERE id = @datasetId;` + ); + const updateBookHash = db.prepare( + `UPDATE DatasetBook SET sha256 = @sha256 WHERE datasetId = @datasetId AND id = @bookId;` + ); + const updateChapterHash = db.prepare( + `UPDATE DatasetChapter SET sha256 = @sha256 WHERE datasetId = @datasetId AND bookId = @bookId AND number = @chapterNumber;` + ); + + const getBooks = db.prepare( + 'SELECT * FROM DatasetBook WHERE datasetId = ?;' + ); + const getChapters = db.prepare( + 'SELECT * FROM DatasetChapter WHERE datasetId = @datasetId AND bookId = @bookId;' + ); + + for (let dataset of datasets) { + const commentarySha = sha256() + .update(dataset.id) + .update(dataset.name) + .update(dataset.language) + .update(dataset.licenseUrl) + .update(dataset.textDirection) + .update(dataset.website) + .update(dataset.englishName); + + const books = getBooks.all(dataset.id) as { + id: string; + datasetId: string; + order: number; + numberOfChapters: number; + sha256: string; + }[]; + + for (let book of books) { + const chapters = getChapters.all({ + datasetId: dataset.id, + bookId: book.id, + }) as { + number: string; + bookId: string; + datasetId: string; + json: string; + sha256: string; + }[]; + + const bookSha = sha256() + .update(book.datasetId) + .update(book.id) + .update(book.numberOfChapters) + .update(book.order); + + for (let chapter of chapters) { + const hash = sha256() + .update(chapter.datasetId) + .update(chapter.bookId) + .update(chapter.number) + .update(chapter.json) + .digest('hex'); + + chapter.sha256 = hash; + + bookSha.update(hash); + } + + const updateChapters = db.transaction(() => { + for (let chapter of chapters) { + updateChapterHash.run({ + sha256: chapter.sha256, + datasetId: chapter.datasetId, + bookId: chapter.bookId, + chapterNumber: chapter.number, + }); + } + }); + + updateChapters(); + + const bookHash = bookSha.digest('hex'); + book.sha256 = bookHash; + + commentarySha.update(bookHash); + } + + const updateBooks = db.transaction(() => { + for (let book of books) { + updateBookHash.run({ + sha256: book.sha256, + datasetId: book.datasetId, + bookId: book.id, + }); + } + }); + + updateBooks(); + + const hash = commentarySha.digest('hex'); + (dataset as any).sha256 = hash; + } + + const updateDatasets = db.transaction(() => { + for (let dataset of datasets) { + updateTranslationHash.run({ + sha256: (dataset as any).sha256, + datasetId: dataset.id, + }); + } + }); + + updateDatasets(); + + logger.log(`Updated.`); +} + export function getDbPathFromDir(dir: string) { dir = dir || process.cwd(); return path.resolve(dir, 'bible-api.db'); @@ -1302,6 +1644,7 @@ export async function* loadDatasets( ): AsyncGenerator { yield* loadTranslationDatasets(db, perBatch, translationsToLoad); yield* loadCommentaryDatasets(db, perBatch, translationsToLoad); + yield* loadDatasetDatasets(db, perBatch, translationsToLoad); } /** @@ -1558,6 +1901,111 @@ export async function* loadCommentaryDatasets( } } +/** + * Loads the datasets from the database as a dataset. + * @param db The database. + * @param perBatch The number of translations to load per batch. + * @param datasetsToLoad The list of commentaries to load. If not provided, all commentaries will be loaded. + */ +export async function* loadDatasetDatasets( + db: PrismaClient, + perBatch: number = 50, + datasetsToLoad?: string[] +) { + const logger = log.getLogger(); + let offset = 0; + let pageSize = perBatch; + + logger.log('Generating dataset datasets in batches of', pageSize); + const totalDatasets = await db.dataset.count(); + const totalBatches = Math.ceil(totalDatasets / pageSize); + let batchNumber = 1; + + while (true) { + logger.log('Generating dataset batch', batchNumber, 'of', totalBatches); + batchNumber++; + + const datasetQuery: Prisma.DatasetFindManyArgs = { + skip: offset, + take: pageSize, + }; + + if (datasetsToLoad && datasetsToLoad.length > 0) { + datasetQuery.where = { + id: { + in: datasetsToLoad, + }, + }; + } + + const datasets = await db.dataset.findMany(datasetQuery); + + if (datasets.length <= 0) { + break; + } + + const output: DatasetOutput = { + translations: [], + commentaries: [], + datasets: [], + }; + + for (let dataset of datasets) { + const datasetDataset: DatasetDataset = { + ...dataset, + textDirection: dataset.textDirection! as any, + books: [], + }; + output.datasets!.push(datasetDataset); + + const books = await db.datasetBook.findMany({ + where: { + datasetId: dataset.id, + }, + orderBy: { + order: 'asc', + }, + }); + + for (let book of books) { + const chapters = await db.datasetChapter.findMany({ + where: { + datasetId: dataset.id, + bookId: book.id, + }, + orderBy: { + number: 'asc', + }, + }); + + const bookChapters: DatasetBookChapter[] = chapters.map( + (chapter) => { + const bookChapter: DatasetBookChapter = { + chapter: JSON.parse(chapter.json), + }; + + for (let verse of bookChapter.chapter.content) { + verse.references.sort((a, b) => b.score - a.score); + } + + return bookChapter; + } + ); + + const datasetBook: DatasetDatasetBook = { + ...book, + chapters: bookChapters, + }; + datasetDataset.books.push(datasetBook); + } + } + + yield output; + + offset += pageSize; + } +} + export interface SerializeApiOptions extends GenerateApiOptions { /** * Whether the output should be pretty-printed. diff --git a/packages/helloao-cli/files.ts b/packages/helloao-cli/files.ts index f23d51a..351dc6d 100644 --- a/packages/helloao-cli/files.ts +++ b/packages/helloao-cli/files.ts @@ -6,7 +6,7 @@ import { readdir, writeFile, } from 'fs/promises'; -import { extname } from 'path'; +import { basename, extname } from 'path'; import * as path from 'path'; import { existsSync } from 'fs-extra'; import { @@ -25,6 +25,12 @@ import { PARSER_VERSION } from '@helloao/tools/parser/usx-parser.js'; import { mergeWith } from 'lodash'; import { fromByteArray } from 'base64-js'; import { log } from '@helloao/tools'; +import { bookOrderMap } from '@helloao/tools/generation/book-order.js'; +import { + DatasetDataset, + DatasetDatasetBook, +} from '@helloao/tools/generation/dataset.js'; +import { getBookId } from '@helloao/tools/utils.js'; /** * Defines an interface that contains information about a serialized file. @@ -366,6 +372,102 @@ export async function loadCommentaryFiles( return await Promise.all(promises); } +/** + * Imports all the datasets from the given directory into the database in the current working directory. + * @param dir The directory that the datasets are located in. + * @param options The options. + */ +export async function loadDatasetsFromDirectory( + dir: string +): Promise { + const logger = log.getLogger(); + + let datasets: DatasetDataset[] = []; + + const apiDir = path.resolve(dir, 'api'); + + const availableDatasets = JSON.parse( + await readFile(path.resolve(apiDir, 'available_datasets.json'), 'utf-8') + ); + datasets.push( + ...availableDatasets.datasets.map((d: any) => ({ + ...d, + books: [], + })) + ); + + for (let dataset of datasets) { + const datasetDir = path.resolve(apiDir, 'd', dataset.id); + const booksList = await readdir(datasetDir); + + for (let bookId of booksList) { + if (bookId === 'books.json') { + continue; + } + const id = getBookId(bookId); + + if (!id) { + logger.warn(`Unknown book directory: ${bookId}`); + continue; + } + + const book: DatasetDatasetBook = { + id, + chapters: [], + order: bookOrderMap.get(id)!, + }; + dataset.books.push(book); + + const bookDir = path.resolve(datasetDir, bookId); + const chapters = await readdir(bookDir); + + for (let chapterFile of chapters) { + const chapterJson = JSON.parse( + await readFile(path.resolve(bookDir, chapterFile), 'utf-8') + ); + + if (chapterJson.chapter) { + book.chapters.push({ + chapter: chapterJson.chapter, + }); + } else if (chapterJson.content) { + const chapterNumber = parseInt( + basename(chapterFile, extname(chapterFile)) + ); + + if (isNaN(chapterNumber)) { + logger.warn(`Unknown chapter format: ${chapterFile}`); + continue; + } + + book.chapters.push({ + chapter: { + number: chapterNumber, + content: chapterJson.content.map((c: any) => ({ + verse: c.verse, + references: (c.references ?? []).map( + (ref: any) => ({ + book: ref.book, + chapter: ref.chapter, + verse: ref.verse, + endVerse: ref.endVerse, + score: ref.score ?? ref.votes, + }) + ), + })), + }, + }); + } else { + logger.warn(`Unknown chapter format: ${chapterFile}`); + continue; + } + } + } + } + + return datasets; +} + /** * Loads the metadata for the given translation. * @param translation The translation that the metadata should be loaded for. diff --git a/packages/helloao-cli/migrations/20251028135135_add_datasets/migration.sql b/packages/helloao-cli/migrations/20251028135135_add_datasets/migration.sql new file mode 100644 index 0000000..b056014 --- /dev/null +++ b/packages/helloao-cli/migrations/20251028135135_add_datasets/migration.sql @@ -0,0 +1,72 @@ +-- CreateTable +CREATE TABLE "Dataset" ( + "id" TEXT NOT NULL PRIMARY KEY, + "name" TEXT NOT NULL, + "website" TEXT NOT NULL, + "licenseUrl" TEXT NOT NULL, + "licenseNotes" TEXT, + "englishName" TEXT NOT NULL, + "language" TEXT NOT NULL, + "textDirection" TEXT NOT NULL, + "sha256" TEXT +); + +-- CreateTable +CREATE TABLE "DatasetBook" ( + "id" TEXT NOT NULL, + "datasetId" TEXT NOT NULL, + "name" TEXT NOT NULL, + "commonName" TEXT NOT NULL, + "introduction" TEXT, + "introductionSummary" TEXT, + "order" INTEGER NOT NULL, + "numberOfChapters" INTEGER NOT NULL, + "sha256" TEXT, + + PRIMARY KEY ("datasetId", "id"), + CONSTRAINT "DatasetBook_datasetId_fkey" FOREIGN KEY ("datasetId") REFERENCES "Dataset" ("id") ON DELETE RESTRICT ON UPDATE CASCADE +); + +-- CreateTable +CREATE TABLE "DatasetChapter" ( + "number" INTEGER NOT NULL, + "bookId" TEXT NOT NULL, + "datasetId" TEXT NOT NULL, + "json" TEXT NOT NULL, + "sha256" TEXT, + + PRIMARY KEY ("datasetId", "bookId", "number"), + CONSTRAINT "DatasetChapter_datasetId_bookId_fkey" FOREIGN KEY ("datasetId", "bookId") REFERENCES "DatasetBook" ("datasetId", "id") ON DELETE RESTRICT ON UPDATE CASCADE, + CONSTRAINT "DatasetChapter_datasetId_fkey" FOREIGN KEY ("datasetId") REFERENCES "Dataset" ("id") ON DELETE RESTRICT ON UPDATE CASCADE +); + +-- CreateTable +CREATE TABLE "DatasetChapterVerse" ( + "number" INTEGER NOT NULL, + "chapterNumber" INTEGER NOT NULL, + "bookId" TEXT NOT NULL, + "datasetId" TEXT NOT NULL, + "contentJson" TEXT NOT NULL, + "sha256" TEXT, + + PRIMARY KEY ("datasetId", "bookId", "chapterNumber", "number"), + CONSTRAINT "DatasetChapterVerse_datasetId_bookId_chapterNumber_fkey" FOREIGN KEY ("datasetId", "bookId", "chapterNumber") REFERENCES "DatasetChapter" ("datasetId", "bookId", "number") ON DELETE RESTRICT ON UPDATE CASCADE, + CONSTRAINT "DatasetChapterVerse_datasetId_bookId_fkey" FOREIGN KEY ("datasetId", "bookId") REFERENCES "DatasetBook" ("datasetId", "id") ON DELETE RESTRICT ON UPDATE CASCADE, + CONSTRAINT "DatasetChapterVerse_datasetId_fkey" FOREIGN KEY ("datasetId") REFERENCES "Dataset" ("id") ON DELETE RESTRICT ON UPDATE CASCADE +); + +-- CreateTable +CREATE TABLE "DatasetReference" ( + "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + "datasetId" TEXT NOT NULL, + "bookId" TEXT NOT NULL, + "chapterNumber" INTEGER NOT NULL, + "verseNumber" INTEGER NOT NULL, + "endVerseNumber" INTEGER, + "score" INTEGER, + "sha256" TEXT, + CONSTRAINT "DatasetReference_datasetId_fkey" FOREIGN KEY ("datasetId") REFERENCES "Dataset" ("id") ON DELETE RESTRICT ON UPDATE CASCADE, + CONSTRAINT "DatasetReference_datasetId_bookId_fkey" FOREIGN KEY ("datasetId", "bookId") REFERENCES "DatasetBook" ("datasetId", "id") ON DELETE RESTRICT ON UPDATE CASCADE, + CONSTRAINT "DatasetReference_datasetId_bookId_chapterNumber_fkey" FOREIGN KEY ("datasetId", "bookId", "chapterNumber") REFERENCES "DatasetChapter" ("datasetId", "bookId", "number") ON DELETE RESTRICT ON UPDATE CASCADE, + CONSTRAINT "DatasetReference_datasetId_bookId_chapterNumber_verseNumber_fkey" FOREIGN KEY ("datasetId", "bookId", "chapterNumber", "verseNumber") REFERENCES "DatasetChapterVerse" ("datasetId", "bookId", "chapterNumber", "number") ON DELETE RESTRICT ON UPDATE CASCADE +); diff --git a/packages/helloao-cli/migrations/20251028144932_fix_dataset_types/migration.sql b/packages/helloao-cli/migrations/20251028144932_fix_dataset_types/migration.sql new file mode 100644 index 0000000..96390eb --- /dev/null +++ b/packages/helloao-cli/migrations/20251028144932_fix_dataset_types/migration.sql @@ -0,0 +1,50 @@ +/* + Warnings: + + - You are about to drop the column `commonName` on the `DatasetBook` table. All the data in the column will be lost. + - You are about to drop the column `introduction` on the `DatasetBook` table. All the data in the column will be lost. + - You are about to drop the column `introductionSummary` on the `DatasetBook` table. All the data in the column will be lost. + - You are about to drop the column `name` on the `DatasetBook` table. All the data in the column will be lost. + - You are about to drop the column `sha256` on the `DatasetReference` table. All the data in the column will be lost. + - Added the required column `referenceBookId` to the `DatasetReference` table without a default value. This is not possible if the table is not empty. + - Added the required column `referenceChapter` to the `DatasetReference` table without a default value. This is not possible if the table is not empty. + - Added the required column `referenceVerse` to the `DatasetReference` table without a default value. This is not possible if the table is not empty. + +*/ +-- RedefineTables +PRAGMA defer_foreign_keys=ON; +PRAGMA foreign_keys=OFF; +CREATE TABLE "new_DatasetBook" ( + "id" TEXT NOT NULL, + "datasetId" TEXT NOT NULL, + "order" INTEGER NOT NULL, + "numberOfChapters" INTEGER NOT NULL, + "sha256" TEXT, + + PRIMARY KEY ("datasetId", "id"), + CONSTRAINT "DatasetBook_datasetId_fkey" FOREIGN KEY ("datasetId") REFERENCES "Dataset" ("id") ON DELETE RESTRICT ON UPDATE CASCADE +); +INSERT INTO "new_DatasetBook" ("datasetId", "id", "numberOfChapters", "order", "sha256") SELECT "datasetId", "id", "numberOfChapters", "order", "sha256" FROM "DatasetBook"; +DROP TABLE "DatasetBook"; +ALTER TABLE "new_DatasetBook" RENAME TO "DatasetBook"; +CREATE TABLE "new_DatasetReference" ( + "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + "datasetId" TEXT NOT NULL, + "bookId" TEXT NOT NULL, + "chapterNumber" INTEGER NOT NULL, + "verseNumber" INTEGER NOT NULL, + "referenceBookId" TEXT NOT NULL, + "referenceChapter" INTEGER NOT NULL, + "referenceVerse" INTEGER NOT NULL, + "endVerseNumber" INTEGER, + "score" INTEGER, + CONSTRAINT "DatasetReference_datasetId_fkey" FOREIGN KEY ("datasetId") REFERENCES "Dataset" ("id") ON DELETE RESTRICT ON UPDATE CASCADE, + CONSTRAINT "DatasetReference_datasetId_bookId_fkey" FOREIGN KEY ("datasetId", "bookId") REFERENCES "DatasetBook" ("datasetId", "id") ON DELETE RESTRICT ON UPDATE CASCADE, + CONSTRAINT "DatasetReference_datasetId_bookId_chapterNumber_fkey" FOREIGN KEY ("datasetId", "bookId", "chapterNumber") REFERENCES "DatasetChapter" ("datasetId", "bookId", "number") ON DELETE RESTRICT ON UPDATE CASCADE, + CONSTRAINT "DatasetReference_datasetId_bookId_chapterNumber_verseNumber_fkey" FOREIGN KEY ("datasetId", "bookId", "chapterNumber", "verseNumber") REFERENCES "DatasetChapterVerse" ("datasetId", "bookId", "chapterNumber", "number") ON DELETE RESTRICT ON UPDATE CASCADE +); +INSERT INTO "new_DatasetReference" ("bookId", "chapterNumber", "datasetId", "endVerseNumber", "id", "score", "verseNumber") SELECT "bookId", "chapterNumber", "datasetId", "endVerseNumber", "id", "score", "verseNumber" FROM "DatasetReference"; +DROP TABLE "DatasetReference"; +ALTER TABLE "new_DatasetReference" RENAME TO "DatasetReference"; +PRAGMA foreign_keys=ON; +PRAGMA defer_foreign_keys=OFF; diff --git a/packages/helloao-cli/schema.prisma b/packages/helloao-cli/schema.prisma index 9bb6b1b..ea55b59 100644 --- a/packages/helloao-cli/schema.prisma +++ b/packages/helloao-cli/schema.prisma @@ -321,3 +321,107 @@ model ChapterFootnote { @@id([translationId, bookId, chapterNumber, id]) } + + +model Dataset { + id String @id + name String + website String + licenseUrl String + licenseNotes String? + englishName String + language String + textDirection String + + sha256 String? + + books DatasetBook[] + chapters DatasetChapter[] + verses DatasetChapterVerse[] + references DatasetReference[] +} + +model DatasetBook { + id String + + datasetId String + dataset Dataset @relation(fields: [datasetId], references: [id]) + + order Int + + numberOfChapters Int + + // The SHA-256 hash of the book + sha256 String? + + chapters DatasetChapter[] + verses DatasetChapterVerse[] + references DatasetReference[] + + @@id([datasetId, id]) +} + +model DatasetChapter { + number Int + + bookId String + book DatasetBook @relation(fields: [datasetId, bookId], references: [datasetId, id]) + + datasetId String + dataset Dataset @relation(fields: [datasetId], references: [id]) + + json String // The JSON of the chapter + + // The SHA-256 hash of the chapter + sha256 String? + + verses DatasetChapterVerse[] + references DatasetReference[] + + @@id([datasetId, bookId, number]) +} + +model DatasetChapterVerse { + number Int + + chapterNumber Int + chapter DatasetChapter @relation(fields: [datasetId, bookId, chapterNumber], references: [datasetId, bookId, number]) + + bookId String + book DatasetBook @relation(fields: [datasetId, bookId], references: [datasetId, id]) + + datasetId String + dataset Dataset @relation(fields: [datasetId], references: [id]) + + contentJson String // The JSON of the verse content + + // The SHA-256 hash of the verse + sha256 String? + + references DatasetReference[] + + @@id([datasetId, bookId, chapterNumber, number]) +} + +model DatasetReference { + id Int @id @default(autoincrement()) + + datasetId String + dataset Dataset @relation(fields: [datasetId], references: [id]) + + bookId String + book DatasetBook @relation(fields: [datasetId, bookId], references: [datasetId, id]) + + chapterNumber Int + chapter DatasetChapter @relation(fields: [datasetId, bookId, chapterNumber], references: [datasetId, bookId, number]) + + verseNumber Int + verse DatasetChapterVerse @relation(fields: [datasetId, bookId, chapterNumber, verseNumber], references: [datasetId, bookId, chapterNumber, number]) + + referenceBookId String + referenceChapter Int + referenceVerse Int + + endVerseNumber Int? + score Int? +} \ No newline at end of file diff --git a/packages/helloao-tools/generation/api.spec.ts b/packages/helloao-tools/generation/api.spec.ts index 9609408..c18b4bc 100644 --- a/packages/helloao-tools/generation/api.spec.ts +++ b/packages/helloao-tools/generation/api.spec.ts @@ -6,7 +6,7 @@ import { import Genesis from '../../../bible/bsb/01GENBSB.usfm'; import Exodus from '../../../bible/bsb/02EXOBSB.usfm'; import _1Chronicles from '../../../bible/bsb/131CHBSB.usfm'; -import { generateDataset } from './dataset.js'; +import { DatasetOutput, generateDataset } from './dataset.js'; import { InputCommentaryMetadata, InputFile, @@ -2487,6 +2487,141 @@ describe('generateApiForDataset', () => { // ] // }); }); + + it('should support datasets', () => { + const dataset: DatasetOutput = { + translations: [], + commentaries: [], + datasets: [ + { + id: 'default', + englishName: 'Default Dataset', + name: 'Default Dataset', + language: 'eng', + textDirection: 'ltr', + licenseUrl: 'https://example.com/terms.htm', + website: 'https://example.com', + books: [ + { + id: 'GEN', + order: 1, + chapters: [ + { + chapter: { + number: 1, + content: [ + { + verse: 1, + references: [ + { + book: 'JHN', + chapter: 1, + verse: 1, + endVerse: 3, + score: 500, + }, + ], + }, + ], + }, + }, + ], + }, + ], + }, + ], + }; + const generated = generateApiForDataset(dataset); + const files = generateFilesForApi(generated); + + const tree = fileTree(files); + + const expectedDataset = { + id: 'default', + englishName: 'Default Dataset', + name: 'Default Dataset', + language: 'eng', + textDirection: 'ltr', + licenseUrl: 'https://example.com/terms.htm', + website: 'https://example.com', + availableFormats: ['json'], + listOfBooksApiLink: '/api/d/default/books.json', + numberOfBooks: 1, + totalNumberOfChapters: 1, + totalNumberOfVerses: 1, + totalNumberOfReferences: 1, + }; + + expect(tree).toEqual({ + '/api/available_translations.json': { + translations: [], + }, + '/api/available_commentaries.json': { + commentaries: [], + }, + '/api/available_datasets.json': { + datasets: [expectedDataset], + }, + '/api/d/default/books.json': { + dataset: expectedDataset, + books: [ + { + id: 'GEN', + order: 1, + numberOfChapters: 1, + totalNumberOfVerses: 1, + totalNumberOfReferences: 1, + firstChapterNumber: 1, + lastChapterNumber: 1, + firstChapterApiLink: '/api/d/default/GEN/1.json', + lastChapterApiLink: '/api/d/default/GEN/1.json', + }, + ], + }, + '/api/d/default/GEN/1.json': { + dataset: expectedDataset, + book: { + id: 'GEN', + order: 1, + numberOfChapters: 1, + totalNumberOfVerses: 1, + totalNumberOfReferences: 1, + firstChapterNumber: 1, + lastChapterNumber: 1, + firstChapterApiLink: '/api/d/default/GEN/1.json', + lastChapterApiLink: '/api/d/default/GEN/1.json', + }, + thisChapterLink: '/api/d/default/GEN/1.json', + nextChapterApiLink: null, + previousChapterApiLink: null, + numberOfVerses: 1, + numberOfReferences: 1, + chapter: { + number: 1, + content: [ + { + verse: 1, + references: [ + { + book: 'JHN', + chapter: 1, + verse: 1, + endVerse: 3, + score: 500, + }, + ], + }, + ], + }, + }, + }); + + // expect(availableTranslations).toEqual({ + // translations: [ + // expectedTranslation + // ] + // }); + }); }); function firstXLines(content: string, x: number) { diff --git a/packages/helloao-tools/generation/api.ts b/packages/helloao-tools/generation/api.ts index 3dee8d8..ebaefe4 100644 --- a/packages/helloao-tools/generation/api.ts +++ b/packages/helloao-tools/generation/api.ts @@ -3,6 +3,9 @@ import { CommentaryBook, CommentaryBookChapter, CommentaryProfile, + Dataset, + DatasetBook, + DatasetBookChapter, OutputFile, Translation, TranslationBook, @@ -74,6 +77,25 @@ export interface ApiOutput { */ commentaryProfileContents: ApiCommentaryProfileContent[]; + /** + * The list of available datasets. + * This maps to the /api/available-datasets.json endpoint. + */ + availableDatasets?: ApiAvailableDatasets; + + /** + * The list of books for each dataset. + * This maps to the /api/d/:datasetId/books.json endpoint. + */ + datasetBooks?: ApiDatasetBooks[]; + + /** + * The list of chapters for each dataset book. + * This maps to the following endpoint: + * - /api/d/:datasetId/:bookId/:chapterNumber.json + */ + datasetBookChapters?: ApiDatasetBookChapter[]; + /** * The path prefix that the API should use. */ @@ -102,6 +124,61 @@ export interface ApiAvailableCommentaries { commentaries: ApiCommentary[]; } +/** + * The list of available datasets. + * Maps to the /api/available-datasets.json endpoint. + */ +export interface ApiAvailableDatasets { + datasets: ApiDataset[]; +} + +/** + * Defines a dataset that is used in the API. + */ +export interface ApiDataset extends Dataset { + /** + * The API link for the list of books for this dataset. + */ + listOfBooksApiLink: string; + + /** + * The available list of formats. + */ + availableFormats: 'json'[]; + + /** + * The number of books that are contained in this dataset. + */ + numberOfBooks: number; + + /** + * The total number of chapters that are contained in this dataset. + */ + totalNumberOfChapters: number; + + /** + * The total number of verses that are contained in this dataset. + */ + totalNumberOfVerses: number; + + /** + * The total number of references that are contained in this dataset. + */ + totalNumberOfReferences: number; + + /** + * Gets the name of the language that the commentary is in. + * Null or undefined if the name of the language is not known. + */ + languageName?: string; + + /** + * Gets the name of the language in English. + * Null or undefined if the language doesn't have an english name. + */ + languageEnglishName?: string; +} + /** * Defines a translation that is used in the API. */ @@ -255,6 +332,21 @@ export interface ApiCommentaryBooks { books: ApiCommentaryBook[]; } +/** + * Defines an interface that contains information about the books that are available for a dataset. + */ +export interface ApiDatasetBooks { + /** + * The dataset information for the books. + */ + dataset: ApiDataset; + + /** + * The list of books that are available for the dataset. + */ + books: ApiDatasetBook[]; +} + /** * Defines an interface that contains information about the profiles that are available for a commentary. */ @@ -363,6 +455,46 @@ export interface ApiCommentaryBook extends CommentaryBook { totalNumberOfVerses: number; } +/** + * Defines an interface that contains information about a book in a dataset. + */ +export interface ApiDatasetBook extends DatasetBook { + /** + * The number of the first chapter in the book. + */ + firstChapterNumber: number; + + /** + * The link to the first chapter of the book. + */ + firstChapterApiLink: string; + + /** + * The number of the last chapter in the book. + */ + lastChapterNumber: number; + + /** + * The link to the last chapter of the book. + */ + lastChapterApiLink: string; + + /** + * The number of chapters that the book contains. + */ + numberOfChapters: number; + + /** + * The number of verses that the book contains. + */ + totalNumberOfVerses: number; + + /** + * The number of references that the book contains. + */ + totalNumberOfReferences: number; +} + /** * Defines an interface that contains information about a book chapter. */ @@ -449,6 +581,48 @@ export interface ApiCommentaryBookChapter extends CommentaryBookChapter { numberOfVerses: number; } +/** + * Defines an interface that contains information about a book chapter. + */ +export interface ApiDatasetBookChapter extends DatasetBookChapter { + /** + * The dataset information for the book chapter. + */ + dataset: ApiDataset; + + /** + * The book information for the book chapter. + */ + book: ApiDatasetBook; + + /** + * The link to this chapter. + */ + thisChapterLink: string; + + /** + * The link to the next chapter. + * Null if this is the last chapter in the translation. + */ + nextChapterApiLink: string | null; + + /** + * The link to the previous chapter. + * Null if this is the first chapter in the translation. + */ + previousChapterApiLink: string | null; + + /** + * The number of verses that the chapter contains. + */ + numberOfVerses: number; + + /** + * The number of references that the chapter contains. + */ + numberOfReferences: number; +} + export interface ApiTranslationBookChapterAudio { /** * The chapter that the audio is for. @@ -895,6 +1069,139 @@ export function generateApiForDataset( api.commentaryProfiles.push(commentaryProfiles); } + for (let { books, ...datasetInfo } of dataset.datasets ?? []) { + const apiDataset: ApiDataset = { + ...datasetInfo, + availableFormats: ['json'], + listOfBooksApiLink: listOfDatasetBooksApiLink( + datasetInfo.id, + apiPathPrefix + ), + numberOfBooks: books.length, + totalNumberOfChapters: 0, + totalNumberOfVerses: 0, + totalNumberOfReferences: 0, + languageName: getNativeName + ? (getNativeName(datasetInfo.language) ?? undefined) + : undefined, + languageEnglishName: getEnglishName + ? (getEnglishName(datasetInfo.language) ?? undefined) + : undefined, + }; + + const datasetBooks: ApiDatasetBooks = { + dataset: apiDataset, + books: [], + }; + + let datasetChapters: ApiDatasetBookChapter[] = []; + + for (let { chapters, ...book } of books) { + const firstChapterNumber = chapters[0]?.chapter.number ?? null; + const lastChapterNumber = + chapters[chapters.length - 1]?.chapter.number ?? null; + const apiBook: ApiDatasetBook = { + ...book, + firstChapterNumber, + firstChapterApiLink: bookDatasetChapterApiLink( + datasetInfo.id, + book.id, + firstChapterNumber, + 'json', + apiPathPrefix + ), + lastChapterNumber, + lastChapterApiLink: bookDatasetChapterApiLink( + datasetInfo.id, + book.id, + lastChapterNumber, + 'json', + apiPathPrefix + ), + numberOfChapters: chapters.length, + totalNumberOfVerses: 0, + totalNumberOfReferences: 0, + }; + + for (let { chapter } of chapters) { + const apiBookChapter: ApiDatasetBookChapter = { + dataset: apiDataset, + book: apiBook, + chapter: chapter, + thisChapterLink: bookDatasetChapterApiLink( + datasetInfo.id, + book.id, + chapter.number, + 'json', + apiPathPrefix + ), + nextChapterApiLink: null, + previousChapterApiLink: null, + numberOfVerses: chapter.content.length, + numberOfReferences: 0, + }; + + // apiBookChapter.numberOfVerses += ; + for (let verse of chapter.content) { + apiBookChapter.numberOfReferences += + verse.references.length; + } + + apiBook.totalNumberOfVerses += apiBookChapter.numberOfVerses; + apiBook.totalNumberOfReferences += + apiBookChapter.numberOfReferences; + + datasetChapters.push(apiBookChapter); + if (!api.datasetBookChapters) { + api.datasetBookChapters = []; + } + api.datasetBookChapters.push(apiBookChapter); + } + + datasetBooks.books.push(apiBook); + + apiDataset.totalNumberOfChapters += apiBook.numberOfChapters; + apiDataset.totalNumberOfVerses += apiBook.totalNumberOfVerses; + apiDataset.totalNumberOfReferences += + apiBook.totalNumberOfReferences; + } + + for (let i = 0; i < datasetChapters.length; i++) { + if (i > 0) { + datasetChapters[i].previousChapterApiLink = + bookDatasetChapterApiLink( + datasetInfo.id, + datasetChapters[i - 1].book.id, + datasetChapters[i - 1].chapter.number, + 'json', + apiPathPrefix + ); + } + + if (i < datasetChapters.length - 1) { + datasetChapters[i].nextChapterApiLink = + bookDatasetChapterApiLink( + datasetInfo.id, + datasetChapters[i + 1].book.id, + datasetChapters[i + 1].chapter.number, + 'json', + apiPathPrefix + ); + } + } + + if (!api.availableDatasets) { + api.availableDatasets = { + datasets: [], + }; + } + api.availableDatasets.datasets.push(apiDataset); + if (!api.datasetBooks) { + api.datasetBooks = []; + } + api.datasetBooks.push(datasetBooks); + } + return api; function getBookLink(book: TranslationBook | CommentaryBook): string { @@ -968,6 +1275,32 @@ export function generateFilesForApi(api: ApiOutput): OutputFile[] { files.push(jsonFile(bookChapter.thisChapterLink, bookChapter)); } + if (api.availableDatasets) { + files.push( + jsonFile( + `${api.pathPrefix}/api/available_datasets.json`, + api.availableDatasets, + true + ) + ); + } + + if (api.datasetBooks) { + for (let datasetBook of api.datasetBooks) { + files.push( + jsonFile(datasetBook.dataset.listOfBooksApiLink, datasetBook) + ); + } + } + + if (api.datasetBookChapters) { + for (let datasetBookChapter of api.datasetBookChapters) { + files.push( + jsonFile(datasetBookChapter.thisChapterLink, datasetBookChapter) + ); + } + } + // for (let audio of api.translationBookChapterAudio) { // files.push(downloadedFile(audio.link, audio.originalUrl)); // } @@ -1016,6 +1349,18 @@ export function listOfCommentaryBooksApiLink( return `${prefix}/api/c/${commentaryId}/books.json`; } +/** + * Gets the API Link for the list of books endpoint for a dataset. + * @param datasetId The ID of the dataset. + * @returns + */ +export function listOfDatasetBooksApiLink( + datasetId: string, + prefix: string = '' +): string { + return `${prefix}/api/d/${datasetId}/books.json`; +} + /** * Getes the API link for a book chapter. * @param translationId The ID of the translation. @@ -1066,6 +1411,25 @@ export function bookChapterAudioApiLink( )}/${chapterNumber}.${reader}.mp3`; } +/** + * Getes the API link for a book chapter. + * @param translationId The ID of the translation. + * @param commonName The name of the book. + * @param chapterNumber The number of the book. + * @param extension The extension of the file. + */ +export function bookDatasetChapterApiLink( + translationId: string, + commonName: string, + chapterNumber: number, + extension: string, + prefix: string = '' +) { + return `${prefix}/api/d/${translationId}/${replaceSpacesWithUnderscores( + commonName + )}/${chapterNumber}.${extension}`; +} + /** * Gets the API link for a profile. * @param translationId The ID of the translation. diff --git a/packages/helloao-tools/generation/common-types.ts b/packages/helloao-tools/generation/common-types.ts index c4eac6a..c5b8442 100644 --- a/packages/helloao-tools/generation/common-types.ts +++ b/packages/helloao-tools/generation/common-types.ts @@ -203,6 +203,48 @@ export interface Commentary { textDirection: 'ltr' | 'rtl'; } +export interface Dataset { + /** + * The ID of the dataset. + */ + id: string; + + /** + * The name of the dataset. + */ + name: string; + + /** + * The website for the dataset. + */ + website: string; + + /** + * The URL that the license for the dataset can be found. + */ + licenseUrl: string; + + /** + * The API-added notes for the license. + */ + licenseNotes?: string | null; + + /** + * The English name for the dataset. + */ + englishName: string; + + /** + * The ISO 639 3-letter language tag that the dataset is primarily in. + */ + language: string; + + /** + * The direction that the language is written in. + */ + textDirection: 'ltr' | 'rtl'; +} + /** * Defines an interface that contains information about a book. */ @@ -275,6 +317,68 @@ export interface CommentaryBook { order: number; } +/** + * Defines an interface that contains information about a dataset book. + */ +export interface DatasetBook { + /** + * The ID of the book. Should match the USFM book ID. + */ + id: string; + + /** + * The order of the book in the Bible. + */ + order: number; +} + +/** + * Defines an interface that contains information about a chapter in a dataset. + */ +export interface DatasetBookChapter { + /** + * The data for the chapter. + */ + chapter: DatasetChapterData; +} + +export interface DatasetChapterData { + /** + * The number of the chapter. + */ + number: number; + + /** + * The content of the chapter. + */ + content: DatasetChapterVerseContent[]; +} + +/** + * Defines an interface that contains information about a verse in a dataset chapter. + */ +export interface DatasetChapterVerseContent { + /** + * The number of the verse. + */ + verse: number; + + /** + * The list of references for the verse. + */ + references: ScoredVerseRef[]; +} + +/** + * Defines an interface that contains information about a verse reference that has an arbitrary score attached to it. + */ +export interface ScoredVerseRef extends VerseRef { + /** + * The score of the verse reference. + */ + score: number; +} + /** * Defines an interface that contains information about a profile in a commentary. */ diff --git a/packages/helloao-tools/generation/dataset.ts b/packages/helloao-tools/generation/dataset.ts index 104407f..5d67a06 100644 --- a/packages/helloao-tools/generation/dataset.ts +++ b/packages/helloao-tools/generation/dataset.ts @@ -14,6 +14,9 @@ import { Translation, TranslationBook, TranslationBookChapter, + Dataset as CommonDataset, + DatasetBook, + DatasetBookChapter, } from './common-types.js'; import { bookIdMap as defaultBookIdMap, @@ -46,6 +49,11 @@ export interface DatasetOutput { */ commentaries: DatasetCommentary[]; + /** + * The list of datasets that are available in the dataset. + */ + datasets?: DatasetDataset[]; + parseMessages?: { [key: string]: ParseMessage[]; }; @@ -103,6 +111,14 @@ export interface DatasetCommentaryProfile extends CommentaryProfile { content: string[]; } +export interface DatasetDataset extends CommonDataset { + books: DatasetDatasetBook[]; +} + +export interface DatasetDatasetBook extends DatasetBook { + chapters: DatasetBookChapter[]; +} + /** * Generates a list of output files from the given list of input files. * @param file The list of files.