diff --git a/docs/guide/getting-started.md b/docs/guide/getting-started.md
index f01a98d..6e23c4d 100644
--- a/docs/guide/getting-started.md
+++ b/docs/guide/getting-started.md
@@ -12,5 +12,8 @@ These files are available at the following paths:
- [`https://bible.helloao.org/api/available_commentaries.json`](../reference/README.md#available-commentaries)
- [`https://bible.helloao.org/api/c/{commentary}/books.json`](../reference/README.md#list-books-in-a-commentary)
- [`https://bible.helloao.org/api/c/{commentary}/{book}/{chapter}.json`](../reference/README.md#get-a-chapter-from-a-commentary)
+- [`https://bible.helloao.org/api/available_datasets.json`](../reference/README.md#available-datasets)
+- [`https://bible.helloao.org/api/d/{dataset}/books.json`](../reference/README.md#list-books-in-a-dataset)
+- [`https://bible.helloao.org/api/d/{dataset}/{book}/{chapter}.json`](../reference/README.md#get-a-chapter-from-a-dataset)
For more information about each endpoint, see the [next page](./making-requests.md).
diff --git a/docs/guide/making-requests.md b/docs/guide/making-requests.md
index 0c81a35..c026cfb 100644
--- a/docs/guide/making-requests.md
+++ b/docs/guide/making-requests.md
@@ -104,7 +104,7 @@ fetch(`https://bible.helloao.org/api/c/${commentary}/${book}/${chapter}.json`)
});
```
-## List Profiles in a Commentary
+### List Profiles in a Commentary
([reference](../reference/README.md#list-profiles-in-a-commentary))
@@ -119,7 +119,7 @@ fetch(`https://bible.helloao.org/api/c/${commentary}/profiles.json`)
});
```
-## Get a Profile in a Commentary
+### Get a Profile in a Commentary
([reference](../reference/README.md#get-a-profile-in-a-commentary))
@@ -134,3 +134,47 @@ fetch(`https://bible.helloao.org/api/c/${commentary}/profiles/${profile}.json`)
console.log('The Aaron tyndale commentary profile:', profile);
});
```
+
+### Get the list of Available Datasets
+
+([reference](../reference/README.md#available-datasets))
+
+```ts:no-line-numbers title="fetch-datasets.js"
+fetch(`https://bible.helloao.org/api/available_datasets.json`)
+ .then(request => request.json())
+ .then(availableDatasets => {
+ console.log('The API has the following datasets:', availableDatasets);
+ });
+```
+
+### Get the list of books in a dataset
+
+([reference](../reference/README.md#list-books-in-a-dataset))
+
+```ts:no-line-numbers title="fetch-dataset-books.js"
+const dataset = 'open-cross-ref';
+
+// Get the list of books for the open-cross-ref dataset
+fetch(`https://bible.helloao.org/api/d/${dataset}/books.json`)
+ .then(request => request.json())
+ .then(books => {
+ console.log('The open-cross-ref dataset has the following books:', books);
+ });
+```
+
+### Get a Chapter from a Dataset
+
+([reference](../reference/README.md#get-a-chapter-from-a-dataset))
+
+```ts:no-line-numbers title="fetch-dataset-chapter.js"
+const dataset = 'open-cross-ref';
+const book = 'GEN';
+const chapter = 1;
+
+// Get Genesis 1 from the open-cross-ref dataset
+fetch(`https://bible.helloao.org/api/d/${dataset}/${book}/${chapter}.json`)
+ .then(request => request.json())
+ .then(chapter => {
+ console.log('Genesis 1 (open-cross-ref):', chapter);
+ });
+```
diff --git a/docs/reference/README.md b/docs/reference/README.md
index 1249a24..7cc0586 100644
--- a/docs/reference/README.md
+++ b/docs/reference/README.md
@@ -1417,3 +1417,487 @@ export interface CommentaryProfileContent {
]
}
```
+
+## Available Datasets
+
+`GET https://bible.helloao.org/api/available_datasets.json`
+
+Gets the list of available Bible datasets in the API.
+
+### Code Example
+
+```ts:no-line-numbers title="fetch-datasets.js"
+fetch(`https://bible.helloao.org/api/available_datasets.json`)
+ .then(request => request.json())
+ .then(availableDatasets => {
+ console.log('The API has the following commentaries:', availableDatasets);
+ });
+```
+
+### Structure
+
+```typescript:no-line-numbers title="available-datasets.ts"
+export interface AvailableDatasets {
+ /**
+ * The list of datasets.
+ */
+ datasets: Dataset[];
+}
+
+export interface Dataset {
+ /**
+ * The ID of the dataset.
+ */
+ id: string;
+
+ /**
+ * The name of the dataset.
+ */
+ name: string;
+
+ /**
+ * The website for the dataset.
+ */
+ website: string;
+
+ /**
+ * The URL that the license for the dataset can be found.
+ */
+ licenseUrl: string;
+
+ /**
+ * The english name for the dataset.
+ */
+ englishName: string;
+
+ /**
+ * The ISO 639 3-letter language tag that the dataset is primarily in.
+ */
+ language: string;
+
+ /**
+ * The direction that the language is written in.
+ * "ltr" indicates that the text is written from the left side of the page to the right.
+ * "rtl" indicates that the text is written from the right side of the page to the left.
+ */
+ textDirection: 'ltr' | 'rtl';
+
+ /**
+ * The API link for the list of available books for this dataset.
+ */
+ listOfBooksApiLink: string;
+
+ /**
+ * The available list of formats.
+ */
+ availableFormats: ('json' | 'usfm')[];
+
+ /**
+ * The number of books that are contained in this dataset.
+ */
+ numberOfBooks: number;
+
+ /**
+ * The total number of chapters that are contained in this dataset.
+ */
+ totalNumberOfChapters: number;
+
+ /**
+ * The total number of verses that are contained in this dataset.
+ */
+ totalNumberOfVerses: number;
+
+ /**
+ * The total number of cross references that are contained in this dataset.
+ */
+ totalNumberOfReferences: number;
+
+ /**
+ * Gets the name of the language that the dataset is in.
+ * Null or undefined if the name of the language is not known.
+ */
+ languageName?: string;
+
+ /**
+ * Gets the name of the language in English.
+ * Null or undefined if the language doesn't have an english name.
+ */
+ languageEnglishName?: string;
+}
+```
+
+### Example
+
+```json:no-line-numbers title="/api/available_datasets.json"
+{
+ "datasets": [
+ {
+ "id": "open-cross-ref",
+ "name": "Bible Cross References",
+ "website": "https://www.openbible.info/labs/cross-references/",
+ "licenseUrl": "https://creativecommons.org/licenses/by/4.0/",
+ "licenseNotes": "Changes were made to the data to fit the Free Use Bible API format.",
+ "englishName": "Bible Cross References",
+ "language": "eng",
+ "textDirection": "ltr",
+ "availableFormats": [
+ "json"
+ ],
+ "listOfBooksApiLink": "/api/d/open-cross-ref/books.json",
+ "numberOfBooks": 66,
+ "totalNumberOfChapters": 1189,
+ "totalNumberOfVerses": 29364,
+ "totalNumberOfReferences": 344799,
+ "languageName": "English",
+ "languageEnglishName": "English"
+ }
+ ]
+}
+```
+
+## List Books in a Dataset
+
+`GET https://bible.helloao.org/api/d/{dataset}/books.json`
+
+Gets the list of books that are available for the given dataset.
+
+- `dataset` the ID of the dataset (e.g. `open-cross-ref`).
+
+### Code Example
+
+```ts:no-line-numbers title="fetch-dataset-books.js"
+const dataset = 'open-cross-ref';
+
+// Get the list of books for the open-cross-ref dataset
+fetch(`https://bible.helloao.org/api/c/${dataset}/books.json`)
+ .then(request => request.json())
+ .then(books => {
+ console.log('The open-cross-ref dataset has the following books:', books);
+ });
+```
+
+### Structure
+
+```typescript:no-line-numbers title="dataset-books.ts"
+export interface DatasetBooks {
+ /**
+ * The dataset information for the books.
+ */
+ dataset: Dataset;
+
+ /**
+ * The list of books that are available for the dataset.
+ */
+ books: DatasetBook[];
+}
+
+interface DatasetBook {
+ /**
+ * The ID of the book.
+ * Matches the ID of the corresponding book in the Bible (GEN, EXO, etc.).
+ */
+ id: string;
+
+ /**
+ * The order of the book in the Bible.
+ */
+ order: number;
+
+ /**
+ * The number of the first chapter in the book.
+ */
+ firstChapterNumber: number;
+
+ /**
+ * The link to the first chapter of the book.
+ */
+ firstChapterApiLink: string | null;
+
+ /**
+ * The number of the last chapter in the book.
+ */
+ lastChapterNumber: number | null;
+
+ /**
+ * The link to the last chapter of the book.
+ */
+ lastChapterApiLink: string | null;
+
+ /**
+ * The number of chapters that the book contains.
+ */
+ numberOfChapters: number;
+
+ /**
+ * The number of verses that the book contains.
+ */
+ totalNumberOfVerses: number;
+
+ /**
+ * The total number of cross references that this book contains.
+ */
+ totalNumberOfReferences: number;
+}
+```
+
+### Example
+
+```json:no-line-numbers title="/api/d/open-cross-ref/books.json"
+{
+ "dataset": {
+ "id": "open-cross-ref",
+ "name": "Bible Cross References",
+ "website": "https://www.openbible.info/labs/cross-references/",
+ "licenseUrl": "https://creativecommons.org/licenses/by/4.0/",
+ "licenseNotes": "Changes were made to the data to fit the Free Use Bible API format.",
+ "englishName": "Bible Cross References",
+ "language": "eng",
+ "textDirection": "ltr",
+ "availableFormats": [
+ "json"
+ ],
+ "listOfBooksApiLink": "/api/d/open-cross-ref/books.json",
+ "numberOfBooks": 66,
+ "totalNumberOfChapters": 1189,
+ "totalNumberOfVerses": 29364,
+ "totalNumberOfReferences": 344799,
+ "languageName": "English",
+ "languageEnglishName": "English"
+ },
+ "books": [
+ {
+ "id": "GEN",
+ "datasetId": "open-cross-ref",
+ "order": 1,
+ "numberOfChapters": 50,
+ "firstChapterNumber": 1,
+ "firstChapterApiLink": "/api/d/open-cross-ref/GEN/1.json",
+ "lastChapterNumber": 50,
+ "lastChapterApiLink": "/api/d/open-cross-ref/GEN/50.json",
+ "totalNumberOfVerses": 1382,
+ "totalNumberOfReferences": 13327
+ },
+ {
+ "id": "EXO",
+ "datasetId": "open-cross-ref",
+ "order": 2,
+ "numberOfChapters": 40,
+ "firstChapterNumber": 1,
+ "firstChapterApiLink": "/api/d/open-cross-ref/EXO/1.json",
+ "lastChapterNumber": 40,
+ "lastChapterApiLink": "/api/d/open-cross-ref/EXO/40.json",
+ "totalNumberOfVerses": 1084,
+ "totalNumberOfReferences": 9974
+ },
+ ]
+}
+```
+
+## Get a Chapter from a Dataset
+
+`GET https://bible.helloao.org/api/d/{dataset}/{book}/{chapter}.json`
+
+Gets the content of a single chapter for a given book and dataset.
+
+- `dataset` the ID of the dataset (e.g. `open-cross-ref`).
+- `book` is the ID of the book (e.g. `GEN` for Genesis).
+- `chapter` is the numerical chapter number (e.g. `1` for the first chapter).
+
+### Code Example
+
+```ts:no-line-numbers title="fetch-dataset-chapter.js"
+const dataset = 'open-cross-ref';
+const book = 'GEN';
+const chapter = 1;
+
+// Get Genesis 1 from the open-cross-ref dataset
+fetch(`https://bible.helloao.org/api/d/${dataset}/${book}/${chapter}.json`)
+ .then(request => request.json())
+ .then(chapter => {
+ console.log('Genesis 1 (open-cross-ref):', chapter);
+ });
+```
+
+### Structure
+
+```typescript:no-line-numbers title="dataset-chapter.ts"
+export interface DatasetBookChapter {
+ /**
+ * The dataset information for the book chapter.
+ */
+ dataset: Dataset;
+
+ /**
+ * The book information for the book chapter.
+ */
+ book: DatasetBook;
+
+ /**
+ * The link to this chapter.
+ */
+ thisChapterLink: string;
+
+ /**
+ * The link to the next chapter.
+ * Null if this is the last chapter in the dataset.
+ */
+ nextChapterApiLink: string | null;
+
+ /**
+ * The link to the previous chapter.
+ * Null if this is the first chapter in the dataset.
+ */
+ previousChapterApiLink: string | null;
+
+ /**
+ * The number of verses that the chapter contains.
+ */
+ numberOfVerses: number;
+
+ /**
+ * The information for the chapter.
+ */
+ chapter: DatasetChapterData;
+}
+
+interface DatasetChapterData {
+ /**
+ * The number of the chapter.
+ */
+ number: number;
+
+ /**
+ * The content of the chapter.
+ */
+ content: DatasetVerse[];
+}
+
+interface DatasetVerse {
+ /**
+ * The number of the verse.
+ */
+ verse: number;
+
+ /**
+ * The cross-references for the verse.
+ *
+ * Sorted by score, descending.
+ */
+ references: DatasetReference[];
+}
+
+interface DatasetReference {
+ /**
+ * The ID of the book that is being referenced.
+ */
+ book: string;
+
+ /**
+ * The chapter number.
+ */
+ chapter: number;
+
+ /**
+ * The verse number.
+ * If `endVerse` is present, then this is the verse that the reference starts at.
+ */
+ verse: number;
+
+ /**
+ * The verse that the reference ends at.
+ */
+ endVerse?: number;
+
+ /**
+ * The relevence score for the reference.
+ */
+ score?: number;
+}
+```
+
+### Example
+
+```json:no-line-numbers title="/api/d/open-cross-ref/REV/22.json"
+{
+ "dataset": {
+ "id": "open-cross-ref",
+ "name": "Bible Cross References",
+ "website": "https://www.openbible.info/labs/cross-references/",
+ "licenseUrl": "https://creativecommons.org/licenses/by/4.0/",
+ "licenseNotes": "Changes were made to the data to fit the Free Use Bible API format.",
+ "englishName": "Bible Cross References",
+ "language": "eng",
+ "textDirection": "ltr",
+ "availableFormats": [
+ "json"
+ ],
+ "listOfBooksApiLink": "/api/d/open-cross-ref/books.json",
+ "numberOfBooks": 66,
+ "totalNumberOfChapters": 1189,
+ "totalNumberOfVerses": 29364,
+ "totalNumberOfReferences": 344799,
+ "languageName": "English",
+ "languageEnglishName": "English"
+ },
+ "book": {
+ "id": "REV",
+ "datasetId": "open-cross-ref",
+ "order": 66,
+ "numberOfChapters": 22,
+ "firstChapterNumber": 1,
+ "firstChapterApiLink": "/api/d/open-cross-ref/REV/1.json",
+ "lastChapterNumber": 22,
+ "lastChapterApiLink": "/api/d/open-cross-ref/REV/22.json",
+ "totalNumberOfVerses": 402,
+ "totalNumberOfReferences": 6495
+ },
+ "chapter": {
+ "number": 22,
+ "content": [
+ {
+ "verse": 1,
+ "references": [
+ {
+ "book": "REV",
+ "chapter": 7,
+ "verse": 17,
+ "score": 74
+ },
+ {
+ "book": "JHN",
+ "chapter": 4,
+ "verse": 14,
+ "score": 62
+ },
+ {
+ "book": "PSA",
+ "chapter": 36,
+ "verse": 8,
+ "endVerse": 9,
+ "score": 59
+ },
+ {
+ "book": "JHN",
+ "chapter": 7,
+ "verse": 38,
+ "endVerse": 39,
+ "score": 59
+ },
+ {
+ "book": "JHN",
+ "chapter": 4,
+ "verse": 10,
+ "endVerse": 11,
+ "score": 55
+ },
+ ]
+ }
+ ]
+ },
+ "thisChapterLink": "/api/d/open-cross-ref/REV/22.json",
+ "nextChapterApiLink": null,
+ "previousChapterApiLink": "/api/d/open-cross-ref/REV/21.json",
+ "numberOfVerses": 21,
+ "numberOfReferences": 360
+}
+```
diff --git a/packages/helloao-cli/actions.ts b/packages/helloao-cli/actions.ts
index 2a6a4ff..a717c15 100644
--- a/packages/helloao-cli/actions.ts
+++ b/packages/helloao-cli/actions.ts
@@ -1,21 +1,38 @@
import path, { basename, extname } from 'node:path';
import * as database from './db.js';
-import Sql from 'better-sqlite3';
+import Sql, { Database } from 'better-sqlite3';
import { DOMParser, Element, Node } from 'linkedom';
import { mkdir, readdir, rm, writeFile } from 'node:fs/promises';
-import { getFirstNonEmpty, normalizeLanguage } from '@helloao/tools/utils.js';
-import { InputTranslationMetadata } from '@helloao/tools/generation/index.js';
+import {
+ getBookId,
+ getFirstNonEmpty,
+ normalizeLanguage,
+} from '@helloao/tools/utils.js';
+import {
+ bookOrder,
+ dataset,
+ InputTranslationMetadata,
+} from '@helloao/tools/generation/index.js';
import { exists, readFile } from 'fs-extra';
import { KNOWN_AUDIO_TRANSLATIONS } from '@helloao/tools/generation/audio.js';
-import { bookChapterCountMap } from '@helloao/tools/generation/book-order.js';
+import {
+ bookChapterCountMap,
+ bookOrderMap,
+} from '@helloao/tools/generation/book-order.js';
import { downloadFile, unzipToDirectory } from './downloads.js';
import { batch, toAsyncIterable } from '@helloao/tools/parser/iterators.js';
import {
hashInputFiles,
+ loadDatasetsFromDirectory,
loadTranslationFiles,
loadTranslationsFiles,
} from './files.js';
-import { generateDataset } from '@helloao/tools/generation/dataset.js';
+import {
+ DatasetDataset,
+ DatasetDatasetBook,
+ DatasetOutput,
+ generateDataset,
+} from '@helloao/tools/generation/dataset.js';
import {
serializeAndUploadDatasets,
UploadApiFromDatabaseOptions,
@@ -36,6 +53,7 @@ import {
convertUsfmToUsx3,
} from './conversion.js';
import { fetchEBibleMetadata } from './ebible.js';
+import { importDatasetOutput } from './db.js';
export interface GetTranslationsItem {
id: string;
@@ -327,6 +345,25 @@ export async function initDb(
CREATE TABLE "CommentaryChapterVerse" AS SELECT * FROM source.CommentaryChapterVerse
INNER JOIN source.Commentary ON source.Commentary.id = source.CommentaryChapterVerse.commentaryId
WHERE source.Commentary.language IN ${languages};
+
+ CREATE TABLE "Dataset" AS SELECT * FROM source.Dataset
+ WHERE language IN ${languages};
+
+ CREATE TABLE "DatasetBook" AS SELECT * FROM source.DatasetBook
+ INNER JOIN source.Dataset ON source.Dataset.id = source.DatasetBook.datasetId
+ WHERE source.Dataset.language IN ${languages};
+
+ CREATE TABLE "DatasetChapter" AS SELECT * FROM source.DatasetChapter
+ INNER JOIN source.Dataset ON source.Dataset.id = source.DatasetChapter.datasetId
+ WHERE source.Dataset.language IN ${languages};
+
+ CREATE TABLE "DatasetChapterVerse" AS SELECT * FROM source.DatasetChapterVerse
+ INNER JOIN source.Dataset ON source.Dataset.id = source.DatasetChapterVerse.datasetId
+ WHERE source.Dataset.language IN ${languages};
+
+ CREATE TABLE "DatasetReference" AS SELECT * FROM source.DatasetReference
+ INNER JOIN source.Dataset ON source.Dataset.id = source.DatasetReference.datasetId
+ WHERE source.Dataset.language IN ${languages};
`);
} else {
db.exec(`
@@ -343,6 +380,11 @@ export async function initDb(
CREATE TABLE "CommentaryBook" AS SELECT * FROM source.CommentaryBook;
CREATE TABLE "CommentaryChapter" AS SELECT * FROM source.CommentaryChapter;
CREATE TABLE "CommentaryChapterVerse" AS SELECT * FROM source.CommentaryChapterVerse;
+ CREATE TABLE "Dataset" AS SELECT * FROM source.Dataset;
+ CREATE TABLE "DatasetBook" AS SELECT * FROM source.DatasetBook;
+ CREATE TABLE "DatasetChapter" AS SELECT * FROM source.DatasetChapter;
+ CREATE TABLE "DatasetChapterVerse" AS SELECT * FROM source.DatasetChapterVerse;
+ CREATE TABLE "DatasetReference" AS SELECT * FROM source.DatasetReference;
`);
}
@@ -489,6 +531,28 @@ export async function importCommentaries(
}
}
+/**
+ * Imports the API from the given directory into the database in the current working directory.
+ * @param dir The directory that the API is located in.
+ * @param options The options.
+ */
+export async function importApi(
+ dir: string,
+ options: ImportTranslationOptions
+) {
+ const db = await database.getDb(options.db);
+ try {
+ const datasets = await loadDatasetsFromDirectory(dir);
+ importDatasetOutput(db, {
+ commentaries: [],
+ translations: [],
+ datasets,
+ });
+ } finally {
+ db.close();
+ }
+}
+
export interface FetchTranslationsOptions {
/**
* Fetch all translations. If omitted, only undownloaded translations will be fetched.
diff --git a/packages/helloao-cli/cli.ts b/packages/helloao-cli/cli.ts
index c1305ca..64462fb 100644
--- a/packages/helloao-cli/cli.ts
+++ b/packages/helloao-cli/cli.ts
@@ -11,6 +11,7 @@ import {
fetchAudio,
generateTranslationFiles,
generateTranslationsFiles,
+ importApi,
importCommentaries,
importCommentary,
importTranslation,
@@ -145,6 +146,17 @@ async function start() {
});
});
+ program
+ .command('import-api
')
+ .description('Imports API files from the given directory into the DB.')
+ .option('--overwrite', 'Whether to overwrite existing files.')
+ .action(async (dir: string, options: any) => {
+ await importApi(dir, {
+ ...program.opts(),
+ ...options,
+ });
+ });
+
program
.command('upload-test-translation ')
.description(
diff --git a/packages/helloao-cli/db.ts b/packages/helloao-cli/db.ts
index c3777c2..e351ab1 100644
--- a/packages/helloao-cli/db.ts
+++ b/packages/helloao-cli/db.ts
@@ -7,6 +7,8 @@ import {
DatasetCommentary,
DatasetCommentaryBook,
DatasetCommentaryProfile,
+ DatasetDataset,
+ DatasetDatasetBook,
DatasetOutput,
DatasetTranslation,
DatasetTranslationBook,
@@ -19,6 +21,10 @@ import {
OutputFile,
OutputFileContent,
CommentaryBookChapter,
+ DatasetBookChapter,
+ DatasetChapterVerseContent,
+ Dataset,
+ DatasetBook,
} from '@helloao/tools/generation/index.js';
import {
generateApiForDataset,
@@ -197,15 +203,31 @@ export async function importFileBatch(
logger.log('Generated', output.translations.length, 'translations');
logger.log('Generated', output.commentaries.length, 'commentaries');
+ importDatasetOutput(db, output);
+ insertFileMetadata(db, changedFiles);
+}
+
+/**
+ * Imports the given dataset output into the database.
+ * @param db The database to import the dataset into.
+ * @param output The dataset output to import.
+ */
+export function importDatasetOutput(db: Database, output: DatasetOutput) {
+ const logger = log.getLogger();
+
insertTranslations(db, output.translations);
updateTranslationHashes(db, output.translations);
insertCommentaries(db, output.commentaries);
updateCommentaryHashes(db, output.commentaries);
- insertFileMetadata(db, changedFiles);
+ insertDatasets(db, output.datasets ?? []);
+ updateDatasetHashes(db, output.datasets ?? []);
insertWarningMetadata(db, output.parseMessages);
logger.log(`Inserted ${output.translations.length} translations into DB`);
logger.log(`Inserted ${output.commentaries.length} commentaries into DB`);
+ if (output.datasets) {
+ logger.log(`Inserted ${output.datasets.length} datasets into DB`);
+ }
logger.log(
`Produced ${output.parseMessages?.length ?? 0} warnings/errors.`
);
@@ -1173,6 +1195,326 @@ function updateCommentaryHashes(
logger.log(`Updated.`);
}
+export function insertDatasets(db: Database, datasets: DatasetDataset[]) {
+ const translationUpsert = db.prepare(`INSERT INTO Dataset(
+ id,
+ name,
+ language,
+ textDirection,
+ licenseUrl,
+ licenseNotes,
+ website,
+ englishName
+ ) VALUES (
+ @id,
+ @name,
+ @language,
+ @textDirection,
+ @licenseUrl,
+ @licenseNotes,
+ @website,
+ @englishName
+ ) ON CONFLICT(id) DO
+ UPDATE SET
+ name=excluded.name,
+ language=excluded.language,
+ textDirection=excluded.textDirection,
+ licenseUrl=excluded.licenseUrl,
+ licenseNotes=excluded.licenseNotes,
+ website=excluded.website,
+ englishName=excluded.englishName;`);
+
+ const insertManyTranslations = db.transaction(
+ (datasets: DatasetDataset[]) => {
+ for (let dataset of datasets) {
+ translationUpsert.run({
+ id: dataset.id,
+ name: dataset.name,
+ language: dataset.language,
+ textDirection: dataset.textDirection,
+ licenseUrl: dataset.licenseUrl,
+ licenseNotes: dataset.licenseNotes,
+ website: dataset.website,
+ englishName: dataset.englishName,
+ });
+ }
+ }
+ );
+
+ insertManyTranslations(datasets);
+
+ const deleteReferences = db.prepare(`DELETE FROM DatasetReference
+ WHERE datasetId = @datasetId;`);
+
+ for (let dataset of datasets) {
+ deleteReferences.run({
+ datasetId: dataset.id,
+ });
+ insertDatasetBooks(db, dataset, dataset.books);
+ }
+}
+
+export function insertDatasetBooks(
+ db: Database,
+ dataset: DatasetDataset,
+ datasetBooks: DatasetDatasetBook[]
+) {
+ const bookUpsert = db.prepare(`INSERT INTO DatasetBook(
+ id,
+ datasetId,
+ numberOfChapters,
+ \`order\`
+ ) VALUES (
+ @id,
+ @datasetId,
+ @numberOfChapters,
+ @bookOrder
+ ) ON CONFLICT(id,datasetId) DO
+ UPDATE SET
+ numberOfChapters=excluded.numberOfChapters;`);
+
+ const insertMany = db.transaction((books: DatasetDatasetBook[]) => {
+ for (let book of books) {
+ if (!book) {
+ continue;
+ }
+ bookUpsert.run({
+ id: book.id,
+ datasetId: dataset.id,
+ numberOfChapters: book.chapters.length,
+ bookOrder: book.order ?? 9999,
+ });
+ }
+ });
+
+ insertMany(datasetBooks);
+
+ for (let book of datasetBooks) {
+ insertDatasetContent(db, dataset, book, book.chapters);
+ }
+}
+
+export function insertDatasetContent(
+ db: Database,
+ dataset: DatasetDataset,
+ book: DatasetDatasetBook,
+ chapters: DatasetBookChapter[]
+) {
+ const logger = log.getLogger();
+
+ const chapterUpsert = db.prepare(`INSERT INTO DatasetChapter(
+ datasetId,
+ bookId,
+ number,
+ json
+ ) VALUES (
+ @datasetId,
+ @bookId,
+ @number,
+ @json
+ ) ON CONFLICT(datasetId,bookId,number) DO
+ UPDATE SET
+ json=excluded.json;`);
+ const verseUpsert = db.prepare(`INSERT INTO DatasetChapterVerse(
+ datasetId,
+ bookId,
+ chapterNumber,
+ number,
+ contentJson
+ ) VALUES (
+ @datasetId,
+ @bookId,
+ @chapterNumber,
+ @number,
+ @contentJson
+ ) ON CONFLICT(datasetId,bookId,chapterNumber,number) DO
+ UPDATE SET
+ contentJson=excluded.contentJson;`);
+
+ const referenceInsert = db.prepare(`INSERT INTO DatasetReference(
+ datasetId,
+ bookId,
+ chapterNumber,
+ verseNumber,
+ referenceBookId,
+ referenceChapter,
+ referenceVerse,
+ endVerseNumber,
+ score
+ ) VALUES (
+ @datasetId,
+ @bookId,
+ @chapterNumber,
+ @verseNumber,
+ @referenceBookId,
+ @referenceChapter,
+ @referenceVerse,
+ @endVerseNumber,
+ @score
+ );`);
+
+ const insertChaptersAndVerses = db.transaction(() => {
+ for (let chapter of chapters) {
+ chapterUpsert.run({
+ datasetId: dataset.id,
+ bookId: book.id,
+ number: chapter.chapter.number,
+ json: JSON.stringify(chapter.chapter),
+ });
+
+ for (let verse of chapter.chapter.content) {
+ verseUpsert.run({
+ datasetId: dataset.id,
+ bookId: book.id,
+ chapterNumber: chapter.chapter.number,
+ number: verse.verse,
+ contentJson: JSON.stringify(verse),
+ });
+
+ for (let ref of verse.references) {
+ referenceInsert.run({
+ datasetId: dataset.id,
+ bookId: book.id,
+ chapterNumber: chapter.chapter.number,
+ verseNumber: verse.verse,
+ referenceBookId: ref.book,
+ referenceChapter: ref.chapter,
+ referenceVerse: ref.verse,
+ endVerseNumber: ref.endVerse ?? null,
+ score: ref.score ?? null,
+ });
+ }
+ }
+ }
+ });
+
+ insertChaptersAndVerses();
+}
+
+/**
+ * Updates the hashes for the datasets in the database.
+ * @param db The database to update the hashes in.
+ * @param datasets The datasets to update the hashes for.
+ */
+function updateDatasetHashes(db: Database, datasets: Dataset[]) {
+ const logger = log.getLogger();
+ logger.log(`Updating hashes for ${datasets.length} datasets.`);
+
+ const updateTranslationHash = db.prepare(
+ `UPDATE Dataset SET sha256 = @sha256 WHERE id = @datasetId;`
+ );
+ const updateBookHash = db.prepare(
+ `UPDATE DatasetBook SET sha256 = @sha256 WHERE datasetId = @datasetId AND id = @bookId;`
+ );
+ const updateChapterHash = db.prepare(
+ `UPDATE DatasetChapter SET sha256 = @sha256 WHERE datasetId = @datasetId AND bookId = @bookId AND number = @chapterNumber;`
+ );
+
+ const getBooks = db.prepare(
+ 'SELECT * FROM DatasetBook WHERE datasetId = ?;'
+ );
+ const getChapters = db.prepare(
+ 'SELECT * FROM DatasetChapter WHERE datasetId = @datasetId AND bookId = @bookId;'
+ );
+
+ for (let dataset of datasets) {
+ const commentarySha = sha256()
+ .update(dataset.id)
+ .update(dataset.name)
+ .update(dataset.language)
+ .update(dataset.licenseUrl)
+ .update(dataset.textDirection)
+ .update(dataset.website)
+ .update(dataset.englishName);
+
+ const books = getBooks.all(dataset.id) as {
+ id: string;
+ datasetId: string;
+ order: number;
+ numberOfChapters: number;
+ sha256: string;
+ }[];
+
+ for (let book of books) {
+ const chapters = getChapters.all({
+ datasetId: dataset.id,
+ bookId: book.id,
+ }) as {
+ number: string;
+ bookId: string;
+ datasetId: string;
+ json: string;
+ sha256: string;
+ }[];
+
+ const bookSha = sha256()
+ .update(book.datasetId)
+ .update(book.id)
+ .update(book.numberOfChapters)
+ .update(book.order);
+
+ for (let chapter of chapters) {
+ const hash = sha256()
+ .update(chapter.datasetId)
+ .update(chapter.bookId)
+ .update(chapter.number)
+ .update(chapter.json)
+ .digest('hex');
+
+ chapter.sha256 = hash;
+
+ bookSha.update(hash);
+ }
+
+ const updateChapters = db.transaction(() => {
+ for (let chapter of chapters) {
+ updateChapterHash.run({
+ sha256: chapter.sha256,
+ datasetId: chapter.datasetId,
+ bookId: chapter.bookId,
+ chapterNumber: chapter.number,
+ });
+ }
+ });
+
+ updateChapters();
+
+ const bookHash = bookSha.digest('hex');
+ book.sha256 = bookHash;
+
+ commentarySha.update(bookHash);
+ }
+
+ const updateBooks = db.transaction(() => {
+ for (let book of books) {
+ updateBookHash.run({
+ sha256: book.sha256,
+ datasetId: book.datasetId,
+ bookId: book.id,
+ });
+ }
+ });
+
+ updateBooks();
+
+ const hash = commentarySha.digest('hex');
+ (dataset as any).sha256 = hash;
+ }
+
+ const updateDatasets = db.transaction(() => {
+ for (let dataset of datasets) {
+ updateTranslationHash.run({
+ sha256: (dataset as any).sha256,
+ datasetId: dataset.id,
+ });
+ }
+ });
+
+ updateDatasets();
+
+ logger.log(`Updated.`);
+}
+
export function getDbPathFromDir(dir: string) {
dir = dir || process.cwd();
return path.resolve(dir, 'bible-api.db');
@@ -1302,6 +1644,7 @@ export async function* loadDatasets(
): AsyncGenerator {
yield* loadTranslationDatasets(db, perBatch, translationsToLoad);
yield* loadCommentaryDatasets(db, perBatch, translationsToLoad);
+ yield* loadDatasetDatasets(db, perBatch, translationsToLoad);
}
/**
@@ -1558,6 +1901,111 @@ export async function* loadCommentaryDatasets(
}
}
+/**
+ * Loads the datasets from the database as a dataset.
+ * @param db The database.
+ * @param perBatch The number of translations to load per batch.
+ * @param datasetsToLoad The list of commentaries to load. If not provided, all commentaries will be loaded.
+ */
+export async function* loadDatasetDatasets(
+ db: PrismaClient,
+ perBatch: number = 50,
+ datasetsToLoad?: string[]
+) {
+ const logger = log.getLogger();
+ let offset = 0;
+ let pageSize = perBatch;
+
+ logger.log('Generating dataset datasets in batches of', pageSize);
+ const totalDatasets = await db.dataset.count();
+ const totalBatches = Math.ceil(totalDatasets / pageSize);
+ let batchNumber = 1;
+
+ while (true) {
+ logger.log('Generating dataset batch', batchNumber, 'of', totalBatches);
+ batchNumber++;
+
+ const datasetQuery: Prisma.DatasetFindManyArgs = {
+ skip: offset,
+ take: pageSize,
+ };
+
+ if (datasetsToLoad && datasetsToLoad.length > 0) {
+ datasetQuery.where = {
+ id: {
+ in: datasetsToLoad,
+ },
+ };
+ }
+
+ const datasets = await db.dataset.findMany(datasetQuery);
+
+ if (datasets.length <= 0) {
+ break;
+ }
+
+ const output: DatasetOutput = {
+ translations: [],
+ commentaries: [],
+ datasets: [],
+ };
+
+ for (let dataset of datasets) {
+ const datasetDataset: DatasetDataset = {
+ ...dataset,
+ textDirection: dataset.textDirection! as any,
+ books: [],
+ };
+ output.datasets!.push(datasetDataset);
+
+ const books = await db.datasetBook.findMany({
+ where: {
+ datasetId: dataset.id,
+ },
+ orderBy: {
+ order: 'asc',
+ },
+ });
+
+ for (let book of books) {
+ const chapters = await db.datasetChapter.findMany({
+ where: {
+ datasetId: dataset.id,
+ bookId: book.id,
+ },
+ orderBy: {
+ number: 'asc',
+ },
+ });
+
+ const bookChapters: DatasetBookChapter[] = chapters.map(
+ (chapter) => {
+ const bookChapter: DatasetBookChapter = {
+ chapter: JSON.parse(chapter.json),
+ };
+
+ for (let verse of bookChapter.chapter.content) {
+ verse.references.sort((a, b) => b.score - a.score);
+ }
+
+ return bookChapter;
+ }
+ );
+
+ const datasetBook: DatasetDatasetBook = {
+ ...book,
+ chapters: bookChapters,
+ };
+ datasetDataset.books.push(datasetBook);
+ }
+ }
+
+ yield output;
+
+ offset += pageSize;
+ }
+}
+
export interface SerializeApiOptions extends GenerateApiOptions {
/**
* Whether the output should be pretty-printed.
diff --git a/packages/helloao-cli/files.ts b/packages/helloao-cli/files.ts
index f23d51a..351dc6d 100644
--- a/packages/helloao-cli/files.ts
+++ b/packages/helloao-cli/files.ts
@@ -6,7 +6,7 @@ import {
readdir,
writeFile,
} from 'fs/promises';
-import { extname } from 'path';
+import { basename, extname } from 'path';
import * as path from 'path';
import { existsSync } from 'fs-extra';
import {
@@ -25,6 +25,12 @@ import { PARSER_VERSION } from '@helloao/tools/parser/usx-parser.js';
import { mergeWith } from 'lodash';
import { fromByteArray } from 'base64-js';
import { log } from '@helloao/tools';
+import { bookOrderMap } from '@helloao/tools/generation/book-order.js';
+import {
+ DatasetDataset,
+ DatasetDatasetBook,
+} from '@helloao/tools/generation/dataset.js';
+import { getBookId } from '@helloao/tools/utils.js';
/**
* Defines an interface that contains information about a serialized file.
@@ -366,6 +372,102 @@ export async function loadCommentaryFiles(
return await Promise.all(promises);
}
+/**
+ * Imports all the datasets from the given directory into the database in the current working directory.
+ * @param dir The directory that the datasets are located in.
+ * @param options The options.
+ */
+export async function loadDatasetsFromDirectory(
+ dir: string
+): Promise {
+ const logger = log.getLogger();
+
+ let datasets: DatasetDataset[] = [];
+
+ const apiDir = path.resolve(dir, 'api');
+
+ const availableDatasets = JSON.parse(
+ await readFile(path.resolve(apiDir, 'available_datasets.json'), 'utf-8')
+ );
+ datasets.push(
+ ...availableDatasets.datasets.map((d: any) => ({
+ ...d,
+ books: [],
+ }))
+ );
+
+ for (let dataset of datasets) {
+ const datasetDir = path.resolve(apiDir, 'd', dataset.id);
+ const booksList = await readdir(datasetDir);
+
+ for (let bookId of booksList) {
+ if (bookId === 'books.json') {
+ continue;
+ }
+ const id = getBookId(bookId);
+
+ if (!id) {
+ logger.warn(`Unknown book directory: ${bookId}`);
+ continue;
+ }
+
+ const book: DatasetDatasetBook = {
+ id,
+ chapters: [],
+ order: bookOrderMap.get(id)!,
+ };
+ dataset.books.push(book);
+
+ const bookDir = path.resolve(datasetDir, bookId);
+ const chapters = await readdir(bookDir);
+
+ for (let chapterFile of chapters) {
+ const chapterJson = JSON.parse(
+ await readFile(path.resolve(bookDir, chapterFile), 'utf-8')
+ );
+
+ if (chapterJson.chapter) {
+ book.chapters.push({
+ chapter: chapterJson.chapter,
+ });
+ } else if (chapterJson.content) {
+ const chapterNumber = parseInt(
+ basename(chapterFile, extname(chapterFile))
+ );
+
+ if (isNaN(chapterNumber)) {
+ logger.warn(`Unknown chapter format: ${chapterFile}`);
+ continue;
+ }
+
+ book.chapters.push({
+ chapter: {
+ number: chapterNumber,
+ content: chapterJson.content.map((c: any) => ({
+ verse: c.verse,
+ references: (c.references ?? []).map(
+ (ref: any) => ({
+ book: ref.book,
+ chapter: ref.chapter,
+ verse: ref.verse,
+ endVerse: ref.endVerse,
+ score: ref.score ?? ref.votes,
+ })
+ ),
+ })),
+ },
+ });
+ } else {
+ logger.warn(`Unknown chapter format: ${chapterFile}`);
+ continue;
+ }
+ }
+ }
+ }
+
+ return datasets;
+}
+
/**
* Loads the metadata for the given translation.
* @param translation The translation that the metadata should be loaded for.
diff --git a/packages/helloao-cli/migrations/20251028135135_add_datasets/migration.sql b/packages/helloao-cli/migrations/20251028135135_add_datasets/migration.sql
new file mode 100644
index 0000000..b056014
--- /dev/null
+++ b/packages/helloao-cli/migrations/20251028135135_add_datasets/migration.sql
@@ -0,0 +1,72 @@
+-- CreateTable
+CREATE TABLE "Dataset" (
+ "id" TEXT NOT NULL PRIMARY KEY,
+ "name" TEXT NOT NULL,
+ "website" TEXT NOT NULL,
+ "licenseUrl" TEXT NOT NULL,
+ "licenseNotes" TEXT,
+ "englishName" TEXT NOT NULL,
+ "language" TEXT NOT NULL,
+ "textDirection" TEXT NOT NULL,
+ "sha256" TEXT
+);
+
+-- CreateTable
+CREATE TABLE "DatasetBook" (
+ "id" TEXT NOT NULL,
+ "datasetId" TEXT NOT NULL,
+ "name" TEXT NOT NULL,
+ "commonName" TEXT NOT NULL,
+ "introduction" TEXT,
+ "introductionSummary" TEXT,
+ "order" INTEGER NOT NULL,
+ "numberOfChapters" INTEGER NOT NULL,
+ "sha256" TEXT,
+
+ PRIMARY KEY ("datasetId", "id"),
+ CONSTRAINT "DatasetBook_datasetId_fkey" FOREIGN KEY ("datasetId") REFERENCES "Dataset" ("id") ON DELETE RESTRICT ON UPDATE CASCADE
+);
+
+-- CreateTable
+CREATE TABLE "DatasetChapter" (
+ "number" INTEGER NOT NULL,
+ "bookId" TEXT NOT NULL,
+ "datasetId" TEXT NOT NULL,
+ "json" TEXT NOT NULL,
+ "sha256" TEXT,
+
+ PRIMARY KEY ("datasetId", "bookId", "number"),
+ CONSTRAINT "DatasetChapter_datasetId_bookId_fkey" FOREIGN KEY ("datasetId", "bookId") REFERENCES "DatasetBook" ("datasetId", "id") ON DELETE RESTRICT ON UPDATE CASCADE,
+ CONSTRAINT "DatasetChapter_datasetId_fkey" FOREIGN KEY ("datasetId") REFERENCES "Dataset" ("id") ON DELETE RESTRICT ON UPDATE CASCADE
+);
+
+-- CreateTable
+CREATE TABLE "DatasetChapterVerse" (
+ "number" INTEGER NOT NULL,
+ "chapterNumber" INTEGER NOT NULL,
+ "bookId" TEXT NOT NULL,
+ "datasetId" TEXT NOT NULL,
+ "contentJson" TEXT NOT NULL,
+ "sha256" TEXT,
+
+ PRIMARY KEY ("datasetId", "bookId", "chapterNumber", "number"),
+ CONSTRAINT "DatasetChapterVerse_datasetId_bookId_chapterNumber_fkey" FOREIGN KEY ("datasetId", "bookId", "chapterNumber") REFERENCES "DatasetChapter" ("datasetId", "bookId", "number") ON DELETE RESTRICT ON UPDATE CASCADE,
+ CONSTRAINT "DatasetChapterVerse_datasetId_bookId_fkey" FOREIGN KEY ("datasetId", "bookId") REFERENCES "DatasetBook" ("datasetId", "id") ON DELETE RESTRICT ON UPDATE CASCADE,
+ CONSTRAINT "DatasetChapterVerse_datasetId_fkey" FOREIGN KEY ("datasetId") REFERENCES "Dataset" ("id") ON DELETE RESTRICT ON UPDATE CASCADE
+);
+
+-- CreateTable
+CREATE TABLE "DatasetReference" (
+ "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
+ "datasetId" TEXT NOT NULL,
+ "bookId" TEXT NOT NULL,
+ "chapterNumber" INTEGER NOT NULL,
+ "verseNumber" INTEGER NOT NULL,
+ "endVerseNumber" INTEGER,
+ "score" INTEGER,
+ "sha256" TEXT,
+ CONSTRAINT "DatasetReference_datasetId_fkey" FOREIGN KEY ("datasetId") REFERENCES "Dataset" ("id") ON DELETE RESTRICT ON UPDATE CASCADE,
+ CONSTRAINT "DatasetReference_datasetId_bookId_fkey" FOREIGN KEY ("datasetId", "bookId") REFERENCES "DatasetBook" ("datasetId", "id") ON DELETE RESTRICT ON UPDATE CASCADE,
+ CONSTRAINT "DatasetReference_datasetId_bookId_chapterNumber_fkey" FOREIGN KEY ("datasetId", "bookId", "chapterNumber") REFERENCES "DatasetChapter" ("datasetId", "bookId", "number") ON DELETE RESTRICT ON UPDATE CASCADE,
+ CONSTRAINT "DatasetReference_datasetId_bookId_chapterNumber_verseNumber_fkey" FOREIGN KEY ("datasetId", "bookId", "chapterNumber", "verseNumber") REFERENCES "DatasetChapterVerse" ("datasetId", "bookId", "chapterNumber", "number") ON DELETE RESTRICT ON UPDATE CASCADE
+);
diff --git a/packages/helloao-cli/migrations/20251028144932_fix_dataset_types/migration.sql b/packages/helloao-cli/migrations/20251028144932_fix_dataset_types/migration.sql
new file mode 100644
index 0000000..96390eb
--- /dev/null
+++ b/packages/helloao-cli/migrations/20251028144932_fix_dataset_types/migration.sql
@@ -0,0 +1,50 @@
+/*
+ Warnings:
+
+ - You are about to drop the column `commonName` on the `DatasetBook` table. All the data in the column will be lost.
+ - You are about to drop the column `introduction` on the `DatasetBook` table. All the data in the column will be lost.
+ - You are about to drop the column `introductionSummary` on the `DatasetBook` table. All the data in the column will be lost.
+ - You are about to drop the column `name` on the `DatasetBook` table. All the data in the column will be lost.
+ - You are about to drop the column `sha256` on the `DatasetReference` table. All the data in the column will be lost.
+ - Added the required column `referenceBookId` to the `DatasetReference` table without a default value. This is not possible if the table is not empty.
+ - Added the required column `referenceChapter` to the `DatasetReference` table without a default value. This is not possible if the table is not empty.
+ - Added the required column `referenceVerse` to the `DatasetReference` table without a default value. This is not possible if the table is not empty.
+
+*/
+-- RedefineTables
+PRAGMA defer_foreign_keys=ON;
+PRAGMA foreign_keys=OFF;
+CREATE TABLE "new_DatasetBook" (
+ "id" TEXT NOT NULL,
+ "datasetId" TEXT NOT NULL,
+ "order" INTEGER NOT NULL,
+ "numberOfChapters" INTEGER NOT NULL,
+ "sha256" TEXT,
+
+ PRIMARY KEY ("datasetId", "id"),
+ CONSTRAINT "DatasetBook_datasetId_fkey" FOREIGN KEY ("datasetId") REFERENCES "Dataset" ("id") ON DELETE RESTRICT ON UPDATE CASCADE
+);
+INSERT INTO "new_DatasetBook" ("datasetId", "id", "numberOfChapters", "order", "sha256") SELECT "datasetId", "id", "numberOfChapters", "order", "sha256" FROM "DatasetBook";
+DROP TABLE "DatasetBook";
+ALTER TABLE "new_DatasetBook" RENAME TO "DatasetBook";
+CREATE TABLE "new_DatasetReference" (
+ "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
+ "datasetId" TEXT NOT NULL,
+ "bookId" TEXT NOT NULL,
+ "chapterNumber" INTEGER NOT NULL,
+ "verseNumber" INTEGER NOT NULL,
+ "referenceBookId" TEXT NOT NULL,
+ "referenceChapter" INTEGER NOT NULL,
+ "referenceVerse" INTEGER NOT NULL,
+ "endVerseNumber" INTEGER,
+ "score" INTEGER,
+ CONSTRAINT "DatasetReference_datasetId_fkey" FOREIGN KEY ("datasetId") REFERENCES "Dataset" ("id") ON DELETE RESTRICT ON UPDATE CASCADE,
+ CONSTRAINT "DatasetReference_datasetId_bookId_fkey" FOREIGN KEY ("datasetId", "bookId") REFERENCES "DatasetBook" ("datasetId", "id") ON DELETE RESTRICT ON UPDATE CASCADE,
+ CONSTRAINT "DatasetReference_datasetId_bookId_chapterNumber_fkey" FOREIGN KEY ("datasetId", "bookId", "chapterNumber") REFERENCES "DatasetChapter" ("datasetId", "bookId", "number") ON DELETE RESTRICT ON UPDATE CASCADE,
+ CONSTRAINT "DatasetReference_datasetId_bookId_chapterNumber_verseNumber_fkey" FOREIGN KEY ("datasetId", "bookId", "chapterNumber", "verseNumber") REFERENCES "DatasetChapterVerse" ("datasetId", "bookId", "chapterNumber", "number") ON DELETE RESTRICT ON UPDATE CASCADE
+);
+INSERT INTO "new_DatasetReference" ("bookId", "chapterNumber", "datasetId", "endVerseNumber", "id", "score", "verseNumber") SELECT "bookId", "chapterNumber", "datasetId", "endVerseNumber", "id", "score", "verseNumber" FROM "DatasetReference";
+DROP TABLE "DatasetReference";
+ALTER TABLE "new_DatasetReference" RENAME TO "DatasetReference";
+PRAGMA foreign_keys=ON;
+PRAGMA defer_foreign_keys=OFF;
diff --git a/packages/helloao-cli/schema.prisma b/packages/helloao-cli/schema.prisma
index 9bb6b1b..ea55b59 100644
--- a/packages/helloao-cli/schema.prisma
+++ b/packages/helloao-cli/schema.prisma
@@ -321,3 +321,107 @@ model ChapterFootnote {
@@id([translationId, bookId, chapterNumber, id])
}
+
+
+model Dataset {
+ id String @id
+ name String
+ website String
+ licenseUrl String
+ licenseNotes String?
+ englishName String
+ language String
+ textDirection String
+
+ sha256 String?
+
+ books DatasetBook[]
+ chapters DatasetChapter[]
+ verses DatasetChapterVerse[]
+ references DatasetReference[]
+}
+
+model DatasetBook {
+ id String
+
+ datasetId String
+ dataset Dataset @relation(fields: [datasetId], references: [id])
+
+ order Int
+
+ numberOfChapters Int
+
+ // The SHA-256 hash of the book
+ sha256 String?
+
+ chapters DatasetChapter[]
+ verses DatasetChapterVerse[]
+ references DatasetReference[]
+
+ @@id([datasetId, id])
+}
+
+model DatasetChapter {
+ number Int
+
+ bookId String
+ book DatasetBook @relation(fields: [datasetId, bookId], references: [datasetId, id])
+
+ datasetId String
+ dataset Dataset @relation(fields: [datasetId], references: [id])
+
+ json String // The JSON of the chapter
+
+ // The SHA-256 hash of the chapter
+ sha256 String?
+
+ verses DatasetChapterVerse[]
+ references DatasetReference[]
+
+ @@id([datasetId, bookId, number])
+}
+
+model DatasetChapterVerse {
+ number Int
+
+ chapterNumber Int
+ chapter DatasetChapter @relation(fields: [datasetId, bookId, chapterNumber], references: [datasetId, bookId, number])
+
+ bookId String
+ book DatasetBook @relation(fields: [datasetId, bookId], references: [datasetId, id])
+
+ datasetId String
+ dataset Dataset @relation(fields: [datasetId], references: [id])
+
+ contentJson String // The JSON of the verse content
+
+ // The SHA-256 hash of the verse
+ sha256 String?
+
+ references DatasetReference[]
+
+ @@id([datasetId, bookId, chapterNumber, number])
+}
+
+model DatasetReference {
+ id Int @id @default(autoincrement())
+
+ datasetId String
+ dataset Dataset @relation(fields: [datasetId], references: [id])
+
+ bookId String
+ book DatasetBook @relation(fields: [datasetId, bookId], references: [datasetId, id])
+
+ chapterNumber Int
+ chapter DatasetChapter @relation(fields: [datasetId, bookId, chapterNumber], references: [datasetId, bookId, number])
+
+ verseNumber Int
+ verse DatasetChapterVerse @relation(fields: [datasetId, bookId, chapterNumber, verseNumber], references: [datasetId, bookId, chapterNumber, number])
+
+ referenceBookId String
+ referenceChapter Int
+ referenceVerse Int
+
+ endVerseNumber Int?
+ score Int?
+}
\ No newline at end of file
diff --git a/packages/helloao-tools/generation/api.spec.ts b/packages/helloao-tools/generation/api.spec.ts
index 9609408..c18b4bc 100644
--- a/packages/helloao-tools/generation/api.spec.ts
+++ b/packages/helloao-tools/generation/api.spec.ts
@@ -6,7 +6,7 @@ import {
import Genesis from '../../../bible/bsb/01GENBSB.usfm';
import Exodus from '../../../bible/bsb/02EXOBSB.usfm';
import _1Chronicles from '../../../bible/bsb/131CHBSB.usfm';
-import { generateDataset } from './dataset.js';
+import { DatasetOutput, generateDataset } from './dataset.js';
import {
InputCommentaryMetadata,
InputFile,
@@ -2487,6 +2487,141 @@ describe('generateApiForDataset', () => {
// ]
// });
});
+
+ it('should support datasets', () => {
+ const dataset: DatasetOutput = {
+ translations: [],
+ commentaries: [],
+ datasets: [
+ {
+ id: 'default',
+ englishName: 'Default Dataset',
+ name: 'Default Dataset',
+ language: 'eng',
+ textDirection: 'ltr',
+ licenseUrl: 'https://example.com/terms.htm',
+ website: 'https://example.com',
+ books: [
+ {
+ id: 'GEN',
+ order: 1,
+ chapters: [
+ {
+ chapter: {
+ number: 1,
+ content: [
+ {
+ verse: 1,
+ references: [
+ {
+ book: 'JHN',
+ chapter: 1,
+ verse: 1,
+ endVerse: 3,
+ score: 500,
+ },
+ ],
+ },
+ ],
+ },
+ },
+ ],
+ },
+ ],
+ },
+ ],
+ };
+ const generated = generateApiForDataset(dataset);
+ const files = generateFilesForApi(generated);
+
+ const tree = fileTree(files);
+
+ const expectedDataset = {
+ id: 'default',
+ englishName: 'Default Dataset',
+ name: 'Default Dataset',
+ language: 'eng',
+ textDirection: 'ltr',
+ licenseUrl: 'https://example.com/terms.htm',
+ website: 'https://example.com',
+ availableFormats: ['json'],
+ listOfBooksApiLink: '/api/d/default/books.json',
+ numberOfBooks: 1,
+ totalNumberOfChapters: 1,
+ totalNumberOfVerses: 1,
+ totalNumberOfReferences: 1,
+ };
+
+ expect(tree).toEqual({
+ '/api/available_translations.json': {
+ translations: [],
+ },
+ '/api/available_commentaries.json': {
+ commentaries: [],
+ },
+ '/api/available_datasets.json': {
+ datasets: [expectedDataset],
+ },
+ '/api/d/default/books.json': {
+ dataset: expectedDataset,
+ books: [
+ {
+ id: 'GEN',
+ order: 1,
+ numberOfChapters: 1,
+ totalNumberOfVerses: 1,
+ totalNumberOfReferences: 1,
+ firstChapterNumber: 1,
+ lastChapterNumber: 1,
+ firstChapterApiLink: '/api/d/default/GEN/1.json',
+ lastChapterApiLink: '/api/d/default/GEN/1.json',
+ },
+ ],
+ },
+ '/api/d/default/GEN/1.json': {
+ dataset: expectedDataset,
+ book: {
+ id: 'GEN',
+ order: 1,
+ numberOfChapters: 1,
+ totalNumberOfVerses: 1,
+ totalNumberOfReferences: 1,
+ firstChapterNumber: 1,
+ lastChapterNumber: 1,
+ firstChapterApiLink: '/api/d/default/GEN/1.json',
+ lastChapterApiLink: '/api/d/default/GEN/1.json',
+ },
+ thisChapterLink: '/api/d/default/GEN/1.json',
+ nextChapterApiLink: null,
+ previousChapterApiLink: null,
+ numberOfVerses: 1,
+ numberOfReferences: 1,
+ chapter: {
+ number: 1,
+ content: [
+ {
+ verse: 1,
+ references: [
+ {
+ book: 'JHN',
+ chapter: 1,
+ verse: 1,
+ endVerse: 3,
+ score: 500,
+ },
+ ],
+ },
+ ],
+ },
+ },
+ });
+
+ // expect(availableTranslations).toEqual({
+ // translations: [
+ // expectedTranslation
+ // ]
+ // });
+ });
});
function firstXLines(content: string, x: number) {
diff --git a/packages/helloao-tools/generation/api.ts b/packages/helloao-tools/generation/api.ts
index 3dee8d8..ebaefe4 100644
--- a/packages/helloao-tools/generation/api.ts
+++ b/packages/helloao-tools/generation/api.ts
@@ -3,6 +3,9 @@ import {
CommentaryBook,
CommentaryBookChapter,
CommentaryProfile,
+ Dataset,
+ DatasetBook,
+ DatasetBookChapter,
OutputFile,
Translation,
TranslationBook,
@@ -74,6 +77,25 @@ export interface ApiOutput {
*/
commentaryProfileContents: ApiCommentaryProfileContent[];
+ /**
+ * The list of available datasets.
+ * This maps to the /api/available-datasets.json endpoint.
+ */
+ availableDatasets?: ApiAvailableDatasets;
+
+ /**
+ * The list of books for each dataset.
+ * This maps to the /api/d/:datasetId/books.json endpoint.
+ */
+ datasetBooks?: ApiDatasetBooks[];
+
+ /**
+ * The list of chapters for each dataset book.
+ * This maps to the following endpoint:
+ * - /api/d/:datasetId/:bookId/:chapterNumber.json
+ */
+ datasetBookChapters?: ApiDatasetBookChapter[];
+
/**
* The path prefix that the API should use.
*/
@@ -102,6 +124,61 @@ export interface ApiAvailableCommentaries {
commentaries: ApiCommentary[];
}
+/**
+ * The list of available datasets.
+ * Maps to the /api/available-datasets.json endpoint.
+ */
+export interface ApiAvailableDatasets {
+ datasets: ApiDataset[];
+}
+
+/**
+ * Defines a dataset that is used in the API.
+ */
+export interface ApiDataset extends Dataset {
+ /**
+ * The API link for the list of books for this dataset.
+ */
+ listOfBooksApiLink: string;
+
+ /**
+ * The available list of formats.
+ */
+ availableFormats: 'json'[];
+
+ /**
+ * The number of books that are contained in this dataset.
+ */
+ numberOfBooks: number;
+
+ /**
+ * The total number of chapters that are contained in this dataset.
+ */
+ totalNumberOfChapters: number;
+
+ /**
+ * The total number of verses that are contained in this dataset.
+ */
+ totalNumberOfVerses: number;
+
+ /**
+ * The total number of references that are contained in this dataset.
+ */
+ totalNumberOfReferences: number;
+
+ /**
+ * Gets the name of the language that the commentary is in.
+ * Null or undefined if the name of the language is not known.
+ */
+ languageName?: string;
+
+ /**
+ * Gets the name of the language in English.
+ * Null or undefined if the language doesn't have an english name.
+ */
+ languageEnglishName?: string;
+}
+
/**
* Defines a translation that is used in the API.
*/
@@ -255,6 +332,21 @@ export interface ApiCommentaryBooks {
books: ApiCommentaryBook[];
}
+/**
+ * Defines an interface that contains information about the books that are available for a dataset.
+ */
+export interface ApiDatasetBooks {
+ /**
+ * The dataset information for the books.
+ */
+ dataset: ApiDataset;
+
+ /**
+ * The list of books that are available for the dataset.
+ */
+ books: ApiDatasetBook[];
+}
+
/**
* Defines an interface that contains information about the profiles that are available for a commentary.
*/
@@ -363,6 +455,46 @@ export interface ApiCommentaryBook extends CommentaryBook {
totalNumberOfVerses: number;
}
+/**
+ * Defines an interface that contains information about a book in a dataset.
+ */
+export interface ApiDatasetBook extends DatasetBook {
+ /**
+ * The number of the first chapter in the book.
+ */
+ firstChapterNumber: number;
+
+ /**
+ * The link to the first chapter of the book.
+ */
+ firstChapterApiLink: string;
+
+ /**
+ * The number of the last chapter in the book.
+ */
+ lastChapterNumber: number;
+
+ /**
+ * The link to the last chapter of the book.
+ */
+ lastChapterApiLink: string;
+
+ /**
+ * The number of chapters that the book contains.
+ */
+ numberOfChapters: number;
+
+ /**
+ * The number of verses that the book contains.
+ */
+ totalNumberOfVerses: number;
+
+ /**
+ * The number of references that the book contains.
+ */
+ totalNumberOfReferences: number;
+}
+
/**
* Defines an interface that contains information about a book chapter.
*/
@@ -449,6 +581,48 @@ export interface ApiCommentaryBookChapter extends CommentaryBookChapter {
numberOfVerses: number;
}
+/**
+ * Defines an interface that contains information about a book chapter.
+ */
+export interface ApiDatasetBookChapter extends DatasetBookChapter {
+ /**
+ * The dataset information for the book chapter.
+ */
+ dataset: ApiDataset;
+
+ /**
+ * The book information for the book chapter.
+ */
+ book: ApiDatasetBook;
+
+ /**
+ * The link to this chapter.
+ */
+ thisChapterLink: string;
+
+ /**
+ * The link to the next chapter.
+ * Null if this is the last chapter in the translation.
+ */
+ nextChapterApiLink: string | null;
+
+ /**
+ * The link to the previous chapter.
+ * Null if this is the first chapter in the translation.
+ */
+ previousChapterApiLink: string | null;
+
+ /**
+ * The number of verses that the chapter contains.
+ */
+ numberOfVerses: number;
+
+ /**
+ * The number of references that the chapter contains.
+ */
+ numberOfReferences: number;
+}
+
export interface ApiTranslationBookChapterAudio {
/**
* The chapter that the audio is for.
@@ -895,6 +1069,139 @@ export function generateApiForDataset(
api.commentaryProfiles.push(commentaryProfiles);
}
+ for (let { books, ...datasetInfo } of dataset.datasets ?? []) {
+ const apiDataset: ApiDataset = {
+ ...datasetInfo,
+ availableFormats: ['json'],
+ listOfBooksApiLink: listOfDatasetBooksApiLink(
+ datasetInfo.id,
+ apiPathPrefix
+ ),
+ numberOfBooks: books.length,
+ totalNumberOfChapters: 0,
+ totalNumberOfVerses: 0,
+ totalNumberOfReferences: 0,
+ languageName: getNativeName
+ ? (getNativeName(datasetInfo.language) ?? undefined)
+ : undefined,
+ languageEnglishName: getEnglishName
+ ? (getEnglishName(datasetInfo.language) ?? undefined)
+ : undefined,
+ };
+
+ const datasetBooks: ApiDatasetBooks = {
+ dataset: apiDataset,
+ books: [],
+ };
+
+ let datasetChapters: ApiDatasetBookChapter[] = [];
+
+ for (let { chapters, ...book } of books) {
+ const firstChapterNumber = chapters[0]?.chapter.number ?? null;
+ const lastChapterNumber =
+ chapters[chapters.length - 1]?.chapter.number ?? null;
+ const apiBook: ApiDatasetBook = {
+ ...book,
+ firstChapterNumber,
+ firstChapterApiLink: bookDatasetChapterApiLink(
+ datasetInfo.id,
+ book.id,
+ firstChapterNumber,
+ 'json',
+ apiPathPrefix
+ ),
+ lastChapterNumber,
+ lastChapterApiLink: bookDatasetChapterApiLink(
+ datasetInfo.id,
+ book.id,
+ lastChapterNumber,
+ 'json',
+ apiPathPrefix
+ ),
+ numberOfChapters: chapters.length,
+ totalNumberOfVerses: 0,
+ totalNumberOfReferences: 0,
+ };
+
+ for (let { chapter } of chapters) {
+ const apiBookChapter: ApiDatasetBookChapter = {
+ dataset: apiDataset,
+ book: apiBook,
+ chapter: chapter,
+ thisChapterLink: bookDatasetChapterApiLink(
+ datasetInfo.id,
+ book.id,
+ chapter.number,
+ 'json',
+ apiPathPrefix
+ ),
+ nextChapterApiLink: null,
+ previousChapterApiLink: null,
+ numberOfVerses: chapter.content.length,
+ numberOfReferences: 0,
+ };
+
+ // apiBookChapter.numberOfVerses += ;
+ for (let verse of chapter.content) {
+ apiBookChapter.numberOfReferences +=
+ verse.references.length;
+ }
+
+ apiBook.totalNumberOfVerses += apiBookChapter.numberOfVerses;
+ apiBook.totalNumberOfReferences +=
+ apiBookChapter.numberOfReferences;
+
+ datasetChapters.push(apiBookChapter);
+ if (!api.datasetBookChapters) {
+ api.datasetBookChapters = [];
+ }
+ api.datasetBookChapters.push(apiBookChapter);
+ }
+
+ datasetBooks.books.push(apiBook);
+
+ apiDataset.totalNumberOfChapters += apiBook.numberOfChapters;
+ apiDataset.totalNumberOfVerses += apiBook.totalNumberOfVerses;
+ apiDataset.totalNumberOfReferences +=
+ apiBook.totalNumberOfReferences;
+ }
+
+ for (let i = 0; i < datasetChapters.length; i++) {
+ if (i > 0) {
+ datasetChapters[i].previousChapterApiLink =
+ bookDatasetChapterApiLink(
+ datasetInfo.id,
+ datasetChapters[i - 1].book.id,
+ datasetChapters[i - 1].chapter.number,
+ 'json',
+ apiPathPrefix
+ );
+ }
+
+ if (i < datasetChapters.length - 1) {
+ datasetChapters[i].nextChapterApiLink =
+ bookDatasetChapterApiLink(
+ datasetInfo.id,
+ datasetChapters[i + 1].book.id,
+ datasetChapters[i + 1].chapter.number,
+ 'json',
+ apiPathPrefix
+ );
+ }
+ }
+
+ if (!api.availableDatasets) {
+ api.availableDatasets = {
+ datasets: [],
+ };
+ }
+ api.availableDatasets.datasets.push(apiDataset);
+ if (!api.datasetBooks) {
+ api.datasetBooks = [];
+ }
+ api.datasetBooks.push(datasetBooks);
+ }
+
return api;
function getBookLink(book: TranslationBook | CommentaryBook): string {
@@ -968,6 +1275,32 @@ export function generateFilesForApi(api: ApiOutput): OutputFile[] {
files.push(jsonFile(bookChapter.thisChapterLink, bookChapter));
}
+ if (api.availableDatasets) {
+ files.push(
+ jsonFile(
+ `${api.pathPrefix}/api/available_datasets.json`,
+ api.availableDatasets,
+ true
+ )
+ );
+ }
+
+ if (api.datasetBooks) {
+ for (let datasetBook of api.datasetBooks) {
+ files.push(
+ jsonFile(datasetBook.dataset.listOfBooksApiLink, datasetBook)
+ );
+ }
+ }
+
+ if (api.datasetBookChapters) {
+ for (let datasetBookChapter of api.datasetBookChapters) {
+ files.push(
+ jsonFile(datasetBookChapter.thisChapterLink, datasetBookChapter)
+ );
+ }
+ }
+
// for (let audio of api.translationBookChapterAudio) {
// files.push(downloadedFile(audio.link, audio.originalUrl));
// }
@@ -1016,6 +1349,18 @@ export function listOfCommentaryBooksApiLink(
return `${prefix}/api/c/${commentaryId}/books.json`;
}
+/**
+ * Gets the API Link for the list of books endpoint for a dataset.
+ * @param datasetId The ID of the dataset.
+ * @returns
+ */
+export function listOfDatasetBooksApiLink(
+ datasetId: string,
+ prefix: string = ''
+): string {
+ return `${prefix}/api/d/${datasetId}/books.json`;
+}
+
/**
* Getes the API link for a book chapter.
* @param translationId The ID of the translation.
@@ -1066,6 +1411,25 @@ export function bookChapterAudioApiLink(
)}/${chapterNumber}.${reader}.mp3`;
}
+/**
+ * Getes the API link for a book chapter.
+ * @param translationId The ID of the translation.
+ * @param commonName The name of the book.
+ * @param chapterNumber The number of the book.
+ * @param extension The extension of the file.
+ */
+export function bookDatasetChapterApiLink(
+ translationId: string,
+ commonName: string,
+ chapterNumber: number,
+ extension: string,
+ prefix: string = ''
+) {
+ return `${prefix}/api/d/${translationId}/${replaceSpacesWithUnderscores(
+ commonName
+ )}/${chapterNumber}.${extension}`;
+}
+
/**
* Gets the API link for a profile.
* @param translationId The ID of the translation.
diff --git a/packages/helloao-tools/generation/common-types.ts b/packages/helloao-tools/generation/common-types.ts
index c4eac6a..c5b8442 100644
--- a/packages/helloao-tools/generation/common-types.ts
+++ b/packages/helloao-tools/generation/common-types.ts
@@ -203,6 +203,48 @@ export interface Commentary {
textDirection: 'ltr' | 'rtl';
}
+export interface Dataset {
+ /**
+ * The ID of the dataset.
+ */
+ id: string;
+
+ /**
+ * The name of the dataset.
+ */
+ name: string;
+
+ /**
+ * The website for the dataset.
+ */
+ website: string;
+
+ /**
+ * The URL that the license for the dataset can be found.
+ */
+ licenseUrl: string;
+
+ /**
+ * The API-added notes for the license.
+ */
+ licenseNotes?: string | null;
+
+ /**
+ * The English name for the dataset.
+ */
+ englishName: string;
+
+ /**
+ * The ISO 639 3-letter language tag that the dataset is primarily in.
+ */
+ language: string;
+
+ /**
+ * The direction that the language is written in.
+ */
+ textDirection: 'ltr' | 'rtl';
+}
+
/**
* Defines an interface that contains information about a book.
*/
@@ -275,6 +317,68 @@ export interface CommentaryBook {
order: number;
}
+/**
+ * Defines an interface that contains information about a dataset book.
+ */
+export interface DatasetBook {
+ /**
+ * The ID of the book. Should match the USFM book ID.
+ */
+ id: string;
+
+ /**
+ * The order of the book in the Bible.
+ */
+ order: number;
+}
+
+/**
+ * Defines an interface that contains information about a chapter in a dataset.
+ */
+export interface DatasetBookChapter {
+ /**
+ * The data for the chapter.
+ */
+ chapter: DatasetChapterData;
+}
+
+export interface DatasetChapterData {
+ /**
+ * The number of the chapter.
+ */
+ number: number;
+
+ /**
+ * The content of the chapter.
+ */
+ content: DatasetChapterVerseContent[];
+}
+
+/**
+ * Defines an interface that contains information about a verse in a dataset chapter.
+ */
+export interface DatasetChapterVerseContent {
+ /**
+ * The number of the verse.
+ */
+ verse: number;
+
+ /**
+ * The list of references for the verse.
+ */
+ references: ScoredVerseRef[];
+}
+
+/**
+ * Defines an interface that contains information about a verse reference that has an arbitrary score attached to it.
+ */
+export interface ScoredVerseRef extends VerseRef {
+ /**
+ * The score of the verse reference.
+ */
+ score: number;
+}
+
/**
* Defines an interface that contains information about a profile in a commentary.
*/
diff --git a/packages/helloao-tools/generation/dataset.ts b/packages/helloao-tools/generation/dataset.ts
index 104407f..5d67a06 100644
--- a/packages/helloao-tools/generation/dataset.ts
+++ b/packages/helloao-tools/generation/dataset.ts
@@ -14,6 +14,9 @@ import {
Translation,
TranslationBook,
TranslationBookChapter,
+ Dataset as CommonDataset,
+ DatasetBook,
+ DatasetBookChapter,
} from './common-types.js';
import {
bookIdMap as defaultBookIdMap,
@@ -46,6 +49,11 @@ export interface DatasetOutput {
*/
commentaries: DatasetCommentary[];
+ /**
+ * The list of datasets that are available in the dataset.
+ */
+ datasets?: DatasetDataset[];
+
parseMessages?: {
[key: string]: ParseMessage[];
};
@@ -103,6 +111,14 @@ export interface DatasetCommentaryProfile extends CommentaryProfile {
content: string[];
}
+export interface DatasetDataset extends CommonDataset {
+ books: DatasetDatasetBook[];
+}
+
+export interface DatasetDatasetBook extends DatasetBook {
+ chapters: DatasetBookChapter[];
+}
+
/**
* Generates a list of output files from the given list of input files.
* @param file The list of files.