diff --git a/.env b/.env
index e73693c0b..41ce69bf7 100644
--- a/.env
+++ b/.env
@@ -68,8 +68,16 @@ CHECK_ALL_BOOKS_FOR_LANGUAGE=true
# When true the system will acquire git repos via download of its
# master.zip file. When false it will clone the repo. Both approaches
# are optimized maximally by taking advantage of curl and git options.
+# In practice, not all resources are available by download whereas
+# they all are available by cloning, hence the value false.
DOWNLOAD_ASSETS=false
+# Setting to false will cause interleave by verse layout to put each
+# TW word associated with the verse in a horizontal comma delimited
+# list, otherwise it will put each TW word in a list with one word per
+# line.
+TW_WORD_LIST_VERTICAL=false
+
# * http://localhost:3000 covers requests originating from the case
# where 'npm run dev' is invoked to run vite (to run svelte js frontend)
# outside Docker. This results in vite's development mode which runs on
diff --git a/backend/doc/config.py b/backend/doc/config.py
index fb1db2de7..2c1b6c323 100755
--- a/backend/doc/config.py
+++ b/backend/doc/config.py
@@ -2,7 +2,7 @@
import logging
from logging import config as lc
-from typing import Sequence, final
+from typing import Mapping, Sequence, final
import yaml
from pydantic import EmailStr, HttpUrl
@@ -35,17 +35,55 @@ class Settings(BaseSettings):
"usfm",
]
+ # This can be expanded to include any additional types (if
+ # there are any) that we want to be available to users. These are all
+ # that I found of relevance in the data API.
+ RESOURCE_TYPE_CODES_AND_NAMES: Mapping[str, str] = {
+ "ayt": "Bahasa Indonesian Bible",
+ "bc": "Bible Commentary",
+ "blv": "Portuguese Bíblia Livre",
+ "cuv": "新标点和合本",
+ "f10": "French Louis Segond 1910 Bible",
+ "nav": "New Arabic Version (Ketab El Hayat)",
+ "reg": "Regular",
+ "rg": "NT Survey Reviewers' Guide",
+ "tn": "Translation Notes",
+ "tn-condensed": "Condensed Translation Notes",
+ "tq": "Translation Questions",
+ "tw": "Translation Words",
+ # "udb": "Unlocked Dynamic Bible", # Content team doesn't want udb used TODO (just for English or ?)
+ "ugnt": "unfoldingWord® Greek New Testament",
+ "uhb": "unfoldingWord® Hebrew Bible",
+ "ulb": "Unlocked Literal Bible",
+ }
+
SHOW_TN_BOOK_INTRO: bool = True
+ # SHOW_BC_BOOK_INTRO: bool = True
+ # SHOW_TN_CHAPTER_INTRO: bool = True
+ # SHOW_TN_BOOK_INTRO_IN_VERSIFIED_CONTEXT: bool = True
+ # SHOW_BC_BOOK_INTRO_IN_VERSIFIED_CONTEXT: bool = True
+ # SHOW_TN_CHAPTER_INTRO_IN_VERSIFIED_CONTEXT: bool = True
+ # SHOW_BC_CHAPTER_COMMENTARY_IN_VERSIFIED_CONTEXT: bool = True
+ # SHOW_RG_CHAPTER_COMMENTARY_IN_VERSIFIED_CONTEXT: bool = True
CHECK_USFM: bool
USE_LOCALIZED_BOOK_NAME: bool
CHECK_ALL_BOOKS_FOR_LANGUAGE: bool
- BOOK_NAME_FMT_STR: str = "
{}
"
+ TRANSLATION_WORD_VERSE_SECTION_HEADER_STR: str = "Uses:
"
+ TRANSLATION_WORD_VERSE_REF_ITEM_FMT_STR: str = (
+ '{} {}:{}'
+ )
+ UNORDERED_LIST_BEGIN_STR: str = ""
+ UNORDERED_LIST_END_STR: str = "
"
+ VERSE_SPAN_FMT_STR: str = '{}'
+ BOOK_NAME_FMT_STR: str = "{}
"
+ LEFT_ALIGNED_HEADER_FMT_STR: str = "{}
"
END_OF_CHAPTER_HTML: str = ''
HR: str = "
"
+ TW_WORD_LIST_VERTICAL: bool = True
- DOWNLOAD_ASSETS: bool # If true then download assets, else clone assets
+ DOWNLOAD_ASSETS: bool = False # If true then download assets, else clone assets
def logger(self, name: str) -> logging.Logger:
"""
diff --git a/backend/doc/domain/assembly_strategies/assembly_strategies_book_then_lang_by_chapter.py b/backend/doc/domain/assembly_strategies/assembly_strategies_book_then_lang_by_chapter.py
index cf094a48c..61f6a7783 100644
--- a/backend/doc/domain/assembly_strategies/assembly_strategies_book_then_lang_by_chapter.py
+++ b/backend/doc/domain/assembly_strategies/assembly_strategies_book_then_lang_by_chapter.py
@@ -26,8 +26,11 @@
TWBook,
USFMBook,
)
+from doc.domain.parsing import handle_split_chapter_into_verses
from doc.reviewers_guide.model import RGBook
+from doc.utils.list_utils import unique_list_of_strings
from doc.utils.number_utils import is_even
+from doc.utils.tw_utils import translation_words_for_content
logger = settings.logger(__name__)
@@ -40,7 +43,7 @@
HTML_COLUMN_RIGHT_BEGIN: str = ""
-def assemble_content_by_book_then_lang(
+def assemble_content_by_chapter(
usfm_books: Sequence[USFMBook],
tn_books: Sequence[TNBook],
tq_books: Sequence[TQBook],
@@ -51,17 +54,16 @@ def assemble_content_by_book_then_lang(
use_section_visual_separator: bool,
use_two_column_layout_for_tn_notes: bool,
use_two_column_layout_for_tq_notes: bool,
+ show_tn_book_intro: bool,
+ show_bc_book_intro: bool,
+ show_tn_chapter_intro: bool,
book_names: Mapping[str, str] = BOOK_NAMES,
book_id_map: dict[str, int] = BOOK_ID_MAP,
) -> str:
- """
- Assemble by book then by language in alphabetic order before
- delegating more atomic ordering/interleaving to an assembly
- sub-strategy.
- """
content = []
+ # Collect and duplicate max number of lang_codes
# Collect and deduplicate book codes
- all_book_codes = (
+ all_book_codes = list(
{usfm_book.book_code for usfm_book in usfm_books}
.union(tn_book.book_code for tn_book in tn_books)
.union(tq_book.book_code for tq_book in tq_books)
@@ -69,10 +71,9 @@ def assemble_content_by_book_then_lang(
.union(bc_book.book_code for bc_book in bc_books)
.union(rg_book.book_code for rg_book in rg_books)
)
- most_book_codes = list(all_book_codes)
# Cache book_id_map lookup
book_codes_sorted = sorted(
- most_book_codes, key=lambda book_code: book_id_map[book_code]
+ all_book_codes, key=lambda book_code: book_id_map[book_code]
)
for book_code in book_codes_sorted:
selected_usfm_books = [
@@ -108,6 +109,9 @@ def assemble_content_by_book_then_lang(
use_section_visual_separator,
use_two_column_layout_for_tn_notes,
use_two_column_layout_for_tq_notes,
+ show_tn_book_intro,
+ show_bc_book_intro,
+ show_tn_chapter_intro,
)
)
elif (
@@ -129,6 +133,9 @@ def assemble_content_by_book_then_lang(
use_section_visual_separator,
use_two_column_layout_for_tn_notes,
use_two_column_layout_for_tq_notes,
+ show_tn_book_intro,
+ show_bc_book_intro,
+ show_tn_chapter_intro,
)
)
elif (
@@ -150,6 +157,9 @@ def assemble_content_by_book_then_lang(
selected_rg_books,
use_section_visual_separator,
use_two_column_layout_for_tq_notes,
+ show_tn_book_intro,
+ show_bc_book_intro,
+ show_tn_chapter_intro,
)
)
elif (
@@ -195,6 +205,69 @@ def assemble_content_by_book_then_lang(
return "".join(content)
+def assemble_content_by_verse_chapter_at_a_time(
+ usfm_books: Sequence[USFMBook],
+ tn_books: Sequence[TNBook],
+ tq_books: Sequence[TQBook],
+ tw_books: Sequence[TWBook],
+ bc_books: Sequence[BCBook],
+ rg_books: Sequence[RGBook],
+ assembly_layout_kind: AssemblyLayoutEnum,
+ use_section_visual_separator: bool,
+ use_two_column_layout_for_tn_notes: bool,
+ use_two_column_layout_for_tq_notes: bool,
+ show_tn_book_intro: bool,
+ show_bc_book_intro: bool,
+ show_tn_chapter_intro: bool,
+ show_bc_chapter_commentary: bool,
+ show_rg_chapter_commentary: bool,
+ book_names: Mapping[str, str] = BOOK_NAMES,
+ book_id_map: dict[str, int] = BOOK_ID_MAP,
+) -> str:
+ content = []
+ if not usfm_books: # usfm not provided so versification doesn't apply
+ content.extend(
+ assemble_usfm_by_chapter(
+ usfm_books,
+ tn_books,
+ tq_books,
+ tw_books,
+ bc_books,
+ rg_books,
+ use_section_visual_separator,
+ use_two_column_layout_for_tn_notes,
+ use_two_column_layout_for_tq_notes,
+ show_tn_book_intro,
+ show_bc_book_intro,
+ show_tn_chapter_intro,
+ # show_bc_chapter_commentary,
+ )
+ )
+ elif usfm_books and (
+ assembly_layout_kind == AssemblyLayoutEnum.ONE_COLUMN
+ or assembly_layout_kind == AssemblyLayoutEnum.ONE_COLUMN_COMPACT
+ ):
+ content.extend(
+ assemble_usfm_by_verse_chapter_at_a_time(
+ usfm_books,
+ tn_books,
+ tq_books,
+ tw_books,
+ bc_books,
+ rg_books,
+ use_section_visual_separator,
+ use_two_column_layout_for_tn_notes,
+ use_two_column_layout_for_tq_notes,
+ show_tn_book_intro,
+ show_bc_book_intro,
+ show_tn_chapter_intro,
+ show_bc_chapter_commentary,
+ show_rg_chapter_commentary,
+ )
+ )
+ return "".join(content)
+
+
def assemble_usfm_by_chapter(
usfm_books: Sequence[USFMBook],
tn_books: Sequence[TNBook],
@@ -205,118 +278,430 @@ def assemble_usfm_by_chapter(
use_section_visual_separator: bool,
use_two_column_layout_for_tn_notes: bool,
use_two_column_layout_for_tq_notes: bool,
+ show_tn_book_intro: bool,
+ show_bc_book_intro: bool,
+ show_tn_chapter_intro: bool,
end_of_chapter_html: str = settings.END_OF_CHAPTER_HTML,
close_direction_html: str = "
",
hr: str = settings.HR,
book_chapters: Mapping[str, int] = BOOK_CHAPTERS,
- show_tn_book_intro: bool = settings.SHOW_TN_BOOK_INTRO,
fmt_str: str = settings.BOOK_NAME_FMT_STR,
+ resource_type_name_fmt_str: str = settings.LEFT_ALIGNED_HEADER_FMT_STR,
+ tw_word_list_vertical: bool = settings.TW_WORD_LIST_VERTICAL,
) -> list[str]:
"""
Construct the HTML wherein at least one USFM resource exists, one column
layout.
"""
-
+ # Collect and duplicate
content = []
-
-
-
-
-
-
if show_tn_book_intro:
for tn_book in tn_books:
content.append(tn_language_direction_html(tn_book))
book_intro_ = tn_book_intro(tn_book, use_section_visual_separator)
book_intro_adj = adjust_book_intro_headings(book_intro_)
- content.append(book_intro_adj)
+ if book_intro_adj:
+ content.append(
+ resource_type_name_fmt_str.format(tn_book.resource_type_name)
+ )
+ content.append(book_intro_adj)
content.append(close_direction_html)
- for bc_book in bc_books:
- content.append(bc_book_intro(bc_book, use_section_visual_separator))
+ if show_bc_book_intro:
+ for bc_book in bc_books:
+ bc_book_intro_ = bc_book_intro(bc_book, use_section_visual_separator)
+ if bc_book_intro_:
+ content.append(
+ resource_type_name_fmt_str.format(bc_book.resource_type_name)
+ )
+ content.append(bc_book_intro_)
book_codes = {usfm_book.book_code for usfm_book in usfm_books}
for book_code in book_codes:
num_chapters = book_chapters[book_code]
for chapter_num in range(1, num_chapters + 1):
- for tn_book in [
- tn_book for tn_book in tn_books if tn_book.book_code == book_code
- ]:
- if chapter_num in tn_book.chapters:
- content.append(tn_language_direction_html(tn_book))
- content.append(
- chapter_intro(
- tn_book, chapter_num, use_section_visual_separator
- )
+ if show_tn_chapter_intro:
+ for tn_book in [
+ tn_book for tn_book in tn_books if tn_book.book_code == book_code
+ ]:
+ tn_chapter_intro_ = chapter_intro(
+ tn_book, chapter_num, use_section_visual_separator
)
- content.append(close_direction_html)
+ if tn_chapter_intro_:
+ content.append(tn_language_direction_html(tn_book))
+ content.append(
+ resource_type_name_fmt_str.format(
+ tn_book.resource_type_name
+ )
+ )
+ content.append(tn_chapter_intro_)
+ content.append(close_direction_html)
for bc_book in [
bc_book for bc_book in bc_books if bc_book.book_code == book_code
]:
- if chapter_num in bc_book.chapters:
+ chapter_commentary_ = chapter_commentary(
+ bc_book, chapter_num, use_section_visual_separator
+ )
+ if chapter_commentary_:
content.append(
- chapter_commentary(
- bc_book, chapter_num, use_section_visual_separator
- )
+ resource_type_name_fmt_str.format(bc_book.resource_type_name)
)
+ content.append(chapter_commentary_)
for usfm_book in [
usfm_book
for usfm_book in usfm_books
if usfm_book.book_code == book_code
]:
- # Add the book title, e.g., 1 Peter
content.append(fmt_str.format(usfm_book.national_book_name))
if chapter_num in usfm_book.chapters:
content.append(usfm_language_direction_html(usfm_book))
+ content.append(
+ resource_type_name_fmt_str.format(usfm_book.resource_type_name)
+ )
content.append(usfm_book.chapters[chapter_num].content)
content.append(close_direction_html)
- if not has_footnotes(usfm_book.chapters[chapter_num].content):
+ if (
+ not has_footnotes(usfm_book.chapters[chapter_num].content)
+ and use_section_visual_separator
+ ):
content.append(hr)
+ # Add list of tw words used in chapter
+ selected_tw_books = [
+ tw_book
+ for tw_book in tw_books
+ if tw_book.book_code == book_code
+ ]
+ if selected_tw_books:
+ tw_book = selected_tw_books[0]
+ words = translation_words_for_content(
+ tw_book, usfm_book.chapters[chapter_num].content
+ )
+ unique_words = unique_list_of_strings(words)
+ if unique_words:
+ content.append(fmt_str.format(tw_book.resource_type_name))
+ if tw_word_list_vertical:
+ html = (
+ "\n"
+ + "\n".join(
+ [
+ f"- {localized_word}
"
+ for localized_word, word in unique_words
+ ]
+ )
+ + "
"
+ )
+ else:
+ html = ", ".join(
+ [
+ f"{localized_word}"
+ for localized_word, word in unique_words
+ ]
+ )
+ logger.debug("tw links html: %s", html)
+ content.append(html)
# Add the interleaved tn notes
tn_verses = None
for tn_book in [
tn_book for tn_book in tn_books if tn_book.book_code == book_code
]:
- if chapter_num in tn_book.chapters:
- tn_verses = tn_chapter_verses(
- tn_book,
- chapter_num,
- use_section_visual_separator,
- use_two_column_layout_for_tn_notes,
+ tn_verses = tn_chapter_verses(
+ tn_book,
+ chapter_num,
+ use_section_visual_separator,
+ use_two_column_layout_for_tn_notes,
+ )
+ if tn_verses:
+ content.append(tn_language_direction_html(tn_book))
+ content.append(
+ resource_type_name_fmt_str.format(tn_book.resource_type_name)
)
- if tn_verses:
- content.append(tn_language_direction_html(tn_book))
- content.append(tn_verses)
- content.append(close_direction_html)
+ content.append(tn_verses)
+ content.append(close_direction_html)
tq_verses = None
for tq_book in [
tq_book for tq_book in tq_books if tq_book.book_code == book_code
]:
- if chapter_num in tq_book.chapters:
- tq_verses = tq_chapter_verses(
- tq_book,
- chapter_num,
- use_section_visual_separator,
- use_two_column_layout_for_tq_notes,
+ tq_verses = tq_chapter_verses(
+ tq_book,
+ chapter_num,
+ use_section_visual_separator,
+ use_two_column_layout_for_tq_notes,
+ )
+ if tq_verses:
+ content.append(tq_language_direction_html(tq_book))
+ content.append(
+ resource_type_name_fmt_str.format(tq_book.resource_type_name)
)
- if tq_verses:
- content.append(tq_language_direction_html(tq_book))
- content.append(tq_verses)
- content.append(close_direction_html)
+ content.append(tq_verses)
+ content.append(close_direction_html)
rg_verses = None
for rg_book in [
- rg_book
- for rg_book in rg_books
- if rg_book.book_code == book_code
- # if rg_book.lang_code == lang_code
- # and rg_book.book_code == book_code
+ rg_book for rg_book in rg_books if rg_book.book_code == book_code
]:
- if chapter_num in rg_book.chapters:
- rg_verses = rg_chapter_verses(
- rg_book, chapter_num, use_section_visual_separator
+ rg_verses = rg_chapter_verses(
+ rg_book, chapter_num, use_section_visual_separator
+ )
+ if rg_verses:
+ content.append(rg_language_direction_html(rg_book))
+ content.append(rg_verses)
+ content.append(close_direction_html)
+ content.append(end_of_chapter_html)
+ return content
+
+
+def assemble_usfm_by_verse_chapter_at_a_time(
+ usfm_books: Sequence[USFMBook],
+ tn_books: Sequence[TNBook],
+ tq_books: Sequence[TQBook],
+ tw_books: Sequence[TWBook],
+ bc_books: Sequence[BCBook],
+ rg_books: Sequence[RGBook],
+ use_section_visual_separator: bool,
+ use_two_column_layout_for_tn_notes: bool,
+ use_two_column_layout_for_tq_notes: bool,
+ show_tn_book_intro: bool,
+ show_bc_book_intro: bool,
+ show_tn_chapter_intro: bool,
+ show_bc_chapter_commentary: bool,
+ show_rg_chapter_commentary: bool,
+ end_of_chapter_html: str = settings.END_OF_CHAPTER_HTML,
+ close_direction_html: str = "",
+ hr: str = settings.HR,
+ book_chapters: Mapping[str, int] = BOOK_CHAPTERS,
+ fmt_str: str = settings.LEFT_ALIGNED_HEADER_FMT_STR,
+ verse_span_fmt_str: str = settings.VERSE_SPAN_FMT_STR,
+ tw_word_list_vertical: bool = settings.TW_WORD_LIST_VERTICAL,
+ resource_type_codes_and_names: Mapping[
+ str, str
+ ] = settings.RESOURCE_TYPE_CODES_AND_NAMES,
+) -> list[str]:
+ content = []
+ lang_codes = list(dict.fromkeys(usfm_book.lang_code for usfm_book in usfm_books))
+ for lang_code in lang_codes:
+ if show_tn_book_intro:
+ for tn_book in [
+ tn_book for tn_book in tn_books if tn_book.lang_code == lang_code
+ ]:
+ content.append(tn_language_direction_html(tn_book))
+ book_intro_ = tn_book_intro(tn_book, use_section_visual_separator)
+ book_intro_adj = adjust_book_intro_headings(book_intro_)
+ if book_intro_adj:
+ content.append(fmt_str.format(tn_book.resource_type_name))
+ content.append(book_intro_adj)
+ content.append(close_direction_html)
+ if show_bc_book_intro:
+ for bc_book in [
+ bc_book for bc_book in bc_books if bc_book.lang_code == lang_code
+ ]:
+ bc_book_intro_ = bc_book_intro(bc_book, use_section_visual_separator)
+ if bc_book_intro_:
+ content.append(fmt_str.format(bc_book.resource_type_name))
+ content.append(bc_book_intro_)
+ book_codes = list(dict.fromkeys(usfm_book.book_code for usfm_book in usfm_books))
+ for book_code in book_codes:
+ num_chapters = book_chapters[book_code]
+ for chapter_num in range(1, num_chapters + 1):
+ for lang_code in lang_codes:
+ if show_tn_chapter_intro:
+ for tn_book in [
+ tn_book
+ for tn_book in tn_books
+ if tn_book.book_code == book_code
+ and tn_book.lang_code == lang_code
+ ]:
+ chapter_intro_ = chapter_intro(
+ tn_book, chapter_num, use_section_visual_separator
+ )
+ if chapter_intro_:
+ content.append(tn_language_direction_html(tn_book))
+ content.append(fmt_str.format(tn_book.resource_type_name))
+ content.append(chapter_intro_)
+ content.append(close_direction_html)
+ if show_bc_chapter_commentary:
+ for bc_book in [
+ bc_book
+ for bc_book in bc_books
+ if bc_book.book_code == book_code
+ and bc_book.lang_code == lang_code
+ ]:
+ chapter_commentary_ = chapter_commentary(
+ bc_book, chapter_num, use_section_visual_separator
+ )
+ if chapter_commentary_:
+ content.append(fmt_str.format(bc_book.resource_type_name))
+ content.append(chapter_commentary_)
+ if show_rg_chapter_commentary:
+ rg_verses = None
+ for rg_book in [
+ rg_book
+ for rg_book in rg_books
+ if rg_book.book_code == book_code
+ and rg_book.lang_code == lang_code
+ ]:
+ rg_verses = rg_chapter_verses(
+ rg_book, chapter_num, use_section_visual_separator
+ )
+ if rg_verses:
+ content.append(rg_language_direction_html(rg_book))
+ content.append(fmt_str.format(rg_book.resource_type_name))
+ content.append(rg_verses)
+ content.append(close_direction_html)
+ selected_usfm_books = [
+ usfm_book
+ for usfm_book in usfm_books
+ if usfm_book.book_code == book_code
+ and usfm_book.lang_code == lang_code
+ ]
+ selected_tn_books = [
+ tn_book
+ for tn_book in tn_books
+ if tn_book.book_code == book_code and tn_book.lang_code == lang_code
+ ]
+ selected_tq_books = [
+ tq_book
+ for tq_book in tq_books
+ if tq_book.book_code == book_code and tq_book.lang_code == lang_code
+ ]
+ selected_tw_books = [
+ tw_book
+ for tw_book in tw_books
+ if tw_book.book_code == book_code and tw_book.lang_code == lang_code
+ ]
+ usfm_book = None
+ usfm_book2 = None
+ usfm_chapter = None
+ usfm_chapter2 = None
+ if len(selected_usfm_books) == 1:
+ usfm_book = selected_usfm_books[0]
+ usfm_chapter = (
+ usfm_book.chapters[chapter_num]
+ if chapter_num in usfm_book.chapters
+ else None
)
- if rg_verses:
- content.append(rg_language_direction_html(rg_book))
- content.append(rg_verses)
- content.append(close_direction_html)
+ elif len(selected_usfm_books) == 2: # Second USFM chosen, e.g., fr f10
+ # TODO Later we might do resources types by clicked order at which point we would likely
+ # just use the else clause below.
+ # Assuming f10 should be treated as secondary to ulb for fr
+ if selected_usfm_books[0].resource_type_name in [
+ resource_type_codes_and_names.get("f10", ""),
+ resource_type_codes_and_names.get("udb", ""),
+ ]:
+ usfm_book2 = selected_usfm_books[0]
+ usfm_chapter2 = (
+ usfm_book2.chapters[chapter_num]
+ if chapter_num in usfm_book2.chapters
+ else None
+ )
+ usfm_book = selected_usfm_books[1]
+ usfm_chapter = (
+ usfm_book.chapters[chapter_num]
+ if chapter_num in usfm_book.chapters
+ else None
+ )
+ else:
+ usfm_book = selected_usfm_books[0]
+ usfm_chapter = (
+ usfm_book.chapters[chapter_num]
+ if chapter_num in usfm_book.chapters
+ else None
+ )
+ usfm_book2 = selected_usfm_books[1]
+ usfm_chapter2 = (
+ usfm_book2.chapters[chapter_num]
+ if chapter_num in usfm_book2.chapters
+ else None
+ )
+ tn_chapter = (
+ selected_tn_books[0].chapters[chapter_num]
+ if selected_tn_books
+ else None
+ )
+ tq_chapter = (
+ selected_tq_books[0].chapters[chapter_num]
+ if selected_tq_books
+ else None
+ )
+ if usfm_book and usfm_chapter:
+ content.append(usfm_language_direction_html(usfm_book))
+ usfm_chapter.verses = handle_split_chapter_into_verses(
+ usfm_book, usfm_chapter
+ )
+ content.append(fmt_str.format(usfm_book.national_book_name))
+ for verse_ref, verse in usfm_chapter.verses.items():
+ content.append(
+ fmt_str.format(
+ f"{usfm_book.national_book_name} {chapter_num}:{verse_ref}"
+ )
+ )
+ content.append(fmt_str.format(usfm_book.resource_type_name))
+ content.append(verse_span_fmt_str.format(verse))
+ if (
+ selected_tn_books
+ and tn_chapter
+ and tn_chapter.verses
+ and verse_ref in tn_chapter.verses
+ ):
+ content.append(
+ fmt_str.format(selected_tn_books[0].resource_type_name)
+ )
+ content.append(tn_chapter.verses[verse_ref])
+ if (
+ selected_tq_books
+ and tq_chapter
+ and tq_chapter.verses
+ and verse_ref in tq_chapter.verses
+ ):
+ content.append(
+ fmt_str.format(selected_tq_books[0].resource_type_name)
+ )
+ content.append(tq_chapter.verses[verse_ref])
+ if selected_tw_books:
+ tw_book = selected_tw_books[0]
+ words = translation_words_for_content(tw_book, verse)
+ unique_words = unique_list_of_strings(words)
+ if unique_words:
+ content.append(
+ fmt_str.format(tw_book.resource_type_name)
+ )
+ if tw_word_list_vertical:
+ html = (
+ "\n"
+ + "\n".join(
+ [
+ f"- {localized_word}
"
+ for localized_word, word in unique_words
+ ]
+ )
+ + "
"
+ )
+ else:
+ html = ", ".join(
+ [
+ f"{localized_word}"
+ for localized_word, word in unique_words
+ ]
+ )
+ content.append(html)
+ if usfm_book2 and usfm_chapter2:
+ content.append(
+ fmt_str.format(usfm_book2.resource_type_name)
+ )
+ usfm_chapter2.verses = handle_split_chapter_into_verses(
+ usfm_book2, usfm_chapter2
+ )
+ if (
+ usfm_chapter2.verses
+ and verse_ref in usfm_chapter2.verses
+ ):
+ content.append(
+ fmt_str.format(
+ f"{usfm_book2.national_book_name} {chapter_num}:{verse_ref}"
+ )
+ )
+ content.append(
+ verse_span_fmt_str.format(
+ usfm_chapter2.verses[verse_ref]
+ )
+ )
+ content.append(close_direction_html)
content.append(end_of_chapter_html)
return content
@@ -331,91 +716,101 @@ def assemble_tn_by_chapter(
use_section_visual_separator: bool,
use_two_column_layout_for_tn_notes: bool,
use_two_column_layout_for_tq_notes: bool,
+ show_tn_book_intro: bool,
+ show_bc_book_intro: bool,
+ show_tn_chapter_intro: bool,
end_of_chapter_html: str = settings.END_OF_CHAPTER_HTML,
close_direction_html: str = "",
book_chapters: Mapping[str, int] = BOOK_CHAPTERS,
- show_tn_book_intro: bool = settings.SHOW_TN_BOOK_INTRO,
+ fmt_str: str = settings.LEFT_ALIGNED_HEADER_FMT_STR,
) -> list[str]:
"""
Construct the HTML for a 'by chapter' strategy wherein at least
tn_books exists.
"""
content = []
-
-
-
-
-
+ # TODO Should we use lang_codes again here to ensure order?
if show_tn_book_intro:
for tn_book in tn_books:
content.append(tn_language_direction_html(tn_book))
book_intro_ = tn_book_intro(tn_book, use_section_visual_separator)
book_intro_adj = adjust_book_intro_headings(book_intro_)
- content.append(book_intro_adj)
+ if book_intro_adj:
+ content.append(fmt_str.format(tn_book.resource_type_name))
+ content.append(book_intro_adj)
content.append(close_direction_html)
- for bc_book in bc_books:
- content.append(bc_book_intro(bc_book, use_section_visual_separator))
+ if show_bc_book_intro:
+ for bc_book in bc_books:
+ bc_book_intro_ = bc_book_intro(bc_book, use_section_visual_separator)
+ if bc_book_intro_:
+ # TODO add lang direction?
+ content.append(fmt_str.format(bc_book.resource_type_name))
+ content.append(bc_book_intro_)
+ # TODO add lang direction close?
book_codes = {tn_book.book_code for tn_book in tn_books}
for book_code in book_codes:
num_chapters = book_chapters[book_code]
for chapter_num in range(1, num_chapters + 1):
- for tn_book in [
- tn_book for tn_book in tn_books if tn_book.book_code == book_code
- ]:
- if chapter_num in tn_book.chapters:
- content.append(tn_language_direction_html(tn_book))
- content.append(
- chapter_intro(
- tn_book, chapter_num, use_section_visual_separator
- )
+ if show_tn_chapter_intro:
+ for tn_book in [
+ tn_book for tn_book in tn_books if tn_book.book_code == book_code
+ ]:
+ tn_chapter_intro = chapter_intro(
+ tn_book, chapter_num, use_section_visual_separator
)
- content.append(close_direction_html)
+ if tn_chapter_intro:
+ content.append(tn_language_direction_html(tn_book))
+ content.append(fmt_str.format(tn_book.resource_type_name))
+ content.append(tn_chapter_intro)
+ content.append(close_direction_html)
for bc_book in [
bc_book for bc_book in bc_books if bc_book.book_code == book_code
]:
- if chapter_num in bc_book.chapters:
- content.append(
- chapter_commentary(
- bc_book, chapter_num, use_section_visual_separator
- )
- )
+ chapter_commentary_ = chapter_commentary(
+ bc_book, chapter_num, use_section_visual_separator
+ )
+ if chapter_commentary_:
+ content.append(fmt_str.format(bc_book.resource_type_name))
+ content.append(chapter_commentary_)
for tn_book in [
tn_book for tn_book in tn_books if tn_book.book_code == book_code
]:
- if chapter_num in tn_book.chapters:
- tn_verses = tn_chapter_verses(
- tn_book,
- chapter_num,
- use_section_visual_separator,
- use_two_column_layout_for_tn_notes,
- )
+ tn_verses = tn_chapter_verses(
+ tn_book,
+ chapter_num,
+ use_section_visual_separator,
+ use_two_column_layout_for_tn_notes,
+ )
+ if tn_verses:
content.append(tn_language_direction_html(tn_book))
+ content.append(fmt_str.format(tn_book.resource_type_name))
content.append(tn_verses)
content.append(close_direction_html)
for tq_book in [
tq_book for tq_book in tq_books if tq_book.book_code == book_code
]:
- if chapter_num in tq_book.chapters:
- tq_verses = tq_chapter_verses(
- tq_book,
- chapter_num,
- use_section_visual_separator,
- use_two_column_layout_for_tq_notes,
- )
+ tq_verses = tq_chapter_verses(
+ tq_book,
+ chapter_num,
+ use_section_visual_separator,
+ use_two_column_layout_for_tq_notes,
+ )
+ if tq_verses:
content.append(tq_language_direction_html(tq_book))
+ content.append(fmt_str.format(tq_book.resource_type_name))
content.append(tq_verses)
content.append(close_direction_html)
for rg_book in [
rg_book for rg_book in rg_books if rg_book.book_code == book_code
]:
- if chapter_num in rg_book.chapters:
- rg_verses = rg_chapter_verses(
- rg_book, chapter_num, use_section_visual_separator
- )
- if rg_verses:
- content.append(rg_language_direction_html(rg_book))
- content.append(rg_verses)
- content.append(close_direction_html)
+ rg_verses = rg_chapter_verses(
+ rg_book, chapter_num, use_section_visual_separator
+ )
+ if rg_verses:
+ content.append(rg_language_direction_html(rg_book))
+ content.append(fmt_str.format(rg_book.resource_type_name))
+ content.append(rg_verses)
+ content.append(close_direction_html)
content.append(end_of_chapter_html)
return content
@@ -429,19 +824,19 @@ def assemble_tq_by_chapter(
rg_books: Sequence[RGBook],
use_section_visual_separator: bool,
use_two_column_layout_for_tq_notes: bool,
+ show_tn_book_intro: bool,
+ show_bc_book_intro: bool,
+ show_tn_chapter_intro: bool,
end_of_chapter_html: str = settings.END_OF_CHAPTER_HTML,
close_direction_html: str = "",
book_chapters: Mapping[str, int] = BOOK_CHAPTERS,
+ fmt_str: str = settings.LEFT_ALIGNED_HEADER_FMT_STR,
) -> list[str]:
"""
Construct the HTML for a 'by chapter' strategy wherein at least
tq_books exists.
"""
content = []
-
-
-
-
book_codes = {tq_book.book_code for tq_book in tq_books}
for book_code in book_codes:
num_chapters = book_chapters[book_code]
@@ -450,37 +845,37 @@ def assemble_tq_by_chapter(
for bc_book in [
bc_book for bc_book in bc_books if bc_book.book_code == book_code
]:
- if chapter_num in bc_book.chapters:
- content.append(
- chapter_commentary(
- bc_book, chapter_num, use_section_visual_separator
- )
- )
+ chapter_commentary_ = chapter_commentary(
+ bc_book, chapter_num, use_section_visual_separator
+ )
+ if chapter_commentary_:
+ content.append(fmt_str.format(bc_book.resource_type_name))
+ content.append(chapter_commentary_)
for tq_book in [
tq_book for tq_book in tq_books if tq_book.book_code == book_code
]:
- if chapter_num in tq_book.chapters:
- tq_verses = tq_chapter_verses(
- tq_book,
- chapter_num,
- use_section_visual_separator,
- use_two_column_layout_for_tq_notes,
- )
- if tq_verses:
- content.append(tq_language_direction_html(tq_book))
- content.append(tq_verses)
- content.append(close_direction_html)
+ tq_verses = tq_chapter_verses(
+ tq_book,
+ chapter_num,
+ use_section_visual_separator,
+ use_two_column_layout_for_tq_notes,
+ )
+ if tq_verses:
+ content.append(tq_language_direction_html(tq_book))
+ content.append(fmt_str.format(tq_book.resource_type_name))
+ content.append(tq_verses)
+ content.append(close_direction_html)
for rg_book in [
rg_book for rg_book in rg_books if rg_book.book_code == book_code
]:
- if chapter_num in rg_book.chapters:
- rg_verses = rg_chapter_verses(
- rg_book, chapter_num, use_section_visual_separator
- )
- if rg_verses:
- content.append(rg_language_direction_html(rg_book))
- content.append(rg_verses)
- content.append(close_direction_html)
+ rg_verses = rg_chapter_verses(
+ rg_book, chapter_num, use_section_visual_separator
+ )
+ if rg_verses:
+ content.append(rg_language_direction_html(rg_book))
+ content.append(fmt_str.format(rg_book.resource_type_name))
+ content.append(rg_verses)
+ content.append(close_direction_html)
content.append(end_of_chapter_html)
return content
@@ -501,16 +896,19 @@ def assemble_tw_by_chapter(
rg_books: Sequence[RGBook],
use_section_visual_separator: bool,
end_of_chapter_html: str = settings.END_OF_CHAPTER_HTML,
+ fmt_str: str = settings.LEFT_ALIGNED_HEADER_FMT_STR,
) -> list[str]:
content = []
-
-
for bc_book in bc_books:
content.append(bc_book_intro(bc_book, use_section_visual_separator))
for chapter_num, chapter in bc_book.chapters.items():
- content.append(
- chapter_commentary(bc_book, chapter_num, use_section_visual_separator)
+
+ chapter_commentary_ = chapter_commentary(
+ bc_book, chapter_num, use_section_visual_separator
)
+ if chapter_commentary_:
+ content.append(fmt_str.format(bc_book.resource_type_name))
+ content.append(chapter_commentary_)
content.append(end_of_chapter_html)
return content
@@ -636,11 +1034,6 @@ def assemble_usfm_by_chapter_2c_sl_sr(
content = []
-
-
-
-
-
# Order USFM book content units so that they are in language pairs
# for side by side display.
zipped_usfm_books = ensure_primary_usfm_books_for_different_languages_are_adjacent(
diff --git a/backend/doc/domain/assembly_strategies/assembly_strategies_lang_then_book_by_chapter.py b/backend/doc/domain/assembly_strategies/assembly_strategies_lang_then_book_by_chapter.py
index 22de3c59e..2bf607f82 100755
--- a/backend/doc/domain/assembly_strategies/assembly_strategies_lang_then_book_by_chapter.py
+++ b/backend/doc/domain/assembly_strategies/assembly_strategies_lang_then_book_by_chapter.py
@@ -25,13 +25,16 @@
TWBook,
USFMBook,
)
+from doc.domain.parsing import handle_split_chapter_into_verses
from doc.reviewers_guide.model import RGBook
+from doc.utils.list_utils import unique_list_of_strings
+from doc.utils.tw_utils import translation_words_for_content
logger = settings.logger(__name__)
-def assemble_content_by_lang_then_book(
+def assemble_content_by_book(
usfm_books: Sequence[USFMBook],
tn_books: Sequence[TNBook],
tq_books: Sequence[TQBook],
@@ -42,27 +45,30 @@ def assemble_content_by_lang_then_book(
use_section_visual_separator: bool,
use_two_column_layout_for_tn_notes: bool,
use_two_column_layout_for_tq_notes: bool,
+ show_tn_book_intro: bool,
+ show_bc_book_intro: bool,
+ show_tn_chapter_intro: bool,
book_names: Mapping[str, str] = BOOK_NAMES,
book_id_map: dict[str, int] = BOOK_ID_MAP,
+ resource_type_codes_and_names: Mapping[
+ str, str
+ ] = settings.RESOURCE_TYPE_CODES_AND_NAMES,
) -> list[str]:
- """
- Assemble by language then by book in lexicographical order before
- delegating more atomic ordering/interleaving to an assembly
- sub-strategy.
- """
content = []
- # Collect and deduplicate language codes
- all_lang_codes = (
- {usfm_book.lang_code for usfm_book in usfm_books}
- .union(tn_book.lang_code for tn_book in tn_books)
- .union(tq_book.lang_code for tq_book in tq_books)
- .union(tw_book.lang_code for tw_book in tw_books)
- .union(bc_book.lang_code for bc_book in bc_books)
- .union(rg_book.lang_code for rg_book in rg_books)
+ # Collect and duplicate max number of lang_codes
+ lang_codes = list(
+ dict.fromkeys(
+ [
+ *[usfm_book.lang_code for usfm_book in usfm_books],
+ *[tn_book.lang_code for tn_book in tn_books],
+ *[tq_book.lang_code for tq_book in tq_books],
+ *[tw_book.lang_code for tw_book in tw_books],
+ *[bc_book.lang_code for bc_book in bc_books],
+ *[rg_book.lang_code for rg_book in rg_books],
+ ]
+ )
)
- lang_codes = list(all_lang_codes)
- # Collect and deduplicate book codes
- all_book_codes = (
+ book_codes = list(
{usfm_book.book_code for usfm_book in usfm_books}
.union(tn_book.book_code for tn_book in tn_books)
.union(tq_book.book_code for tq_book in tq_books)
@@ -70,7 +76,6 @@ def assemble_content_by_lang_then_book(
.union(bc_book.book_code for bc_book in bc_books)
.union(rg_book.book_code for rg_book in rg_books)
)
- book_codes = list(all_book_codes)
book_codes_sorted = sorted(book_codes, key=lambda book_code: book_id_map[book_code])
for lang_code in lang_codes:
for book_code in book_codes_sorted:
@@ -79,12 +84,24 @@ def assemble_content_by_lang_then_book(
for usfm_book in usfm_books
if usfm_book.lang_code == lang_code and usfm_book.book_code == book_code
]
- usfm_book = selected_usfm_books[0] if selected_usfm_books else None
- usfm_book2 = (
- selected_usfm_books[1]
- if selected_usfm_books and len(selected_usfm_books) > 1
- else None
- )
+ usfm_book = None
+ usfm_book2 = None
+ if len(selected_usfm_books) == 1:
+ usfm_book = selected_usfm_books[0]
+ elif (
+ len(selected_usfm_books) == 2
+ ): # Second USFM chosen, e.g., fr f10. Assuming f10 should be treated as secondary to ulb for fr
+ # TODO Later we might do resources types by clicked order at which point we would likely
+ # just use the body of the else clause below.
+ if selected_usfm_books[0].resource_type_name in [
+ resource_type_codes_and_names.get("f10", ""),
+ resource_type_codes_and_names.get("udb", ""),
+ ]:
+ usfm_book = selected_usfm_books[1]
+ usfm_book2 = selected_usfm_books[0]
+ else:
+ usfm_book = selected_usfm_books[0]
+ usfm_book2 = selected_usfm_books[1]
selected_tn_books = [
tn_book
for tn_book in tn_books
@@ -128,6 +145,9 @@ def assemble_content_by_lang_then_book(
use_section_visual_separator,
use_two_column_layout_for_tn_notes,
use_two_column_layout_for_tq_notes,
+ show_tn_book_intro,
+ show_bc_book_intro,
+ show_tn_chapter_intro,
)
)
elif usfm_book is None and tn_book is not None:
@@ -143,6 +163,9 @@ def assemble_content_by_lang_then_book(
use_section_visual_separator,
use_two_column_layout_for_tn_notes,
use_two_column_layout_for_tq_notes,
+ show_tn_book_intro,
+ show_bc_book_intro,
+ show_tn_chapter_intro,
)
)
elif usfm_book is None and tn_book is None and tq_book is not None:
@@ -157,6 +180,9 @@ def assemble_content_by_lang_then_book(
rg_book,
use_section_visual_separator,
use_two_column_layout_for_tq_notes,
+ show_tn_book_intro,
+ show_bc_book_intro,
+ show_tn_chapter_intro,
)
)
elif (
@@ -180,6 +206,153 @@ def assemble_content_by_lang_then_book(
return content
+def assemble_content_by_verse_book_at_a_time(
+ usfm_books: Sequence[USFMBook],
+ tn_books: Sequence[TNBook],
+ tq_books: Sequence[TQBook],
+ tw_books: Sequence[TWBook],
+ bc_books: Sequence[BCBook],
+ rg_books: Sequence[RGBook],
+ assembly_layout_kind: AssemblyLayoutEnum,
+ use_section_visual_separator: bool,
+ use_two_column_layout_for_tn_notes: bool,
+ use_two_column_layout_for_tq_notes: bool,
+ show_tn_book_intro: bool,
+ show_bc_book_intro: bool,
+ show_tn_chapter_intro: bool,
+ show_bc_chapter_commentary: bool,
+ show_rg_chapter_commentary: bool,
+ book_names: Mapping[str, str] = BOOK_NAMES,
+ book_id_map: dict[str, int] = BOOK_ID_MAP,
+ resource_type_codes_and_names: Mapping[
+ str, str
+ ] = settings.RESOURCE_TYPE_CODES_AND_NAMES,
+) -> list[str]:
+ content = []
+ # Collect and duplicate max number of lang_codes
+ lang_codes = list(
+ dict.fromkeys(
+ [
+ *[usfm_book.lang_code for usfm_book in usfm_books],
+ *[tn_book.lang_code for tn_book in tn_books],
+ *[tq_book.lang_code for tq_book in tq_books],
+ *[tw_book.lang_code for tw_book in tw_books],
+ *[bc_book.lang_code for bc_book in bc_books],
+ *[rg_book.lang_code for rg_book in rg_books],
+ ]
+ )
+ )
+ # Collect and deduplicate book codes
+ book_codes = list(
+ {usfm_book.book_code for usfm_book in usfm_books}
+ .union(tn_book.book_code for tn_book in tn_books)
+ .union(tq_book.book_code for tq_book in tq_books)
+ .union(tw_book.book_code for tw_book in tw_books)
+ .union(bc_book.book_code for bc_book in bc_books)
+ .union(rg_book.book_code for rg_book in rg_books)
+ )
+ book_codes_sorted = sorted(book_codes, key=lambda book_code: book_id_map[book_code])
+ for lang_code in lang_codes:
+ for book_code in book_codes_sorted:
+ selected_usfm_books = [
+ usfm_book
+ for usfm_book in usfm_books
+ if usfm_book.lang_code == lang_code and usfm_book.book_code == book_code
+ ]
+ usfm_book = None
+ usfm_book2 = None
+ if len(selected_usfm_books) == 1:
+ usfm_book = selected_usfm_books[0]
+ elif len(selected_usfm_books) == 2: # Second USFM chosen, e.g., fr f10
+ # TODO Later we might do resources types by clicked order at which point we would likely
+ # just use the else clause below.
+ # Assuming f10 should be treated as secondary to ulb for fr
+ if selected_usfm_books[0].resource_type_name in [
+ resource_type_codes_and_names.get("f10", ""),
+ resource_type_codes_and_names.get("udb", ""),
+ ]:
+ usfm_book = selected_usfm_books[1]
+ usfm_book2 = selected_usfm_books[0]
+ logger.info(
+ "inside fr branch for initializing usfm_book and usfm_book2"
+ )
+ else:
+ usfm_book = selected_usfm_books[0]
+ usfm_book2 = selected_usfm_books[1]
+ selected_tn_books = [
+ tn_book
+ for tn_book in tn_books
+ if tn_book.lang_code == lang_code and tn_book.book_code == book_code
+ ]
+ tn_book = selected_tn_books[0] if selected_tn_books else None
+ selected_tq_books = [
+ tq_book
+ for tq_book in tq_books
+ if tq_book.lang_code == lang_code and tq_book.book_code == book_code
+ ]
+ tq_book = selected_tq_books[0] if selected_tq_books else None
+ selected_tw_books = [
+ tw_book
+ for tw_book in tw_books
+ if tw_book.lang_code == lang_code and tw_book.book_code == book_code
+ ]
+ tw_book = selected_tw_books[0] if selected_tw_books else None
+ selected_bc_books = [
+ bc_book
+ for bc_book in bc_books
+ if bc_book.lang_code == lang_code and bc_book.book_code == book_code
+ ]
+ bc_book = selected_bc_books[0] if selected_bc_books else None
+ selected_rg_books = [
+ rg_book
+ for rg_book in rg_books
+ if rg_book.lang_code == lang_code and rg_book.book_code == book_code
+ ]
+ rg_book = selected_rg_books[0] if selected_rg_books else None
+ if not usfm_books:
+ content.extend(
+ assemble_content_by_book(
+ usfm_books,
+ tn_books,
+ tq_books,
+ tw_books,
+ bc_books,
+ rg_books,
+ assembly_layout_kind,
+ use_section_visual_separator,
+ use_two_column_layout_for_tn_notes,
+ use_two_column_layout_for_tq_notes,
+ show_tn_book_intro,
+ show_bc_book_intro,
+ show_tn_chapter_intro,
+ )
+ )
+ elif usfm_books and (
+ assembly_layout_kind == AssemblyLayoutEnum.ONE_COLUMN
+ or assembly_layout_kind == AssemblyLayoutEnum.ONE_COLUMN_COMPACT
+ ):
+ content.extend(
+ assemble_usfm_by_verse_book_at_a_time(
+ usfm_book,
+ tn_book,
+ tq_book,
+ tw_book,
+ usfm_book2,
+ bc_book,
+ rg_book,
+ use_section_visual_separator,
+ use_two_column_layout_for_tn_notes,
+ use_two_column_layout_for_tq_notes,
+ show_tn_book_intro,
+ show_bc_book_intro,
+ show_tn_chapter_intro,
+ show_bc_chapter_commentary,
+ show_rg_chapter_commentary,
+ )
+ )
+ return content
+
+
def assemble_usfm_by_book(
usfm_book: Optional[USFMBook],
tn_book: Optional[TNBook],
@@ -191,22 +364,81 @@ def assemble_usfm_by_book(
use_section_visual_separator: bool,
use_two_column_layout_for_tn_notes: bool,
use_two_column_layout_for_tq_notes: bool,
+ show_tn_book_intro: bool,
+ show_bc_book_intro: bool,
+ show_tn_chapter_intro: bool,
end_of_chapter_html: str = settings.END_OF_CHAPTER_HTML,
hr: str = settings.HR,
close_direction_html: str = "",
fmt_str: str = settings.BOOK_NAME_FMT_STR,
+ resource_type_name_fmt_str: str = settings.LEFT_ALIGNED_HEADER_FMT_STR,
+ tw_word_list_vertical: bool = settings.TW_WORD_LIST_VERTICAL,
) -> list[str]:
content = []
content.append(usfm_language_direction_html(usfm_book))
- content.append(tn_book_intro(tn_book, use_section_visual_separator))
- content.append(bc_book_intro(bc_book, use_section_visual_separator))
+ tn_book_intro_ = tn_book_intro(tn_book, use_section_visual_separator)
+ if show_tn_book_intro and tn_book and tn_book_intro_:
+ content.append(resource_type_name_fmt_str.format(tn_book.resource_type_name))
+ content.append(tn_book_intro_)
+ bc_book_intro_ = bc_book_intro(bc_book, use_section_visual_separator)
+ if show_bc_book_intro and bc_book and bc_book_intro_:
+ content.append(resource_type_name_fmt_str.format(bc_book.resource_type_name))
+ content.append(bc_book_intro_)
if usfm_book:
content.append(fmt_str.format(usfm_book.national_book_name))
+ if (
+ tn_book is None
+ and tq_book is None
+ and bc_book is None
+ and rg_book is None
+ and usfm_book2 is None
+ ):
+ content.append(
+ resource_type_name_fmt_str.format(usfm_book.resource_type_name)
+ )
for (
chapter_num,
chapter,
) in usfm_book.chapters.items():
+ if not (
+ tn_book is None
+ and tq_book is None
+ and bc_book is None
+ and rg_book is None
+ and usfm_book2 is None
+ ):
+ content.append(
+ resource_type_name_fmt_str.format(usfm_book.resource_type_name)
+ )
content.append(chapter.content)
+ if use_section_visual_separator:
+ content.append(hr)
+ if tw_book:
+ words = translation_words_for_content(tw_book, chapter.content)
+ unique_words = unique_list_of_strings(words)
+ if unique_words:
+ content.append(
+ resource_type_name_fmt_str.format(tw_book.resource_type_name)
+ )
+ if tw_word_list_vertical:
+ html = (
+ "\n"
+ + "\n".join(
+ [
+ f"- {localized_word}
"
+ for localized_word, word in unique_words
+ ]
+ )
+ + "
"
+ )
+ else:
+ html = ", ".join(
+ [
+ f"{localized_word}"
+ for localized_word, word in unique_words
+ ]
+ )
+ content.append(html)
if (
not has_footnotes(chapter.content)
and (
@@ -219,36 +451,207 @@ def assemble_usfm_by_book(
and use_section_visual_separator
):
content.append(hr)
- content.append(
- chapter_intro(tn_book, chapter_num, use_section_visual_separator)
+ tn_chapter_intro = chapter_intro(
+ tn_book, chapter_num, use_section_visual_separator
)
- content.append(
- chapter_commentary(bc_book, chapter_num, use_section_visual_separator)
+ if show_tn_chapter_intro and tn_book and tn_chapter_intro:
+ content.append(
+ resource_type_name_fmt_str.format(tn_book.resource_type_name)
+ )
+ content.append(tn_chapter_intro)
+ bc_chapter_commentary = chapter_commentary(
+ bc_book, chapter_num, use_section_visual_separator
)
- content.append(
- tn_chapter_verses(
- tn_book,
- chapter_num,
- use_section_visual_separator,
- use_two_column_layout_for_tn_notes,
+ if bc_book and bc_chapter_commentary:
+ content.append(
+ resource_type_name_fmt_str.format(bc_book.resource_type_name)
)
+ content.append(bc_chapter_commentary)
+ tn_verses = tn_chapter_verses(
+ tn_book,
+ chapter_num,
+ use_section_visual_separator,
+ use_two_column_layout_for_tn_notes,
)
- content.append(
- tq_chapter_verses(
- tq_book,
- chapter_num,
- use_section_visual_separator,
- use_two_column_layout_for_tq_notes,
+ if tn_book and tn_verses:
+ content.append(
+ resource_type_name_fmt_str.format(tn_book.resource_type_name)
)
+ content.append(tn_verses)
+ tq_verses = tq_chapter_verses(
+ tq_book,
+ chapter_num,
+ use_section_visual_separator,
+ use_two_column_layout_for_tq_notes,
)
- content.append(
- rg_chapter_verses(rg_book, chapter_num, use_section_visual_separator)
+ if tq_book and tq_verses:
+ content.append(
+ resource_type_name_fmt_str.format(tq_book.resource_type_name)
+ )
+ content.append(tq_verses)
+ rg_verses = rg_chapter_verses(
+ rg_book, chapter_num, use_section_visual_separator
)
+ if rg_book and rg_verses:
+ content.append(
+ resource_type_name_fmt_str.format(rg_book.resource_type_name)
+ )
+ content.append(rg_verses)
# If the user chose two USFM resource types for a language. e.g., fr:
- # ulb, f10, show the second USFM content here
+ # ulb and f10, then show the second USFM content here
if usfm_book2:
if chapter_num in usfm_book2.chapters:
+ content.append(
+ resource_type_name_fmt_str.format(usfm_book2.resource_type_name)
+ )
content.append(usfm_book2.chapters[chapter_num].content)
+ if use_section_visual_separator:
+ content.append(hr)
+ content.append(end_of_chapter_html)
+ content.append(close_direction_html)
+ return content
+
+
+def assemble_usfm_by_verse_book_at_a_time(
+ usfm_book: Optional[USFMBook],
+ tn_book: Optional[TNBook],
+ tq_book: Optional[TQBook],
+ tw_book: Optional[TWBook],
+ usfm_book2: Optional[USFMBook],
+ bc_book: Optional[BCBook],
+ rg_book: Optional[RGBook],
+ use_section_visual_separator: bool,
+ use_two_column_layout_for_tn_notes: bool,
+ use_two_column_layout_for_tq_notes: bool,
+ show_tn_book_intro: bool,
+ show_bc_book_intro: bool,
+ show_tn_chapter_intro: bool,
+ show_bc_chapter_commentary: bool,
+ show_rg_chapter_commentary: bool,
+ end_of_chapter_html: str = settings.END_OF_CHAPTER_HTML,
+ hr: str = settings.HR,
+ close_direction_html: str = "",
+ fmt_str: str = settings.LEFT_ALIGNED_HEADER_FMT_STR,
+ verse_span_fmt_str: str = settings.VERSE_SPAN_FMT_STR,
+ tw_word_list_vertical: bool = settings.TW_WORD_LIST_VERTICAL,
+ resource_type_codes_and_names: Mapping[
+ str, str
+ ] = settings.RESOURCE_TYPE_CODES_AND_NAMES,
+) -> list[str]:
+ content = []
+ content.append(usfm_language_direction_html(usfm_book))
+ tn_book_intro_ = tn_book_intro(tn_book, use_section_visual_separator)
+ if show_tn_book_intro and tn_book and tn_book_intro_:
+ content.append(fmt_str.format(tn_book.resource_type_name))
+ content.append(tn_book_intro_)
+ bc_book_intro_ = bc_book_intro(bc_book, use_section_visual_separator)
+ if show_bc_book_intro and bc_book and bc_book_intro_:
+ content.append(fmt_str.format(bc_book.resource_type_name))
+ content.append(bc_book_intro_)
+ if usfm_book:
+ for (
+ chapter_num,
+ chapter,
+ ) in usfm_book.chapters.items():
+ chapter.verses = handle_split_chapter_into_verses(usfm_book, chapter)
+ tn_chapter = tn_book.chapters[chapter_num] if tn_book else None
+ tq_chapter = tq_book.chapters[chapter_num] if tq_book else None
+ tn_chapter_intro = chapter_intro(
+ tn_book, chapter_num, use_section_visual_separator
+ )
+ if show_tn_chapter_intro and tn_book and tn_chapter_intro:
+ content.append(fmt_str.format(tn_book.resource_type_name))
+ content.append(tn_chapter_intro)
+ bc_chapter_commentary = chapter_commentary(
+ bc_book, chapter_num, use_section_visual_separator
+ )
+ if show_bc_chapter_commentary and bc_book and bc_chapter_commentary:
+ content.append(fmt_str.format(bc_book.resource_type_name))
+ content.append(bc_chapter_commentary)
+ rg_verses = rg_chapter_verses(
+ rg_book, chapter_num, use_section_visual_separator
+ )
+ if show_rg_chapter_commentary and rg_book and rg_verses:
+ content.append(fmt_str.format(rg_book.resource_type_name))
+ content.append(rg_verses)
+ if chapter.verses:
+ for verse_ref, verse in chapter.verses.items():
+ content.append(
+ fmt_str.format(
+ f"{usfm_book.national_book_name} {chapter_num}:{verse_ref}"
+ )
+ )
+ content.append(fmt_str.format(usfm_book.resource_type_name))
+ content.append(verse_span_fmt_str.format(verse))
+ if (
+ tn_book
+ and tn_chapter
+ and tn_chapter.verses
+ and verse_ref in tn_chapter.verses
+ ):
+ content.append(fmt_str.format(tn_book.resource_type_name))
+ content.append(tn_chapter.verses[verse_ref])
+ if (
+ tq_book
+ and tq_chapter
+ and tq_chapter.verses
+ and verse_ref in tq_chapter.verses
+ ):
+ content.append(fmt_str.format(tq_book.resource_type_name))
+ content.append(tq_chapter.verses[verse_ref])
+ if tw_book:
+ words = translation_words_for_content(tw_book, verse)
+ unique_words = unique_list_of_strings(words)
+ if unique_words:
+ content.append(fmt_str.format(tw_book.resource_type_name))
+ if tw_word_list_vertical:
+ html = (
+ "\n"
+ + "\n".join(
+ [
+ f"- {localized_word}
"
+ for localized_word, word in unique_words
+ ]
+ )
+ + "
"
+ )
+ else:
+ html = ", ".join(
+ [
+ f"{localized_word}"
+ for localized_word, word in unique_words
+ ]
+ )
+ content.append(html)
+ # If the user chose two USFM resource types for a language. e.g., fr:
+ # ulb, f10, show the second USFM content here
+ if usfm_book2:
+ usfm_book2_chapter = usfm_book2.chapters[chapter_num]
+ usfm_book2_chapter.verses = handle_split_chapter_into_verses(
+ usfm_book2, usfm_book2_chapter
+ )
+ if usfm_book2_chapter.verses:
+ content.append(
+ fmt_str.format(usfm_book2.resource_type_name)
+ )
+ content.append(
+ verse_span_fmt_str.format(
+ usfm_book2_chapter.verses[verse_ref]
+ )
+ )
+ # TODO How should we handle footnotes in a versified output?
+ # if (
+ # not has_footnotes(chapter.content)
+ # and (
+ # usfm_book2 is not None
+ # or tn_book is not None
+ # or tq_book is not None
+ # or rg_book is not None
+ # or tw_book is not None
+ # )
+ # and use_section_visual_separator
+ # ):
+ # content.append(hr)
content.append(end_of_chapter_html)
content.append(close_direction_html)
return content
@@ -265,41 +668,63 @@ def assemble_tn_by_book(
use_section_visual_separator: bool,
use_two_column_layout_for_tn_notes: bool,
use_two_column_layout_for_tq_notes: bool,
+ show_tn_book_intro: bool,
+ show_bc_book_intro: bool,
+ show_tn_chapter_intro: bool,
+ # show_bc_chapter_commentary: bool,
+ fmt_str: str = settings.LEFT_ALIGNED_HEADER_FMT_STR,
end_of_chapter_html: str = settings.END_OF_CHAPTER_HTML,
+ # show_tn_book_intro: bool = settings.SHOW_TN_BOOK_INTRO,
+ # show_tn_chapter_intro: bool = settings.SHOW_TN_CHAPTER_INTRO,
close_direction_html: str = "",
) -> list[str]:
content = []
content.append(tn_language_direction_html(tn_book))
- content.append(tn_book_intro(tn_book, use_section_visual_separator))
+ tn_book_intro_ = tn_book_intro(tn_book, use_section_visual_separator)
+ if show_tn_book_intro and tn_book and tn_book_intro_:
+ content.append(fmt_str.format(tn_book.resource_type_name))
+ content.append(tn_book_intro_)
if tn_book:
for chapter_num in tn_book.chapters:
content.append(chapter_heading(chapter_num))
- content.append(
- chapter_intro(tn_book, chapter_num, use_section_visual_separator)
+ tn_chapter_intro_ = chapter_intro(
+ tn_book, chapter_num, use_section_visual_separator
)
- content.append(
- chapter_commentary(bc_book, chapter_num, use_section_visual_separator)
+ if show_tn_chapter_intro and tn_chapter_intro_:
+ content.append(fmt_str.format(tn_book.resource_type_name))
+ content.append(tn_chapter_intro_)
+ bc_chapter_commentary = chapter_commentary(
+ bc_book, chapter_num, use_section_visual_separator
)
- content.append(
- tn_chapter_verses(
- tn_book,
- chapter_num,
- use_section_visual_separator,
- use_two_column_layout_for_tn_notes,
- )
+ if bc_book and bc_chapter_commentary:
+ content.append(fmt_str.format(bc_book.resource_type_name))
+ content.append(bc_chapter_commentary)
+ tn_chapter_verses_ = tn_chapter_verses(
+ tn_book,
+ chapter_num,
+ use_section_visual_separator,
+ use_two_column_layout_for_tn_notes,
)
- content.append(
- tq_chapter_verses(
- tq_book,
- chapter_num,
- use_section_visual_separator,
- use_two_column_layout_for_tq_notes,
- )
+ if tn_chapter_verses_:
+ content.append(fmt_str.format(tn_book.resource_type_name))
+ content.append(tn_chapter_verses_)
+
+ tq_chapter_verses_ = tq_chapter_verses(
+ tq_book,
+ chapter_num,
+ use_section_visual_separator,
+ use_two_column_layout_for_tq_notes,
)
- content.append(
- rg_chapter_verses(rg_book, chapter_num, use_section_visual_separator)
+ if tq_book and tq_chapter_verses_:
+ content.append(fmt_str.format(tq_book.resource_type_name))
+ content.append(tq_chapter_verses_)
+ rg_chapter_verses_ = rg_chapter_verses(
+ rg_book, chapter_num, use_section_visual_separator
)
- content.append(end_of_chapter_html)
+ if rg_book and rg_chapter_verses_:
+ content.append(fmt_str.format(rg_book.resource_type_name))
+ content.append(rg_chapter_verses_)
+ content.append(end_of_chapter_html)
content.append(close_direction_html)
return content
@@ -314,6 +739,11 @@ def assemble_tq_by_book(
rg_book: Optional[RGBook],
use_section_visual_separator: bool,
use_two_column_layout_for_tq_notes: bool,
+ show_tn_book_intro: bool,
+ show_bc_book_intro: bool,
+ show_tn_chapter_intro: bool,
+ # show_bc_chapter_commentary: bool,
+ fmt_str: str = settings.LEFT_ALIGNED_HEADER_FMT_STR,
end_of_chapter_html: str = settings.END_OF_CHAPTER_HTML,
close_direction_html: str = "",
) -> list[str]:
@@ -321,21 +751,30 @@ def assemble_tq_by_book(
content.append(tq_language_direction_html(tq_book))
if tq_book:
for chapter_num in tq_book.chapters:
- content.append(
- chapter_commentary(bc_book, chapter_num, use_section_visual_separator)
+
+ chapter_commentary_ = chapter_commentary(
+ bc_book, chapter_num, use_section_visual_separator
)
+ if bc_book and chapter_commentary_:
+ content.append(fmt_str.format(bc_book.resource_type_name))
+ content.append(chapter_commentary_)
content.append(chapter_heading(chapter_num))
- content.append(
- tq_chapter_verses(
- tq_book,
- chapter_num,
- use_section_visual_separator,
- use_two_column_layout_for_tq_notes,
- )
+
+ tq_chapter_verses_ = tq_chapter_verses(
+ tq_book,
+ chapter_num,
+ use_section_visual_separator,
+ use_two_column_layout_for_tq_notes,
)
- content.append(
- rg_chapter_verses(rg_book, chapter_num, use_section_visual_separator)
+ if tq_chapter_verses_:
+ content.append(fmt_str.format(tq_book.resource_type_name))
+ content.append(tq_chapter_verses_)
+ rg_chapter_verses_ = rg_chapter_verses(
+ rg_book, chapter_num, use_section_visual_separator
)
+ if rg_book and rg_chapter_verses_:
+ content.append(fmt_str.format(rg_book.resource_type_name))
+ content.append(rg_chapter_verses_)
content.append(end_of_chapter_html)
content.append(close_direction_html)
return content
@@ -349,6 +788,7 @@ def assemble_rg_by_chapter(
bc_books: Sequence[BCBook],
rg_books: Sequence[RGBook],
use_section_visual_separator: bool,
+ fmt_str: str = settings.LEFT_ALIGNED_HEADER_FMT_STR,
end_of_chapter_html: str = settings.END_OF_CHAPTER_HTML,
close_direction_html: str = "",
) -> list[str]:
@@ -375,25 +815,26 @@ def rg_sort_key(resource: RGBook) -> str:
if bc_book.lang_code == rg_book_.lang_code
and bc_book.book_code == rg_book_.book_code
]:
- if chapter_num in bc_book.chapters:
- content.append(
- chapter_commentary(
- bc_book, chapter_num, use_section_visual_separator
- )
- )
+
+ chapter_commentary_ = chapter_commentary(
+ bc_book, chapter_num, use_section_visual_separator
+ )
+ if chapter_commentary_:
+ content.append(fmt_str.format(bc_book.resource_type_name))
+ content.append(chapter_commentary_)
for rg_book in [
rg_book
for rg_book in rg_books
if rg_book.lang_code == rg_book_.lang_code
and rg_book.book_code == rg_book_.book_code
]:
- if chapter_num in rg_book.chapters:
+ rg_chapter_verses_ = rg_chapter_verses(
+ rg_book, chapter_num, use_section_visual_separator
+ )
+ if rg_chapter_verses_:
content.append(rg_language_direction_html(rg_book))
- content.append(
- rg_chapter_verses(
- rg_book, chapter_num, use_section_visual_separator
- )
- )
+ content.append(fmt_str.format(rg_book.resource_type_name))
+ content.append(rg_chapter_verses_)
content.append(close_direction_html)
return content
@@ -409,21 +850,28 @@ def assemble_tw_by_book(
bc_book: Optional[BCBook],
rg_book: Optional[RGBook],
use_section_visual_separator: bool,
+ fmt_str: str = settings.LEFT_ALIGNED_HEADER_FMT_STR,
end_of_chapter_html: str = settings.END_OF_CHAPTER_HTML,
close_direction_html: str = "",
) -> list[str]:
content = []
if bc_book:
for chapter_num in bc_book.chapters:
- content.append(
- chapter_commentary(bc_book, chapter_num, use_section_visual_separator)
+ chapter_commentary_ = chapter_commentary(
+ bc_book, chapter_num, use_section_visual_separator
)
- content.append(end_of_chapter_html)
+ if chapter_commentary_:
+ # TODO lang direction?
+ content.append(fmt_str.format(bc_book.resource_type_name))
+ content.append(chapter_commentary_)
+ content.append(end_of_chapter_html)
if rg_book:
for chapter_num in rg_book.chapters:
- content.append(
- rg_chapter_verses(rg_book, chapter_num, use_section_visual_separator)
+ rg_verses = rg_chapter_verses(
+ rg_book, chapter_num, use_section_visual_separator
)
- content.append(end_of_chapter_html)
-
+ if rg_verses:
+ content.append(fmt_str.format(rg_book.resource_type_name))
+ content.append(rg_verses)
+ content.append(end_of_chapter_html)
return content
diff --git a/backend/doc/domain/assembly_strategies/assembly_strategy_utils.py b/backend/doc/domain/assembly_strategies/assembly_strategy_utils.py
index ece56803f..0cf7564c0 100755
--- a/backend/doc/domain/assembly_strategies/assembly_strategy_utils.py
+++ b/backend/doc/domain/assembly_strategies/assembly_strategy_utils.py
@@ -185,7 +185,7 @@ def tn_chapter_verses(
Return the HTML for verses that are in the chapter with
chapter_num.
"""
- tn_verse_notes_enclosing_div_fmt_str: str = (
+ tn_verse_notes_enclosing_div_fmt_str = (
"{}
"
if use_two_column_layout_for_tn_notes
else "{}
"
diff --git a/backend/doc/domain/assembly_strategies_docx/assembly_strategies_book_then_lang_by_chapter.py b/backend/doc/domain/assembly_strategies_docx/assembly_strategies_book_then_lang_by_chapter.py
index 8ea36c09f..89aeee6fe 100644
--- a/backend/doc/domain/assembly_strategies_docx/assembly_strategies_book_then_lang_by_chapter.py
+++ b/backend/doc/domain/assembly_strategies_docx/assembly_strategies_book_then_lang_by_chapter.py
@@ -22,13 +22,16 @@
TWBook,
USFMBook,
)
+from doc.domain.parsing import handle_split_chapter_into_verses
from doc.reviewers_guide.model import RGBook
+from doc.utils.list_utils import unique_list_of_strings
+from doc.utils.tw_utils import translation_words_for_content
logger = settings.logger(__name__)
-def assemble_content_by_book_then_lang(
+def assemble_content_by_chapter(
usfm_books: Sequence[USFMBook],
tn_books: Sequence[TNBook],
tq_books: Sequence[TQBook],
@@ -40,6 +43,9 @@ def assemble_content_by_book_then_lang(
use_section_visual_separator: bool,
use_two_column_layout_for_tn_notes: bool,
use_two_column_layout_for_tq_notes: bool,
+ show_tn_book_intro: bool,
+ show_bc_book_intro: bool,
+ show_tn_chapter_intro: bool,
book_names: Mapping[str, str] = BOOK_NAMES,
book_id_map: dict[str, int] = BOOK_ID_MAP,
) -> list[DocumentPart]:
@@ -94,6 +100,9 @@ def assemble_content_by_book_then_lang(
use_section_visual_separator,
use_two_column_layout_for_tn_notes,
use_two_column_layout_for_tq_notes,
+ show_tn_book_intro,
+ show_bc_book_intro,
+ show_tn_chapter_intro,
)
)
elif not selected_usfm_books and selected_tn_books:
@@ -108,6 +117,9 @@ def assemble_content_by_book_then_lang(
use_section_visual_separator,
use_two_column_layout_for_tn_notes,
use_two_column_layout_for_tq_notes,
+ show_tn_book_intro,
+ show_bc_book_intro,
+ show_tn_chapter_intro,
)
)
elif not selected_usfm_books and not selected_tn_books and selected_tq_books:
@@ -121,6 +133,9 @@ def assemble_content_by_book_then_lang(
selected_rg_books,
use_section_visual_separator,
use_two_column_layout_for_tq_notes,
+ show_tn_book_intro,
+ show_bc_book_intro,
+ show_tn_chapter_intro,
)
)
elif (
@@ -138,11 +153,72 @@ def assemble_content_by_book_then_lang(
selected_bc_books,
selected_rg_books,
use_section_visual_separator,
+ show_bc_book_intro,
)
)
return document_parts
+def assemble_content_by_verse_chapter_at_a_time(
+ usfm_books: Sequence[USFMBook],
+ tn_books: Sequence[TNBook],
+ tq_books: Sequence[TQBook],
+ tw_books: Sequence[TWBook],
+ bc_books: Sequence[BCBook],
+ rg_books: Sequence[RGBook],
+ assembly_layout_kind: AssemblyLayoutEnum,
+ use_section_visual_separator: bool,
+ use_two_column_layout_for_tn_notes: bool,
+ use_two_column_layout_for_tq_notes: bool,
+ show_tn_book_intro: bool,
+ show_bc_book_intro: bool,
+ show_tn_chapter_intro: bool,
+ show_bc_chapter_commentary: bool,
+ show_rg_chapter_commentary: bool,
+ book_names: Mapping[str, str] = BOOK_NAMES,
+ book_id_map: dict[str, int] = BOOK_ID_MAP,
+) -> list[DocumentPart]:
+ document_parts: list[DocumentPart] = []
+ if usfm_books:
+ document_parts.extend(
+ assemble_usfm_by_verse_chapter_at_a_time(
+ usfm_books,
+ tn_books,
+ tq_books,
+ tw_books,
+ bc_books,
+ rg_books,
+ use_section_visual_separator,
+ use_two_column_layout_for_tn_notes,
+ use_two_column_layout_for_tq_notes,
+ show_tn_book_intro,
+ show_bc_book_intro,
+ show_tn_chapter_intro,
+ show_bc_chapter_commentary,
+ show_rg_chapter_commentary,
+ )
+ )
+ else: # usfm not provided so versification doesn't apply
+ document_parts.extend(
+ assemble_usfm_by_chapter(
+ usfm_books,
+ tn_books,
+ tq_books,
+ tw_books,
+ bc_books,
+ rg_books,
+ use_section_visual_separator,
+ use_two_column_layout_for_tn_notes,
+ use_two_column_layout_for_tq_notes,
+ show_tn_book_intro,
+ show_bc_book_intro,
+ show_tn_chapter_intro,
+ # show_bc_chapter_commentary,
+ )
+ )
+ return document_parts
+
+
def assemble_usfm_by_chapter(
usfm_books: Sequence[USFMBook],
tn_books: Sequence[TNBook],
@@ -153,40 +229,54 @@ def assemble_usfm_by_chapter(
use_section_visual_separator: bool,
use_two_column_layout_for_tn_notes: bool,
use_two_column_layout_for_tq_notes: bool,
+ show_tn_book_intro: bool,
+ show_bc_book_intro: bool,
+ show_tn_chapter_intro: bool,
book_chapters: Mapping[str, int] = BOOK_CHAPTERS,
- show_tn_book_intro: bool = settings.SHOW_TN_BOOK_INTRO,
fmt_str: str = settings.BOOK_NAME_FMT_STR,
+ resource_type_name_fmt_str: str = settings.LEFT_ALIGNED_HEADER_FMT_STR,
) -> list[DocumentPart]:
"""
Construct the Docx wherein at least one USFM resource exists, one column
layout.
"""
-
-
-
-
-
-
document_parts: list[DocumentPart] = []
- if show_tn_book_intro:
- for tn_book in tn_books:
- if tn_book.book_intro:
- book_intro_ = tn_book.book_intro
- book_intro_adj = adjust_book_intro_headings(book_intro_)
- document_parts.append(
- DocumentPart(
- content=book_intro_adj,
- is_rtl=tn_book and tn_book.lang_direction == LangDirEnum.RTL,
- use_section_visual_separator=use_section_visual_separator,
- )
+ for tn_book in tn_books:
+ if show_tn_book_intro and tn_book.book_intro:
+ document_parts.append(
+ DocumentPart(
+ content=resource_type_name_fmt_str.format(
+ tn_book.resource_type_name
+ ),
+ is_rtl=tn_book and tn_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ book_intro_adj = adjust_book_intro_headings(tn_book.book_intro)
+ document_parts.append(
+ DocumentPart(
+ content=book_intro_adj,
+ is_rtl=tn_book and tn_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
)
+ )
for bc_book in bc_books:
- document_parts.append(
- DocumentPart(
- content=bc_book.book_intro,
- use_section_visual_separator=use_section_visual_separator,
+ if show_bc_book_intro and bc_book.book_intro:
+ document_parts.append(
+ DocumentPart(
+ content=resource_type_name_fmt_str.format(
+ bc_book.resource_type_name
+ ),
+ is_rtl=bc_book and bc_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=bc_book.book_intro,
+ use_section_visual_separator=use_section_visual_separator,
+ )
)
- )
book_codes = {usfm_book.book_code for usfm_book in usfm_books}
for book_code in book_codes:
num_chapters = book_chapters[book_code]
@@ -194,26 +284,48 @@ def assemble_usfm_by_chapter(
for tn_book in [
tn_book for tn_book in tn_books if tn_book.book_code == book_code
]:
- if chapter_num in tn_book.chapters:
+ tn_chapter_intro = chapter_intro(
+ tn_book, chapter_num, use_section_visual_separator
+ )
+ if show_tn_chapter_intro and tn_chapter_intro:
document_parts.append(
DocumentPart(
- content=chapter_intro(
- tn_book, chapter_num, use_section_visual_separator
+ content=resource_type_name_fmt_str.format(
+ tn_book.resource_type_name
),
is_rtl=tn_book
and tn_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=tn_chapter_intro,
+ is_rtl=tn_book
+ and tn_book.lang_direction == LangDirEnum.RTL,
use_section_visual_separator=use_section_visual_separator,
)
)
for bc_book in [
bc_book for bc_book in bc_books if bc_book.book_code == book_code
]:
- if chapter_num in bc_book.chapters:
+ chapter_commentary_ = chapter_commentary(
+ bc_book, chapter_num, use_section_visual_separator
+ )
+ if chapter_commentary_:
document_parts.append(
DocumentPart(
- content=chapter_commentary(
- bc_book, chapter_num, use_section_visual_separator
+ content=resource_type_name_fmt_str.format(
+ bc_book.resource_type_name
),
+ is_rtl=bc_book
+ and bc_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=chapter_commentary_,
use_section_visual_separator=use_section_visual_separator,
)
)
@@ -226,10 +338,20 @@ def assemble_usfm_by_chapter(
DocumentPart(
content=fmt_str.format(usfm_book.national_book_name),
add_hr_p=False,
- use_section_visual_separator=use_section_visual_separator,
+ use_section_visual_separator=False,
)
)
if chapter_num in usfm_book.chapters:
+ document_parts.append(
+ DocumentPart(
+ content=resource_type_name_fmt_str.format(
+ usfm_book.resource_type_name
+ ),
+ is_rtl=usfm_book
+ and usfm_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
document_parts.append(
DocumentPart(
content=usfm_book.chapters[chapter_num].content,
@@ -244,81 +366,505 @@ def assemble_usfm_by_chapter(
for tn_book in tn_books
if tn_book.book_code == usfm_book.book_code
]:
- if chapter_num in tn_book.chapters:
- tn_verses = tn_chapter_verses(
- tn_book,
- chapter_num,
- use_section_visual_separator,
- use_two_column_layout_for_tn_notes,
+ tn_verses = tn_chapter_verses(
+ tn_book,
+ chapter_num,
+ use_section_visual_separator,
+ use_two_column_layout_for_tn_notes,
+ )
+ if tn_verses:
+ document_parts.append(
+ DocumentPart(
+ content=resource_type_name_fmt_str.format(
+ tn_book.resource_type_name
+ ),
+ is_rtl=tn_book
+ and tn_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
)
- if tn_verses:
- document_parts.append(
- DocumentPart(
- content=tn_verses,
- is_rtl=tn_book
- and tn_book.lang_direction == LangDirEnum.RTL,
- contained_in_two_column_section=use_two_column_layout_for_tn_notes,
- add_hr_p=False,
- use_section_visual_separator=use_section_visual_separator,
- )
+ document_parts.append(
+ DocumentPart(
+ content=tn_verses,
+ is_rtl=tn_book
+ and tn_book.lang_direction == LangDirEnum.RTL,
+ contained_in_two_column_section=use_two_column_layout_for_tn_notes,
+ add_hr_p=False,
+ use_section_visual_separator=False,
)
- document_parts.append(
- DocumentPart(
- content="",
- use_section_visual_separator=use_section_visual_separator,
- )
+ )
+ # This is a trick to make an hr after a two column section by tricking
+ # the html to docx parser into keeping this part using an HTML space
+ # rather than an empty string.
+ document_parts.append(
+ DocumentPart(
+ content=" ",
+ use_section_visual_separator=use_section_visual_separator,
)
+ )
for tq_book in [
tq_book
for tq_book in tq_books
if tq_book.book_code == usfm_book.book_code
]:
- if chapter_num in tq_book.chapters:
- tq_verses = tq_chapter_verses(
- tq_book,
- chapter_num,
- use_section_visual_separator,
- use_two_column_layout_for_tq_notes,
+ tq_verses = tq_chapter_verses(
+ tq_book,
+ chapter_num,
+ use_section_visual_separator,
+ use_two_column_layout_for_tq_notes,
+ )
+ if tq_verses:
+ document_parts.append(
+ DocumentPart(
+ content=resource_type_name_fmt_str.format(
+ tq_book.resource_type_name
+ ),
+ is_rtl=tq_book
+ and tq_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
)
- if tq_verses:
- document_parts.append(
- DocumentPart(
- content=tq_verses,
- is_rtl=tq_book
- and tq_book.lang_direction == LangDirEnum.RTL,
- contained_in_two_column_section=use_two_column_layout_for_tq_notes,
- add_hr_p=False,
- use_section_visual_separator=use_section_visual_separator,
- )
+ document_parts.append(
+ DocumentPart(
+ content=tq_verses,
+ is_rtl=tq_book
+ and tq_book.lang_direction == LangDirEnum.RTL,
+ contained_in_two_column_section=use_two_column_layout_for_tq_notes,
+ use_section_visual_separator=False,
)
- document_parts.append(
- DocumentPart(
- content="",
- use_section_visual_separator=use_section_visual_separator,
- )
+ )
+ # This is a trick to make an hr after a two column section by tricking
+ # the html to docx parser into keeping this part using an HTML space
+ # rather than an empty string.
+ document_parts.append(
+ DocumentPart(
+ content=" ",
+ use_section_visual_separator=use_section_visual_separator,
)
+ )
for rg_book in [
rg_book
for rg_book in rg_books
if rg_book.book_code == usfm_book.book_code
]:
- if chapter_num in rg_book.chapters:
+ rg_verses = rg_chapter_verses(
+ rg_book, chapter_num, use_section_visual_separator
+ )
+ if rg_verses:
document_parts.append(
DocumentPart(
- content=rg_chapter_verses(
- rg_book, chapter_num, use_section_visual_separator
+ content=resource_type_name_fmt_str.format(
+ rg_book.resource_type_name
),
+ is_rtl=rg_book
+ and rg_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=rg_verses,
use_section_visual_separator=use_section_visual_separator,
),
)
+ # document_parts.append(
+ # DocumentPart(
+ # content="",
+ # add_hr_p=False,
+ # add_page_break=True,
+ # use_section_visual_separator=use_section_visual_separator,
+ # )
+ # )
+ return document_parts
+
+
+def assemble_usfm_by_verse_chapter_at_a_time(
+ usfm_books: Sequence[USFMBook],
+ tn_books: Sequence[TNBook],
+ tq_books: Sequence[TQBook],
+ tw_books: Sequence[TWBook],
+ bc_books: Sequence[BCBook],
+ rg_books: Sequence[RGBook],
+ use_section_visual_separator: bool,
+ use_two_column_layout_for_tn_notes: bool,
+ use_two_column_layout_for_tq_notes: bool,
+ show_tn_book_intro: bool,
+ show_bc_book_intro: bool,
+ show_tn_chapter_intro: bool,
+ show_bc_chapter_commentary: bool,
+ show_rg_chapter_commentary: bool,
+ end_of_chapter_html: str = settings.END_OF_CHAPTER_HTML,
+ close_direction_html: str = "",
+ hr: str = settings.HR,
+ book_chapters: Mapping[str, int] = BOOK_CHAPTERS,
+ fmt_str: str = settings.LEFT_ALIGNED_HEADER_FMT_STR,
+ verse_span_fmt_str: str = settings.VERSE_SPAN_FMT_STR,
+ tw_word_list_vertical: bool = settings.TW_WORD_LIST_VERTICAL,
+ resource_type_codes_and_names: Mapping[
+ str, str
+ ] = settings.RESOURCE_TYPE_CODES_AND_NAMES,
+) -> list[DocumentPart]:
+ document_parts: list[DocumentPart] = []
+ lang_codes = list(dict.fromkeys(usfm_book.lang_code for usfm_book in usfm_books))
+ for tn_book in tn_books:
+ if show_tn_book_intro and tn_book.book_intro:
+ book_intro_adj = adjust_book_intro_headings(tn_book.book_intro)
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(tn_book.resource_type_name),
+ is_rtl=tn_book and tn_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=book_intro_adj,
+ is_rtl=tn_book and tn_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
+ for bc_book in bc_books:
+ if show_bc_book_intro and bc_book.book_intro:
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(bc_book.resource_type_name),
+ is_rtl=bc_book and bc_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
document_parts.append(
DocumentPart(
- content="",
- add_hr_p=False,
- add_page_break=True,
+ content=bc_book.book_intro,
+ is_rtl=bc_book and bc_book.lang_direction == LangDirEnum.RTL,
use_section_visual_separator=use_section_visual_separator,
)
)
+ book_codes = list(dict.fromkeys(usfm_book.book_code for usfm_book in usfm_books))
+ for book_code in book_codes:
+ num_chapters = book_chapters[book_code]
+ for chapter_num in range(1, num_chapters + 1):
+ for lang_code in lang_codes:
+ if show_tn_chapter_intro:
+ for tn_book in [
+ tn_book
+ for tn_book in tn_books
+ if tn_book.book_code == book_code
+ ]:
+ tn_chapter_intro = chapter_intro(
+ tn_book,
+ chapter_num,
+ use_section_visual_separator,
+ )
+ if tn_chapter_intro:
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(tn_book.resource_type_name),
+ is_rtl=tn_book
+ and tn_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=tn_chapter_intro,
+ is_rtl=tn_book
+ and tn_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
+ if show_bc_chapter_commentary:
+ for bc_book in [
+ bc_book
+ for bc_book in bc_books
+ if bc_book.book_code == book_code
+ ]:
+ bc_chapter_commentary = chapter_commentary(
+ bc_book, chapter_num, False
+ )
+ if bc_chapter_commentary:
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(bc_book.resource_type_name),
+ is_rtl=bc_book
+ and bc_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=bc_chapter_commentary,
+ is_rtl=bc_book
+ and bc_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
+ if show_rg_chapter_commentary:
+ rg_verses = None
+ for rg_book in [
+ rg_book
+ for rg_book in rg_books
+ if rg_book.book_code == book_code
+ ]:
+ rg_verses = rg_chapter_verses(
+ rg_book, chapter_num, use_section_visual_separator
+ )
+ if rg_verses:
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(rg_book.resource_type_name),
+ is_rtl=rg_book
+ and rg_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=rg_verses,
+ is_rtl=rg_book
+ and rg_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
+ selected_usfm_books = [
+ usfm_book
+ for usfm_book in usfm_books
+ if usfm_book.book_code == book_code
+ and usfm_book.lang_code == lang_code
+ ]
+ selected_tn_books = [
+ tn_book
+ for tn_book in tn_books
+ if tn_book.book_code == book_code and tn_book.lang_code == lang_code
+ ]
+ selected_tq_books = [
+ tq_book
+ for tq_book in tq_books
+ if tq_book.book_code == book_code and tq_book.lang_code == lang_code
+ ]
+ selected_tw_books = [
+ tw_book
+ for tw_book in tw_books
+ if tw_book.book_code == book_code and tw_book.lang_code == lang_code
+ ]
+ usfm_book = None
+ usfm_book2 = None
+ usfm_chapter = None
+ usfm_chapter2 = None
+ if len(selected_usfm_books) == 1:
+ usfm_book = selected_usfm_books[0]
+ usfm_chapter = (
+ usfm_book.chapters[chapter_num]
+ if chapter_num in usfm_book.chapters
+ else None
+ )
+ elif len(selected_usfm_books) == 2: # Second USFM chosen, e.g., fr f10
+ # TODO Later we might do resources types by clicked order at which point we would likely
+ # just use the else clause below.
+ # Assuming f10 should be treated as secondary to ulb for fr
+ if selected_usfm_books[0].resource_type_name in [
+ resource_type_codes_and_names.get("f10", ""),
+ resource_type_codes_and_names.get("udb", ""),
+ ]:
+ usfm_book2 = selected_usfm_books[0]
+ usfm_chapter2 = (
+ usfm_book2.chapters[chapter_num]
+ if chapter_num in usfm_book2.chapters
+ else None
+ )
+ usfm_book = selected_usfm_books[1]
+ usfm_chapter = (
+ usfm_book.chapters[chapter_num]
+ if chapter_num in usfm_book.chapters
+ else None
+ )
+ else:
+ usfm_book = selected_usfm_books[0]
+ usfm_chapter = (
+ usfm_book.chapters[chapter_num]
+ if chapter_num in usfm_book.chapters
+ else None
+ )
+ usfm_book2 = selected_usfm_books[1]
+ usfm_chapter2 = (
+ usfm_book2.chapters[chapter_num]
+ if chapter_num in usfm_book2.chapters
+ else None
+ )
+ tn_chapter = (
+ selected_tn_books[0].chapters[chapter_num]
+ if selected_tn_books
+ else None
+ )
+ tq_chapter = (
+ selected_tq_books[0].chapters[chapter_num]
+ if selected_tq_books
+ else None
+ )
+ if usfm_book and usfm_chapter:
+ usfm_chapter.verses = handle_split_chapter_into_verses(
+ usfm_book, usfm_chapter
+ )
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(usfm_book.resource_type_name),
+ is_rtl=usfm_book
+ and usfm_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ for verse_ref, verse in usfm_chapter.verses.items():
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(
+ f"{usfm_book.national_book_name} {chapter_num}:{verse_ref}"
+ ),
+ is_rtl=usfm_book
+ and usfm_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=verse_span_fmt_str.format(verse),
+ is_rtl=usfm_book
+ and usfm_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
+ if (
+ selected_tn_books
+ and tn_chapter
+ and tn_chapter.verses
+ and verse_ref in tn_chapter.verses
+ ):
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(
+ selected_tn_books[0].resource_type_name
+ ),
+ is_rtl=selected_tn_books[0]
+ and selected_tn_books[0].lang_direction
+ == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=tn_chapter.verses[verse_ref],
+ is_rtl=selected_tn_books[0]
+ and selected_tn_books[0].lang_direction
+ == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
+ if (
+ selected_tq_books
+ and tq_chapter
+ and tq_chapter.verses
+ and verse_ref in tq_chapter.verses
+ ):
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(
+ selected_tq_books[0].resource_type_name
+ ),
+ is_rtl=selected_tq_books[0]
+ and selected_tq_books[0].lang_direction
+ == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=tq_chapter.verses[verse_ref],
+ is_rtl=selected_tq_books[0]
+ and selected_tq_books[0].lang_direction
+ == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
+ if selected_tw_books:
+ tw_book = selected_tw_books[0]
+ words = translation_words_for_content(tw_book, verse)
+ unique_words = unique_list_of_strings(words)
+ if unique_words:
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(
+ tw_book.resource_type_name
+ ),
+ is_rtl=tw_book
+ and tw_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ if tw_word_list_vertical:
+ html = (
+ "\n"
+ + "\n".join(
+ [
+ f"- {localized_word}
"
+ for localized_word, word in unique_words
+ ]
+ )
+ + "
"
+ )
+ else:
+ html = ", ".join(
+ [
+ f"{localized_word}"
+ for localized_word, word in unique_words
+ ]
+ )
+ logger.debug("tw links html: %s", html)
+ document_parts.append(
+ DocumentPart(
+ content=html,
+ is_rtl=tw_book
+ and tw_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
+ if usfm_book2 and usfm_chapter2:
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(
+ usfm_book2.resource_type_name
+ ),
+ is_rtl=usfm_book2
+ and usfm_book2.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ usfm_chapter2.verses = handle_split_chapter_into_verses(
+ usfm_book2, usfm_chapter2
+ )
+ if (
+ usfm_chapter2.verses
+ and verse_ref in usfm_chapter2.verses
+ ):
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(
+ f"{usfm_book2.national_book_name} {chapter_num}:{verse_ref}"
+ ),
+ is_rtl=usfm_book2
+ and usfm_book2.lang_direction
+ == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=verse_span_fmt_str.format(
+ usfm_chapter2.verses[verse_ref]
+ ),
+ is_rtl=usfm_book2
+ and usfm_book2.lang_direction
+ == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
return document_parts
@@ -332,25 +878,28 @@ def assemble_tn_by_chapter(
use_section_visual_separator: bool,
use_two_column_layout_for_tn_notes: bool,
use_two_column_layout_for_tq_notes: bool,
+ show_tn_book_intro: bool,
+ show_bc_book_intro: bool,
+ show_tn_chapter_intro: bool,
book_chapters: Mapping[str, int] = BOOK_CHAPTERS,
- show_tn_book_intro: bool = settings.SHOW_TN_BOOK_INTRO,
+ fmt_str: str = settings.LEFT_ALIGNED_HEADER_FMT_STR,
) -> list[DocumentPart]:
"""
Construct the HTML for a 'by chapter' strategy wherein at least
tn_book_content_units exists.
"""
-
-
-
-
-
document_parts: list[DocumentPart] = []
if show_tn_book_intro:
- # Add book intros for each tn_book
for tn_book in tn_books:
if tn_book.book_intro:
- book_intro_ = tn_book.book_intro
- book_intro_adj = adjust_book_intro_headings(book_intro_)
+ book_intro_adj = adjust_book_intro_headings(tn_book.book_intro)
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(tn_book.resource_type_name),
+ is_rtl=tn_book and tn_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
document_parts.append(
DocumentPart(
content=book_intro_adj,
@@ -358,46 +907,74 @@ def assemble_tn_by_chapter(
use_section_visual_separator=use_section_visual_separator,
)
)
- for bc_book in bc_books:
- document_parts.append(
- DocumentPart(
- content=bc_book_intro(bc_book, use_section_visual_separator),
- use_section_visual_separator=use_section_visual_separator,
- )
- )
+ if show_bc_book_intro:
+ for bc_book in bc_books:
+ bc_book_intro_ = bc_book_intro(bc_book, use_section_visual_separator)
+ if bc_book_intro_:
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(bc_book.resource_type_name),
+ is_rtl=bc_book and bc_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=bc_book_intro_,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
book_codes = {tn_book.book_code for tn_book in tn_books}
for book_code in book_codes:
num_chapters = book_chapters[book_code]
for chapter_num in range(1, num_chapters + 1):
- for tn_book in [
- tn_book for tn_book in tn_books if tn_book.book_code == book_code
- ]:
- one_column_html = []
- if chapter_num in tn_book.chapters:
- one_column_html.append(
- chapter_intro(
- tn_book, chapter_num, use_section_visual_separator
- )
- )
- one_column_html_ = "".join(one_column_html)
- if one_column_html_:
- document_parts.append(
- DocumentPart(
- content=one_column_html_,
- is_rtl=tn_book
- and tn_book.lang_direction == LangDirEnum.RTL,
- use_section_visual_separator=use_section_visual_separator,
+ if show_tn_chapter_intro:
+ for tn_book in [
+ tn_book for tn_book in tn_books if tn_book.book_code == book_code
+ ]:
+ one_column_html = []
+ if chapter_num in tn_book.chapters:
+ one_column_html.append(
+ chapter_intro(
+ tn_book, chapter_num, use_section_visual_separator
)
)
+ one_column_html_ = "".join(one_column_html)
+ if one_column_html_:
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(tn_book.resource_type_name),
+ is_rtl=tn_book
+ and tn_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=one_column_html_,
+ is_rtl=tn_book
+ and tn_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
for bc_book in [
bc_book for bc_book in bc_books if bc_book.book_code == book_code
]:
- if chapter_num in bc_book.chapters:
+ bc_chapter_commentary = chapter_commentary(
+ bc_book, chapter_num, use_section_visual_separator
+ )
+ if bc_chapter_commentary:
document_parts.append(
DocumentPart(
- content=chapter_commentary(
- bc_book, chapter_num, use_section_visual_separator
- ),
+ content=fmt_str.format(bc_book.resource_type_name),
+ is_rtl=bc_book
+ and bc_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=bc_chapter_commentary,
use_section_visual_separator=use_section_visual_separator,
)
)
@@ -412,6 +989,14 @@ def assemble_tn_by_chapter(
use_two_column_layout_for_tn_notes,
)
if tn_verses:
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(tn_book.resource_type_name),
+ is_rtl=tn_book
+ and tn_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
document_parts.append(
DocumentPart(
content=tn_verses,
@@ -419,12 +1004,15 @@ def assemble_tn_by_chapter(
and tn_book.lang_direction == LangDirEnum.RTL,
contained_in_two_column_section=use_two_column_layout_for_tn_notes,
add_hr_p=False,
- use_section_visual_separator=use_section_visual_separator,
+ use_section_visual_separator=False,
)
)
+ # This is a trick to make an hr after a two column section by tricking
+ # the html to docx parser into keeping this part using an HTML space
+ # rather than an empty string.
document_parts.append(
DocumentPart(
- content="",
+ content=" ",
use_section_visual_separator=use_section_visual_separator,
)
)
@@ -438,6 +1026,14 @@ def assemble_tn_by_chapter(
use_two_column_layout_for_tq_notes,
)
if tq_verses:
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(tq_book.resource_type_name),
+ is_rtl=tq_book
+ and tq_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
document_parts.append(
DocumentPart(
content=tq_verses,
@@ -445,12 +1041,15 @@ def assemble_tn_by_chapter(
and tq_book.lang_direction == LangDirEnum.RTL,
contained_in_two_column_section=use_two_column_layout_for_tq_notes,
add_hr_p=False,
- use_section_visual_separator=use_section_visual_separator,
+ use_section_visual_separator=False,
)
)
+ # This is a trick to make an hr after a two column section by tricking
+ # the html to docx parser into keeping this part using an HTML space
+ # rather than an empty string.
document_parts.append(
DocumentPart(
- content="",
+ content=" ",
use_section_visual_separator=use_section_visual_separator,
)
)
@@ -461,6 +1060,14 @@ def assemble_tn_by_chapter(
rg_book, chapter_num, use_section_visual_separator
)
if rg_verses:
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(rg_book.resource_type_name),
+ is_rtl=rg_book
+ and rg_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
document_parts.append(
DocumentPart(
content=rg_verses,
@@ -471,14 +1078,14 @@ def assemble_tn_by_chapter(
use_section_visual_separator=use_section_visual_separator,
)
)
- document_parts.append(
- DocumentPart(
- content="",
- add_hr_p=False,
- add_page_break=True,
- use_section_visual_separator=use_section_visual_separator,
- )
- )
+ # document_parts.append(
+ # DocumentPart(
+ # content="",
+ # add_hr_p=False,
+ # add_page_break=True,
+ # use_section_visual_separator=use_section_visual_separator,
+ # )
+ # )
return document_parts
@@ -491,16 +1098,16 @@ def assemble_tq_by_chapter(
rg_books: Sequence[RGBook],
use_section_visual_separator: bool,
use_two_column_layout_for_tq_notes: bool,
+ show_tn_book_intro: bool,
+ show_bc_book_intro: bool,
+ show_tn_chapter_intro: bool,
book_chapters: Mapping[str, int] = BOOK_CHAPTERS,
+ fmt_str: str = settings.LEFT_ALIGNED_HEADER_FMT_STR,
) -> list[DocumentPart]:
"""
Construct the HTML for a 'by chapter' strategy wherein at least
tq_book_content_units exists.
"""
-
-
-
-
document_parts: list[DocumentPart] = []
book_codes = {tq_book.book_code for tq_book in tq_books}
for book_code in book_codes:
@@ -511,11 +1118,19 @@ def assemble_tq_by_chapter(
for bc_book in [
bc_book for bc_book in bc_books if bc_book.book_code == book_code
]:
- one_column_html.append(
- chapter_commentary(
- bc_book, chapter_num, use_section_visual_separator
- )
+ chapter_commentary_ = chapter_commentary(
+ bc_book, chapter_num, use_section_visual_separator
)
+ if chapter_commentary_:
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(bc_book.resource_type_name),
+ is_rtl=bc_book
+ and bc_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ one_column_html.append(chapter_commentary_)
if one_column_html:
document_parts.append(DocumentPart(content="".join(one_column_html)))
for tq_book in [
@@ -530,6 +1145,14 @@ def assemble_tq_by_chapter(
use_two_column_layout_for_tq_notes,
)
if tq_verses:
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(tq_book.resource_type_name),
+ is_rtl=tq_book
+ and tq_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
document_parts.append(
DocumentPart(
content=tq_verses,
@@ -537,12 +1160,15 @@ def assemble_tq_by_chapter(
and tq_book.lang_direction == LangDirEnum.RTL,
contained_in_two_column_section=use_two_column_layout_for_tq_notes,
add_hr_p=False,
- use_section_visual_separator=use_section_visual_separator,
+ use_section_visual_separator=False,
)
)
+ # This is a trick to make an hr after a two column section by tricking
+ # the html to docx parser into keeping this part using an HTML space
+ # rather than an empty string.
document_parts.append(
DocumentPart(
- content="",
+ content=" ",
use_section_visual_separator=use_section_visual_separator,
)
)
@@ -555,6 +1181,14 @@ def assemble_tq_by_chapter(
rg_book, chapter_num, use_section_visual_separator
)
if rg_verses:
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(rg_book.resource_type_name),
+ is_rtl=rg_book
+ and rg_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
document_parts.append(
DocumentPart(
content=rg_verses,
@@ -562,9 +1196,9 @@ def assemble_tq_by_chapter(
and rg_book.lang_direction == LangDirEnum.RTL,
)
)
- document_parts.append(
- DocumentPart(content="", add_hr_p=False, add_page_break=True)
- )
+ # document_parts.append(
+ # DocumentPart(content="", add_hr_p=False, add_page_break=True)
+ # )
return document_parts
@@ -583,32 +1217,39 @@ def assemble_tw_by_chapter(
bc_books: Sequence[BCBook],
rg_books: Sequence[RGBook],
use_section_visual_separator: bool,
+ show_bc_book_intro: bool,
+ fmt_str: str = settings.LEFT_ALIGNED_HEADER_FMT_STR,
) -> list[DocumentPart]:
"""Construct the HTML for BC and TW."""
document_parts: list[DocumentPart] = []
-
-
-
for bc_book in bc_books:
- document_parts.append(
- DocumentPart(
- content=bc_book.book_intro,
- use_section_visual_separator=use_section_visual_separator,
+ if show_bc_book_intro and bc_book.book_intro:
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(bc_book.resource_type_name),
+ is_rtl=bc_book and bc_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
)
- )
- for chapter in bc_book.chapters.values():
document_parts.append(
DocumentPart(
- content=chapter.commentary,
+ content=bc_book.book_intro,
use_section_visual_separator=use_section_visual_separator,
)
)
+ for chapter in bc_book.chapters.values():
document_parts.append(
DocumentPart(
- content="",
- add_hr_p=False,
- add_page_break=True,
+ content=chapter.commentary,
use_section_visual_separator=use_section_visual_separator,
)
)
+ # document_parts.append(
+ # DocumentPart(
+ # content="",
+ # add_hr_p=False,
+ # add_page_break=True,
+ # use_section_visual_separator=use_section_visual_separator,
+ # )
+ # )
return document_parts
diff --git a/backend/doc/domain/assembly_strategies_docx/assembly_strategies_lang_then_book_by_chapter.py b/backend/doc/domain/assembly_strategies_docx/assembly_strategies_lang_then_book_by_chapter.py
index 54ac74b4c..a0a166193 100755
--- a/backend/doc/domain/assembly_strategies_docx/assembly_strategies_lang_then_book_by_chapter.py
+++ b/backend/doc/domain/assembly_strategies_docx/assembly_strategies_lang_then_book_by_chapter.py
@@ -21,13 +21,16 @@
TWBook,
USFMBook,
)
+from doc.domain.parsing import handle_split_chapter_into_verses
from doc.reviewers_guide.model import RGBook
+from doc.utils.list_utils import unique_list_of_strings
+from doc.utils.tw_utils import translation_words_for_content
logger = settings.logger(__name__)
-def assemble_content_by_lang_then_book(
+def assemble_content_by_book(
usfm_books: Sequence[USFMBook],
tn_books: Sequence[TNBook],
tq_books: Sequence[TQBook],
@@ -39,26 +42,30 @@ def assemble_content_by_lang_then_book(
use_section_visual_separator: bool,
use_two_column_layout_for_tn_notes: bool,
use_two_column_layout_for_tq_notes: bool,
+ show_tn_book_intro: bool,
+ show_bc_book_intro: bool,
+ show_tn_chapter_intro: bool,
+ show_bc_chapter_commentary: bool,
book_names: Mapping[str, str] = BOOK_NAMES,
book_id_map: dict[str, int] = BOOK_ID_MAP,
+ resource_type_codes_and_names: Mapping[
+ str, str
+ ] = settings.RESOURCE_TYPE_CODES_AND_NAMES,
) -> list[DocumentPart]:
- """
- Group content by language and then by book and then pass content
- and a couple other parameters, assembly_layout_kind and
- chunk_size, to interleaving strategy to do the actual
- interleaving.
- """
document_parts: list[DocumentPart] = []
- all_lang_codes = (
- {usfm_book.lang_code for usfm_book in usfm_books}
- .union(tn_book.lang_code for tn_book in tn_books)
- .union(tq_book.lang_code for tq_book in tq_books)
- .union(tw_book.lang_code for tw_book in tw_books)
- .union(bc_book.lang_code for bc_book in bc_books)
- .union(rg_book.lang_code for rg_book in rg_books)
+ # Collect and duplicate max number of lang_codes
+ lang_codes = list(
+ dict.fromkeys(
+ [
+ *[usfm_book.lang_code for usfm_book in usfm_books],
+ *[tn_book.lang_code for tn_book in tn_books],
+ *[tq_book.lang_code for tq_book in tq_books],
+ *[tw_book.lang_code for tw_book in tw_books],
+ *[bc_book.lang_code for bc_book in bc_books],
+ *[rg_book.lang_code for rg_book in rg_books],
+ ]
+ )
)
- most_lang_codes = list(all_lang_codes)
- # Collect and deduplicate book codes
all_book_codes = (
{usfm_book.book_code for usfm_book in usfm_books}
.union(tn_book.book_code for tn_book in tn_books)
@@ -67,28 +74,40 @@ def assemble_content_by_lang_then_book(
.union(bc_book.book_code for bc_book in bc_books)
.union(rg_book.book_code for rg_book in rg_books)
)
- most_book_codes = list(all_book_codes)
- book_codes_sorted = sorted(
- most_book_codes, key=lambda book_code: book_id_map[book_code]
- )
- for lang_code in most_lang_codes:
+ book_codes = list(all_book_codes)
+ book_codes_sorted = sorted(book_codes, key=lambda book_code: book_id_map[book_code])
+ for lang_code in lang_codes:
for book_code in book_codes_sorted:
selected_usfm_books = [
usfm_book
for usfm_book in usfm_books
if usfm_book.lang_code == lang_code and usfm_book.book_code == book_code
]
- usfm_book = selected_usfm_books[0] if selected_usfm_books else None
- usfm_book2 = (
- selected_usfm_books[1]
- if selected_usfm_books and len(selected_usfm_books) > 1
- else None
- )
+ usfm_book = None
+ usfm_book2 = None
+ if len(selected_usfm_books) == 1:
+ usfm_book = selected_usfm_books[0]
+ elif (
+ len(selected_usfm_books) == 2
+ ): # Second USFM chosen, e.g., fr f10. Assuming f10 should be treated as secondary to ulb for fr
+ # TODO Later we might do resources types by clicked order at which point we would likely
+ # just use the body of the else clause below.
+ if selected_usfm_books[0].resource_type_name in [
+ resource_type_codes_and_names.get("f10", ""),
+ resource_type_codes_and_names.get("udb", ""),
+ ]:
+ usfm_book = selected_usfm_books[1]
+ usfm_book2 = selected_usfm_books[0]
+ else:
+ usfm_book = selected_usfm_books[0]
+ usfm_book2 = selected_usfm_books[1]
selected_tn_books = [
tn_book
for tn_book in tn_books
if tn_book.lang_code == lang_code and tn_book.book_code == book_code
]
+ # TODO en tn and tn-condensed exist, so we need to admit the possibility
+ # of two tn_books not just one for en
tn_book = selected_tn_books[0] if selected_tn_books else None
selected_tq_books = [
tq_book
@@ -96,7 +115,7 @@ def assemble_content_by_lang_then_book(
if tq_book.lang_code == lang_code and tq_book.book_code == book_code
]
tq_book = selected_tq_books[0] if selected_tq_books else None
- # TWBook doesn't really need to have a book_code attribute
+ # TODO TWBook doesn't really need to have a book_code attribute
# because TW resources are language centric not book centric.
# We could do something about that later if desired for
# design cleanness sake.
@@ -131,6 +150,9 @@ def assemble_content_by_lang_then_book(
use_section_visual_separator,
use_two_column_layout_for_tn_notes,
use_two_column_layout_for_tq_notes,
+ show_tn_book_intro,
+ show_bc_book_intro,
+ show_tn_chapter_intro,
)
)
elif usfm_book is None and tn_book is not None:
@@ -146,6 +168,9 @@ def assemble_content_by_lang_then_book(
use_section_visual_separator,
use_two_column_layout_for_tn_notes,
use_two_column_layout_for_tq_notes,
+ show_tn_book_intro,
+ show_bc_book_intro,
+ show_tn_chapter_intro,
)
)
elif usfm_book is None and tn_book is None and tq_book is not None:
@@ -160,6 +185,7 @@ def assemble_content_by_lang_then_book(
rg_book,
use_section_visual_separator,
use_two_column_layout_for_tq_notes,
+ show_bc_book_intro,
)
)
elif (
@@ -178,6 +204,153 @@ def assemble_content_by_lang_then_book(
bc_book,
rg_book,
use_section_visual_separator,
+ show_bc_book_intro,
+ )
+ )
+ return document_parts
+
+
+def assemble_content_by_verse_book_at_a_time(
+ usfm_books: Sequence[USFMBook],
+ tn_books: Sequence[TNBook],
+ tq_books: Sequence[TQBook],
+ tw_books: Sequence[TWBook],
+ bc_books: Sequence[BCBook],
+ rg_books: Sequence[RGBook],
+ assembly_layout_kind: AssemblyLayoutEnum,
+ chunk_size: ChunkSizeEnum,
+ use_section_visual_separator: bool,
+ use_two_column_layout_for_tn_notes: bool,
+ use_two_column_layout_for_tq_notes: bool,
+ show_tn_book_intro: bool,
+ show_bc_book_intro: bool,
+ show_tn_chapter_intro: bool,
+ show_bc_chapter_commentary: bool,
+ show_rg_chapter_commentary: bool,
+ book_names: Mapping[str, str] = BOOK_NAMES,
+ book_id_map: dict[str, int] = BOOK_ID_MAP,
+ resource_type_codes_and_names: Mapping[
+ str, str
+ ] = settings.RESOURCE_TYPE_CODES_AND_NAMES,
+) -> list[DocumentPart]:
+ document_parts: list[DocumentPart] = []
+ # Collect and duplicate max number of lang_codes
+ lang_codes = list(
+ dict.fromkeys(
+ [
+ *[usfm_book.lang_code for usfm_book in usfm_books],
+ *[tn_book.lang_code for tn_book in tn_books],
+ *[tq_book.lang_code for tq_book in tq_books],
+ *[tw_book.lang_code for tw_book in tw_books],
+ *[bc_book.lang_code for bc_book in bc_books],
+ *[rg_book.lang_code for rg_book in rg_books],
+ ]
+ )
+ )
+ book_codes = list(
+ {usfm_book.book_code for usfm_book in usfm_books}
+ .union(tn_book.book_code for tn_book in tn_books)
+ .union(tq_book.book_code for tq_book in tq_books)
+ .union(tw_book.book_code for tw_book in tw_books)
+ .union(bc_book.book_code for bc_book in bc_books)
+ .union(rg_book.book_code for rg_book in rg_books)
+ )
+ book_codes_sorted = sorted(book_codes, key=lambda book_code: book_id_map[book_code])
+ for lang_code in lang_codes:
+ for book_code in book_codes_sorted:
+ selected_usfm_books = [
+ usfm_book
+ for usfm_book in usfm_books
+ if usfm_book.lang_code == lang_code and usfm_book.book_code == book_code
+ ]
+ usfm_book = None
+ usfm_book2 = None
+ if len(selected_usfm_books) == 1:
+ usfm_book = selected_usfm_books[0]
+ elif len(selected_usfm_books) == 2: # Second USFM chosen, e.g., fr f10
+ # TODO Later we might do resources types by clicked order at which point we would likely
+ # just use the else clause below.
+ # Assuming f10 should be treated as secondary to ulb for fr
+ if selected_usfm_books[0].resource_type_name in [
+ resource_type_codes_and_names.get("f10", ""),
+ resource_type_codes_and_names.get("udb", ""),
+ ]:
+ usfm_book = selected_usfm_books[1]
+ usfm_book2 = selected_usfm_books[0]
+ else:
+ usfm_book = selected_usfm_books[0]
+ usfm_book2 = selected_usfm_books[1]
+ selected_tn_books = [
+ tn_book
+ for tn_book in tn_books
+ if tn_book.lang_code == lang_code and tn_book.book_code == book_code
+ ]
+ tn_book = selected_tn_books[0] if selected_tn_books else None
+ selected_tq_books = [
+ tq_book
+ for tq_book in tq_books
+ if tq_book.lang_code == lang_code and tq_book.book_code == book_code
+ ]
+ tq_book = selected_tq_books[0] if selected_tq_books else None
+ # TODO TWBook doesn't really need to have a book_code attribute
+ # because TW resources are language centric not book centric.
+ # We could do something about that later if desired for
+ # design cleanness sake.
+ selected_tw_books = [
+ tw_book
+ for tw_book in tw_books
+ if tw_book.lang_code == lang_code and tw_book.book_code == book_code
+ ]
+ tw_book = selected_tw_books[0] if selected_tw_books else None
+ selected_bc_books = [
+ bc_book
+ for bc_book in bc_books
+ if bc_book.lang_code == lang_code and bc_book.book_code == book_code
+ ]
+ bc_book = selected_bc_books[0] if selected_bc_books else None
+ selected_rg_books = [
+ rg_book
+ for rg_book in rg_books
+ if rg_book.lang_code == lang_code and rg_book.book_code == book_code
+ ]
+ rg_book = selected_rg_books[0] if selected_rg_books else None
+ if usfm_book:
+ document_parts.extend(
+ assemble_usfm_by_verse_book_at_a_time(
+ usfm_book,
+ tn_book,
+ tq_book,
+ tw_book,
+ usfm_book2,
+ bc_book,
+ rg_book,
+ use_section_visual_separator,
+ use_two_column_layout_for_tn_notes,
+ use_two_column_layout_for_tq_notes,
+ show_tn_book_intro,
+ show_bc_book_intro,
+ show_tn_chapter_intro,
+ show_bc_chapter_commentary,
+ )
+ )
+ else:
+ document_parts.extend(
+ assemble_content_by_book(
+ usfm_books,
+ tn_books,
+ tq_books,
+ tw_books,
+ bc_books,
+ rg_books,
+ assembly_layout_kind,
+ chunk_size,
+ use_section_visual_separator,
+ use_two_column_layout_for_tn_notes,
+ use_two_column_layout_for_tq_notes,
+ show_tn_book_intro,
+ show_bc_book_intro,
+ show_tn_chapter_intro,
+ show_bc_chapter_commentary,
)
)
return document_parts
@@ -194,8 +367,12 @@ def assemble_usfm_by_book(
use_section_visual_separator: bool,
use_two_column_layout_for_tn_notes: bool,
use_two_column_layout_for_tq_notes: bool,
- show_tn_book_intro: bool = settings.SHOW_TN_BOOK_INTRO,
+ show_tn_book_intro: bool,
+ show_bc_book_intro: bool,
+ show_tn_chapter_intro: bool,
fmt_str: str = settings.BOOK_NAME_FMT_STR,
+ resource_type_name_fmt_str: str = settings.LEFT_ALIGNED_HEADER_FMT_STR,
+ tw_word_list_vertical: bool = settings.TW_WORD_LIST_VERTICAL,
) -> list[DocumentPart]:
"""
Construct the HTML for a 'by book' strategy wherein at least
@@ -203,6 +380,13 @@ def assemble_usfm_by_book(
"""
document_parts: list[DocumentPart] = []
if show_tn_book_intro and tn_book and tn_book.book_intro:
+ document_parts.append(
+ DocumentPart(
+ content=resource_type_name_fmt_str.format(tn_book.resource_type_name),
+ is_rtl=tn_book and tn_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
document_parts.append(
DocumentPart(
content=tn_book.book_intro,
@@ -210,14 +394,20 @@ def assemble_usfm_by_book(
use_section_visual_separator=use_section_visual_separator,
)
)
- if bc_book:
- if bc_book.book_intro:
- document_parts.append(
- DocumentPart(
- content=bc_book.book_intro,
- use_section_visual_separator=use_section_visual_separator,
- )
+ if show_bc_book_intro and bc_book and bc_book.book_intro:
+ document_parts.append(
+ DocumentPart(
+ content=resource_type_name_fmt_str.format(bc_book.resource_type_name),
+ is_rtl=bc_book and bc_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
)
+ )
+ document_parts.append(
+ DocumentPart(
+ content=bc_book.book_intro,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
if usfm_book:
is_rtl = usfm_book and usfm_book.lang_direction == LangDirEnum.RTL
# Add book name
@@ -226,47 +416,27 @@ def assemble_usfm_by_book(
content=fmt_str.format(usfm_book.national_book_name),
is_rtl=is_rtl,
add_hr_p=False,
- use_section_visual_separator=use_section_visual_separator,
+ use_section_visual_separator=False,
)
)
for (
chapter_num,
chapter,
) in usfm_book.chapters.items():
- tn_verses: str = ""
- tq_verses: str = ""
- rg_verses: str = ""
- chapter_intro_ = ""
- chapter_commentary_ = ""
chapter_intro_ = chapter_intro(
tn_book, chapter_num, use_section_visual_separator
)
- tn_verses = tn_chapter_verses(
- tn_book,
- chapter_num,
- use_section_visual_separator,
- use_two_column_layout_for_tn_notes,
- )
- chapter_commentary_ = chapter_commentary(
- bc_book, chapter_num, use_section_visual_separator
- )
- tq_verses = tq_chapter_verses(
- tq_book,
- chapter_num,
- use_section_visual_separator,
- use_two_column_layout_for_tq_notes,
- )
- rg_verses = rg_chapter_verses(
- rg_book, chapter_num, use_section_visual_separator
- )
- document_parts.append(
- DocumentPart(
- content=chapter.content,
- is_rtl=is_rtl,
- use_section_visual_separator=use_section_visual_separator,
+ if chapter_intro_ and tn_book:
+ document_parts.append(
+ DocumentPart(
+ content=resource_type_name_fmt_str.format(
+ tn_book.resource_type_name
+ ),
+ is_rtl=usfm_book
+ and usfm_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
)
- )
- if chapter_intro_:
document_parts.append(
DocumentPart(
content=chapter_intro_,
@@ -274,48 +444,162 @@ def assemble_usfm_by_book(
use_section_visual_separator=use_section_visual_separator,
)
)
- if chapter_commentary_:
+ chapter_commentary_ = chapter_commentary(bc_book, chapter_num, False)
+ if chapter_commentary_ and bc_book:
+ document_parts.append(
+ DocumentPart(
+ content=resource_type_name_fmt_str.format(
+ bc_book.resource_type_name
+ ),
+ is_rtl=usfm_book
+ and usfm_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
document_parts.append(
DocumentPart(
content=chapter_commentary_,
is_rtl=is_rtl,
- add_hr_p=False,
+ # add_hr_p=False,
use_section_visual_separator=use_section_visual_separator,
)
)
- if tn_verses:
+ document_parts.append(
+ DocumentPart(
+ content=resource_type_name_fmt_str.format(
+ usfm_book.resource_type_name
+ ),
+ is_rtl=usfm_book and usfm_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=chapter.content,
+ is_rtl=is_rtl,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
+ if tw_book:
+ words = translation_words_for_content(tw_book, chapter.content)
+ unique_words = unique_list_of_strings(words)
+ if unique_words:
+ document_parts.append(
+ DocumentPart(
+ content=resource_type_name_fmt_str.format(
+ tw_book.resource_type_name
+ ),
+ is_rtl=tw_book
+ and tw_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ if tw_word_list_vertical:
+ html = (
+ "\n"
+ + "\n".join(
+ [
+ f"- {localized_word}
"
+ for localized_word, word in unique_words
+ ]
+ )
+ + "
"
+ )
+ else:
+ html = ", ".join(
+ [
+ f"{localized_word}"
+ for localized_word, word in unique_words
+ ]
+ )
+ document_parts.append(
+ DocumentPart(
+ content=html,
+ is_rtl=tw_book
+ and tw_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
+ tn_verses = tn_chapter_verses(
+ tn_book,
+ chapter_num,
+ False,
+ use_two_column_layout_for_tn_notes,
+ )
+ if tn_book and tn_verses:
+ document_parts.append(
+ DocumentPart(
+ content=resource_type_name_fmt_str.format(
+ tn_book.resource_type_name
+ ),
+ is_rtl=usfm_book
+ and usfm_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
document_parts.append(
DocumentPart(
content=tn_verses,
is_rtl=is_rtl,
- add_hr_p=False,
contained_in_two_column_section=use_two_column_layout_for_tn_notes,
- use_section_visual_separator=use_section_visual_separator,
+ use_section_visual_separator=False,
)
)
+ # This is a trick to make an hr after a two column section by tricking
+ # the html to docx parser into keeping this part using an HTML space
+ # rather than an empty string.
document_parts.append(
DocumentPart(
- content="",
+ content=" ",
use_section_visual_separator=use_section_visual_separator,
)
)
- if tq_verses:
+ tq_verses = tq_chapter_verses(
+ tq_book,
+ chapter_num,
+ False,
+ use_two_column_layout_for_tq_notes,
+ )
+ if tq_verses and tq_book:
+ document_parts.append(
+ DocumentPart(
+ content=resource_type_name_fmt_str.format(
+ tq_book.resource_type_name
+ ),
+ is_rtl=usfm_book
+ and usfm_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
document_parts.append(
DocumentPart(
content=tq_verses,
is_rtl=is_rtl,
- add_hr_p=False,
contained_in_two_column_section=use_two_column_layout_for_tq_notes,
- use_section_visual_separator=use_section_visual_separator,
+ use_section_visual_separator=False,
)
)
+ # This is a trick to make an hr after a two column section by tricking
+ # the html to docx parser into keeping this part using an HTML space
+ # rather than an empty string.
document_parts.append(
DocumentPart(
- content="",
+ content=" ",
use_section_visual_separator=use_section_visual_separator,
)
)
- if rg_verses:
+ rg_verses = rg_chapter_verses(rg_book, chapter_num, False)
+ if rg_verses and rg_book:
+ document_parts.append(
+ DocumentPart(
+ content=resource_type_name_fmt_str.format(
+ rg_book.resource_type_name
+ ),
+ is_rtl=usfm_book
+ and usfm_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
document_parts.append(
DocumentPart(
content=rg_verses,
@@ -324,13 +608,17 @@ def assemble_usfm_by_book(
use_section_visual_separator=use_section_visual_separator,
)
)
- # TODO Get feedback on whether we should allow a user to select a primary _and_
- # a secondary USFM resource. If we want to limit the user to only one USFM per
- # document then we would want to control that in the UI and maybe also at the API
- # level. The API level control would be implemented in the DocumentRequest
- # validation.
if usfm_book2:
- # Here we add the whole chapter's worth of verses for the secondary usfm
+ document_parts.append(
+ DocumentPart(
+ content=resource_type_name_fmt_str.format(
+ usfm_book2.resource_type_name
+ ),
+ is_rtl=usfm_book
+ and usfm_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
document_parts.append(
DocumentPart(
content=usfm_book2.chapters[chapter_num].content,
@@ -340,14 +628,283 @@ def assemble_usfm_by_book(
use_section_visual_separator=use_section_visual_separator,
)
)
- document_parts.append(
- DocumentPart(
- content="",
- add_hr_p=False,
- add_page_break=True,
- use_section_visual_separator=use_section_visual_separator,
+ # document_parts.append(
+ # DocumentPart(
+ # content="",
+ # add_hr_p=False,
+ # add_page_break=True,
+ # use_section_visual_separator=use_section_visual_separator,
+ # )
+ # )
+ return document_parts
+
+
+def assemble_usfm_by_verse_book_at_a_time(
+ usfm_book: Optional[USFMBook],
+ tn_book: Optional[TNBook],
+ tq_book: Optional[TQBook],
+ tw_book: Optional[TWBook],
+ usfm_book2: Optional[USFMBook],
+ bc_book: Optional[BCBook],
+ rg_book: Optional[RGBook],
+ use_section_visual_separator: bool,
+ use_two_column_layout_for_tn_notes: bool,
+ use_two_column_layout_for_tq_notes: bool,
+ show_tn_book_intro: bool,
+ show_bc_book_intro: bool,
+ show_tn_chapter_intro: bool,
+ show_bc_chapter_commentary: bool,
+ fmt_str: str = settings.LEFT_ALIGNED_HEADER_FMT_STR,
+ verse_span_fmt_str: str = settings.VERSE_SPAN_FMT_STR,
+ tw_word_list_vertical: bool = settings.TW_WORD_LIST_VERTICAL,
+ resource_type_codes_and_names: Mapping[
+ str, str
+ ] = settings.RESOURCE_TYPE_CODES_AND_NAMES,
+) -> list[DocumentPart]:
+ document_parts: list[DocumentPart] = []
+ if show_tn_book_intro and tn_book and tn_book.book_intro:
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(tn_book.resource_type_name),
+ is_rtl=tn_book and tn_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=tn_book.book_intro,
+ is_rtl=tn_book and tn_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
+ if show_bc_book_intro and bc_book and bc_book.book_intro:
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(bc_book.resource_type_name),
+ is_rtl=bc_book and bc_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=bc_book.book_intro,
+ is_rtl=bc_book and bc_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
+ if usfm_book:
+ for (
+ chapter_num,
+ chapter,
+ ) in usfm_book.chapters.items():
+ chapter.verses = handle_split_chapter_into_verses(usfm_book, chapter)
+ tn_chapter = tn_book.chapters[chapter_num] if tn_book else None
+ tq_chapter = tq_book.chapters[chapter_num] if tq_book else None
+ tn_chapter_intro = chapter_intro(tn_book, chapter_num, False)
+ if show_tn_chapter_intro and tn_book and tn_chapter_intro:
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(tn_book.resource_type_name),
+ is_rtl=tn_book and tn_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
)
+ document_parts.append(
+ DocumentPart(
+ content=tn_chapter_intro,
+ is_rtl=tn_book and tn_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
+ chapter_commentary_ = chapter_commentary(
+ bc_book, chapter_num, use_section_visual_separator
)
+ if show_bc_chapter_commentary and bc_book and chapter_commentary_:
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(bc_book.resource_type_name),
+ is_rtl=bc_book and bc_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=chapter_commentary_,
+ is_rtl=bc_book and bc_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
+ rg_verses = rg_chapter_verses(
+ rg_book, chapter_num, use_section_visual_separator
+ )
+ if rg_book and rg_verses:
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(rg_book.resource_type_name),
+ is_rtl=rg_book and rg_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=rg_verses,
+ is_rtl=rg_book and rg_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
+ if chapter.verses:
+ for verse_ref, verse in chapter.verses.items():
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(
+ f"{usfm_book.national_book_name} {chapter_num}:{verse_ref}"
+ ),
+ is_rtl=usfm_book
+ and usfm_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(usfm_book.resource_type_name),
+ is_rtl=usfm_book
+ and usfm_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=verse_span_fmt_str.format(verse),
+ is_rtl=usfm_book
+ and usfm_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
+ if (
+ tn_book
+ and tn_chapter
+ and tn_chapter.verses
+ and verse_ref in tn_chapter.verses
+ ):
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(tn_book.resource_type_name),
+ is_rtl=tn_book
+ and tn_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=tn_chapter.verses[verse_ref],
+ is_rtl=tn_book
+ and tn_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
+ if (
+ tq_book
+ and tq_chapter
+ and tq_chapter.verses
+ and verse_ref in tq_chapter.verses
+ ):
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(tq_book.resource_type_name),
+ is_rtl=tq_book
+ and tq_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=tq_chapter.verses[verse_ref],
+ is_rtl=tq_book
+ and tq_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
+ if tw_book:
+ words = translation_words_for_content(tw_book, verse)
+ unique_words = unique_list_of_strings(words)
+ if unique_words:
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(tw_book.resource_type_name),
+ is_rtl=tw_book
+ and tw_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ if tw_word_list_vertical:
+ html = (
+ "\n"
+ + "\n".join(
+ [
+ f"- {localized_word}
"
+ for localized_word, word in unique_words
+ ]
+ )
+ + "
"
+ )
+ else:
+ html = ", ".join(
+ [
+ f"{localized_word}"
+ for localized_word, word in unique_words
+ ]
+ )
+ logger.debug("tw links html: %s", html)
+ document_parts.append(
+ DocumentPart(
+ content=html,
+ is_rtl=tw_book
+ and tw_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
+ # If the user chose two USFM resource types for a language. e.g., fr:
+ # ulb, f10, show the second USFM content here
+ if usfm_book2:
+ usfm_book2_chapter = usfm_book2.chapters[chapter_num]
+ usfm_book2_chapter.verses = handle_split_chapter_into_verses(
+ usfm_book2, usfm_book2_chapter
+ )
+ if usfm_book2_chapter.verses:
+ document_parts.append(
+ DocumentPart(
+ content=fmt_str.format(
+ usfm_book2.resource_type_name
+ ),
+ is_rtl=usfm_book2
+ and usfm_book2.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=verse_span_fmt_str.format(
+ usfm_book2_chapter.verses[verse_ref]
+ ),
+ is_rtl=usfm_book2
+ and usfm_book2.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
+ # TODO How should we handle footnotes in a versified output?
+ # if (
+ # not has_footnotes(chapter.content)
+ # and (
+ # usfm_book2 is not None
+ # or tn_book is not None
+ # or tq_book is not None
+ # or rg_book is not None
+ # or tw_book is not None
+ # )
+ # and use_section_visual_separator
+ # ):
+ # content.append(hr)
+ # content.append(end_of_chapter_html)
return document_parts
@@ -362,15 +919,24 @@ def assemble_tn_by_book(
use_section_visual_separator: bool,
use_two_column_layout_for_tn_notes: bool,
use_two_column_layout_for_tq_notes: bool,
- show_tn_book_intro: bool = settings.SHOW_TN_BOOK_INTRO,
+ show_tn_book_intro: bool,
+ show_bc_book_intro: bool,
+ show_tn_chapter_intro: bool,
+ # show_tn_book_intro: bool = settings.SHOW_TN_BOOK_INTRO,
+ resource_type_name_fmt_str: str = settings.LEFT_ALIGNED_HEADER_FMT_STR,
) -> list[DocumentPart]:
- """
- Construct the HTML for a 'by book' strategy wherein at least
- tn_book exists.
- """
document_parts: list[DocumentPart] = []
if tn_book:
if show_tn_book_intro and tn_book.book_intro:
+ document_parts.append(
+ DocumentPart(
+ content=resource_type_name_fmt_str.format(
+ tn_book.resource_type_name
+ ),
+ is_rtl=tn_book and tn_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
document_parts.append(
DocumentPart(
content=tn_book.book_intro,
@@ -378,29 +944,60 @@ def assemble_tn_by_book(
use_section_visual_separator=use_section_visual_separator,
)
)
- if bc_book and bc_book.book_intro:
+ if show_bc_book_intro and bc_book and bc_book.book_intro:
+ document_parts.append(
+ DocumentPart(
+ content=resource_type_name_fmt_str.format(
+ bc_book.resource_type_name
+ ),
+ is_rtl=tn_book and tn_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
document_parts.append(DocumentPart(content=bc_book.book_intro))
for chapter_num in tn_book.chapters:
- one_column_html = []
- one_column_html.append(chapter_heading(chapter_num))
- one_column_html.append(
- chapter_intro(tn_book, chapter_num, use_section_visual_separator)
+ if show_tn_chapter_intro:
+ one_column_html = []
+ one_column_html.append(chapter_heading(chapter_num))
+ one_column_html.append(
+ chapter_intro(tn_book, chapter_num, use_section_visual_separator)
+ )
+ one_column_html_ = "".join(one_column_html)
+ if one_column_html_:
+ document_parts.append(
+ DocumentPart(
+ content=resource_type_name_fmt_str.format(
+ tn_book.resource_type_name
+ ),
+ is_rtl=tn_book
+ and tn_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=one_column_html_,
+ is_rtl=tn_book
+ and tn_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
+ chapter_commentary_ = chapter_commentary(
+ bc_book, chapter_num, use_section_visual_separator
)
- one_column_html_ = "".join(one_column_html)
- if one_column_html_:
+ if bc_book and chapter_commentary_:
document_parts.append(
DocumentPart(
- content=one_column_html_,
+ content=resource_type_name_fmt_str.format(
+ bc_book.resource_type_name
+ ),
is_rtl=tn_book and tn_book.lang_direction == LangDirEnum.RTL,
use_section_visual_separator=use_section_visual_separator,
)
)
- if bc_book:
document_parts.append(
DocumentPart(
- content=chapter_commentary(
- bc_book, chapter_num, use_section_visual_separator
- ),
+ content=chapter_commentary_,
use_section_visual_separator=use_section_visual_separator,
)
)
@@ -411,16 +1008,34 @@ def assemble_tn_by_book(
use_two_column_layout_for_tn_notes,
)
if tn_verses:
+ document_parts.append(
+ DocumentPart(
+ content=resource_type_name_fmt_str.format(
+ tn_book.resource_type_name
+ ),
+ is_rtl=tn_book and tn_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
document_parts.append(
DocumentPart(
content=tn_verses,
is_rtl=tn_book and tn_book.lang_direction == LangDirEnum.RTL,
add_hr_p=False,
contained_in_two_column_section=use_two_column_layout_for_tn_notes,
+ use_section_visual_separator=False,
+ )
+ )
+ # document_parts.append(DocumentPart(content=""))
+ # This is a trick to make an hr after a two column section by tricking
+ # the html to docx parser into keeping this part using an HTML space
+ # rather than an empty string.
+ document_parts.append(
+ DocumentPart(
+ content=" ",
use_section_visual_separator=use_section_visual_separator,
)
)
- document_parts.append(DocumentPart(content=""))
tq_verses = tq_chapter_verses(
tq_book,
chapter_num,
@@ -428,40 +1043,67 @@ def assemble_tn_by_book(
use_two_column_layout_for_tq_notes,
)
if tq_book and tq_verses:
+ document_parts.append(
+ DocumentPart(
+ content=resource_type_name_fmt_str.format(
+ tq_book.resource_type_name
+ ),
+ is_rtl=tq_book and tq_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
document_parts.append(
DocumentPart(
content=tq_verses,
is_rtl=tq_book and tq_book.lang_direction == LangDirEnum.RTL,
contained_in_two_column_section=use_two_column_layout_for_tq_notes,
+ use_section_visual_separator=False,
+ )
+ )
+ # document_parts.append(DocumentPart(content=""))
+ # This is a trick to make an hr after a two column section by tricking
+ # the html to docx parser into keeping this part using an HTML space
+ # rather than an empty string.
+ document_parts.append(
+ DocumentPart(
+ content=" ",
use_section_visual_separator=use_section_visual_separator,
)
)
- document_parts.append(DocumentPart(content=""))
rg_verses = rg_chapter_verses(
rg_book, chapter_num, use_section_visual_separator
)
if rg_book and rg_verses:
document_parts.append(
DocumentPart(
- content=rg_verses,
+ content=resource_type_name_fmt_str.format(
+ rg_book.resource_type_name
+ ),
is_rtl=rg_book and rg_book.lang_direction == LangDirEnum.RTL,
use_section_visual_separator=use_section_visual_separator,
)
)
document_parts.append(
DocumentPart(
- content="",
+ content=rg_verses,
+ is_rtl=rg_book and rg_book.lang_direction == LangDirEnum.RTL,
use_section_visual_separator=use_section_visual_separator,
)
)
- document_parts.append(
- DocumentPart(
- content="",
- add_hr_p=False,
- add_page_break=True,
- use_section_visual_separator=use_section_visual_separator,
- )
- )
+ # document_parts.append(
+ # DocumentPart(
+ # content="",
+ # use_section_visual_separator=use_section_visual_separator,
+ # )
+ # )
+ # document_parts.append(
+ # DocumentPart(
+ # content="",
+ # add_hr_p=False,
+ # add_page_break=True,
+ # use_section_visual_separator=use_section_visual_separator,
+ # )
+ # )
return document_parts
@@ -475,23 +1117,41 @@ def assemble_tq_by_book(
rg_book: Optional[RGBook],
use_section_visual_separator: bool,
use_two_column_layout_for_tq_notes: bool,
+ show_bc_book_intro: bool,
+ resource_type_name_fmt_str: str = settings.LEFT_ALIGNED_HEADER_FMT_STR,
) -> list[DocumentPart]:
- """
- Construct the HTML for a 'by book' strategy wherein at least
- tq_book exists.
- """
document_parts: list[DocumentPart] = []
if tq_book:
for chapter_num in tq_book.chapters:
- if bc_book:
+
+ chapter_commentary_ = chapter_commentary(
+ bc_book, chapter_num, use_section_visual_separator
+ )
+ if bc_book and chapter_commentary_:
document_parts.append(
DocumentPart(
- content=chapter_commentary(
- bc_book, chapter_num, use_section_visual_separator
+ content=resource_type_name_fmt_str.format(
+ bc_book.resource_type_name
),
+ is_rtl=bc_book and bc_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
+ document_parts.append(
+ DocumentPart(
+ content=chapter_commentary_,
use_section_visual_separator=use_section_visual_separator,
)
)
+ document_parts.append(
+ DocumentPart(
+ content=resource_type_name_fmt_str.format(
+ tq_book.resource_type_name
+ ),
+ is_rtl=tq_book and tq_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
document_parts.append(
DocumentPart(
content=chapter_heading(chapter_num),
@@ -506,11 +1166,29 @@ def assemble_tq_by_book(
use_two_column_layout_for_tq_notes,
)
if tq_verses:
+ document_parts.append(
+ DocumentPart(
+ content=resource_type_name_fmt_str.format(
+ tq_book.resource_type_name
+ ),
+ is_rtl=tq_book and tq_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
document_parts.append(
DocumentPart(
content=tq_verses,
is_rtl=tq_book and tq_book.lang_direction == LangDirEnum.RTL,
contained_in_two_column_section=use_two_column_layout_for_tq_notes,
+ use_section_visual_separator=False,
+ )
+ )
+ # This is a trick to make an hr after a two column section by tricking
+ # the html to docx parser into keeping this part using an HTML space
+ # rather than an empty string.
+ document_parts.append(
+ DocumentPart(
+ content=" ",
use_section_visual_separator=use_section_visual_separator,
)
)
@@ -518,6 +1196,15 @@ def assemble_tq_by_book(
rg_book, chapter_num, use_section_visual_separator
)
if rg_book and rg_verses:
+ document_parts.append(
+ DocumentPart(
+ content=resource_type_name_fmt_str.format(
+ rg_book.resource_type_name
+ ),
+ is_rtl=rg_book and rg_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
+ )
document_parts.append(
DocumentPart(
content=rg_verses,
@@ -525,14 +1212,14 @@ def assemble_tq_by_book(
use_section_visual_separator=use_section_visual_separator,
)
)
- document_parts.append(
- DocumentPart(
- content="",
- add_hr_p=False,
- add_page_break=True,
- use_section_visual_separator=use_section_visual_separator,
- )
- )
+ # document_parts.append(
+ # DocumentPart(
+ # content="",
+ # add_hr_p=False,
+ # add_page_break=True,
+ # use_section_visual_separator=use_section_visual_separator,
+ # )
+ # )
return document_parts
@@ -545,27 +1232,45 @@ def assemble_tw_by_book(
bc_book: Optional[BCBook],
rg_book: Optional[RGBook],
use_section_visual_separator: bool,
+ show_bc_book_intro: bool,
+ resource_type_name_fmt_str: str = settings.LEFT_ALIGNED_HEADER_FMT_STR,
) -> list[DocumentPart]:
- """
- TW is handled outside this module, that is why no
- code for TW is explicitly included here.
- """
document_parts: list[DocumentPart] = []
if bc_book:
- document_parts.append(DocumentPart(content=bc_book.book_intro))
- for chapter in bc_book.chapters.values():
+ if show_bc_book_intro and bc_book.book_intro:
document_parts.append(
DocumentPart(
- content=chapter.commentary,
- use_section_visual_separator=use_section_visual_separator,
+ content=resource_type_name_fmt_str.format(
+ bc_book.resource_type_name
+ ),
+ is_rtl=bc_book and bc_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
)
)
- document_parts.append(
- DocumentPart(
- content="",
- add_hr_p=False,
- add_page_break=True,
- use_section_visual_separator=use_section_visual_separator,
+ document_parts.append(DocumentPart(content=bc_book.book_intro))
+ for chapter in bc_book.chapters.values():
+ if chapter.commentary:
+ document_parts.append(
+ DocumentPart(
+ content=resource_type_name_fmt_str.format(
+ bc_book.resource_type_name
+ ),
+ is_rtl=bc_book and bc_book.lang_direction == LangDirEnum.RTL,
+ use_section_visual_separator=False,
+ )
)
- )
+ document_parts.append(
+ DocumentPart(
+ content=chapter.commentary,
+ use_section_visual_separator=use_section_visual_separator,
+ )
+ )
+ # document_parts.append(
+ # DocumentPart(
+ # content="",
+ # add_hr_p=False,
+ # add_page_break=True,
+ # use_section_visual_separator=use_section_visual_separator,
+ # )
+ # )
return document_parts
diff --git a/backend/doc/domain/assembly_strategies_docx/assembly_strategy_utils.py b/backend/doc/domain/assembly_strategies_docx/assembly_strategy_utils.py
index 4fa0498af..5ead84855 100644
--- a/backend/doc/domain/assembly_strategies_docx/assembly_strategy_utils.py
+++ b/backend/doc/domain/assembly_strategies_docx/assembly_strategy_utils.py
@@ -11,9 +11,9 @@
from docx import Document # type: ignore
from docx.enum.section import WD_SECTION # type: ignore
from docx.enum.text import WD_BREAK # type: ignore
+from docx.oxml import parse_xml # type: ignore
from docx.oxml.ns import qn # type: ignore
from docx.oxml.shared import OxmlElement # type: ignore
-from docx.text.paragraph import Paragraph # type: ignore
logger = settings.logger(__name__)
@@ -183,47 +183,50 @@ def rg_chapter_verses(
return "".join(content)
-def add_hr(paragraph: Paragraph) -> None:
- """Add a horizontal line at the end of the given paragraph."""
- p = paragraph._p # p is the XML element
- pPr = p.get_or_add_pPr()
- pBdr = OxmlElement("w:pBdr")
- pPr.insert_element_before(
- pBdr,
- "w:shd",
- "w:tabs",
- "w:suppressAutoHyphens",
- "w:kinsoku",
- "w:wordWrap",
- "w:overflowPunct",
- "w:topLinePunct",
- "w:autoSpaceDE",
- "w:autoSpaceDN",
- "w:bidi",
- "w:adjustRightInd",
- "w:snapToGrid",
- "w:spacing",
- "w:ind",
- "w:contextualSpacing",
- "w:mirrorIndents",
- "w:suppressOverlap",
- "w:jc",
- "w:textDirection",
- "w:textAlignment",
- "w:textboxTightWrap",
- "w:outlineLvl",
- "w:divId",
- "w:cnfStyle",
- "w:rPr",
- "w:sectPr",
- "w:pPrChange",
- )
- bottom = OxmlElement("w:bottom")
- bottom.set(qn("w:val"), "single")
- bottom.set(qn("w:sz"), "6")
- bottom.set(qn("w:space"), "1")
- bottom.set(qn("w:color"), "auto")
- pBdr.append(bottom)
+def add_full_width_hr(doc: Document) -> None:
+ """Add a full-width horizontal rule that spans the entire page width."""
+ p = doc.add_paragraph()
+ run = p.add_run()
+ # Adjust this width to your page layout; 6.5" = 8.5" page minus 1" margins on each side
+ width_inches = 6.5
+ hr_xml = f"""
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ """
+ drawing = parse_xml(hr_xml)
+ run._r.append(drawing)
def set_docx_language(
diff --git a/backend/doc/domain/document_generator.py b/backend/doc/domain/document_generator.py
index 679dd9775..cb4734f02 100755
--- a/backend/doc/domain/document_generator.py
+++ b/backend/doc/domain/document_generator.py
@@ -13,17 +13,19 @@
from doc.config import settings
from doc.domain import parsing, resource_lookup, worker
from doc.domain.assembly_strategies.assembly_strategies_book_then_lang_by_chapter import (
- assemble_content_by_book_then_lang,
+ assemble_content_by_chapter,
+ assemble_content_by_verse_chapter_at_a_time,
)
from doc.domain.assembly_strategies.assembly_strategies_lang_then_book_by_chapter import (
- assemble_content_by_lang_then_book,
+ assemble_content_by_book,
+ assemble_content_by_verse_book_at_a_time,
)
from doc.domain.assembly_strategies_docx import (
assembly_strategies_book_then_lang_by_chapter as book_then_lang,
assembly_strategies_lang_then_book_by_chapter as lang_then_book,
)
from doc.domain.assembly_strategies_docx.assembly_strategy_utils import (
- add_hr,
+ add_full_width_hr,
add_one_column_section,
add_page_break,
add_two_column_section,
@@ -47,7 +49,11 @@
USFMBook,
)
from doc.reviewers_guide.model import RGBook
-from doc.utils.docx_util import generate_docx_toc
+from doc.utils.docx_util import (
+ generate_docx_toc,
+ preprocess_html_for_internal_docx_links,
+ add_internal_docx_links,
+)
from doc.utils.file_utils import (
docx_filepath,
epub_filepath,
@@ -60,7 +66,7 @@
from doc.utils.tw_utils import (
contains_tw,
filter_unique_by_lang_code,
- translation_words_section,
+ translation_words_section_for_book,
)
from docx import Document # type: ignore
from docx.enum.section import WD_SECTION # type: ignore
@@ -92,6 +98,11 @@ def initialize_document_request_and_key(
document_request.use_section_visual_separator,
document_request.use_two_column_layout_for_tn_notes,
document_request.use_two_column_layout_for_tq_notes,
+ document_request.show_tn_book_intro,
+ document_request.show_tn_chapter_intro,
+ document_request.show_bc_book_intro,
+ document_request.show_bc_chapter_commentary,
+ document_request.show_rg_chapter_commentary,
)
return document_request, document_request_key_
@@ -142,6 +153,7 @@ def locate_acquire_and_build_resource_objects(
document_request.resource_requests,
document_request.layout_for_print,
document_request.use_chapter_labels,
+ document_request.generate_docx,
)
t1 = time.time()
logger.info("Time to parse all resource content: %s", t1 - t0)
@@ -169,7 +181,7 @@ def generate_document(
"""
This is the main entry point for this module for non-docx generation.
>>> from doc.domain import document_generator
- >>> document_request_json = '{"email_address":null,"assembly_strategy_kind":"lbo","assembly_layout_kind":"1c","layout_for_print":false,"resource_requests":[{"lang_code":"es-419","resource_type":"ulb","book_code":"mat"}],"generate_pdf":true,"generate_epub":false,"generate_docx":false,"chunk_size":"chapter","limit_words":false,"include_tn_book_intros":false,"document_request_source":"ui"}'
+ >>> document_request_json = '{"email_address":null,"assembly_strategy_kind":"lbo","assembly_layout_kind":"1c","layout_for_print":false,"resource_requests":[{"lang_code":"es-419","resource_type":"ulb","book_code":"mat"}],"generate_pdf":true,"generate_epub":false,"generate_docx":false,"chunk_size":"chapter","limit_words":false,"show_tn_book_intro":false,"document_request_source":"ui"}'
>>> document_generator.generate_document(document_request_json)
"""
current_task.update_state(state="Receiving request")
@@ -292,7 +304,6 @@ def generate_docx_document(
docx_filepath_,
document_parts,
document_request.layout_for_print,
- document_request.use_section_visual_separator,
title1,
title2,
)
@@ -327,6 +338,11 @@ def document_request_key(
use_section_visual_separator: bool,
use_two_column_layout_for_tn_notes: bool,
use_two_column_layout_for_tq_notes: bool,
+ show_tn_book_intro: bool,
+ show_bc_book_intro: bool,
+ show_tn_chapter_intro: bool,
+ show_bc_chapter_commentary: bool,
+ show_rg_chapter_commentary: bool,
max_filename_len: int = 240,
underscore: str = "_",
hyphen: str = "-",
@@ -359,9 +375,9 @@ def document_request_key(
]
)
if any(contains_tw(resource_request) for resource_request in resource_requests):
- document_request_key = f'{resource_request_keys}_{assembly_strategy_kind.value}_{assembly_layout_kind.value}_{chunk_size.value}_{"clt" if use_chapter_labels else "clf"}_{"lwt" if limit_words else "lwf"}_{"sst" if use_section_visual_separator else "ssf"}_{"2ctn" if use_two_column_layout_for_tn_notes else "1ctn"}_{"2ctq" if use_two_column_layout_for_tq_notes else "1ctq"}'
+ document_request_key = f'{resource_request_keys}_{assembly_strategy_kind.value}_{assembly_layout_kind.value}_{chunk_size.value}_{"clt" if use_chapter_labels else "clf"}_{"lwt" if limit_words else "lwf"}_{"sst" if use_section_visual_separator else "ssf"}_{"2ctn" if use_two_column_layout_for_tn_notes else "1ctn"}_{"2ctq" if use_two_column_layout_for_tq_notes else "1ctq"}_{"tnbt" if show_tn_book_intro else "tnbf"}_{"bcbt" if show_bc_book_intro else "bcbf"}_{"tnct" if show_tn_chapter_intro else "tncf"}_{"bcct" if show_bc_chapter_commentary else "bccf"}_{"rgct" if show_rg_chapter_commentary else "rgcf"}'
else:
- document_request_key = f'{resource_request_keys}_{assembly_strategy_kind.value}_{assembly_layout_kind.value}_{chunk_size.value}_{"clt" if use_chapter_labels else "clf"}_{"sst" if use_section_visual_separator else "ssf"}_{"2ctn" if use_two_column_layout_for_tn_notes else "1ctn"}_{"2ctq" if use_two_column_layout_for_tq_notes else "1ctq"}'
+ document_request_key = f'{resource_request_keys}_{assembly_strategy_kind.value}_{assembly_layout_kind.value}_{chunk_size.value}_{"clt" if use_chapter_labels else "clf"}_{"sst" if use_section_visual_separator else "ssf"}_{"2ctn" if use_two_column_layout_for_tn_notes else "1ctn"}_{"2ctq" if use_two_column_layout_for_tq_notes else "1ctq"}_{"tnbt" if show_tn_book_intro else "tnbf"}_{"bcbt" if show_bc_book_intro else "bcbf"}_{"tnct" if show_tn_chapter_intro else "tncf"}_{"bcct" if show_bc_chapter_commentary else "bccf"}_{"rgct" if show_rg_chapter_commentary else "rgcf"}'
if len(document_request_key) >= max_filename_len:
# The generated filename could be too long for the OS where this is
# running. Therefore, use the current time as a document_request_key
@@ -435,6 +451,7 @@ def assemble_content(
rg_books: Sequence[RGBook],
found_resource_lookup_dtos: Sequence[ResourceLookupDto],
hr: str = "
",
+ resource_assets_dir: str = settings.RESOURCE_ASSETS_DIR,
) -> list[str]:
"""
Assemble and return the content from all requested resources according to the
@@ -445,10 +462,54 @@ def assemble_content(
content = []
if (
document_request.assembly_strategy_kind
- == AssemblyStrategyEnum.LANGUAGE_BOOK_ORDER
+ == AssemblyStrategyEnum.INTERLEAVE_BY_BOOK
+ ):
+ content.extend(
+ assemble_content_by_book(
+ usfm_books,
+ tn_books,
+ tq_books,
+ tw_books,
+ bc_books,
+ rg_books,
+ cast(AssemblyLayoutEnum, document_request.assembly_layout_kind),
+ document_request.use_section_visual_separator,
+ document_request.use_two_column_layout_for_tn_notes,
+ document_request.use_two_column_layout_for_tq_notes,
+ document_request.show_tn_book_intro,
+ document_request.show_bc_book_intro,
+ document_request.show_tn_chapter_intro,
+ )
+ )
+ elif (
+ document_request.assembly_strategy_kind
+ == AssemblyStrategyEnum.INTERLEAVE_BY_VERSE_BOOK_AT_A_TIME
+ ):
+ content.extend(
+ assemble_content_by_verse_book_at_a_time(
+ usfm_books,
+ tn_books,
+ tq_books,
+ tw_books,
+ bc_books,
+ rg_books,
+ cast(AssemblyLayoutEnum, document_request.assembly_layout_kind),
+ document_request.use_section_visual_separator,
+ document_request.use_two_column_layout_for_tn_notes,
+ document_request.use_two_column_layout_for_tq_notes,
+ document_request.show_tn_book_intro,
+ document_request.show_bc_book_intro,
+ document_request.show_tn_chapter_intro,
+ document_request.show_bc_chapter_commentary,
+ document_request.show_rg_chapter_commentary,
+ )
+ )
+ elif (
+ document_request.assembly_strategy_kind
+ == AssemblyStrategyEnum.INTERLEAVE_BY_CHAPTER
):
content.extend(
- assemble_content_by_lang_then_book(
+ assemble_content_by_chapter(
usfm_books,
tn_books,
tq_books,
@@ -459,14 +520,17 @@ def assemble_content(
document_request.use_section_visual_separator,
document_request.use_two_column_layout_for_tn_notes,
document_request.use_two_column_layout_for_tq_notes,
+ document_request.show_tn_book_intro,
+ document_request.show_bc_book_intro,
+ document_request.show_tn_chapter_intro,
)
)
elif (
document_request.assembly_strategy_kind
- == AssemblyStrategyEnum.BOOK_LANGUAGE_ORDER
+ == AssemblyStrategyEnum.INTERLEAVE_BY_VERSE
):
content.extend(
- assemble_content_by_book_then_lang(
+ assemble_content_by_verse_chapter_at_a_time(
usfm_books,
tn_books,
tq_books,
@@ -477,6 +541,11 @@ def assemble_content(
document_request.use_section_visual_separator,
document_request.use_two_column_layout_for_tn_notes,
document_request.use_two_column_layout_for_tq_notes,
+ document_request.show_tn_book_intro,
+ document_request.show_bc_book_intro,
+ document_request.show_tn_chapter_intro,
+ document_request.show_bc_chapter_commentary,
+ document_request.show_rg_chapter_commentary,
)
)
t1 = time.time()
@@ -486,10 +555,12 @@ def assemble_content(
unique_tw_books = filter_unique_by_lang_code(tw_books)
for tw_book in unique_tw_books:
content.extend(
- translation_words_section(
+ translation_words_section_for_book(
tw_book,
usfm_books,
- document_request.limit_words,
+ # Not currently using limit tw words feature now because we want all
+ # interdocument tw links to work.
+ False,
document_request.resource_requests,
)
)
@@ -540,9 +611,9 @@ def assemble_docx_content(
document_parts: list[DocumentPart] = []
if (
document_request.assembly_strategy_kind
- == AssemblyStrategyEnum.LANGUAGE_BOOK_ORDER
+ == AssemblyStrategyEnum.INTERLEAVE_BY_BOOK
):
- document_parts = lang_then_book.assemble_content_by_lang_then_book(
+ document_parts = lang_then_book.assemble_content_by_book(
usfm_books,
tn_books,
tq_books,
@@ -554,12 +625,16 @@ def assemble_docx_content(
document_request.use_section_visual_separator,
document_request.use_two_column_layout_for_tn_notes,
document_request.use_two_column_layout_for_tq_notes,
+ document_request.show_tn_book_intro,
+ document_request.show_bc_book_intro,
+ document_request.show_tn_chapter_intro,
+ document_request.show_bc_chapter_commentary,
)
elif (
document_request.assembly_strategy_kind
- == AssemblyStrategyEnum.BOOK_LANGUAGE_ORDER
+ == AssemblyStrategyEnum.INTERLEAVE_BY_VERSE_BOOK_AT_A_TIME
):
- document_parts = book_then_lang.assemble_content_by_book_then_lang(
+ document_parts = lang_then_book.assemble_content_by_verse_book_at_a_time(
usfm_books,
tn_books,
tq_books,
@@ -571,6 +646,53 @@ def assemble_docx_content(
document_request.use_section_visual_separator,
document_request.use_two_column_layout_for_tn_notes,
document_request.use_two_column_layout_for_tq_notes,
+ document_request.show_tn_book_intro,
+ document_request.show_bc_book_intro,
+ document_request.show_tn_chapter_intro,
+ document_request.show_bc_chapter_commentary,
+ document_request.show_rg_chapter_commentary,
+ )
+ elif (
+ document_request.assembly_strategy_kind
+ == AssemblyStrategyEnum.INTERLEAVE_BY_CHAPTER
+ ):
+ document_parts = book_then_lang.assemble_content_by_chapter(
+ usfm_books,
+ tn_books,
+ tq_books,
+ tw_books,
+ bc_books,
+ rg_books,
+ cast(AssemblyLayoutEnum, document_request.assembly_layout_kind),
+ document_request.chunk_size,
+ document_request.use_section_visual_separator,
+ document_request.use_two_column_layout_for_tn_notes,
+ document_request.use_two_column_layout_for_tq_notes,
+ document_request.show_tn_book_intro,
+ document_request.show_bc_book_intro,
+ document_request.show_tn_chapter_intro,
+ )
+ elif (
+ document_request.assembly_strategy_kind
+ == AssemblyStrategyEnum.INTERLEAVE_BY_VERSE
+ ):
+ document_parts = book_then_lang.assemble_content_by_verse_chapter_at_a_time(
+ usfm_books,
+ tn_books,
+ tq_books,
+ tw_books,
+ bc_books,
+ rg_books,
+ cast(AssemblyLayoutEnum, document_request.assembly_layout_kind),
+ # document_request.chunk_size,
+ document_request.use_section_visual_separator,
+ document_request.use_two_column_layout_for_tn_notes,
+ document_request.use_two_column_layout_for_tq_notes,
+ document_request.show_tn_book_intro,
+ document_request.show_bc_book_intro,
+ document_request.show_tn_chapter_intro,
+ document_request.show_bc_chapter_commentary,
+ document_request.show_rg_chapter_commentary,
)
t1 = time.time()
logger.info("Time for interleaving document: %s", t1 - t0)
@@ -581,11 +703,14 @@ def assemble_docx_content(
for tw_book in unique_tw_books:
document_parts.append(
DocumentPart(
- content=translation_words_section(
- tw_book,
- usfm_books,
- document_request.limit_words,
- document_request.resource_requests,
+ content="".join(
+ translation_words_section_for_book(
+ tw_book,
+ usfm_books,
+ False,
+ # document_request.limit_words,
+ document_request.resource_requests,
+ )
),
use_section_visual_separator=document_request.use_section_visual_separator,
)
@@ -672,32 +797,31 @@ def convert_html_to_epub(
def compose_docx_document(
- document_parts: list[DocumentPart], use_section_visual_separator: bool
+ document_parts: list[DocumentPart],
) -> Document:
+ """
+ Convert a sequence of HTML parts into one DOCX Document,
+ performing preprocessing and optional separators.
+ """
doc = Document()
html_to_docx = HtmlToDocx()
t0 = time.time()
for part in document_parts:
if part.contained_in_two_column_section:
add_two_column_section(doc)
- try:
- html_to_docx.add_html_to_document(part.content, doc)
- except ValueError as e:
- logger.exception(e)
else:
add_one_column_section(doc)
- try:
- html_to_docx.add_html_to_document(part.content, doc)
- except ValueError as e:
- logger.exception(e)
- # Set the language for spellcheck
- # set_docx_language(doc, lang_code)
- if use_section_visual_separator and part.add_hr_p:
- add_hr(doc.paragraphs[-1])
+ try:
+ processed_html = preprocess_html_for_internal_docx_links(part.content)
+ html_to_docx.add_html_to_document(processed_html, doc)
+ except ValueError as e:
+ logger.exception("Error converting HTML to docx: %s", e)
+ if part.use_section_visual_separator and part.add_hr_p:
+ add_full_width_hr(doc)
if part.add_page_break:
add_page_break(doc)
t1 = time.time()
- logger.info("Time for converting HTML to Docx: %s", t1 - t0)
+ logger.info("Time for converting HTML to Docx: %.2f seconds", t1 - t0)
return doc
@@ -706,7 +830,6 @@ def convert_html_to_docx(
docx_filepath: str,
document_parts: list[DocumentPart],
layout_for_print: bool,
- use_section_visual_separator: bool,
title1: str = "title1",
title2: str = "title2",
title3: str = "",
@@ -736,7 +859,9 @@ def convert_html_to_docx(
new_section = doc.add_section(WD_SECTION.CONTINUOUS)
new_section.start_type
master = Composer(doc)
- master.append(compose_docx_document(document_parts, use_section_visual_separator))
+ doc2 = compose_docx_document(document_parts)
+ add_internal_docx_links(doc2)
+ master.append(doc2)
master.save(docx_filepath)
t1 = time.time()
logger.info("Time for converting HTML to Docx: %s", t1 - t0)
@@ -752,8 +877,8 @@ def cover_filepath(
def select_assembly_layout_kind(
document_request: DocumentRequest,
usfm_resource_types: Sequence[str] = settings.USFM_RESOURCE_TYPES,
- language_book_order: AssemblyStrategyEnum = AssemblyStrategyEnum.LANGUAGE_BOOK_ORDER,
- book_language_order: AssemblyStrategyEnum = AssemblyStrategyEnum.BOOK_LANGUAGE_ORDER,
+ language_book_order: AssemblyStrategyEnum = AssemblyStrategyEnum.INTERLEAVE_BY_BOOK,
+ book_language_order: AssemblyStrategyEnum = AssemblyStrategyEnum.INTERLEAVE_BY_CHAPTER,
stet_strategy: AssemblyStrategyEnum = AssemblyStrategyEnum.STET_STRATEGY,
one_column_compact: AssemblyLayoutEnum = AssemblyLayoutEnum.ONE_COLUMN_COMPACT,
sl_sr: AssemblyLayoutEnum = AssemblyLayoutEnum.TWO_COLUMN_SCRIPTURE_LEFT_SCRIPTURE_RIGHT,
diff --git a/backend/doc/domain/model.py b/backend/doc/domain/model.py
index 99688e3a9..fec5c5293 100644
--- a/backend/doc/domain/model.py
+++ b/backend/doc/domain/model.py
@@ -14,6 +14,7 @@
from pydantic import BaseModel, EmailStr, HttpUrl
from pydantic.functional_validators import model_validator
+
# These type aliases give us more self-documenting code, but of course
# aren't strictly necessary.
VerseRef = str
@@ -37,19 +38,10 @@
@final
class AssemblyStrategyEnum(str, Enum):
- """
- * LANGUAGE_BOOK_ORDER
- - This enum value signals to use the high level strategy that orders
- by language and then by book before delegating to an assembly
- sub-strategy.
- * BOOK_LANGUAGE_ORDER
- - This enum value signals to use the high level strategy that orders
- by book and then by language before delegating to an assembly
- sub-strategy.
- """
-
- LANGUAGE_BOOK_ORDER = "lbo"
- BOOK_LANGUAGE_ORDER = "blo"
+ INTERLEAVE_BY_BOOK = "lbo"
+ INTERLEAVE_BY_CHAPTER = "blo"
+ INTERLEAVE_BY_VERSE_BOOK_AT_A_TIME = "lvo" # Interleave by verse one book at a time
+ INTERLEAVE_BY_VERSE = "bvo" # Interleave by verse one chapter at a time
STET_STRATEGY = "stet"
@@ -218,12 +210,13 @@ class DocumentRequest(BaseModel):
use_two_column_layout_for_tn_notes: bool = False
# Some languages, e.g., Khmer, don't layout well in 2 column
use_two_column_layout_for_tq_notes: bool = False
-
# Indicate whether to show visual separator between sections, e.g., hr element
use_section_visual_separator: bool = False
- # Indicate whether TN book intros should be included. Currently,
- # the content team does not want them included.
- include_tn_book_intros: bool = False
+ show_tn_book_intro: bool = True
+ show_bc_book_intro: bool = True
+ show_tn_chapter_intro: bool = True
+ show_bc_chapter_commentary: bool = True
+ show_rg_chapter_commentary: bool = True
# Indicate where the document request originated from. We default to
# TEST so that tests don't have to specify and every other client, e.g.,
# UI, should specify in order for
@@ -297,7 +290,7 @@ def ensure_valid_document_request(self) -> "DocumentRequest":
# )
)
if (
- self.assembly_strategy_kind != AssemblyStrategyEnum.BOOK_LANGUAGE_ORDER
+ self.assembly_strategy_kind != AssemblyStrategyEnum.INTERLEAVE_BY_CHAPTER
and self.assembly_layout_kind
== AssemblyLayoutEnum.TWO_COLUMN_SCRIPTURE_LEFT_SCRIPTURE_RIGHT
):
@@ -305,7 +298,7 @@ def ensure_valid_document_request(self) -> "DocumentRequest":
"Two column scripture left, scripture right layout is only compatible with book language order assembly strategy."
)
elif (
- self.assembly_strategy_kind == AssemblyStrategyEnum.BOOK_LANGUAGE_ORDER
+ self.assembly_strategy_kind == AssemblyStrategyEnum.INTERLEAVE_BY_CHAPTER
and self.assembly_layout_kind
== AssemblyLayoutEnum.TWO_COLUMN_SCRIPTURE_LEFT_SCRIPTURE_RIGHT
# Because book content for different languages will be side by side for
@@ -318,7 +311,7 @@ def ensure_valid_document_request(self) -> "DocumentRequest":
"Two column scripture left, scripture right layout requires a non-zero even number of languages. For an uneven number of languages you'll want to use the one column layout kind."
)
elif (
- self.assembly_strategy_kind == AssemblyStrategyEnum.BOOK_LANGUAGE_ORDER
+ self.assembly_strategy_kind == AssemblyStrategyEnum.INTERLEAVE_BY_CHAPTER
and self.assembly_layout_kind
== AssemblyLayoutEnum.TWO_COLUMN_SCRIPTURE_LEFT_SCRIPTURE_RIGHT
# Because book content for different languages will be side by side for
@@ -445,8 +438,9 @@ class TWNameContentPair:
HTML content.
"""
- def __init__(self, localized_word: str, content: str):
+ def __init__(self, localized_word: str, path: str, content: str):
self.localized_word = localized_word
+ self.path = path
self.content = content
@@ -458,7 +452,7 @@ class TWBook(NamedTuple):
resource_type_name: str
lang_direction: LangDirEnum
name_content_pairs: list[TWNameContentPair] = []
- # uses: dict[str, list[TWUse]] = {}
+ uses: dict[str, list[TWUse]] = {}
@final
diff --git a/backend/doc/domain/parsing.py b/backend/doc/domain/parsing.py
index ef32ecbb6..10234005d 100644
--- a/backend/doc/domain/parsing.py
+++ b/backend/doc/domain/parsing.py
@@ -12,6 +12,7 @@
import mistune
import requests
+from bs4 import BeautifulSoup
from doc.config import settings
from doc.domain.assembly_strategies.assembly_strategy_utils import (
adjust_commentary_headings,
@@ -46,8 +47,10 @@
from doc.markdown_transforms import markdown_transformer
from doc.reviewers_guide.model import RGBook
from doc.reviewers_guide.parser import get_rg_books
+from doc.utils.docx_util import preprocess_html_for_internal_docx_links
from doc.utils.file_utils import read_file
from doc.utils.text_utils import (
+ maybe_correct_book_name,
chapter_label_numeric_part,
chapter_label_sans_numeric_part,
normalize_localized_book_name,
@@ -60,7 +63,7 @@
)
from doc.utils.url_utils import (
get_last_segment,
- get_book_names_from_title_file,
+ get_book_name_from_title_file,
book_codes_and_names_from_manifest,
)
from pydantic import HttpUrl
@@ -70,6 +73,7 @@
H1, H2, H3, H4, H5 = "h1", "h2", "h3", "h4", "h5"
+
# fmt: off
BC_ARTICLE_URL_FMT_STR: str = "https://content.bibletranslationtools.org/WycliffeAssociates/en_bc/src/branch/master/{}"
# fmt: on
@@ -467,6 +471,10 @@ def usfm_book_content(
localized_book_name = get_localized_book_name(
frontmatter, resource_dir, resource_lookup_dto
)
+ localized_book_name = maybe_correct_book_name(
+ resource_lookup_dto.lang_code, localized_book_name
+ )
+ logger.debug("localized_book_name: %s", localized_book_name)
for chapter_marker, chapter_usfm in zip(chapter_markers, chapters_usfm):
chapter_num = get_chapter_num(chapter_usfm)
if chapter_num == -1:
@@ -538,12 +546,9 @@ def get_localized_book_name(
len(repo_components) > 2
and resource_lookup_dto.resource_type in usfm_resource_types
):
- book_names_from_title_file = get_book_names_from_title_file(
+ localized_book_name = get_book_name_from_title_file(
resource_dir, resource_lookup_dto.lang_code, repo_components
)
- localized_book_name = book_names_from_title_file.get(
- resource_lookup_dto.book_code, ""
- )
return localized_book_name
@@ -584,6 +589,7 @@ def tn_chapter_verses(
chapter_intro = tn_chapter_intro(chapter_dir)
chapter_intro_html = ""
if chapter_intro:
+ chapter_intro = markdown_transformer.remove_sections(chapter_intro)
tw_resource_dir_ = tw_resource_dir(lang_code)
translation_words_dict_ = translation_words_dict(tw_resource_dir_)
chapter_intro = markdown_transformer.transform_tw_links(
@@ -790,30 +796,55 @@ def tw_name_content_pairs(
resource_dir: str,
lang_code: str,
resource_requests: Sequence[ResourceRequest],
+ generate_docx: bool,
h1: str = H1,
h2: str = H2,
h3: str = H3,
h4: str = H4,
) -> list[TWNameContentPair]:
- translation_word_filepaths_: list[str] = translation_word_filepaths(resource_dir)
- name_content_pairs: list[TWNameContentPair] = []
+ translation_word_filepaths_ = translation_word_filepaths(resource_dir)
+ name_content_pairs = []
+ translation_words_dict_ = translation_words_dict(resource_dir)
for translation_word_filepath in translation_word_filepaths_:
translation_word_content = read_file(translation_word_filepath)
+ # if "daughtersofzion" in translation_word_filepath:
+ # logger.debug("translation_word_content: %s", translation_word_content)
+ # French has a single double quote at the start of some
+ # translation words which disturbs expected alphabetization,
+ # remove it if present. Other languages may have the same defect.
+ if translation_word_content.startswith('# "'):
+ translation_word_content = "# {translation_word_content[3:]}"
localized_translation_word_ = localized_translation_word(
translation_word_content
)
+ if not localized_translation_word_: # language doesn't provide data
+ continue
translation_word_content = markdown_transformer.remove_sections(
translation_word_content
)
translation_word_content = markdown_transformer.transform_ta_and_tn_links(
translation_word_content, lang_code, resource_requests
)
+ translation_word_content = markdown_transformer.transform_tw_links(
+ translation_word_content,
+ lang_code,
+ resource_requests,
+ translation_words_dict_,
+ )
html_word_content = mistune.markdown(translation_word_content)
html_word_content = re.sub(h2, h4, html_word_content)
html_word_content = re.sub(h1, h3, html_word_content)
- name_content_pairs.append(
- TWNameContentPair(localized_translation_word_, html_word_content)
+ if generate_docx:
+ html_word_content = preprocess_html_for_internal_docx_links(
+ html_word_content
+ )
+ pair = TWNameContentPair(
+ localized_translation_word_,
+ translation_word_filepath,
+ html_word_content,
)
+ # logger.debug("tw_name_content_pair: %s", f"{pair.localized_word}, {pair.path}")
+ name_content_pairs.append(pair)
return sorted(name_content_pairs, key=tw_sort_key)
@@ -822,9 +853,10 @@ def tw_book_content(
resource_dir: str,
resource_requests: Sequence[ResourceRequest],
layout_for_print: bool,
+ generate_docx: bool,
) -> TWBook:
name_content_pairs = tw_name_content_pairs(
- resource_dir, resource_lookup_dto.lang_code, resource_requests
+ resource_dir, resource_lookup_dto.lang_code, resource_requests, generate_docx
)
return TWBook(
lang_code=resource_lookup_dto.lang_code,
@@ -958,6 +990,7 @@ def books(
resource_requests: Sequence[ResourceRequest],
layout_for_print: bool,
use_chapter_labels: bool,
+ generate_docx: bool,
usfm_resource_types: Sequence[str] = settings.USFM_RESOURCE_TYPES,
tn_resource_type: str = TN_RESOURCE_TYPE,
en_tn_condensed_resource_type: str = EN_TN_CONDENSED_RESOURCE_TYPE,
@@ -1007,7 +1040,11 @@ def books(
tq_books.append(tq_book)
elif resource_lookup_dto.resource_type == tw_resource_type:
tw_book = tw_book_content(
- resource_lookup_dto, resource_dir, resource_requests, layout_for_print
+ resource_lookup_dto,
+ resource_dir,
+ resource_requests,
+ layout_for_print,
+ generate_docx,
)
tw_books.append(tw_book)
elif resource_lookup_dto.resource_type == bc_resource_type:
@@ -1274,3 +1311,131 @@ def split_chapter_into_verses(chapter: USFMChapter) -> dict[str, str]:
# Add to the dictionary with verse number as the key and verse text as the value
verse_dict[verse_number_] = verse_text
return verse_dict
+
+
+def handle_split_chapter_into_verses(
+ usfm_book: USFMBook,
+ usfm_chapter: USFMChapter,
+ resource_type_codes_and_names: Mapping[
+ str, str
+ ] = settings.RESOURCE_TYPE_CODES_AND_NAMES,
+) -> dict[VerseRef, str]:
+ if (
+ usfm_book.lang_code == "fr"
+ and usfm_book.resource_type_name == resource_type_codes_and_names["f10"]
+ ):
+ return split_chapter_into_verses_with_formatting_for_f10(usfm_chapter)
+ else:
+ return split_chapter_into_verses_with_formatting(usfm_chapter)
+
+
+def split_chapter_into_verses_with_formatting(
+ chapter: USFMChapter,
+) -> dict[VerseRef, str]:
+ """
+ Given a USFMChapter instance, return the same instance with its
+ verses attribute set to a dictionary where the key is the verse
+ number and the value is the verse HTML.
+
+ Sample HTML content with multiple verse elements:
+
+ >>> html_content = '''
+ >>>
+ >>> 19
+ >>> For through the law I died to the law, so that I might live for God. I have been crucified with Christ.
+ >>>
+ >>>
+ >>>
+ >>>
+ >>> 20
+ >>> I have been crucified with Christ and I no longer live, but Christ lives in me. The life I now live in the body, I live by faith in the Son of God, who loved me and gave himself for me.
+ >>>
+ >>>
+ >>>
+ >>> '''
+ >>> from doc.domain.parsing import split_chapter_into_verses_with_formatting
+ >>> chapter = USFMChapter(content=html_content)
+ >>> chapter.verses = split_chapter_into_verses_with_formatting(chapter)
+ >>> chapter.verses["19"]
+
+ 19
+ For through the law I died to the law, so that I might live for God. I have been crucified with Christ.
+
+
+
+ """
+ # TODO What to do about footnote targets? Perhaps have the value be a
+ # tuple with first element of the verse HTML (which includes the
+ # footnote callers) and the second element the target footnotes HTML?
+ verse_dict = {}
+ # Find all verse spans
+ verse_spans = re.findall(
+ r'(.*?)', chapter.content, re.DOTALL
+ )
+ for verse_span in verse_spans:
+ # Extract the verse number from the versemarker
+ verse_number = re.search(r'(\d+)', verse_span)
+ if verse_number:
+ verse_number_ = verse_number.group(1)
+ # Add to the dictionary with verse number as the key and verse text as the value
+ verse_dict[verse_number_] = verse_span
+ return verse_dict
+
+
+def split_chapter_into_verses_with_formatting_for_f10(
+ chapter: USFMChapter,
+) -> dict[str, str]:
+ """
+ Parse chapter.content as HTML, extract each ,
+ unwrap elements (preserving their text),
+ and return a dict mapping verse number -> cleaned HTML fragment for that verse.
+ """
+ soup = BeautifulSoup(chapter.content, "html.parser")
+ verse_dict: dict[str, str] = {}
+ # find all verse spans (parser handles nesting correctly)
+ for verse_span in soup.find_all("span", class_="verse"):
+ # find the verse number from NN
+ sup = verse_span.find("sup", class_="versemarker")
+ if not sup or not sup.string:
+ continue
+ verse_number = sup.string.strip()
+ # unwrap all word-entry spans: replace X
+ # with X (preserving whitespace/punctuation)
+ for we in verse_span.find_all("span", class_="word-entry"):
+ we.unwrap()
+ # Option: normalize whitespace (optional)
+ # If you want to preserve original spacing/punctuation exactly, skip this.
+ # cleaned_html = "".join(str(c) for c in verse_span.contents)
+ cleaned_html = str(verse_span)
+ # Fix spacing issues introduced by inner spans
+ cleaned_html = re.sub(
+ r"\s+([,;:.!?])", r"\1", cleaned_html
+ ) # remove space before punctuation
+ cleaned_html = re.sub(
+ r"\s+'", "'", cleaned_html
+ ) # remove space before apostrophe
+ cleaned_html = re.sub(
+ r"'\s+", "'", cleaned_html
+ ) # remove space after apostrophe
+ cleaned_html = re.sub(
+ r"\s*-\s*", "-", cleaned_html
+ ) # normalize spaces around hyphens
+ cleaned_html = re.sub(r"\s{2,}", " ", cleaned_html) # collapse double spaces
+ cleaned_html = cleaned_html.strip()
+ # if you want plain text instead, use: cleaned_text = verse_span.get_text(" ", strip=True)
+ # store cleaned HTML fragment (still contains etc.)
+ verse_dict[verse_number] = cleaned_html
+ return verse_dict
+
+
+if __name__ == "__main__":
+
+ # To run the doctests in this module, in the root of the project do:
+ # python backend/document/domain/resource_lookup.py
+ # or
+ # python backend/document/domain/resource_lookup.py -v
+ # See https://docs.python.org/3/library/doctest.html
+ # for more details.
+ import doctest
+
+ doctest.testmod()
diff --git a/backend/doc/domain/resource_lookup.py b/backend/doc/domain/resource_lookup.py
index 20c42a462..a88104438 100644
--- a/backend/doc/domain/resource_lookup.py
+++ b/backend/doc/domain/resource_lookup.py
@@ -39,10 +39,10 @@
read_file,
)
from doc.utils.list_utils import unique_tuples, unique_book_codes
-from doc.utils.text_utils import normalize_localized_book_name
+from doc.utils.text_utils import maybe_correct_book_name, normalize_localized_book_name
from doc.utils.url_utils import (
get_last_segment,
- get_book_names_from_title_file,
+ get_book_name_from_title_file,
book_codes_and_names_from_manifest,
)
from fastapi import HTTPException, status
@@ -54,28 +54,6 @@
fetch_source_data_cache: TTLCache[str, SourceData] = TTLCache(maxsize=1, ttl=180)
-# This can be expanded to include any additional types (if
-# there are any) that we want to be available to users. These are all
-# that I found of relevance in the data API.
-RESOURCE_TYPE_CODES_AND_NAMES: Mapping[str, str] = {
- "ayt": "Bahasa Indonesian Bible",
- "bc": "Bible Commentary",
- "blv": "Portuguese Bíblia Livre",
- "cuv": "新标点和合本",
- "f10": "French Louis Segond 1910 Bible",
- "nav": "New Arabic Version (Ketab El Hayat)",
- "reg": "Regular",
- "rg": "NT Survey Reviewers' Guide",
- "tn": "Translation Notes",
- "tn-condensed": "Condensed Translation Notes",
- "tq": "Translation Questions",
- "tw": "Translation Words",
- # "udb": "Unlocked Dynamic Bible", # Content team doesn't want udb used
- "ugnt": "unfoldingWord® Greek New Testament",
- "uhb": "unfoldingWord® Hebrew Bible",
- "ulb": "Unlocked Literal Bible",
-}
-
# This is only used to see if a lang_code is in the collection
# otherwise it is a heart language. Eventually the graphql data api may
# provide gateway/heart boolean value.
@@ -178,14 +156,6 @@
"zlm",
]
-BOOK_NAME_CORRECTION_TABLE: dict[tuple[str, str], str] = {
- ("es-419", "I juan"): "1 Juan",
- ("fr", "Ephésiens"): "Éphésiens",
- ("pt-br", "1 Corintios"): "1 Coríntios",
- ("sw", "Matendo ya mitume"): "Matendo ya Mitume",
- ("sw", "Luke"): "Luka",
- ("sw", "Waraka wa yakobo"): "Yakobo",
-}
# List of languages which do not have USFM available for any books. We use this
# to filter these out of STET's list of source and target
@@ -355,7 +325,7 @@ def repos_to_clone(
resource_assets_dir: str = settings.RESOURCE_ASSETS_DIR,
dcs_mirror_git_username: str = "DCS-Mirror",
resource_type_codes_and_names: Sequence[str] = list(
- RESOURCE_TYPE_CODES_AND_NAMES.keys()
+ settings.RESOURCE_TYPE_CODES_AND_NAMES.keys()
),
) -> list[tuple[HttpUrl, str, str]]:
repo_clone_list: list[tuple[HttpUrl, str, str]] = []
@@ -394,7 +364,9 @@ def get_resource_types(
usfm_resource_types: Sequence[str] = settings.USFM_RESOURCE_TYPES,
docx_file_path: str = "en_rg_nt_survey.docx",
en_rg: str = settings.EN_RG_DIR,
- resource_type_codes_and_names: Mapping[str, str] = RESOURCE_TYPE_CODES_AND_NAMES,
+ resource_type_codes_and_names: Mapping[
+ str, str
+ ] = settings.RESOURCE_TYPE_CODES_AND_NAMES,
) -> list[tuple[str, str]]:
resource_types = []
for url, resource_filepath, resource_type in repo_clone_list:
@@ -696,7 +668,9 @@ def update_repo_components(
repo_components: list[str],
usfm_resource_types: Sequence[str] = settings.USFM_RESOURCE_TYPES,
non_usfm_resource_types: Sequence[str] = NON_USFM_RESOURCE_TYPES,
- resource_type_codes_and_names: Mapping[str, str] = RESOURCE_TYPE_CODES_AND_NAMES,
+ resource_type_codes_and_names: Mapping[
+ str, str
+ ] = settings.RESOURCE_TYPE_CODES_AND_NAMES,
) -> list[str]:
last_component = repo_components[-1]
# Some DCS-Mirror URLs have an unusual pattern wherein a non resource type is the last component
@@ -790,20 +764,6 @@ def make_entry(url: HttpUrl, resource_type: str, lang: Language) -> RepoEntry:
return repos_info
-def maybe_correct_book_name(
- lang_code: str,
- book_name: str,
- book_name_correction_table: dict[tuple[str, str], str] = BOOK_NAME_CORRECTION_TABLE,
-) -> str:
- """
- Translate incorrect or undesirable book names to a preferred form.
- """
- book_name_ = BOOK_NAME_CORRECTION_TABLE.get((lang_code, book_name), "")
- if not book_name_:
- book_name_ = book_name
- return book_name_
-
-
def get_book_codes_for_lang(
lang_code: str,
usfm_only: bool = False,
@@ -901,26 +861,28 @@ def get_book_codes_for_lang_(
and len(repo_components) > 2
and resource_type in usfm_resource_types
):
- book_codes_and_names_localized_from_title_file = (
- get_book_names_from_title_file(
- resource_filepath,
- lang_code,
- repo_components,
- )
+ book_name_ = get_book_name_from_title_file(
+ resource_filepath,
+ lang_code,
+ repo_components,
)
logger.debug(
"book_codes_and_names_localized_from_title_file: %s",
- book_codes_and_names_localized_from_title_file,
+ book_name_,
)
- for code, name in book_codes_and_names_localized_from_title_file.items():
- book_codes_and_names_localized.append(
- (
- code,
- maybe_correct_book_name(
- lang_code, normalize_localized_book_name(name)
- ),
- )
+ logger.debug("book_code: %s", repo_components[1])
+ logger.debug(
+ "normalize_localized_book_name(book_name_): %s",
+ normalize_localized_book_name(book_name_),
+ )
+ book_codes_and_names_localized.append(
+ (
+ repo_components[1],
+ maybe_correct_book_name(
+ lang_code, normalize_localized_book_name(book_name_)
+ ),
)
+ )
if (
not usfm_only
or not book_codes_and_names_localized
@@ -1067,7 +1029,9 @@ def resource_lookup_dto(
book_code: str,
dcs_mirror_git_username: str = "DCS-Mirror",
zmq_git_username: str = "faustin_azaza",
- resource_type_codes_and_names: Mapping[str, str] = RESOURCE_TYPE_CODES_AND_NAMES,
+ resource_type_codes_and_names: Mapping[
+ str, str
+ ] = settings.RESOURCE_TYPE_CODES_AND_NAMES,
) -> Optional[ResourceLookupDto]:
"""
>>> from doc.domain import resource_lookup
@@ -1180,9 +1144,7 @@ def prepare_resource_filepath(
working_dir: str = settings.RESOURCE_ASSETS_DIR,
) -> str:
resource_filepath = ""
- if (
- resource_lookup_dto.url is not None
- ): # We know that resource_url is not None because of how we got here, but mypy isn't convinced. Let's convince mypy.
+ if resource_lookup_dto.url is not None:
resource_filepath = join(
working_dir,
get_last_segment(resource_lookup_dto.url, resource_lookup_dto.lang_code),
diff --git a/backend/doc/markdown_transforms/link_regexes.py b/backend/doc/markdown_transforms/link_regexes.py
index ac979811c..cf3c3d03f 100644
--- a/backend/doc/markdown_transforms/link_regexes.py
+++ b/backend/doc/markdown_transforms/link_regexes.py
@@ -13,6 +13,18 @@
)
)
+# ceb language erroneously uses obe in its rc links in TW content
+TW_OBE_RC_LINK_RE = re.compile(
+ r"\[\[rc:\/\/(?P[^\[\]\(\)\/]+?)\/obe\/(?:kt|names|other)\/(?P[^\[\]\(\)]+?)\]\]"
+)
+
+# RC_QUESTION_LINK_RE = re.compile(
+# r"\[\[rc:\/\/(?P[^\[\]\(\)\/]+?)\/bible\/questions\/comprehension\/[^\[\]\(\)\/]+?\/[^\[\]\(\)\/]+?\]\]"
+# )
+RC_QUESTION_LINK_RE = re.compile(
+ r"\[\[rc:\/\/(?P[^\[\]\(\)\/]+)\/bible\/questions\/comprehension\/(?P[^\[\]\(\)\/]+)\/(?P[^\[\]\(\)\/]+)\]\]"
+)
+
# Regex pattern to match TW STAR markdown style links and capture the last segment
TW_STAR_RC_LINK_RE = re.compile(r"\[\[rc://[^/]+/[^/]+/[^/]+/[^/]+/(?P[^/]+)\]\]")
@@ -51,7 +63,7 @@
# e.g., [foo](../kt/foo.md) links.
# NOTE See id:regex_transformation_order above
TW_MARKDOWN_LINK_RE = re.compile(
- r"\[(?P[^\[\]\(\)]+?)\]\(\.+\/(?:kt|names|other)\/(?P[^\[\]\(\)]+?)\.md\)"
+ r"\[(?P[^\]]+?)\]\(\.*/(?:kt|names|other)/(?P[^\)]+?)\.md\)"
)
diff --git a/backend/doc/markdown_transforms/markdown_transformer.py b/backend/doc/markdown_transforms/markdown_transformer.py
index 2f7f5e589..b0d1e66e9 100644
--- a/backend/doc/markdown_transforms/markdown_transformer.py
+++ b/backend/doc/markdown_transforms/markdown_transformer.py
@@ -7,6 +7,7 @@
from doc.domain.bible_books import BOOK_NUMBERS
from doc.domain.model import ResourceRequest
from doc.markdown_transforms.link_regexes import (
+ RC_QUESTION_LINK_RE,
TA_MARKDOWN_HTTPS_LINK_RE,
TA_PREFIXED_MARKDOWN_HTTPS_LINK_RE,
TA_PREFIXED_MARKDOWN_LINK_RE,
@@ -19,6 +20,7 @@
TN_MARKDOWN_SCRIPTURE_LINK_RE,
TN_OBS_MARKDOWN_LINK_RE,
TW_MARKDOWN_LINK_RE,
+ TW_OBE_RC_LINK_RE,
TW_RC_LINK_RE,
TW_STAR_RC_LINK_RE,
TW_WIKI_PREFIXED_RC_LINK_RE,
@@ -46,6 +48,7 @@
MARKDOWN_SECTIONS_TO_REMOVE: list[str] = [
"Examples from the Bible stories",
"Links",
+ "Tautan", # Links in bi language
"Picture of",
"Pictures",
]
@@ -97,8 +100,7 @@ def transform_tw_links(
# Transform the '...PREFIXED...' version of regexes in each
# resource_type group first before its non-'...PREFIXED...' version
# of regex otherwise we could orphan the prefix portion of the
- # phrase, e.g., you could be left with (Veja: ) or (See: ) or
- # (Blah blah blah: ).
+ # phrase, e.g., you could be left with (Veja: ) or (See: ).
for wiki_link in wiki_link_parser(source):
source = transform_tw_rc_link(
wiki_link, source, lang_code, resource_requests, translation_words_dict
@@ -119,6 +121,9 @@ def transform_tw_links(
source = transform_tw_markdown_links(
source, lang_code, resource_requests, translation_words_dict
)
+ source = transform_rc_obe_tw_links(
+ source, lang_code, resource_requests, translation_words_dict
+ )
return source
@@ -152,6 +157,19 @@ def transform_ta_and_tn_links(
)
source = transform_tn_missing_book_code_markdown_links_no_paren(source)
source = transform_tn_obs_markdown_links(source)
+ source = transform_rc_question_links(source)
+ return source
+
+
+def transform_rc_question_links(
+ source: str, rc_question_link_re: re.Pattern[str] = RC_QUESTION_LINK_RE
+) -> str:
+ """
+ Remove question links in CEB language (and any other languages that have them).
+ """
+ for match in finditer(rc_question_link_re, source):
+ # For now, remove match text the source text.
+ source = source.replace(match.group(0), "")
return source
@@ -163,6 +181,7 @@ def transform_tw_rc_link(
translation_words_dict: dict[str, str],
tw: str = "tw",
fmt_str: str = TRANSLATION_WORD_ANCHOR_LINK_FMT_STR,
+ tw_rc_link_re: re.Pattern[str] = TW_RC_LINK_RE,
) -> str:
"""
Transform the translation word rc wikilink into a Markdown
@@ -170,7 +189,7 @@ def transform_tw_rc_link(
the translation word definition if it exists or replace the
link with the non-localized word if it doesn't.
"""
- match = search(TW_RC_LINK_RE, wikilink.url)
+ match = search(tw_rc_link_re, wikilink.url)
if match:
# Determine if resource_type TW was one of the requested
# resources.
@@ -224,34 +243,33 @@ def transform_tw_markdown_links(
translation_words_dict: dict[str, str],
tw: str = "tw",
fmt_str: str = TRANSLATION_WORD_ANCHOR_LINK_FMT_STR,
+ tw_markdown_link_re: re.Pattern[str] = TW_MARKDOWN_LINK_RE,
) -> str:
"""
Transform the translation word relative file link into a
source anchor link pointing to a destination anchor link for
the translation word definition.
"""
- # Determine if resource_type TW was one of the requested
- # resources.
tw_resources_requests = [
resource_request
for resource_request in resource_requests
if tw in resource_request.resource_type
]
- for match in finditer(TW_MARKDOWN_LINK_RE, source):
+ for match in finditer(tw_markdown_link_re, source):
match_text = match.group(0)
filename_sans_suffix = match.group("word")
if filename_sans_suffix in translation_words_dict and tw_resources_requests:
- # Localize non-English languages.
file_content = read_file(translation_words_dict[filename_sans_suffix])
- # Get the localized name for the translation word
localized_translation_word_ = localized_translation_word(file_content)
- # Build the anchor links
+ # logger.debug("filename_sans_suffix: %s", filename_sans_suffix)
+ # logger.debug("localized_translation_word_: %s", localized_translation_word_)
source = source.replace(
match_text,
fmt_str.format(
- localized_translation_word_,
+ localized_translation_word_, # e.g., Jewish authorities
lang_code,
- localized_translation_word_,
+ filename_sans_suffix, # e.g., jewishleaders
+ # "".join(localized_translation_word_.split()),
),
)
else:
@@ -266,8 +284,61 @@ def transform_tw_markdown_links(
# NOTE Theoretically, this will leave a trailing comma after the link
# if the link is not the last link in a list of links. I haven't
# yet seen such a case in practice though.
- match_text_plus_preceding_dot_utf8_char = "· {}".format(match_text)
- source = source.replace(match_text_plus_preceding_dot_utf8_char, "")
+ # match_text_plus_preceding_dot_utf8_char = "· {}".format(match_text)
+ # source = source.replace(match_text_plus_preceding_dot_utf8_char, "")
+ return source
+
+
+def transform_rc_obe_tw_links(
+ source: str,
+ lang_code: str,
+ resource_requests: Sequence[ResourceRequest],
+ translation_words_dict: dict[str, str],
+ tw: str = "tw",
+ fmt_str: str = TRANSLATION_WORD_ANCHOR_LINK_FMT_STR,
+ tw_link_re: re.Pattern[str] = TW_OBE_RC_LINK_RE,
+) -> str:
+ """
+ Transform the translation word relative file link into a
+ source anchor link pointing to a destination anchor link for
+ the translation word definition.
+ """
+ tw_resources_requests = [
+ resource_request
+ for resource_request in resource_requests
+ if tw in resource_request.resource_type
+ ]
+ for match in finditer(tw_link_re, source):
+ match_text = match.group(0)
+ filename_sans_suffix = match.group("word")
+ if filename_sans_suffix in translation_words_dict and tw_resources_requests:
+ file_content = read_file(translation_words_dict[filename_sans_suffix])
+ localized_translation_word_ = localized_translation_word(file_content)
+ # logger.debug("filename_sans_suffix: %s", filename_sans_suffix)
+ # logger.debug("localized_translation_word_: %s", localized_translation_word_)
+ source = source.replace(
+ match_text,
+ fmt_str.format(
+ localized_translation_word_, # e.g., Jewish authorities
+ lang_code,
+ filename_sans_suffix, # e.g., jewishleaders
+ # "".join(localized_translation_word_.split()),
+ ),
+ )
+ else:
+ logger.debug(
+ "TW file for filename_sans_suffix: %s not found for lang_code: %s",
+ filename_sans_suffix,
+ lang_code,
+ )
+ # Search for translation word relative link
+ # and remove it along with any trailing comma from
+ # the source text.
+ # NOTE Theoretically, this will leave a trailing comma after the link
+ # if the link is not the last link in a list of links. I haven't
+ # yet seen such a case in practice though.
+ # match_text_plus_preceding_dot_utf8_char = "· {}".format(match_text)
+ # source = source.replace(match_text_plus_preceding_dot_utf8_char, "")
return source
@@ -278,6 +349,7 @@ def transform_tw_wiki_rc_links(
translation_words_dict: dict[str, str],
tw: str = "tw",
fmt_str: str = TRANSLATION_WORD_ANCHOR_LINK_FMT_STR,
+ tw_wiki_rc_link_re: re.Pattern[str] = TW_WIKI_RC_LINK_RE,
) -> str:
"""
Transform the translation word rc link into source anchor link
@@ -291,7 +363,7 @@ def transform_tw_wiki_rc_links(
for resource_request in resource_requests
if tw in resource_request.resource_type
]
- for match in finditer(TW_WIKI_RC_LINK_RE, source):
+ for match in finditer(tw_wiki_rc_link_re, source):
filename_sans_suffix = match.group("word")
if filename_sans_suffix in translation_words_dict and tw_resources_requests:
# Localize non-English languages.
@@ -330,6 +402,7 @@ def transform_tw_wiki_rc_links2(
translation_words_dict: dict[str, str],
tw: str = "tw",
fmt_str: str = TRANSLATION_WORD_ANCHOR_LINK_FMT_STR,
+ tw_wiki_rc_link_re2: re.Pattern[str] = TW_WIKI_RC_LINK_RE2,
) -> str:
"""
Transform the translation word rc link into source anchor link
@@ -343,7 +416,7 @@ def transform_tw_wiki_rc_links2(
for resource_request in resource_requests
if tw in resource_request.resource_type
]
- for match in finditer(TW_WIKI_RC_LINK_RE2, source):
+ for match in finditer(tw_wiki_rc_link_re2, source):
filename_sans_suffix = match.group("word")
if filename_sans_suffix in translation_words_dict and tw_resources_requests:
# Localize non-English languages.
@@ -383,6 +456,7 @@ def transform_tw_star_rc_link(
translation_words_dict: dict[str, str],
tw: str = "tw",
fmt_str: str = TRANSLATION_WORD_ANCHOR_LINK_FMT_STR,
+ tw_star_rc_link_re: re.Pattern[str] = TW_STAR_RC_LINK_RE,
) -> str:
"""
Transform the translation word rc wikilink into a Markdown
@@ -390,7 +464,7 @@ def transform_tw_star_rc_link(
the translation word definition if it exists or replace the
link with the non-localized word if it doesn't.
"""
- match = search(TW_STAR_RC_LINK_RE, wikilink.url)
+ match = search(tw_star_rc_link_re, wikilink.url)
if match:
# Determine if resource_type TW was one of the requested
# resources.
@@ -427,7 +501,9 @@ def transform_tw_star_rc_link(
url = url.replace(match.group(0), filename_sans_suffix)
regexp = r"\[\[{}\]\]".format(wikilink.url)
for match2 in finditer(regexp, source):
- source = source.replace(match2.group(0), url)
+ source = source.replace(
+ match2.group(0), fmt_str.format("#{lang_code}-{url}")
+ )
return source
@@ -490,6 +566,7 @@ def transform_tw_wiki_prefixed_rc_links(
translation_words_dict: dict[str, str],
tw: str = "tw",
fmt_str: str = TRANSLATION_WORD_PREFIX_ANCHOR_LINK_FMT_STR,
+ tw_wiki_prefixed_rc_link_re: re.Pattern[str] = TW_WIKI_PREFIXED_RC_LINK_RE,
) -> str:
"""
Transform the translation word rc TW wikilink into source anchor link
@@ -503,7 +580,7 @@ def transform_tw_wiki_prefixed_rc_links(
for resource_request in resource_requests
if tw in resource_request.resource_type
]
- for match in finditer(TW_WIKI_PREFIXED_RC_LINK_RE, source):
+ for match in finditer(tw_wiki_prefixed_rc_link_re, source):
filename_sans_suffix = match.group("word")
if filename_sans_suffix in translation_words_dict and tw_resources_requests:
# Need to localize non-English languages.
@@ -532,7 +609,10 @@ def transform_tw_wiki_prefixed_rc_links(
return source
-def transform_ta_prefixed_wiki_rc_links(source: str) -> str:
+def transform_ta_prefixed_wiki_rc_links(
+ source: str,
+ ta_wiki_prefixed_rc_link_re: re.Pattern[str] = TA_WIKI_PREFIXED_RC_LINK_RE,
+) -> str:
"""
Transform the translation academy rc wikilink into source anchor link
pointing to a destination anchor link for the translation academy
@@ -540,13 +620,15 @@ def transform_ta_prefixed_wiki_rc_links(source: str) -> str:
"""
# FIXME When TA gets implemented we'll need to actually build
# the anchor link.
- for match in finditer(TA_WIKI_PREFIXED_RC_LINK_RE, source):
+ for match in finditer(ta_wiki_prefixed_rc_link_re, source):
# For now, remove match text
source = source.replace(match.group(0), "")
return source
-def transform_ta_wiki_rc_links(source: str) -> str:
+def transform_ta_wiki_rc_links(
+ source: str, ta_wiki_rc_link_re: re.Pattern[str] = TA_WIKI_RC_LINK_RE
+) -> str:
"""
Transform the translation academy rc wikilink into source anchor link
pointing to a destination anchor link for the translation academy
@@ -554,7 +636,7 @@ def transform_ta_wiki_rc_links(source: str) -> str:
"""
# FIXME When TA gets implemented we'll need to actually build
# the anchor link.
- for match in finditer(TA_WIKI_RC_LINK_RE, source):
+ for match in finditer(ta_wiki_rc_link_re, source):
# For now, remove match text the source text.
source = source.replace(match.group(0), "")
return source
@@ -562,7 +644,9 @@ def transform_ta_wiki_rc_links(source: str) -> str:
# TODO zh gen, e.g., 1:20 you end up with things like:(参:). We
# should probably remove the whole parenthesized expression.
-def transform_ta_star_rc_links(source: str) -> str:
+def transform_ta_star_rc_links(
+ source: str, ta_star_rc_link_re: re.Pattern[str] = TA_STAR_RC_LINK_RE
+) -> str:
"""
Transform the translation academy rc wikilink into source anchor link
pointing to a destination anchor link for the translation academy
@@ -578,7 +662,10 @@ def transform_ta_star_rc_links(source: str) -> str:
# TODO zh gen, e.g., 1:20 you end up with things like:(参:). We
# should probably remove the whole parenthesized expression.
-def transform_ta_markdown_links(source: str) -> str:
+def transform_ta_markdown_links(
+ source: str,
+ ta_prefixed_markdown_link_re: re.Pattern[str] = TA_PREFIXED_MARKDOWN_LINK_RE,
+) -> str:
"""
Transform the translation academy markdown link into source anchor link
pointing to a destination anchor link for the translation
@@ -586,13 +673,18 @@ def transform_ta_markdown_links(source: str) -> str:
"""
# FIXME When TA gets implemented we'll need to actually build
# the anchor link.
- for match in finditer(TA_PREFIXED_MARKDOWN_LINK_RE, source):
+ for match in finditer(ta_prefixed_markdown_link_re, source):
# For now, remove match text the source text.
source = source.replace(match.group(0), "")
return source
-def transform_ta_prefixed_markdown_https_links(source: str) -> str:
+def transform_ta_prefixed_markdown_https_links(
+ source: str,
+ ta_prefixed_markdown_https_link_re: re.Pattern[
+ str
+ ] = TA_PREFIXED_MARKDOWN_HTTPS_LINK_RE,
+) -> str:
"""
Transform the translation academy markdown link into source anchor link
pointing to a destination anchor link for the translation
@@ -600,13 +692,15 @@ def transform_ta_prefixed_markdown_https_links(source: str) -> str:
"""
# FIXME When TA gets implemented we'll need to actually build
# the anchor link.
- for match in finditer(TA_PREFIXED_MARKDOWN_HTTPS_LINK_RE, source):
+ for match in finditer(ta_prefixed_markdown_https_link_re, source):
# For now, remove match text the source text.
source = source.replace(match.group(0), "")
return source
-def transform_ta_markdown_https_links(source: str) -> str:
+def transform_ta_markdown_https_links(
+ source: str, ta_markdown_https_link_re: re.Pattern[str] = TA_MARKDOWN_HTTPS_LINK_RE
+) -> str:
"""
Transform the translation academy markdown link into source anchor link
pointing to a destination anchor link for the translation
@@ -614,7 +708,7 @@ def transform_ta_markdown_https_links(source: str) -> str:
"""
# FIXME When TA gets implemented we'll need to actually build
# the anchor link.
- for match in finditer(TA_MARKDOWN_HTTPS_LINK_RE, source):
+ for match in finditer(ta_markdown_https_link_re, source):
# For now, remove match text the source text.
source = source.replace(match.group(0), "")
return source
@@ -626,6 +720,7 @@ def transform_tn_prefixed_markdown_links(
working_dir: str = settings.RESOURCE_ASSETS_DIR,
tn: str = "tn",
fmt_str: str = TRANSLATION_NOTE_ANCHOR_LINK_FMT_STR,
+ tn_markdown_scripture_link_re: re.Pattern[str] = TN_MARKDOWN_SCRIPTURE_LINK_RE,
) -> str:
"""
Transform the translation note rc link into a link pointing to
@@ -670,6 +765,7 @@ def transform_tn_prefixed_markdown_links(
)
)
if exists(path): # file path to TN note exists
+ # TODO Is this still good with new USFM parser?
# Create anchor link to translation note
new_link = fmt_str.format(
scripture_ref,
@@ -699,6 +795,9 @@ def transform_tn_markdown_links(
tn: str = "tn",
working_dir: str = settings.RESOURCE_ASSETS_DIR,
fmt_str: str = TRANSLATION_NOTE_ANCHOR_LINK_FMT_STR,
+ tn_markdown_relative_scripture_link_re: re.Pattern[
+ str
+ ] = TN_MARKDOWN_RELATIVE_SCRIPTURE_LINK_RE,
) -> str:
"""
Transform the translation note rc link into a link pointing to
@@ -707,7 +806,7 @@ def transform_tn_markdown_links(
"""
matching_resource_requests: list[ResourceRequest]
matching_resource_request: ResourceRequest
- for match in finditer(TN_MARKDOWN_RELATIVE_SCRIPTURE_LINK_RE, source):
+ for match in finditer(tn_markdown_relative_scripture_link_re, source):
scripture_ref = match.group("scripture_ref")
book_code = match.group("book_code")
chapter_num = match.group("chapter_num")
@@ -774,6 +873,9 @@ def transform_tn_missing_book_code_markdown_links(
tn: str = "tn",
working_dir: str = settings.RESOURCE_ASSETS_DIR,
fmt_str: str = TRANSLATION_NOTE_ANCHOR_LINK_FMT_STR,
+ tn_markdown_relative_to_current_book_scripture_link_re: re.Pattern[
+ str
+ ] = TN_MARKDOWN_RELATIVE_TO_CURRENT_BOOK_SCRIPTURE_LINK_RE,
) -> str:
"""
Transform the translation note rc link into a link pointing to
@@ -783,7 +885,7 @@ def transform_tn_missing_book_code_markdown_links(
matching_resource_requests: list[ResourceRequest]
matching_resource_request: ResourceRequest
for match in finditer(
- TN_MARKDOWN_RELATIVE_TO_CURRENT_BOOK_SCRIPTURE_LINK_RE, source
+ tn_markdown_relative_to_current_book_scripture_link_re, source
):
scripture_ref = match.group("scripture_ref")
chapter_num = match.group("chapter_num")
@@ -847,6 +949,9 @@ def transform_tn_missing_book_code_markdown_links_no_paren(
source: str,
# tn: str = "tn",
# working_dir: str = settings.RESOURCE_ASSETS_DIR,
+ tn_markdown_relative_to_current_book_scripture_link_re_no_parens: re.Pattern[
+ str
+ ] = TN_MARKDOWN_RELATIVE_TO_CURRENT_BOOK_SCRIPTURE_LINK_RE_NO_PARENS,
) -> str:
"""
Transform the translation note rc link into a non-linked scripture reference only.
@@ -856,7 +961,7 @@ def transform_tn_missing_book_code_markdown_links_no_paren(
# resource_requests = self._resource_requests
# lang_code = self._lang_code
for match in finditer(
- TN_MARKDOWN_RELATIVE_TO_CURRENT_BOOK_SCRIPTURE_LINK_RE_NO_PARENS, source
+ tn_markdown_relative_to_current_book_scripture_link_re_no_parens, source
):
scripture_ref = match.group("scripture_ref")
# chapter_num = match.group("chapter_num")
@@ -914,12 +1019,14 @@ def transform_tn_missing_book_code_markdown_links_no_paren(
return source
-def transform_tn_obs_markdown_links(source: str) -> str:
+def transform_tn_obs_markdown_links(
+ source: str, tn_obs_markdown_link_re: re.Pattern[str] = TN_OBS_MARKDOWN_LINK_RE
+) -> str:
"""
Until OBS is supported, replace OBS TN link with just its link
text.
"""
- for match in finditer(TN_OBS_MARKDOWN_LINK_RE, source):
+ for match in finditer(tn_obs_markdown_link_re, source):
# Build the anchor links
# FIXME Actually create a meaningful link rather than just
# link text
@@ -927,12 +1034,14 @@ def transform_tn_obs_markdown_links(source: str) -> str:
return source
-def wiki_link_parser(source: str) -> list[WikiLink]:
+def wiki_link_parser(
+ source: str, wiki_link_re: re.Pattern[str] = WIKI_LINK_RE
+) -> list[WikiLink]:
"""Return a list of all Wiki links in source."""
links = [
WikiLink(
url=link.group("url"),
)
- for link in finditer(WIKI_LINK_RE, source)
+ for link in finditer(wiki_link_re, source)
]
return links
diff --git a/backend/doc/utils/docx_util.py b/backend/doc/utils/docx_util.py
index dc05e2761..180d35a3f 100644
--- a/backend/doc/utils/docx_util.py
+++ b/backend/doc/utils/docx_util.py
@@ -1,8 +1,11 @@
+import re
from pathlib import Path
from docx import Document # type: ignore
from docx.oxml import OxmlElement # type: ignore
from docx.oxml.ns import qn # type: ignore
+from docx.text.paragraph import Paragraph # type: ignore
+from docx.text.run import Run # type: ignore
def generate_docx_toc(docx_filepath: str) -> str:
@@ -37,3 +40,151 @@ def generate_docx_toc(docx_filepath: str) -> str:
r_element.append(fldChar4)
document.save(toc_path)
return toc_path
+
+
+def preprocess_html_for_internal_docx_links(html: str) -> str:
+ """
+ Replace internal HTML anchors and headings with markers that survive HTML→DOCX conversion.
+ Example:
+ → {{BOOKMARK:intro}}
+ Christ → {{LINK_START:intro}}Christ{{LINK_END}}
+ """
+ # Mark bookmarks
+ html = re.sub(
+ r'',
+ r"{{BOOKMARK:\1}}",
+ html,
+ flags=re.IGNORECASE,
+ )
+ # Replace links
+ html = re.sub(
+ r'(.*?)',
+ r"{{LINK_START:\1}}\2{{LINK_END}}",
+ html,
+ flags=re.IGNORECASE | re.DOTALL,
+ )
+ html = re.sub(
+ r'(.*?)',
+ r"{{LINK_START:\1}}\2{{LINK_END}}",
+ html,
+ flags=re.IGNORECASE | re.DOTALL,
+ )
+ return html
+
+
+def _make_text_run(text: str) -> OxmlElement:
+ """Create a plain text run that preserves spaces."""
+ r = OxmlElement("w:r")
+ t = OxmlElement("w:t")
+ t.text = text
+ # Ensure Word preserves leading/trailing spaces
+ t.set("{http://www.w3.org/XML/1998/namespace}space", "preserve")
+ r.append(t)
+ return r
+
+
+def _make_internal_hyperlink_element(text: str, bookmark_name: str) -> OxmlElement:
+ """Create an internal hyperlink element to an existing bookmark."""
+ hyperlink = OxmlElement("w:hyperlink")
+ hyperlink.set(qn("w:anchor"), bookmark_name) # internal anchor, no '#'
+ hyperlink.set(qn("w:history"), "1")
+ r = OxmlElement("w:r")
+ r_pr = OxmlElement("w:rPr")
+ r_style = OxmlElement("w:rStyle")
+ r_style.set(qn("w:val"), "Hyperlink")
+ r_pr.append(r_style)
+ r.append(r_pr)
+ t = OxmlElement("w:t")
+ t.text = text
+ t.set("{http://www.w3.org/XML/1998/namespace}space", "preserve")
+ r.append(t)
+ hyperlink.append(r)
+ return hyperlink
+
+
+def _add_bookmark_to_run(run: Run, bookmark_name: str) -> None:
+ """Add a DOCX bookmark around the given run in-place."""
+ r = run._r
+ p = r.getparent()
+ start = OxmlElement("w:bookmarkStart")
+ start.set(qn("w:id"), "0")
+ start.set(qn("w:name"), bookmark_name)
+ end = OxmlElement("w:bookmarkEnd")
+ end.set(qn("w:id"), "0")
+ p.insert(p.index(r), start)
+ p.insert(p.index(r) + 1, end)
+
+
+def _replace_runs(para: Paragraph, new_elems: list[OxmlElement]) -> None:
+ """
+ Replace all runs in a paragraph with the provided XML elements.
+ (Keeps paragraph element intact and appends supplied elements.)
+ """
+ # remove existing run XML elements
+ for run in list(para.runs):
+ p_r = run._r
+ p = p_r.getparent()
+ if p is not None and p_r in p:
+ p.remove(p_r)
+ # append new elements
+ for elem in new_elems:
+ para._p.append(elem)
+
+
+def add_internal_docx_links(doc: Document) -> None:
+ """
+ Convert {{BOOKMARK:name}} markers into bookmarks, and
+ convert {{LINK_START:name}}...{{LINK_END}} sequences into internal links.
+ Operates paragraph-by-paragraph using paragraph text aggregated
+ from runs so punctuation/spaces are preserved.
+ """
+ bookmark_map: dict[str, str] = {}
+ # Pass 1 — find and create bookmarks inside runs (remove marker text)
+ bookmark_pattern = re.compile(r"\{\{BOOKMARK:([^}]+)\}\}")
+ for para in doc.paragraphs:
+ for run in para.runs:
+ m = bookmark_pattern.search(run.text)
+ if not m:
+ continue
+ name = m.group(1)
+ # remove marker from run text
+ run.text = bookmark_pattern.sub("", run.text).strip()
+ # add bookmark around this run
+ _add_bookmark_to_run(run, name)
+ bookmark_map[name] = name
+ # Pass 2 — replace LINK_START/LINK_END sequences at paragraph level
+ # pattern matches sequences like {{LINK_START:name}}...{{LINK_END}}
+ link_pattern = re.compile(
+ r"\{\{LINK_START:([^}]+)\}\}(.*?)\{\{LINK_END\}\}", flags=re.DOTALL
+ )
+ for para in doc.paragraphs:
+ # combine paragraph text from runs to preserve exact punctuation/spacing
+ combined_text = "".join(run.text for run in para.runs)
+ if "{{LINK_START:" not in combined_text:
+ # nothing to do for this paragraph
+ continue
+ new_elements: list[OxmlElement] = []
+ cursor = 0
+ for m in link_pattern.finditer(combined_text):
+ start, end = m.span()
+ target = m.group(1)
+ link_text = m.group(2)
+ # add literal text before this link (commas, spaces, etc.)
+ if start > cursor:
+ literal = combined_text[cursor:start]
+ if literal:
+ new_elements.append(_make_text_run(literal))
+ # add hyperlink element or fallback to plain text if bookmark missing
+ if target in bookmark_map:
+ new_elements.append(_make_internal_hyperlink_element(link_text, target))
+ else:
+ new_elements.append(_make_text_run(link_text))
+ cursor = end
+ # trailing text after last link
+ if cursor < len(combined_text):
+ tail = combined_text[cursor:]
+ if tail:
+ new_elements.append(_make_text_run(tail))
+ # replace the paragraph's runs with our constructed elements
+ if new_elements:
+ _replace_runs(para, new_elements)
diff --git a/backend/doc/utils/list_utils.py b/backend/doc/utils/list_utils.py
index 7243aa070..0c75d22c4 100644
--- a/backend/doc/utils/list_utils.py
+++ b/backend/doc/utils/list_utils.py
@@ -1,4 +1,4 @@
-from typing import TypeVar
+from typing import Sequence, TypeVar
T = TypeVar("T", tuple[str, str], tuple[str, str, bool])
@@ -26,3 +26,15 @@ def unique_book_codes(lst: list[T]) -> list[T]:
if key not in seen:
seen[key] = item
return list(seen.values())
+
+
+def unique_list_of_strings(
+ elements: Sequence[tuple[str, str]]
+) -> Sequence[tuple[str, str]]:
+ unique_strs = []
+ added_strs = set()
+ for key, val in elements:
+ if key not in added_strs:
+ unique_strs.append((key, val))
+ added_strs.add(key)
+ return unique_strs
diff --git a/backend/doc/utils/text_utils.py b/backend/doc/utils/text_utils.py
index f0d65699b..97be64880 100644
--- a/backend/doc/utils/text_utils.py
+++ b/backend/doc/utils/text_utils.py
@@ -16,6 +16,19 @@ def normalize_spaces(text: str) -> str:
"III": "3",
}
+BOOK_NAME_CORRECTION_TABLE: dict[tuple[str, str], str] = {
+ ("es-419", "I juan"): "1 Juan",
+ ("fr", "Ephésiens"): "Éphésiens",
+ ("pt-br", "1 Corintios"): "1 Coríntios",
+ (
+ "rmp",
+ "Galasians sapta 1. v/1 da-h pol. dal goad phi da-h dululan, ne dal mai-h phi da-h apostel ipais ag mayaib. phi. je-su krais mai-h mam gad pha, nug krais matmat ag mau sen pha, nug da-h ipais ag malan. v/2 da-h ayaid amayaid da-h pha dade, hit jain hen ohvu iu- an sios galesia e-h hagaug. v/3 gad mam hita, hayaug je-su krais pha, nug-te hagaug he-eh phadu ne mab hogad nauha-h da-h-du. v/4",
+ ): "Galasians",
+ ("sw", "Matendo ya mitume"): "Matendo ya Mitume",
+ ("sw", "Luke"): "Luka",
+ ("sw", "Waraka wa yakobo"): "Yakobo",
+}
+
def normalize_localized_book_name(localized_book_name: str) -> str:
"""
@@ -87,6 +100,20 @@ def chapter_label_numeric_part(s: str) -> int:
return result
+def maybe_correct_book_name(
+ lang_code: str,
+ book_name: str,
+ book_name_correction_table: dict[tuple[str, str], str] = BOOK_NAME_CORRECTION_TABLE,
+) -> str:
+ """
+ Translate incorrect or undesirable book names to a preferred form.
+ """
+ book_name_ = BOOK_NAME_CORRECTION_TABLE.get((lang_code, book_name), "")
+ if not book_name_:
+ book_name_ = book_name
+ return book_name_
+
+
if __name__ == "__main__":
# To run the doctests in this module, in the root of the project do:
diff --git a/backend/doc/utils/tw_utils.py b/backend/doc/utils/tw_utils.py
index b919ea829..ae0e102a1 100644
--- a/backend/doc/utils/tw_utils.py
+++ b/backend/doc/utils/tw_utils.py
@@ -3,23 +3,21 @@
resources that we use in multiple places.
"""
-import os
-import pathlib
import re
import time
from glob import glob
-from typing import Optional, Sequence
+from os.path import basename
+from pathlib import Path
+from typing import Mapping, Optional, Sequence
from doc.config import settings
-from doc.domain import parsing, resource_lookup
-from doc.domain.model import ResourceRequest, TWBook, TWNameContentPair, USFMBook
+from doc.domain import parsing, resource_lookup, bible_books
+from doc.domain.model import ResourceRequest, TWBook, TWNameContentPair, TWUse, USFMBook
logger = settings.logger(__name__)
TW = "tw"
-OPENING_H3_FMT_STR: str = "{}"
-OPENING_H3_WITH_ID_FMT_STR: str = '{}'
def translation_word_filepaths(resource_dir: str) -> list[str]:
@@ -106,38 +104,71 @@ def translation_words_dict(tw_resource_dir: Optional[str]) -> dict[str, str]:
if tw_resource_dir is not None:
filepaths = translation_word_filepaths(tw_resource_dir)
translation_words_dict = {
- pathlib.Path(os.path.basename(word_filepath)).stem: word_filepath
+ Path(basename(word_filepath)).stem: word_filepath
for word_filepath in filepaths
}
return translation_words_dict
-def translation_words_section(
+def translation_words_section_for_book(
tw_book: TWBook,
usfm_books: Optional[Sequence[USFMBook]],
limit_words: bool,
resource_requests: Sequence[ResourceRequest],
+ include_uses_section: bool = True,
resource_type_name_fmt_str: str = "{}
",
-) -> str:
+) -> Sequence[str]:
"""
Build and return the translation words definition section, i.e.,
the list of all translation words for this language, book combination.
Limit the translation words to only those that appear in the USFM
- resouce chosen if limit_words is True and a USFM resource was also
+ resource chosen if limit_words is True and a USFM resource was also
chosen otherwise include all the translation words for the language.
"""
content = []
if tw_book.name_content_pairs:
- content.append(resource_type_name_fmt_str.format(tw_book.resource_type_name))
- selected_name_content_pairs = get_selected_name_content_pairs(
+ lang_codes = list(
+ dict.fromkeys(
+ resource_request.lang_code for resource_request in resource_requests
+ )
+ )
+ if len(lang_codes) > 1:
+ # More than one language was requested so we should
+ # differentiate translation word sections by adding the IETF code to the
+ # resource type name used as a header.
+ heading = resource_type_name_fmt_str.format(
+ f"{tw_book.resource_type_name} ({tw_book.lang_code})"
+ )
+ else:
+ heading = resource_type_name_fmt_str.format(tw_book.resource_type_name)
+ content.append(heading)
+ selected_name_content_pairs = get_selected_name_content_pairs_for_book(
tw_book, usfm_books, limit_words, resource_requests
)
for name_content_pair in selected_name_content_pairs:
- content.append(name_content_pair_content(name_content_pair, tw_book))
- return "".join(content)
+ content.append(
+ name_content_pair_content_for_book(
+ name_content_pair, tw_book, include_uses_section
+ )
+ )
+ return content
+
+
+def translation_words_for_content(
+ tw_book: TWBook,
+ content: str,
+ resource_type_name_fmt_str: str = "{}
",
+) -> Sequence[tuple[str, str]]:
+ selected_name_content_pairs = get_selected_name_content_pairs_for_content(
+ tw_book, content
+ )
+ return [
+ (pair.localized_word, Path(pair.path).stem)
+ for pair in selected_name_content_pairs
+ ]
-def get_selected_name_content_pairs(
+def get_selected_name_content_pairs_for_book(
tw_book: TWBook,
usfm_books: Optional[Sequence[USFMBook]],
limit_words: bool,
@@ -146,13 +177,22 @@ def get_selected_name_content_pairs(
) -> list[TWNameContentPair]:
selected_name_content_pairs = []
if usfm_books and limit_words:
- selected_name_content_pairs = filter_name_content_pairs(tw_book, usfm_books)
+ selected_name_content_pairs = filter_name_content_pairs_for_book(
+ tw_book, usfm_books
+ )
elif (
not usfm_books and limit_words
): # This branch is necessarily expensive computationally and in IO
t0 = time.time()
- usfm_books = fetch_usfm_book_content_units(resource_requests)
- selected_name_content_pairs = filter_name_content_pairs(tw_book, usfm_books)
+ # USFM resource was not requested by user, but they chose resources that
+ # are still associated with books, e.g., TN, so we have to still fetch
+ # USFM if limit_words was chosen to see what words appear in USFM so
+ # that we can limit the words to that collection of words rather than
+ # all of them.
+ usfm_books = fetch_usfm_books(resource_requests)
+ selected_name_content_pairs = filter_name_content_pairs_for_book(
+ tw_book, usfm_books
+ )
t1 = time.time()
logger.info(
"Time for acquiring and filtering TW content based on books chosen: %s",
@@ -163,23 +203,50 @@ def get_selected_name_content_pairs(
return selected_name_content_pairs
-def filter_name_content_pairs(
- tw_book: TWBook, usfm_books: Optional[Sequence[USFMBook]]
+def get_selected_name_content_pairs_for_content(
+ tw_book: TWBook,
+ content: str,
+) -> list[TWNameContentPair]:
+ selected_name_content_pairs = []
+ selected_name_content_pairs = filter_name_content_pairs_for_content(
+ tw_book, content
+ )
+ return selected_name_content_pairs
+
+
+def filter_name_content_pairs_for_book(
+ tw_book: TWBook, usfm_books: Sequence[USFMBook]
+) -> list[TWNameContentPair]:
+ selected_name_content_pairs = []
+ added_pairs = set()
+ for name_content_pair in tw_book.name_content_pairs:
+ for usfm_book in usfm_books:
+ for chapter in usfm_book.chapters.values():
+ if re.search(
+ re.escape(name_content_pair.localized_word),
+ chapter.content,
+ ):
+ if name_content_pair not in added_pairs:
+ selected_name_content_pairs.append(name_content_pair)
+ added_pairs.add(name_content_pair)
+ break
+ return selected_name_content_pairs
+
+
+def filter_name_content_pairs_for_content(
+ tw_book: TWBook, content: str
) -> list[TWNameContentPair]:
selected_name_content_pairs = []
added_pairs = set()
- if usfm_books:
- for name_content_pair in tw_book.name_content_pairs:
- for usfm_book in usfm_books:
- for chapter in usfm_book.chapters.values():
- if re.search(
- re.escape(name_content_pair.localized_word),
- chapter.content,
- ):
- if name_content_pair not in added_pairs:
- selected_name_content_pairs.append(name_content_pair)
- added_pairs.add(name_content_pair)
- break
+ for name_content_pair in tw_book.name_content_pairs:
+ if re.search(
+ r"\b" + re.escape(name_content_pair.localized_word) + r"\b",
+ content,
+ flags=re.IGNORECASE,
+ ):
+ if name_content_pair not in added_pairs:
+ selected_name_content_pairs.append(name_content_pair)
+ added_pairs.add(name_content_pair)
return selected_name_content_pairs
@@ -189,21 +256,31 @@ def contains_tw(resource_request: ResourceRequest, tw_regex: str = "tw.*") -> bo
return value
-def name_content_pair_content(
+def name_content_pair_content_for_book(
+ name_content_pair: TWNameContentPair,
+ tw_book: TWBook,
+ include_uses_section: bool = False,
+) -> str:
+ name_content_pair.content = modify_content_for_anchors(name_content_pair, tw_book)
+ uses_section_ = ""
+ # TODO tw_book.uses is no longer populated - look at git history to see how I used to do it
+ if include_uses_section and name_content_pair.localized_word in tw_book.uses:
+ uses_section_ = uses_section(tw_book.uses[name_content_pair.localized_word])
+ name_content_pair.content = f"{name_content_pair.content}{uses_section_}"
+ return name_content_pair.content
+
+
+def name_content_pair_content_for_verse(
name_content_pair: TWNameContentPair,
tw_book: TWBook,
- # include_uses_section: bool,
+ include_uses_section: bool = False,
) -> str:
name_content_pair.content = modify_content_for_anchors(name_content_pair, tw_book)
- # uses_section_ = ""
- # if (
- # include_uses_section
- # and name_content_pair.localized_word in book_content_unit.uses
- # ):
- # uses_section_ = uses_section(
- # book_content_unit.uses[name_content_pair.localized_word]
- # )
- # name_content_pair.content = f"{name_content_pair.content}{uses_section_}"
+ uses_section_ = ""
+ # TODO
+ if include_uses_section and name_content_pair.localized_word in tw_book.uses:
+ uses_section_ = uses_section(tw_book.uses[name_content_pair.localized_word])
+ name_content_pair.content = f"{name_content_pair.content}{uses_section_}"
return name_content_pair.content
@@ -221,20 +298,23 @@ def filter_unique_by_lang_code(tw_books: Sequence[TWBook]) -> list[TWBook]:
def modify_content_for_anchors(
name_content_pair: TWNameContentPair,
book_content_unit: TWBook,
- opening_h3_fmt_str: str = OPENING_H3_FMT_STR,
- opening_h3_with_id_fmt_str: str = OPENING_H3_WITH_ID_FMT_STR,
+ opening_h3_fmt_str: str = "{}",
+ opening_h3_with_id_fmt_str: str = '{}',
) -> str:
- return name_content_pair.content.replace(
+ content = name_content_pair.content.replace(
opening_h3_fmt_str.format(name_content_pair.localized_word),
opening_h3_with_id_fmt_str.format(
book_content_unit.lang_code,
- name_content_pair.localized_word,
+ Path(name_content_pair.path).stem,
+ # "".join(name_content_pair.localized_word.split()).lower(),
name_content_pair.localized_word,
),
)
+ # logger.debug("tw content: %s", content)
+ return content
-def fetch_usfm_book_content_units(
+def fetch_usfm_books(
resource_requests: Sequence[ResourceRequest],
usfm_resource_types: Sequence[str] = settings.USFM_RESOURCE_TYPES,
) -> list[USFMBook]:
@@ -276,35 +356,59 @@ def fetch_usfm_book_content_units(
return usfm_book_content_units
-# def uses_section(
-# uses: Sequence[TWUse],
-# translation_word_verse_section_header_str: str = settings.TRANSLATION_WORD_VERSE_SECTION_HEADER_STR,
-# unordered_list_begin_str: str = settings.UNORDERED_LIST_BEGIN_STR,
-# translation_word_verse_ref_item_fmt_str: str = settings.TRANSLATION_WORD_VERSE_REF_ITEM_FMT_STR,
-# unordered_list_end_str: str = settings.UNORDERED_LIST_END_STR,
-# book_numbers: Mapping[str, str] = bible_books.BOOK_NUMBERS,
-# book_names: Mapping[str, str] = bible_books.BOOK_NAMES,
-# num_zeros: int = 3,
-# ) -> str:
-# """
-# Construct and return the 'Uses:' section which comes at the end of
-# a translation word definition and wherein each item points to
-# verses (as targeted by lang_code, book_id, chapter_num, and
-# verse_num) wherein the word occurs.
-# """
-# html: list[str] = []
-# html.append(translation_word_verse_section_header_str)
-# html.append(unordered_list_begin_str)
-# for use in uses:
-# html_content_str = translation_word_verse_ref_item_fmt_str.format(
-# use.lang_code,
-# book_numbers[use.book_id].zfill(num_zeros),
-# str(use.chapter_num).zfill(num_zeros),
-# str(use.verse_num).zfill(num_zeros),
-# book_names[use.book_id],
-# use.chapter_num,
-# use.verse_num,
-# )
-# html.append(html_content_str)
-# html.append(unordered_list_end_str)
-# return "\n".join(html)
+def fetch_usfm_book(
+ lang_code: str,
+ book_code: str,
+ resource_type: str,
+) -> Optional[USFMBook]:
+ usfm_book = None
+ resource_lookup_dto = resource_lookup.resource_lookup_dto(
+ lang_code,
+ resource_type,
+ book_code,
+ )
+ if resource_lookup_dto and resource_lookup_dto.url:
+ t0 = time.time()
+ resource_dir = resource_lookup.prepare_resource_filepath(resource_lookup_dto)
+ resource_lookup.provision_asset_files(resource_lookup_dto.url, resource_dir)
+ t1 = time.time()
+ logger.debug(
+ "Time to provision USFM asset files (acquire and write to disk) for TW resource: %s",
+ t1 - t0,
+ )
+ usfm_book = parsing.usfm_book_content(resource_lookup_dto, resource_dir, False)
+ return usfm_book
+
+
+def uses_section(
+ uses: Sequence[TWUse],
+ translation_word_verse_section_header_str: str = settings.TRANSLATION_WORD_VERSE_SECTION_HEADER_STR,
+ unordered_list_begin_str: str = settings.UNORDERED_LIST_BEGIN_STR,
+ translation_word_verse_ref_item_fmt_str: str = settings.TRANSLATION_WORD_VERSE_REF_ITEM_FMT_STR,
+ unordered_list_end_str: str = settings.UNORDERED_LIST_END_STR,
+ book_numbers: Mapping[str, str] = bible_books.BOOK_NUMBERS,
+ book_names: Mapping[str, str] = bible_books.BOOK_NAMES,
+ num_zeros: int = 3,
+) -> str:
+ """
+ Construct and return the 'Uses:' section which comes at the end of
+ a translation word definition and wherein each item points to
+ verses (as targeted by lang_code, book_code, chapter_num, and
+ verse_num) wherein the word occurs.
+ """
+ html: list[str] = []
+ html.append(translation_word_verse_section_header_str)
+ html.append(unordered_list_begin_str)
+ for use in uses:
+ html_content_str = translation_word_verse_ref_item_fmt_str.format(
+ use.lang_code,
+ book_numbers[use.book_id].zfill(num_zeros),
+ str(use.chapter_num).zfill(num_zeros),
+ str(use.verse_num).zfill(num_zeros),
+ book_names[use.book_id],
+ use.chapter_num,
+ use.verse_num,
+ )
+ html.append(html_content_str)
+ html.append(unordered_list_end_str)
+ return "\n".join(html)
diff --git a/backend/doc/utils/url_utils.py b/backend/doc/utils/url_utils.py
index 28d0cbb48..71a249c21 100644
--- a/backend/doc/utils/url_utils.py
+++ b/backend/doc/utils/url_utils.py
@@ -129,30 +129,31 @@ def get_last_segment(url: HttpUrl, lang_code: str) -> str:
return normalize_last_segment(lang_code, last_segment)
-def get_book_names_from_title_file(
+def get_book_name_from_title_file(
resource_filepath: str,
lang_code: str,
repo_components: list[str],
-) -> dict[str, str]:
+) -> str:
"""
Book names in front/title.txt files may or may not be localized,
it depends on the translation work done for lang_code.
"""
- book_codes_and_names_localized: dict[str, str] = {}
+ # book_codes_and_names_localized: dict[str, str] = {}
+ book_name = ""
book_name_file = join(resource_filepath, "front", "title.txt")
if exists(book_name_file):
with open(book_name_file, "r") as fin:
book_name = fin.read()
logger.debug("book_name: %s", book_name)
- if book_name:
- # Moved this code to the caller
- # localized_book_name_ = normalize_localized_book_name(book_name)
- # localized_book_name = maybe_correct_book_name(
- # lang_code, localized_book_name_
- # )
- book_code = repo_components[1]
- book_codes_and_names_localized[book_code] = book_name
- return book_codes_and_names_localized
+ # if book_name:
+ # Moved this code to the caller
+ # localized_book_name_ = normalize_localized_book_name(book_name)
+ # localized_book_name = maybe_correct_book_name(
+ # lang_code, localized_book_name_
+ # )
+ # book_code = repo_components[1]
+ # book_codes_and_names_localized[book_code] = book_name
+ return book_name
def load_manifest(file_path: str) -> str:
diff --git a/backend/passages/domain/document_generator.py b/backend/passages/domain/document_generator.py
index ae4c4a5b5..a19b774db 100644
--- a/backend/passages/domain/document_generator.py
+++ b/backend/passages/domain/document_generator.py
@@ -10,9 +10,7 @@
from doc.domain.model import Attachment
from doc.domain.parsing import split_chapter_into_verses, usfm_book_content
from doc.domain.resource_lookup import (
- RESOURCE_TYPE_CODES_AND_NAMES,
book_codes_for_lang_from_usfm_only,
- maybe_correct_book_name,
prepare_resource_filepath,
provision_asset_files,
resource_lookup_dto,
@@ -20,9 +18,9 @@
)
from doc.reviewers_guide.model import BibleReference
from doc.utils.file_utils import docx_filepath, file_needs_update
+from doc.utils.text_utils import maybe_correct_book_name
from docx import Document # type: ignore
-from docx.oxml import OxmlElement # type: ignore
-from docx.oxml import parse_xml
+from docx.oxml import OxmlElement, parse_xml # type: ignore
from docx.shared import Inches # type: ignore
from docx.table import _Cell # type: ignore
from htmldocx import HtmlToDocx # type: ignore
@@ -32,6 +30,7 @@
from passages.utils.docx_utils import add_footer, add_header
from pydantic import Json
+
logger = settings.logger(__name__)
@@ -44,7 +43,9 @@ def generate_docx_document(
working_dir: str = settings.WORKING_DIR,
output_dir: str = settings.DOCUMENT_OUTPUT_DIR,
usfm_resource_types: Sequence[str] = settings.USFM_RESOURCE_TYPES,
- resource_type_codes_and_names: Mapping[str, str] = RESOURCE_TYPE_CODES_AND_NAMES,
+ resource_type_codes_and_names: Mapping[
+ str, str
+ ] = settings.RESOURCE_TYPE_CODES_AND_NAMES,
) -> str:
"""
Generate the scriptural terms evaluation document.
diff --git a/backend/passages/domain/model.py b/backend/passages/domain/model.py
index d7af948e5..13dce47ae 100644
--- a/backend/passages/domain/model.py
+++ b/backend/passages/domain/model.py
@@ -1,6 +1,7 @@
+from typing import Optional, NamedTuple, final
+
from doc.domain.model import ChapterNum
from pydantic import BaseModel, EmailStr
-from typing import Optional, NamedTuple, final
@final
diff --git a/backend/passages/domain/parser.py b/backend/passages/domain/parser.py
index aeb327211..455d9ec24 100644
--- a/backend/passages/domain/parser.py
+++ b/backend/passages/domain/parser.py
@@ -1,9 +1,11 @@
from typing import Mapping
-from passages.domain.model import BibleReference
-from doc.domain.parsing import lookup_verse_text
-from doc.domain.model import USFMBook
-from doc.domain.bible_books import BOOK_CHAPTER_VERSES
+
from doc.config import settings
+from doc.domain.bible_books import BOOK_CHAPTER_VERSES
+from doc.domain.model import USFMBook
+from doc.domain.parsing import lookup_verse_text
+from passages.domain.model import BibleReference
+
logger = settings.logger(__name__)
diff --git a/backend/passages/domain/stet_verse_list_parser.py b/backend/passages/domain/stet_verse_list_parser.py
index 22bfe84ab..bb983937f 100644
--- a/backend/passages/domain/stet_verse_list_parser.py
+++ b/backend/passages/domain/stet_verse_list_parser.py
@@ -1,5 +1,6 @@
import json
import re
+
from doc.domain.bible_books import BOOK_NAMES
diff --git a/backend/passages/entrypoints/routes.py b/backend/passages/entrypoints/routes.py
index a06111eaa..636e4dc24 100644
--- a/backend/passages/entrypoints/routes.py
+++ b/backend/passages/entrypoints/routes.py
@@ -10,6 +10,7 @@
from passages.domain import document_generator, model
from passages.domain.document_generator import stet_exhaustive_verse_list
+
router = APIRouter()
logger = settings.logger(__name__)
diff --git a/backend/requirements.in b/backend/requirements.in
index 43b0dcbf2..85426b563 100644
--- a/backend/requirements.in
+++ b/backend/requirements.in
@@ -5,6 +5,7 @@
# cython # For pydantic: https://pydantic-docs.helpmanual.io/install/
# TODO do we still need aiofiles?
aiofiles
+beautifulsoup4
cachetools
celery
celery-types
diff --git a/backend/stet/data/stet_en.docx b/backend/stet/data/stet_en.docx
index 97456ad4e..b471ae863 100644
Binary files a/backend/stet/data/stet_en.docx and b/backend/stet/data/stet_en.docx differ
diff --git a/backend/stet/data/stet_es-419.docx b/backend/stet/data/stet_es-419.docx
index cbdd41ba9..a1faade42 100644
Binary files a/backend/stet/data/stet_es-419.docx and b/backend/stet/data/stet_es-419.docx differ
diff --git a/backend/stet/data/stet_fr.docx b/backend/stet/data/stet_fr.docx
index 985562b41..5de990f00 100644
Binary files a/backend/stet/data/stet_fr.docx and b/backend/stet/data/stet_fr.docx differ
diff --git a/backend/stet/data/stet_pt-br.docx b/backend/stet/data/stet_pt-br.docx
index 8f5e48ee2..d1201cd15 100644
Binary files a/backend/stet/data/stet_pt-br.docx and b/backend/stet/data/stet_pt-br.docx differ
diff --git a/backend/stet/data/stet_sw.docx b/backend/stet/data/stet_sw.docx
index 5ee9bc2b7..ff1992d4c 100644
Binary files a/backend/stet/data/stet_sw.docx and b/backend/stet/data/stet_sw.docx differ
diff --git a/backend/stet/domain/document_generator.py b/backend/stet/domain/document_generator.py
index 49aec8883..616e99d46 100644
--- a/backend/stet/domain/document_generator.py
+++ b/backend/stet/domain/document_generator.py
@@ -13,14 +13,13 @@
usfm_book_content,
)
from doc.domain.resource_lookup import (
- RESOURCE_TYPE_CODES_AND_NAMES,
- maybe_correct_book_name,
prepare_resource_filepath,
provision_asset_files,
resource_lookup_dto,
resource_types,
)
from doc.utils.file_utils import docx_filepath, file_needs_update
+from doc.utils.text_utils import maybe_correct_book_name
from docx import Document # type: ignore
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT # type: ignore
from docx.oxml import OxmlElement # type: ignore
@@ -46,6 +45,7 @@
)
from stet.utils.util import extract_chapter_and_beyond
+
logger = settings.logger(__name__)
@@ -57,7 +57,9 @@ def generate_docx_document(
working_dir: str = settings.WORKING_DIR,
output_dir: str = settings.DOCUMENT_OUTPUT_DIR,
usfm_resource_types: Sequence[str] = settings.USFM_RESOURCE_TYPES,
- resource_type_codes_and_names: Mapping[str, str] = RESOURCE_TYPE_CODES_AND_NAMES,
+ resource_type_codes_and_names: Mapping[
+ str, str
+ ] = settings.RESOURCE_TYPE_CODES_AND_NAMES,
) -> str:
"""
Generate the scriptural terms evaluation document.
@@ -328,7 +330,7 @@ def generate_docx(
add_highlighted_html_to_docx_for_words(
verse.source_text, source_paragraph, word_entry.bolded_phrases
)
- else: # Bolded phrases in 3rd column were not provided
+ else: # Bolded phrases in 4th column were not provided
add_highlighted_html_to_docx_for_words(
verse.source_text, source_paragraph, word_entry.words
)
diff --git a/backend/stet/domain/model.py b/backend/stet/domain/model.py
index e5556ce54..8ae61adf8 100644
--- a/backend/stet/domain/model.py
+++ b/backend/stet/domain/model.py
@@ -1,5 +1,6 @@
from dataclasses import dataclass, field
from typing import NamedTuple, Optional, final
+
from pydantic import BaseModel, EmailStr
diff --git a/backend/stet/domain/parser.py b/backend/stet/domain/parser.py
index d218808cf..de49f80e9 100644
--- a/backend/stet/domain/parser.py
+++ b/backend/stet/domain/parser.py
@@ -3,9 +3,10 @@
from doc.config import settings
from doc.domain.bible_books import BOOK_NAMES
from doc.domain.resource_lookup import book_codes_for_lang_from_usfm_only
+from docx import Document # type: ignore
from stet.domain.model import VerseReferenceDto, WordEntryDto
from stet.utils.util import is_valid_int
-from docx import Document # type: ignore
+
logger = settings.logger(__name__)
diff --git a/backend/templates/html/header_enclosing.html b/backend/templates/html/header_enclosing.html
index c427a9e10..5fc5f71f3 100644
--- a/backend/templates/html/header_enclosing.html
+++ b/backend/templates/html/header_enclosing.html
@@ -220,9 +220,9 @@
page-break-before: always;
clear: both;
}
- .chapter:not(:first-of-type) {
- page-break-before: always;
- }
+ /* .chapter:not(:first-of-type) {
+ page-break-before: always;
+ } */
}
.single-space {
diff --git a/frontend/src/lib/ResourceTypeBasket.svelte b/frontend/src/lib/ResourceTypeBasket.svelte
index 572030881..a2dcb948b 100644
--- a/frontend/src/lib/ResourceTypeBasket.svelte
+++ b/frontend/src/lib/ResourceTypeBasket.svelte
@@ -1,6 +1,6 @@