diff --git a/backend/doc/domain/assembly_strategies/assemble_by_chapter.py b/backend/doc/domain/assembly_strategies/assemble_by_chapter.py
index 3a15bba6..e141fb4d 100644
--- a/backend/doc/domain/assembly_strategies/assemble_by_chapter.py
+++ b/backend/doc/domain/assembly_strategies/assemble_by_chapter.py
@@ -7,8 +7,6 @@
chapter_commentary_parts,
collect_unique_book_codes,
collect_unique_lang_codes,
- demote_headings_by_one,
- demote_headings_by_two,
filter_books_by_book_code,
filter_books_by_lang_code,
get_book_intros,
@@ -26,6 +24,7 @@
tq_verses_parts,
rg_verses_parts,
)
+from doc.utils.text_utils import demote_headings_by_one
from doc.domain.bible_books import BOOK_CHAPTERS, BOOK_ID_MAP, BOOK_NAMES
from doc.domain.model import (
AssemblyLayoutEnum,
diff --git a/backend/doc/domain/assembly_strategies/assembly_strategy_utils.py b/backend/doc/domain/assembly_strategies/assembly_strategy_utils.py
index 53704d2f..3751ff45 100644
--- a/backend/doc/domain/assembly_strategies/assembly_strategy_utils.py
+++ b/backend/doc/domain/assembly_strategies/assembly_strategy_utils.py
@@ -2,8 +2,8 @@
Utility functions used by assembly_strategies.
"""
-from re import compile, IGNORECASE, Match, search
-from typing import Optional, Sequence, TypeVar
+from re import search
+from typing import Optional, Sequence
from doc.config import settings
from doc.domain.bible_books import BOOK_ID_MAP
@@ -19,6 +19,7 @@
from doc.reviewers_guide.model import RGBook
from doc.reviewers_guide.render_to_html import render_chapter
from doc.utils.tw_utils import translation_words_content
+from doc.utils.text_utils import demote_headings_by_one
from docx.document import Document as DocxDocument
from docx.enum.section import WD_SECTION
from docx.enum.text import WD_BREAK
@@ -29,11 +30,6 @@
logger = settings.logger(__name__)
-HEADING_RE = compile(r"?h([1-6])\b", IGNORECASE)
-
-T = TypeVar("T")
-
-
OXML_LANGUAGE_LIST: list[str] = [
"ar-SA",
"bg-BG",
@@ -137,7 +133,6 @@ def tn_chapter_verses(
tn_book: Optional[TNBook],
chapter_num: int,
use_two_column_layout_for_tn_notes: bool,
- # fmt_str: str = TN_VERSE_NOTES_ENCLOSING_DIV_FMT_STR,
) -> str:
"""
Return the HTML for verses that are in the chapter with
@@ -970,27 +965,6 @@ def get_non_usfm_resources_verse(
return document_parts
-def _demote_heading(match: Match[str], levels: int) -> str:
- tag = match.group(0)
- level = int(match.group(1))
- new_level = min(level + levels, 6)
- return tag.replace(f"h{level}", f"h{new_level}", 1)
-
-
-def demote_headings_by_one(content: str) -> str:
- return HEADING_RE.sub(
- lambda m: _demote_heading(m, levels=1),
- content,
- )
-
-
-def demote_headings_by_two(content: str) -> str:
- return HEADING_RE.sub(
- lambda m: _demote_heading(m, levels=2),
- content,
- )
-
-
def tnc_chapter_intro(
tnc_book: Optional[TNCBook],
chapter_num: int,
diff --git a/backend/doc/domain/document_generator.py b/backend/doc/domain/document_generator.py
index 7bff28d1..4429b0bf 100755
--- a/backend/doc/domain/document_generator.py
+++ b/backend/doc/domain/document_generator.py
@@ -775,7 +775,7 @@ def compose_docx_document(
add_full_width_hr(doc)
if part.add_page_break:
add_page_break(doc)
- style_superscripts(doc, lift_half_points=4, color=RGBColor(0x99, 0x99, 0x99))
+ style_superscripts(doc, lift_half_points=6, color=RGBColor(0x99, 0x99, 0x99))
t1 = time.time()
logger.info("Time for converting HTML to Docx: %.2f seconds", t1 - t0)
return doc
diff --git a/backend/doc/domain/parsing.py b/backend/doc/domain/parsing.py
index af30d26b..eb5cf122 100644
--- a/backend/doc/domain/parsing.py
+++ b/backend/doc/domain/parsing.py
@@ -14,10 +14,7 @@
import requests
from bs4 import BeautifulSoup
from doc.config import settings
-from doc.domain.assembly_strategies.assembly_strategy_utils import (
- demote_headings_by_one,
- demote_headings_by_two,
-)
+from doc.utils.text_utils import demote_headings_by_one
from doc.domain.bible_books import BOOK_ID_MAP, BOOK_NAMES
from doc.domain.model import (
BC_RESOURCE_TYPE,
@@ -80,6 +77,11 @@
H1, H2, H3, H4, H5 = "h1", "h2", "h3", "h4", "h5"
+_SECTIONHEAD5_RE = re.compile(
+ r'
\s*
',
+ re.MULTILINE,
+)
+
BC_ARTICLE_URL_FMT_STR: str = (
"https://content.bibletranslationtools.org/WycliffeAssociates/en_bc/src/branch/master/{}"
@@ -447,6 +449,10 @@ def ensure_chapter_marker(
return f"\\c {chapter_num}\n" + chapter_usfm_text
+def remove_sectionhead5_elements(content: str) -> str:
+ return _SECTIONHEAD5_RE.sub(" ", content)
+
+
def usfm_book_content(
resource_lookup_dto: ResourceLookupDto,
resource_dir: str,
@@ -511,9 +517,12 @@ def usfm_book_content(
cleaned_chapter_html_content = remove_null_bytes_and_control_characters(
chapter_html_content
)
+ chapter_html_content_sans_s5 = remove_sectionhead5_elements(
+ cleaned_chapter_html_content
+ )
usfm_chapters[chapter_num] = USFMChapter(
content=(
- cleaned_chapter_html_content if cleaned_chapter_html_content else ""
+ chapter_html_content_sans_s5 if chapter_html_content_sans_s5 else ""
),
verses=None,
)
@@ -1278,7 +1287,7 @@ def assemble_chapter_usfm(
verse_content = read_verse_file(usfm_file)
cleaned_verse_content = clean_verse_content(verse_content)
verse_content = ensure_paragraph_before_verses(usfm_file, cleaned_verse_content)
- chapter_usfm_content.append(cleaned_verse_content)
+ chapter_usfm_content.append(verse_content)
chapter_usfm_content.append(
" \n"
) # Make sure a space before next chunk, e.g., auh, mat, ch 9, v 14
@@ -1456,6 +1465,8 @@ def handle_split_chapter_into_verses(
def split_chapter_into_verses_with_formatting(
chapter: USFMChapter,
+ empty_paragraph: str = "",
+ sectionhead5_element: str = '',
) -> dict[VerseRef, str]:
"""
Given a USFMChapter instance, return the same instance with its
@@ -1465,29 +1476,27 @@ def split_chapter_into_verses_with_formatting(
Sample HTML content with multiple verse elements:
>>> html_content = '''
- >>>
- >>> 19
- >>> For through the law I died to the law, so that I might live for God. I have been crucified with Christ.
- >>>
- >>>
- >>>
- >>>
- >>> 20
- >>> I have been crucified with Christ and I no longer live, but Christ lives in me. The life I now live in the body, I live by faith in the Son of God, who loved me and gave himself for me.
- >>>
- >>>
- >>>
- >>> '''
+ ...
+ ... 19
+ ... For through the law I died to the law, so that I might live for God. I have been crucified with Christ.
+ ...
+ ...
+ ...
+ ...
+ ... 20
+ ... I have been crucified with Christ and I no longer live, but Christ lives in me. The life I now live in the body, I live by faith in the Son of God, who loved me and gave himself for me.
+ ...
+ ...
+ ...
+ ... '''
>>> from doc.domain.parsing import split_chapter_into_verses_with_formatting
- >>> chapter = USFMChapter(content=html_content)
+ >>> chapter = USFMChapter(content=html_content, verses=None)
>>> chapter.verses = split_chapter_into_verses_with_formatting(chapter)
- >>> chapter.verses["19"]
-
+ >>> print(chapter.verses["19"])
19
For through the law I died to the law, so that I might live for God. I have been crucified with Christ.
-
-
+
"""
# TODO What to do about footnote targets? Perhaps have the value be a
# tuple with first element of the verse HTML (which includes the
@@ -1505,14 +1514,16 @@ def split_chapter_into_verses_with_formatting(
# Add to the dictionary with verse number as the key and verse text as the value
verse_dict[verse_number_] = (
verse_span.strip()
- .replace("", "")
- .replace('', "")
+ .replace(empty_paragraph, "")
+ .replace(sectionhead5_element, "")
)
return verse_dict
def split_chapter_into_verses_with_formatting_for_f10(
chapter: USFMChapter,
+ empty_paragraph: str = "",
+ sectionhead5_element: str = '',
) -> dict[str, str]:
"""
Parse chapter.content as HTML, extract each ,
@@ -1555,8 +1566,8 @@ def split_chapter_into_verses_with_formatting_for_f10(
# store cleaned HTML fragment (still contains etc.)
verse_dict[verse_number] = (
cleaned_html.strip()
- .replace("", "")
- .replace('', "")
+ .replace(empty_paragraph, "")
+ .replace(sectionhead5_element, "")
)
return verse_dict
diff --git a/backend/doc/domain/resource_lookup.py b/backend/doc/domain/resource_lookup.py
index 60503393..20878848 100644
--- a/backend/doc/domain/resource_lookup.py
+++ b/backend/doc/domain/resource_lookup.py
@@ -16,7 +16,7 @@
import requests
from cachetools import TTLCache, cached
from doc.config import settings
-from doc.domain import worker, parsing
+from doc.domain import worker
from doc.domain.bible_books import BOOK_CHAPTERS, BOOK_ID_MAP, BOOK_NAMES
from doc.domain.model import (
NON_USFM_RESOURCE_TYPES,
@@ -36,7 +36,6 @@
from doc.utils.file_utils import (
delete_tree,
file_needs_update,
- read_file,
)
from doc.utils.list_utils import unique_tuples, unique_book_codes
from doc.utils.text_utils import maybe_correct_book_name, normalize_localized_book_name
@@ -79,7 +78,7 @@ def fetch_source_data(
>>> ();result = resource_lookup.fetch_source_data();() # doctest: +ELLIPSIS
(...)
>>> result.git_repo[0]
- RepoEntry(repo_url=HttpUrl('https://content.bibletranslationtools.org/klero/ach-SS-acholi_rev_text_reg'), content=Content(resource_type='reg', language=Language(english_name='Acholi', ietf_code='ach-SS-acholi', national_name='Acholi', direction=)))
+ RepoEntry(repo_url=HttpUrl('https://content.bibletranslationtools.org/0success/cli_1jn_text_reg'), content=Content(resource_type='reg', language=Language(english_name='Chakali', ietf_code='cli', national_name='Chakali', direction=)))
"""
graphql_query = """
query MyQuery {
@@ -111,6 +110,8 @@ def fetch_source_data(
for repo in data_payload["git_repo"]
if repo.get("content", {}).get("resource_type") is not None
]
+ # Sort for test stability - ensures consistent ordering
+ valid_repos.sort(key=lambda repo: repo["repo_url"])
return SourceData.model_validate({"git_repo": valid_repos})
else:
logger.info("Invalid payload structure, no data.")
@@ -263,6 +264,8 @@ def get_resource_types(
str, str
] = settings.RESOURCE_TYPE_CODES_AND_NAMES,
) -> list[tuple[str, str]]:
+ from doc.domain.parsing import find_usfm_files
+
resource_types = []
for url, resource_filepath, resource_type in repo_clone_list:
if resource_type:
@@ -285,7 +288,7 @@ def get_resource_types(
and file.name.split("-")[1].lower() in book_codes
]
elif resource_type in usfm_resource_types:
- book_assets = parsing.find_usfm_files(resource_filepath)
+ book_assets = find_usfm_files(resource_filepath)
elif resource_type == "rg":
between_texts, bible_reference_strs = find_bible_references(
join(en_rg, docx_file_path)
@@ -574,6 +577,8 @@ def usfm_resource_types_and_book_tuples(
>>> sorted(tuples, key=lambda value: value[1])
[('reg', '1co'), ('reg', '1jn'), ('reg', '1pe'), ('reg', '1th'), ('reg', '1ti'), ('reg', '2co'), ('reg', '2jn'), ('reg', '2pe'), ('reg', '2th'), ('reg', '2ti'), ('reg', '3jn'), ('reg', 'act'), ('reg', 'col'), ('reg', 'eph'), ('reg', 'gal'), ('reg', 'heb'), ('reg', 'jas'), ('reg', 'jhn'), ('reg', 'jud'), ('reg', 'luk'), ('reg', 'mat'), ('reg', 'mrk'), ('reg', 'phm'), ('reg', 'php'), ('reg', 'rev'), ('reg', 'rom'), ('reg', 'tit')]
"""
+ from doc.domain.parsing import usfm_asset_file
+
book_codes = book_codes_str.split(",")
data: SourceData | None = fetch_source_data()
resource_type_and_book_tuples = set()
@@ -602,9 +607,7 @@ def usfm_resource_types_and_book_tuples(
resource_filepath = prepare_resource_filepath(dto)
if file_needs_update(resource_filepath):
provision_asset_files(dto.url, resource_filepath)
- content_file = parsing.usfm_asset_file(
- dto, resource_filepath, False
- )
+ content_file = usfm_asset_file(dto, resource_filepath, False)
if content_file:
resource_type_and_book_tuples.add((resource_type, book_code))
return sorted(resource_type_and_book_tuples, key=lambda value: value[0])
@@ -942,18 +945,24 @@ def get_book_names_from_usfm_metadata(
be localized, it depends on the translation work done for language
lang_code.
"""
+ from doc.domain.parsing import (
+ find_usfm_files,
+ split_usfm_by_chapters,
+ maybe_localized_book_name,
+ )
+
book_codes_and_names_localized: dict[str, str] = {}
- usfm_files = parsing.find_usfm_files(resource_filepath)
+ usfm_files = find_usfm_files(resource_filepath)
for usfm_file in usfm_files:
usfm = ""
usfm_file_components = Path(usfm_file).stem.lower().split("-")
book_code = usfm_file_components[1]
with open(usfm_file, "r") as f:
usfm = f.read()
- frontmatter, _, _ = parsing.split_usfm_by_chapters(
+ frontmatter, _, _ = split_usfm_by_chapters(
lang_code, resource_type, book_code, usfm
)
- localized_book_name = parsing.maybe_localized_book_name(frontmatter)
+ localized_book_name = maybe_localized_book_name(frontmatter)
# localized_book_name = maybe_correct_book_name(lang_code, localized_book_name)
book_codes_and_names_localized[book_code] = localized_book_name
logger.debug("book_codes_and_names_localized: %s", book_codes_and_names_localized)
@@ -1204,7 +1213,8 @@ def nt_survey_rg_passages(
) -> list[BibleReference]:
"""
>>> from doc.domain import resource_lookup
- >>> rg_books = resource_lookup.nt_survey_rg_passages()
+ >>> ();rg_books = resource_lookup.nt_survey_rg_passages() ;() # doctest: +ELLIPSIS
+ (...)
>>> rg_books[0]
BibleReference(book_code='mat', book_name='Matthew', start_chapter=2, start_chapter_verse_ref='1-12', end_chapter=None, end_chapter_verse_ref=None)
"""
@@ -1245,9 +1255,10 @@ def ot_survey_rg1_passages(
) -> list[BibleReference]:
"""
>>> from doc.domain import resource_lookup
- >>> rg_books = resource_lookup.ot_survey_rg1_passages()
+ >>> ();rg_books = resource_lookup.ot_survey_rg1_passages();() # doctest: +ELLIPSIS
+ (...)
>>> rg_books[0]
- BibleReference(book_code='gen', book_name='Genesis', start_chapter=2, start_chapter_verse_ref='1-12', end_chapter=None, end_chapter_verse_ref=None)
+ BibleReference(book_code='gen', book_name='Genesis', start_chapter=1, start_chapter_verse_ref='1', end_chapter=2, end_chapter_verse_ref='3')
"""
path = join(resource_dir, docx_file_path)
rg_books = get_rg_books(
@@ -1286,9 +1297,10 @@ def ot_survey_rg2_passages(
) -> list[BibleReference]:
"""
>>> from doc.domain import resource_lookup
- >>> rg_books = resource_lookup.ot_survey_rg2_passages()
+ >>> ();rg_books = resource_lookup.ot_survey_rg2_passages();() # doctest: +ELLIPSIS
+ (...)
>>> rg_books[0]
- BibleReference(book_code='jos', book_name='Joshua', start_chapter=2, start_chapter_verse_ref='1-12', end_chapter=None, end_chapter_verse_ref=None)
+ BibleReference(book_code='jos', book_name='Joshua', start_chapter=1, start_chapter_verse_ref='1-9', end_chapter=None, end_chapter_verse_ref=None)
"""
path = join(resource_dir, docx_file_path)
rg_books = get_rg_books(
@@ -1327,9 +1339,10 @@ def ot_survey_rg3_passages(
) -> list[BibleReference]:
"""
>>> from doc.domain import resource_lookup
- >>> rg_books = resource_lookup.ot_survey_rg3_passages()
+ >>> ();rg_books = resource_lookup.ot_survey_rg3_passages();() # doctest: +ELLIPSIS
+ (...)
>>> rg_books[0]
- BibleReference(book_code='job', book_name='Job', start_chapter=2, start_chapter_verse_ref='1-12', end_chapter=None, end_chapter_verse_ref=None)
+ BibleReference(book_code='job', book_name='Job', start_chapter=1, start_chapter_verse_ref='6-22', end_chapter=None, end_chapter_verse_ref=None)
"""
path = join(resource_dir, docx_file_path)
rg_books = get_rg_books(
@@ -1368,9 +1381,10 @@ def ot_survey_rg4_passages(
) -> list[BibleReference]:
"""
>>> from doc.domain import resource_lookup
- >>> rg_books = resource_lookup.ot_survey_rg4_passages()
+ >>> ();rg_books = resource_lookup.ot_survey_rg4_passages();() # doctest: +ELLIPSIS
+ (...)
>>> rg_books[0]
- BibleReference(book_code='isa', book_name='Isaiah', start_chapter=2, start_chapter_verse_ref='1-12', end_chapter=None, end_chapter_verse_ref=None)
+ BibleReference(book_code='isa', book_name='Isaiah', start_chapter=1, start_chapter_verse_ref='1-9', end_chapter=None, end_chapter_verse_ref=None)
"""
path = join(resource_dir, docx_file_path)
rg_books = get_rg_books(
diff --git a/backend/doc/domain/usfm_error_detection_and_fixes.py b/backend/doc/domain/usfm_error_detection_and_fixes.py
index 2c02fb61..28f14f94 100644
--- a/backend/doc/domain/usfm_error_detection_and_fixes.py
+++ b/backend/doc/domain/usfm_error_detection_and_fixes.py
@@ -37,7 +37,7 @@
("bem-x-kabenmushi", "reg", "sng"),
("bem-x-kabenmushi", "reg", "deu"),
("bem-x-kabenmushi", "reg", "1ki"),
- ("bem-x-kabenmushi", "reg", "1ch"),
+ # ("bem-x-kabenmushi", "reg", "1ch"), # unavailable from data API
("bem-x-kabenmushi", "reg", "ecc"),
("bem-x-kabenmushi", "reg", "2ki"),
("bi", "reg", "act"),
@@ -129,7 +129,7 @@
("kod", "reg", "phm"),
# ("kqi", "reg", "2th"), # unavailable from data API
# ("kqi", "reg", "2ti"), # unavailable from data API
- # ("kqi", "reg", "mrk"), # book no longer available from data api
+ # ("kqi", "reg", "mrk"), # unavailable from data API
("kqi", "reg", "heb"),
# ("kqi", "reg", "1pe"), # unavailable from data API
# ("kqi", "reg", "tit"), # unavailable from data API
@@ -160,7 +160,7 @@
("mhi-x-burolo", "reg", "2th"),
("mhy-x-benualima", "reg", "mrk"),
# ("mwe", "reg", "tit"), # book is available as choice, but resource not cloned?
- # ("mxo", "reg", "mrk"), # not currently unavailable from data API
+ # ("mxo", "reg", "mrk"), # unavailable from data API
("nak-x-bileki", "reg", "mat"),
("nak-x-bileki", "reg", "1ti"),
("nak-x-bileki", "reg", "eph"),
@@ -174,7 +174,7 @@
("nfd", "reg", "2th"),
("nfd", "reg", "2ti"),
("nfd", "reg", "heb"),
- # ("nhx", "reg", "jos"), # not currently available from data API
+ # ("nhx", "reg", "jos"), # unavailable from data API
("nnb-x-kishula", "reg", "mrk"),
("not", "reg", "jos"),
("now", "reg", "mic"),
@@ -246,10 +246,10 @@
("txy", "reg", "2jn"),
("tyn", "reg", "jud"),
("tyn", "reg", "mrk"),
- # ("vin", "reg", "2co"), # not currently available from data API
- # ("vin", "reg", "1th"),
- # ("vin", "reg", "1ti"),
- # ("vin", "reg", "gal"),
+ # ("vin", "reg", "2co"), # unavailable from data API
+ # ("vin", "reg", "1th"), # unavailable from data API
+ # ("vin", "reg", "1ti"), # unavailable from data API
+ # ("vin", "reg", "gal"), # unavailable from data API
("wbj", "reg", "luk"),
("wbj", "reg", "tit"),
("wbj", "reg", "2jn"),
diff --git a/backend/doc/utils/docx_util.py b/backend/doc/utils/docx_util.py
index ee4da674..fa3e3078 100644
--- a/backend/doc/utils/docx_util.py
+++ b/backend/doc/utils/docx_util.py
@@ -192,10 +192,6 @@ def add_internal_docx_links(doc: DocxDocument) -> None:
_replace_runs(para, new_elements)
-# from docx.oxml import OxmlElement
-# from docx.oxml.ns import qn
-
-
def style_superscripts(
doc: DocxDocument,
*,
diff --git a/backend/doc/utils/text_utils.py b/backend/doc/utils/text_utils.py
index 97be6488..05d6486f 100644
--- a/backend/doc/utils/text_utils.py
+++ b/backend/doc/utils/text_utils.py
@@ -1,13 +1,15 @@
-import re
+from re import compile, match, sub, IGNORECASE, Match
from doc.config import settings
logger = settings.logger(__name__)
+HEADING_RE = compile(r"?h([1-6])\b", IGNORECASE)
+
def normalize_spaces(text: str) -> str:
- return re.sub(r"\s+", " ", text).strip()
+ return sub(r"\s+", " ", text).strip()
_ROMAN_TO_INT = {
@@ -50,9 +52,9 @@ def normalize_localized_book_name(localized_book_name: str) -> str:
'IsaĆas'
"""
name = localized_book_name.strip()
- match = re.match(r"^(1|2|3|i{1,3})", name, re.IGNORECASE)
- if match:
- numeral_raw = match.group(1)
+ match_ = match(r"^(1|2|3|i{1,3})", name, IGNORECASE)
+ if match_:
+ numeral_raw = match_.group(1)
numeral_upper = numeral_raw.upper()
next_char = name[len(numeral_raw) : len(numeral_raw) + 1]
# Special case: single "I" must be followed by space or uppercase to count as numeral
@@ -114,6 +116,27 @@ def maybe_correct_book_name(
return book_name_
+def _demote_heading(match: Match[str], levels: int) -> str:
+ tag = match.group(0)
+ level = int(match.group(1))
+ new_level = min(level + levels, 6)
+ return tag.replace(f"h{level}", f"h{new_level}", 1)
+
+
+def demote_headings_by_one(content: str) -> str:
+ return HEADING_RE.sub(
+ lambda m: _demote_heading(m, levels=1),
+ content,
+ )
+
+
+def demote_headings_by_two(content: str) -> str:
+ return HEADING_RE.sub(
+ lambda m: _demote_heading(m, levels=2),
+ content,
+ )
+
+
if __name__ == "__main__":
# To run the doctests in this module, in the root of the project do:
diff --git a/backend/doc/utils/tw_utils.py b/backend/doc/utils/tw_utils.py
index a5da944b..23c36471 100644
--- a/backend/doc/utils/tw_utils.py
+++ b/backend/doc/utils/tw_utils.py
@@ -11,7 +11,7 @@
from typing import Mapping, Optional, Sequence
from doc.config import settings
-from doc.domain import parsing, resource_lookup, bible_books
+from doc.domain import resource_lookup, bible_books
from doc.domain.model import (
DocumentPart,
LangDirEnum,
@@ -363,6 +363,8 @@ def fetch_usfm_books(
resource_requests: Sequence[ResourceRequest],
usfm_resource_types: Sequence[str] = settings.USFM_RESOURCE_TYPES,
) -> list[USFMBook]:
+ from doc.domain.parsing import usfm_book_content
+
usfm_resource_lookup_dtos = []
for resource_request in resource_requests:
for usfm_type in usfm_resource_types:
@@ -393,7 +395,7 @@ def fetch_usfm_books(
)
# Initialize found resources from their provisioned assets.
usfm_book_content_units = [
- parsing.usfm_book_content(resource_lookup_dto, resource_dir, False) # , False
+ usfm_book_content(resource_lookup_dto, resource_dir, False)
for resource_lookup_dto, resource_dir in zip(
found_usfm_resource_lookup_dtos, resource_dirs
)
@@ -406,6 +408,8 @@ def fetch_usfm_book(
book_code: str,
resource_type: str,
) -> Optional[USFMBook]:
+ from doc.domain.parsing import usfm_book_content
+
usfm_book = None
resource_lookup_dto = resource_lookup.resource_lookup_dto(
lang_code,
@@ -421,7 +425,7 @@ def fetch_usfm_book(
"Time to provision USFM asset files (acquire and write to disk) for TW resource: %s",
t1 - t0,
)
- usfm_book = parsing.usfm_book_content(resource_lookup_dto, resource_dir, False)
+ usfm_book = usfm_book_content(resource_lookup_dto, resource_dir, False)
return usfm_book
diff --git a/backend/passages/domain/document_generator.py b/backend/passages/domain/document_generator.py
index c9065e68..6981955f 100644
--- a/backend/passages/domain/document_generator.py
+++ b/backend/passages/domain/document_generator.py
@@ -142,8 +142,7 @@ def generate_docx_document(
else:
non_book_name_portion_of_reference = f"{passage_ref_dto.start_chapter}:{passage_ref_dto.start_chapter_verse_ref}"
# NOTE We want the document to show references even if there is no
- # content for it. Will these continue to be the desired behavior, we
- # will see.
+ # content for it.
# localized_reference = (
# f"{selected_usfm_book.national_book_name} {non_book_name_portion_of_reference}"
# if selected_usfm_book and non_book_name_portion_of_reference
@@ -286,6 +285,7 @@ def generate_passages_docx_document(
lang_name: str,
passage_reference_dtos_json: str,
email_address: str,
+ docx_filepath_prefix: str = "passages_",
book_names: dict[str, str] = BOOK_NAMES,
) -> Json[str]:
passage_reference_dtos_list = json.loads(passage_reference_dtos_json)
@@ -299,7 +299,7 @@ def generate_passages_docx_document(
email_address,
)
document_request_key_ = document_request_key(lang_code, passage_reference_dtos)
- docx_filepath_ = docx_filepath(document_request_key_)
+ docx_filepath_ = f"{docx_filepath_prefix}{docx_filepath(document_request_key_)}"
if file_needs_update(docx_filepath_):
generate_docx_document(
lang_code,