WycliffeAssociates · linearcombination · Jan 20, 2026 · Jan 19, 2026 · Jan 19, 2026 · Jan 19, 2026
diff --git a/backend/doc/domain/assembly_strategies/assemble_by_chapter.py b/backend/doc/domain/assembly_strategies/assemble_by_chapter.py
@@ -7,8 +7,6 @@
     chapter_commentary_parts,
     collect_unique_book_codes,
     collect_unique_lang_codes,
-    demote_headings_by_one,
-    demote_headings_by_two,
     filter_books_by_book_code,
     filter_books_by_lang_code,
     get_book_intros,
@@ -26,6 +24,7 @@
     tq_verses_parts,
     rg_verses_parts,
 )
+from doc.utils.text_utils import demote_headings_by_one
 from doc.domain.bible_books import BOOK_CHAPTERS, BOOK_ID_MAP, BOOK_NAMES
 from doc.domain.model import (
     AssemblyLayoutEnum,

diff --git a/backend/doc/domain/assembly_strategies/assembly_strategy_utils.py b/backend/doc/domain/assembly_strategies/assembly_strategy_utils.py
@@ -2,8 +2,8 @@
 Utility functions used by assembly_strategies.
 """
 
-from re import compile, IGNORECASE, Match, search
-from typing import Optional, Sequence, TypeVar
+from re import search
+from typing import Optional, Sequence
 
 from doc.config import settings
 from doc.domain.bible_books import BOOK_ID_MAP
@@ -19,6 +19,7 @@
 from doc.reviewers_guide.model import RGBook
 from doc.reviewers_guide.render_to_html import render_chapter
 from doc.utils.tw_utils import translation_words_content
+from doc.utils.text_utils import demote_headings_by_one
 from docx.document import Document as DocxDocument
 from docx.enum.section import WD_SECTION
 from docx.enum.text import WD_BREAK
@@ -29,11 +30,6 @@
 logger = settings.logger(__name__)
 
 
-HEADING_RE = compile(r"</?h([1-6])\b", IGNORECASE)
-
-T = TypeVar("T")
-
-
 OXML_LANGUAGE_LIST: list[str] = [
     "ar-SA",
     "bg-BG",
@@ -137,7 +133,6 @@ def tn_chapter_verses(
     tn_book: Optional[TNBook],
     chapter_num: int,
     use_two_column_layout_for_tn_notes: bool,
-    # fmt_str: str = TN_VERSE_NOTES_ENCLOSING_DIV_FMT_STR,
 ) -> str:
     """
     Return the HTML for verses that are in the chapter with
@@ -970,27 +965,6 @@ def get_non_usfm_resources_verse(
     return document_parts
 
 
-def _demote_heading(match: Match[str], levels: int) -> str:
-    tag = match.group(0)
-    level = int(match.group(1))
-    new_level = min(level + levels, 6)
-    return tag.replace(f"h{level}", f"h{new_level}", 1)
-
-
-def demote_headings_by_one(content: str) -> str:
-    return HEADING_RE.sub(
-        lambda m: _demote_heading(m, levels=1),
-        content,
-    )
-
-
-def demote_headings_by_two(content: str) -> str:
-    return HEADING_RE.sub(
-        lambda m: _demote_heading(m, levels=2),
-        content,
-    )
-
-
 def tnc_chapter_intro(
     tnc_book: Optional[TNCBook],
     chapter_num: int,

diff --git a/backend/doc/domain/document_generator.py b/backend/doc/domain/document_generator.py
@@ -775,7 +775,7 @@ def compose_docx_document(
             add_full_width_hr(doc)
         if part.add_page_break:
             add_page_break(doc)
-    style_superscripts(doc, lift_half_points=4, color=RGBColor(0x99, 0x99, 0x99))
+    style_superscripts(doc, lift_half_points=6, color=RGBColor(0x99, 0x99, 0x99))
     t1 = time.time()
     logger.info("Time for converting HTML to Docx: %.2f seconds", t1 - t0)
     return doc

diff --git a/backend/doc/domain/parsing.py b/backend/doc/domain/parsing.py
@@ -14,10 +14,7 @@
 import requests
 from bs4 import BeautifulSoup
 from doc.config import settings
-from doc.domain.assembly_strategies.assembly_strategy_utils import (
-    demote_headings_by_one,
-    demote_headings_by_two,
-)
+from doc.utils.text_utils import demote_headings_by_one
 from doc.domain.bible_books import BOOK_ID_MAP, BOOK_NAMES
 from doc.domain.model import (
     BC_RESOURCE_TYPE,
@@ -80,6 +77,11 @@
 
 H1, H2, H3, H4, H5 = "h1", "h2", "h3", "h4", "h5"
 
+_SECTIONHEAD5_RE = re.compile(
+    r'<div\s+class="sectionhead-5">\s*</div>',
+    re.MULTILINE,
+)
+
 
 BC_ARTICLE_URL_FMT_STR: str = (
     "https://content.bibletranslationtools.org/WycliffeAssociates/en_bc/src/branch/master/{}"
@@ -447,6 +449,10 @@ def ensure_chapter_marker(
     return f"\\c {chapter_num}\n" + chapter_usfm_text
 
 
+def remove_sectionhead5_elements(content: str) -> str:
+    return _SECTIONHEAD5_RE.sub(" ", content)
+
+
 def usfm_book_content(
     resource_lookup_dto: ResourceLookupDto,
     resource_dir: str,
@@ -511,9 +517,12 @@ def usfm_book_content(
         cleaned_chapter_html_content = remove_null_bytes_and_control_characters(
             chapter_html_content
         )
+        chapter_html_content_sans_s5 = remove_sectionhead5_elements(
+            cleaned_chapter_html_content
+        )
         usfm_chapters[chapter_num] = USFMChapter(
             content=(
-                cleaned_chapter_html_content if cleaned_chapter_html_content else ""
+                chapter_html_content_sans_s5 if chapter_html_content_sans_s5 else ""
             ),
             verses=None,
         )
@@ -1278,7 +1287,7 @@ def assemble_chapter_usfm(
         verse_content = read_verse_file(usfm_file)
         cleaned_verse_content = clean_verse_content(verse_content)
         verse_content = ensure_paragraph_before_verses(usfm_file, cleaned_verse_content)
-        chapter_usfm_content.append(cleaned_verse_content)
+        chapter_usfm_content.append(verse_content)
         chapter_usfm_content.append(
             " \n"
         )  # Make sure a space before next chunk, e.g., auh, mat, ch 9, v 14
@@ -1456,6 +1465,8 @@ def handle_split_chapter_into_verses(
 
 def split_chapter_into_verses_with_formatting(
     chapter: USFMChapter,
+    empty_paragraph: str = "<p></p>",
+    sectionhead5_element: str = '<div class="sectionhead-5"></div>',
 ) -> dict[VerseRef, str]:
     """
     Given a USFMChapter instance, return the same instance with its
@@ -1465,29 +1476,27 @@ def split_chapter_into_verses_with_formatting(
     Sample HTML content with multiple verse elements:
 
     >>> html_content = '''
-    >>> <span class="verse">
-    >>> <sup class="versemarker">19</sup>
-    >>> For through the law I died to the law, so that I might live for God. I have been crucified with Christ.
-    >>> <sup id="footnote-caller-1" class="caller"><a href="#footnote-target-1">1</a></sup>
-    >>> <div class="sectionhead-5"></div>
-    >>> </span>
-    >>> <span class="verse">
-    >>> <sup class="versemarker">20</sup>
-    >>> I have been crucified with Christ and I no longer live, but Christ lives in me. The life I now live in the body, I live by faith in the Son of God, who loved me and gave himself for me.
-    >>> <sup id="footnote-caller-2" class="caller"><a href="#footnote-target-2">2</a></sup>
-    >>> <div class="sectionhead-5"></div>
-    >>> </span>
-    >>> '''
+    ... <span class="verse">
+    ... <sup class="versemarker">19</sup>
+    ... For through the law I died to the law, so that I might live for God. I have been crucified with Christ.
+    ... <sup id="footnote-caller-1" class="caller"><a href="#footnote-target-1">1</a></sup>
+    ... <div class="sectionhead-5"></div>
+    ... </span>
+    ... <span class="verse">
+    ... <sup class="versemarker">20</sup>
+    ... I have been crucified with Christ and I no longer live, but Christ lives in me. The life I now live in the body, I live by faith in the Son of God, who loved me and gave himself for me.
+    ... <sup id="footnote-caller-2" class="caller"><a href="#footnote-target-2">2</a></sup>
+    ... <div class="sectionhead-5"></div>
+    ... </span>
+    ... '''
     >>> from doc.domain.parsing import split_chapter_into_verses_with_formatting
-    >>> chapter = USFMChapter(content=html_content)
+    >>> chapter = USFMChapter(content=html_content, verses=None)
     >>> chapter.verses = split_chapter_into_verses_with_formatting(chapter)
-    >>> chapter.verses["19"]
-    <span class="verse">
+    >>> print(chapter.verses["19"])
     <sup class="versemarker">19</sup>
     For through the law I died to the law, so that I might live for God. I have been crucified with Christ.
     <sup id="footnote-caller-1" class="caller"><a href="#footnote-target-1">1</a></sup>
-    <div class="sectionhead-5"></div>
-    </span>
+    <BLANKLINE>
     """
     # TODO What to do about footnote targets? Perhaps have the value be a
     # tuple with first element of the verse HTML (which includes the
@@ -1505,14 +1514,16 @@ def split_chapter_into_verses_with_formatting(
             # Add to the dictionary with verse number as the key and verse text as the value
             verse_dict[verse_number_] = (
                 verse_span.strip()
-                .replace("<p></p>", "")
-                .replace('<div class="sectionhead-5"></div>', "")
+                .replace(empty_paragraph, "")
+                .replace(sectionhead5_element, "")
             )
     return verse_dict
 
 
 def split_chapter_into_verses_with_formatting_for_f10(
     chapter: USFMChapter,
+    empty_paragraph: str = "<p></p>",
+    sectionhead5_element: str = '<div class="sectionhead-5"></div>',
 ) -> dict[str, str]:
     """
     Parse chapter.content as HTML, extract each <span class="verse">,
@@ -1555,8 +1566,8 @@ def split_chapter_into_verses_with_formatting_for_f10(
         # store cleaned HTML fragment (still contains <sup> etc.)
         verse_dict[verse_number] = (
             cleaned_html.strip()
-            .replace("<p></p>", "")
-            .replace('<div class="sectionhead-5"></div>', "")
+            .replace(empty_paragraph, "")
+            .replace(sectionhead5_element, "")
         )
     return verse_dict
 

diff --git a/backend/doc/domain/resource_lookup.py b/backend/doc/domain/resource_lookup.py
@@ -16,7 +16,7 @@
 import requests
 from cachetools import TTLCache, cached
 from doc.config import settings
-from doc.domain import worker, parsing
+from doc.domain import worker
 from doc.domain.bible_books import BOOK_CHAPTERS, BOOK_ID_MAP, BOOK_NAMES
 from doc.domain.model import (
     NON_USFM_RESOURCE_TYPES,
@@ -36,7 +36,6 @@
 from doc.utils.file_utils import (
     delete_tree,
     file_needs_update,
-    read_file,
 )
 from doc.utils.list_utils import unique_tuples, unique_book_codes
 from doc.utils.text_utils import maybe_correct_book_name, normalize_localized_book_name
@@ -79,7 +78,7 @@ def fetch_source_data(
     >>> ();result = resource_lookup.fetch_source_data();() # doctest: +ELLIPSIS
     (...)
     >>> result.git_repo[0]
-    RepoEntry(repo_url=HttpUrl('https://content.bibletranslationtools.org/klero/ach-SS-acholi_rev_text_reg'), content=Content(resource_type='reg', language=Language(english_name='Acholi', ietf_code='ach-SS-acholi', national_name='Acholi', direction=<LangDirEnum.LTR: 'ltr'>)))
+    RepoEntry(repo_url=HttpUrl('https://content.bibletranslationtools.org/0success/cli_1jn_text_reg'), content=Content(resource_type='reg', language=Language(english_name='Chakali', ietf_code='cli', national_name='Chakali', direction=<LangDirEnum.LTR: 'ltr'>)))
     """
     graphql_query = """
 query MyQuery {
@@ -111,6 +110,8 @@ def fetch_source_data(
                     for repo in data_payload["git_repo"]
                     if repo.get("content", {}).get("resource_type") is not None
                 ]
+                # Sort for test stability - ensures consistent ordering
+                valid_repos.sort(key=lambda repo: repo["repo_url"])
                 return SourceData.model_validate({"git_repo": valid_repos})
             else:
                 logger.info("Invalid payload structure, no data.")
@@ -263,6 +264,8 @@ def get_resource_types(
         str, str
     ] = settings.RESOURCE_TYPE_CODES_AND_NAMES,
 ) -> list[tuple[str, str]]:
+    from doc.domain.parsing import find_usfm_files
+
     resource_types = []
     for url, resource_filepath, resource_type in repo_clone_list:
         if resource_type:
@@ -285,7 +288,7 @@ def get_resource_types(
                     and file.name.split("-")[1].lower() in book_codes
                 ]
             elif resource_type in usfm_resource_types:
-                book_assets = parsing.find_usfm_files(resource_filepath)
+                book_assets = find_usfm_files(resource_filepath)
             elif resource_type == "rg":
                 between_texts, bible_reference_strs = find_bible_references(
                     join(en_rg, docx_file_path)
@@ -574,6 +577,8 @@ def usfm_resource_types_and_book_tuples(
     >>> sorted(tuples, key=lambda value: value[1])
     [('reg', '1co'), ('reg', '1jn'), ('reg', '1pe'), ('reg', '1th'), ('reg', '1ti'), ('reg', '2co'), ('reg', '2jn'), ('reg', '2pe'), ('reg', '2th'), ('reg', '2ti'), ('reg', '3jn'), ('reg', 'act'), ('reg', 'col'), ('reg', 'eph'), ('reg', 'gal'), ('reg', 'heb'), ('reg', 'jas'), ('reg', 'jhn'), ('reg', 'jud'), ('reg', 'luk'), ('reg', 'mat'), ('reg', 'mrk'), ('reg', 'phm'), ('reg', 'php'), ('reg', 'rev'), ('reg', 'rom'), ('reg', 'tit')]
     """
+    from doc.domain.parsing import usfm_asset_file
+
     book_codes = book_codes_str.split(",")
     data: SourceData | None = fetch_source_data()
     resource_type_and_book_tuples = set()
@@ -602,9 +607,7 @@ def usfm_resource_types_and_book_tuples(
                     resource_filepath = prepare_resource_filepath(dto)
                     if file_needs_update(resource_filepath):
                         provision_asset_files(dto.url, resource_filepath)
-                    content_file = parsing.usfm_asset_file(
-                        dto, resource_filepath, False
-                    )
+                    content_file = usfm_asset_file(dto, resource_filepath, False)
                     if content_file:
                         resource_type_and_book_tuples.add((resource_type, book_code))
     return sorted(resource_type_and_book_tuples, key=lambda value: value[0])
@@ -942,18 +945,24 @@ def get_book_names_from_usfm_metadata(
     be localized, it depends on the translation work done for language
     lang_code.
     """
+    from doc.domain.parsing import (
+        find_usfm_files,
+        split_usfm_by_chapters,
+        maybe_localized_book_name,
+    )
+
     book_codes_and_names_localized: dict[str, str] = {}
-    usfm_files = parsing.find_usfm_files(resource_filepath)
+    usfm_files = find_usfm_files(resource_filepath)
     for usfm_file in usfm_files:
         usfm = ""
         usfm_file_components = Path(usfm_file).stem.lower().split("-")
         book_code = usfm_file_components[1]
         with open(usfm_file, "r") as f:
             usfm = f.read()
-        frontmatter, _, _ = parsing.split_usfm_by_chapters(
+        frontmatter, _, _ = split_usfm_by_chapters(
             lang_code, resource_type, book_code, usfm
         )
-        localized_book_name = parsing.maybe_localized_book_name(frontmatter)
+        localized_book_name = maybe_localized_book_name(frontmatter)
         # localized_book_name = maybe_correct_book_name(lang_code, localized_book_name)
         book_codes_and_names_localized[book_code] = localized_book_name
     logger.debug("book_codes_and_names_localized: %s", book_codes_and_names_localized)
@@ -1204,7 +1213,8 @@ def nt_survey_rg_passages(
 ) -> list[BibleReference]:
     """
     >>> from doc.domain import resource_lookup
-    >>> rg_books = resource_lookup.nt_survey_rg_passages()
+    >>> ();rg_books = resource_lookup.nt_survey_rg_passages() ;() # doctest: +ELLIPSIS
+    (...)
     >>> rg_books[0]
     BibleReference(book_code='mat', book_name='Matthew', start_chapter=2, start_chapter_verse_ref='1-12', end_chapter=None, end_chapter_verse_ref=None)
     """
@@ -1245,9 +1255,10 @@ def ot_survey_rg1_passages(
 ) -> list[BibleReference]:
     """
     >>> from doc.domain import resource_lookup
-    >>> rg_books = resource_lookup.ot_survey_rg1_passages()
+    >>> ();rg_books = resource_lookup.ot_survey_rg1_passages();() # doctest: +ELLIPSIS
+    (...)
     >>> rg_books[0]
-    BibleReference(book_code='gen', book_name='Genesis', start_chapter=2, start_chapter_verse_ref='1-12', end_chapter=None, end_chapter_verse_ref=None)
+    BibleReference(book_code='gen', book_name='Genesis', start_chapter=1, start_chapter_verse_ref='1', end_chapter=2, end_chapter_verse_ref='3')
     """
     path = join(resource_dir, docx_file_path)
     rg_books = get_rg_books(
@@ -1286,9 +1297,10 @@ def ot_survey_rg2_passages(
 ) -> list[BibleReference]:
     """
     >>> from doc.domain import resource_lookup
-    >>> rg_books = resource_lookup.ot_survey_rg2_passages()
+    >>> ();rg_books = resource_lookup.ot_survey_rg2_passages();() # doctest: +ELLIPSIS
+    (...)
     >>> rg_books[0]
-    BibleReference(book_code='jos', book_name='Joshua', start_chapter=2, start_chapter_verse_ref='1-12', end_chapter=None, end_chapter_verse_ref=None)
+    BibleReference(book_code='jos', book_name='Joshua', start_chapter=1, start_chapter_verse_ref='1-9', end_chapter=None, end_chapter_verse_ref=None)
     """
     path = join(resource_dir, docx_file_path)
     rg_books = get_rg_books(
@@ -1327,9 +1339,10 @@ def ot_survey_rg3_passages(
 ) -> list[BibleReference]:
     """
     >>> from doc.domain import resource_lookup
-    >>> rg_books = resource_lookup.ot_survey_rg3_passages()
+    >>> ();rg_books = resource_lookup.ot_survey_rg3_passages();() # doctest: +ELLIPSIS
+    (...)
     >>> rg_books[0]
-    BibleReference(book_code='job', book_name='Job', start_chapter=2, start_chapter_verse_ref='1-12', end_chapter=None, end_chapter_verse_ref=None)
+    BibleReference(book_code='job', book_name='Job', start_chapter=1, start_chapter_verse_ref='6-22', end_chapter=None, end_chapter_verse_ref=None)
     """
     path = join(resource_dir, docx_file_path)
     rg_books = get_rg_books(
@@ -1368,9 +1381,10 @@ def ot_survey_rg4_passages(
 ) -> list[BibleReference]:
     """
     >>> from doc.domain import resource_lookup
-    >>> rg_books = resource_lookup.ot_survey_rg4_passages()
+    >>> ();rg_books = resource_lookup.ot_survey_rg4_passages();() # doctest: +ELLIPSIS
+    (...)
     >>> rg_books[0]
-    BibleReference(book_code='isa', book_name='Isaiah', start_chapter=2, start_chapter_verse_ref='1-12', end_chapter=None, end_chapter_verse_ref=None)
+    BibleReference(book_code='isa', book_name='Isaiah', start_chapter=1, start_chapter_verse_ref='1-9', end_chapter=None, end_chapter_verse_ref=None)
     """
     path = join(resource_dir, docx_file_path)
     rg_books = get_rg_books(