Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend/doc/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def api_test_url(self) -> str:
# Used by docker
IMAGE_TAG: str

model_config = SettingsConfigDict(env_file=".env", case_sensitive=True)
model_config = SettingsConfigDict(env_file=".env", case_sensitive=True, frozen=True)


# mypy with pydantic v2 doesn't understand that defaults will be
Expand Down
159 changes: 90 additions & 69 deletions backend/doc/domain/assembly_strategies/assemble_by_book.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,40 +87,51 @@ def assemble_content_by_book(
else:
usfm_book = selected_usfm_books[0]
usfm_book2 = selected_usfm_books[1]
selected_tn_books = [
tn_book
for tn_book in tn_books
if tn_book.lang_code == lang_code and tn_book.book_code == book_code
]
tn_book = selected_tn_books[0] if selected_tn_books else None
selected_tnc_books = [
tnc_book
for tnc_book in tnc_books
if tnc_book.lang_code == lang_code and tnc_book.book_code == book_code
]
tnc_book = selected_tnc_books[0] if selected_tnc_books else None
selected_tq_books = [
tq_book
for tq_book in tq_books
if tq_book.lang_code == lang_code and tq_book.book_code == book_code
]
tq_book = selected_tq_books[0] if selected_tq_books else None
selected_tw_books = [
tw_book for tw_book in tw_books if tw_book.lang_code == lang_code
]
tw_book = selected_tw_books[0] if selected_tw_books else None
selected_bc_books = [
bc_book
for bc_book in bc_books
if bc_book.lang_code == lang_code and bc_book.book_code == book_code
]
bc_book = selected_bc_books[0] if selected_bc_books else None
selected_rg_books = [
rg_book
for rg_book in rg_books
if rg_book.lang_code == lang_code and rg_book.book_code == book_code
]
rg_book = selected_rg_books[0] if selected_rg_books else None
tn_book = next(
(
tn_book
for tn_book in tn_books
if tn_book.lang_code == lang_code and tn_book.book_code == book_code
),
None,
)
tnc_book = next(
(
tnc_book
for tnc_book in tnc_books
if tnc_book.lang_code == lang_code
and tnc_book.book_code == book_code
),
None,
)
tq_book = next(
(
tq_book
for tq_book in tq_books
if tq_book.lang_code == lang_code and tq_book.book_code == book_code
),
None,
)
tw_book = next(
(tw_book for tw_book in tw_books if tw_book.lang_code == lang_code),
None,
)
bc_book = next(
(
bc_book
for bc_book in bc_books
if bc_book.lang_code == lang_code and bc_book.book_code == book_code
),
None,
)
rg_book = next(
(
rg_book
for rg_book in rg_books
if rg_book.lang_code == lang_code and rg_book.book_code == book_code
),
None,
)
if usfm_book is not None:
document_parts.extend(
assemble_usfm_by_book(
Expand Down Expand Up @@ -253,40 +264,51 @@ def assemble_content_by_verse_book_at_a_time(
else:
usfm_book = selected_usfm_books[0]
usfm_book2 = selected_usfm_books[1]
selected_tn_books = [
tn_book
for tn_book in tn_books
if tn_book.lang_code == lang_code and tn_book.book_code == book_code
]
tn_book = selected_tn_books[0] if selected_tn_books else None
selected_tnc_books = [
tnc_book
for tnc_book in tnc_books
if tnc_book.lang_code == lang_code and tnc_book.book_code == book_code
]
tnc_book = selected_tnc_books[0] if selected_tnc_books else None
selected_tq_books = [
tq_book
for tq_book in tq_books
if tq_book.lang_code == lang_code and tq_book.book_code == book_code
]
tq_book = selected_tq_books[0] if selected_tq_books else None
selected_tw_books = [
tw_book for tw_book in tw_books if tw_book.lang_code == lang_code
]
tw_book = selected_tw_books[0] if selected_tw_books else None
selected_bc_books = [
bc_book
for bc_book in bc_books
if bc_book.lang_code == lang_code and bc_book.book_code == book_code
]
bc_book = selected_bc_books[0] if selected_bc_books else None
selected_rg_books = [
rg_book
for rg_book in rg_books
if rg_book.lang_code == lang_code and rg_book.book_code == book_code
]
rg_book = selected_rg_books[0] if selected_rg_books else None
tn_book = next(
(
tn_book
for tn_book in tn_books
if tn_book.lang_code == lang_code and tn_book.book_code == book_code
),
None,
)
tnc_book = next(
(
tnc_book
for tnc_book in tnc_books
if tnc_book.lang_code == lang_code
and tnc_book.book_code == book_code
),
None,
)
tq_book = next(
(
tq_book
for tq_book in tq_books
if tq_book.lang_code == lang_code and tq_book.book_code == book_code
),
None,
)
tw_book = next(
(tw_book for tw_book in tw_books if tw_book.lang_code == lang_code),
None,
)
bc_book = next(
(
bc_book
for bc_book in bc_books
if bc_book.lang_code == lang_code and bc_book.book_code == book_code
),
None,
)
rg_book = next(
(
rg_book
for rg_book in rg_books
if rg_book.lang_code == lang_code and rg_book.book_code == book_code
),
None,
)
if usfm_book:
document_parts.extend(
assemble_usfm_by_verse_book_at_a_time(
Expand Down Expand Up @@ -500,7 +522,6 @@ def assemble_usfm_by_verse_book_at_a_time(
tnc_book,
tq_book,
bc_book,
# rg_book,
verse_ref,
chapter_num,
is_rtl,
Expand Down
129 changes: 63 additions & 66 deletions backend/doc/domain/document_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import time
from datetime import datetime
from os.path import exists, join
from typing import Optional, Sequence, cast
from typing import Final, Mapping, Optional, Sequence, TypeAlias, cast

# import regex as re # not yet supported in python 3.13 - used for unicode word boundaries for RTL languages
import re
Expand Down Expand Up @@ -43,11 +43,8 @@
ResourceLookupDto,
ResourceRequest,
TNBook,
TNChapter,
TNCBook,
TNCChapter,
TQBook,
TQChapter,
TWBook,
USFMBook,
)
Expand Down Expand Up @@ -84,6 +81,16 @@
logger = settings.logger(__name__)


LangCode: TypeAlias = str
BookCode: TypeAlias = str
Key: TypeAlias = tuple[LangCode, BookCode]

ReplacementEntry: TypeAlias = tuple[re.Pattern[str], str] # (pattern, replacement)
ReplacementMap: TypeAlias = dict[Key, ReplacementEntry]

BOOK_NAMES: Final[Mapping[BookCode, str]] = BOOK_NAMES


def initialize_document_request_and_key(
document_request_json: str,
) -> tuple[DocumentRequest, str]:
Expand Down Expand Up @@ -117,86 +124,76 @@ def localize_non_usfm_book_names(
tn_books: list[TNBook],
tnc_books: list[TNCBook],
tq_books: list[TQBook],
book_names: Mapping[BookCode, str] = BOOK_NAMES,
) -> None:
replacement_map: dict[
tuple[str, str],
tuple[re.Pattern[str], str],
] = {}
"""
In-place replacement of English book names with localized (national) names
in Translation Notes (TN), Condensed TN (TNC), and Translation Questions (TQ).

If user has not chosen USFM, gets localized names from USFM for
given language/book if it was available as a choice in the language chosen.
"""
replacement_map: ReplacementMap = {}
# ── Phase 1: Collect known national names from chosen USFM books ──────────
for usfm in usfm_books:
english_name = BOOK_NAMES.get(usfm.book_code)
english_name = book_names.get(usfm.book_code)
if not english_name:
continue

pattern = re.compile(rf"\b{re.escape(english_name)}\b")

pattern = re.compile(rf"\b{re.escape(english_name)}\b", re.IGNORECASE)
replacement_map[(usfm.lang_code, usfm.book_code)] = (
pattern,
usfm.national_book_name,
)

def localize_tn_chapter(
chapter: TNChapter,
pattern: re.Pattern[str],
replacement: str,
) -> None:
chapter.intro_html = pattern.sub(replacement, chapter.intro_html)
for verse_ref, html in chapter.verses.items():
chapter.verses[verse_ref] = pattern.sub(replacement, html)

def localize_tnc_chapter(
chapter: TNCChapter,
def apply_replacement(
text: str,
pattern: re.Pattern[str],
replacement: str,
) -> None:
chapter.intro_html = pattern.sub(replacement, chapter.intro_html)
for verse_ref, html in chapter.verses.items():
chapter.verses[verse_ref] = pattern.sub(replacement, html)
) -> str:
return pattern.sub(replacement, text)

def localize_tq_chapter(
chapter: TQChapter,
pattern: re.Pattern[str],
replacement: str,
def localize_book(
book: TNBook | TNCBook | TQBook,
has_book_intro: bool,
has_chapter_intro: bool,
) -> None:
for verse_ref, html in chapter.verses.items():
chapter.verses[verse_ref] = pattern.sub(replacement, html)

# --- TN ---
for tn_book in tn_books:
key = (tn_book.lang_code, tn_book.book_code)
key: Key = (book.lang_code, book.book_code)
entry = replacement_map.get(key)
if not entry:
continue

if entry is None:
usfm_names = resource_lookup.book_codes_for_lang_from_usfm_only(
book.lang_code
)
match = next(
(item for item in usfm_names if item[0] == book.book_code), None
)
english_name = book_names.get(book.book_code)
if english_name is None or match is None:
return
pattern = re.compile(rf"\b{re.escape(english_name)}\b", re.IGNORECASE)
replacement = match[1].strip()
if not replacement:
return
replacement_map[key] = (pattern, replacement)
entry = (pattern, replacement)
pattern, replacement = entry
tn_book.book_intro = pattern.sub(replacement, tn_book.book_intro)

for tn_chapter in tn_book.chapters.values():
localize_tn_chapter(tn_chapter, pattern, replacement)
if has_book_intro and hasattr(book, "book_intro"):
book.book_intro = apply_replacement(book.book_intro, pattern, replacement)
for chapter in book.chapters.values():
if has_chapter_intro and hasattr(chapter, "intro_html"):
chapter.intro_html = apply_replacement(
chapter.intro_html, pattern, replacement
)
for verse_ref, html in chapter.verses.items():
chapter.verses[verse_ref] = apply_replacement(
html, pattern, replacement
)

# --- TNC ---
for tn_book in tn_books:
localize_book(tn_book, has_book_intro=True, has_chapter_intro=True)
for tnc_book in tnc_books:
key = (tnc_book.lang_code, tnc_book.book_code)
entry = replacement_map.get(key)
if not entry:
continue

pattern, replacement = entry
tnc_book.book_intro = pattern.sub(replacement, tnc_book.book_intro)

for tnc_chapter in tnc_book.chapters.values():
localize_tnc_chapter(tnc_chapter, pattern, replacement)

# --- TQ ---
for book in tq_books:
key = (book.lang_code, book.book_code)
entry = replacement_map.get(key)
if not entry:
continue

pattern, replacement = entry
for tq_chapter in book.chapters.values():
localize_tq_chapter(tq_chapter, pattern, replacement)
localize_book(tnc_book, has_book_intro=True, has_chapter_intro=True)
for tq_book in tq_books:
localize_book(tq_book, has_book_intro=False, has_chapter_intro=False)


def locate_acquire_and_build_resource_objects(
Expand Down