Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
782815a
chore: remove load_by_title from terms_api, as it should be based on …
YishaiGlasner Feb 4, 2026
f2ef5e5
chore: change term load in hok_leyisrael to rely on term name.
YishaiGlasner Feb 4, 2026
467ac49
chore: change term normalize to rely on name (checked the only occurr…
YishaiGlasner Feb 4, 2026
2213e8a
chore: remove unused check_term function (was used in the category ed…
YishaiGlasner Feb 4, 2026
eceed09
fix: check if 'path' is a key in j BEFORE using it.
YishaiGlasner Feb 5, 2026
b033a07
feat(Term):
YishaiGlasner Feb 5, 2026
98ca067
refactor(category api):
YishaiGlasner Feb 5, 2026
ef5804b
chore(TitledTreeNode):
YishaiGlasner Feb 5, 2026
6aeba1c
refactor(Term):
YishaiGlasner Feb 5, 2026
c4dc101
refactor(terms api): change `term` into `name`.
YishaiGlasner Feb 5, 2026
275f71d
refactor(terms api): remove checking if name exists as it's guarantee…
YishaiGlasner Feb 5, 2026
aa182b4
refactor(terms api): remove add/terms endpoint - it's unreachable (ca…
YishaiGlasner Feb 5, 2026
4ad5dcf
refactor(Term): remove `load_by_title` since now more than one term c…
YishaiGlasner Feb 5, 2026
aea84ca
refactor(Term): refactor `load_by_primary_title` to `load_by_primary_…
YishaiGlasner Feb 5, 2026
df3eb12
chore(Term): newlines and indentation.
YishaiGlasner Feb 5, 2026
3198f75
refactor(Term): add validation ensuing uniqueness of primary titles t…
YishaiGlasner Feb 5, 2026
abe7562
chore(text.py): remove unused imports
YishaiGlasner Feb 5, 2026
28d87ea
feat(Term): dependencies for term changing.
YishaiGlasner Feb 8, 2026
c275e51
fix: import form settings
YishaiGlasner Feb 8, 2026
c5b045a
fix(term api): add empty list titles to data when term doesnt exist.
YishaiGlasner Feb 8, 2026
ff5dec9
fix(term tests): remove test for name doesn't equal primary en.
YishaiGlasner Feb 8, 2026
4a62c22
refactor(term): change setting name logic not to count docs.
YishaiGlasner Feb 8, 2026
ecdae5c
refactor(term): validate terms with same titles doesn't exist also fo…
YishaiGlasner Feb 8, 2026
7f58384
refactor(term): track primary titles as scalars instead of mutable li…
YishaiGlasner Feb 8, 2026
d375f94
refactor(term): use InputError rather than ValidationError.
YishaiGlasner Feb 8, 2026
b183ad6
test(term): fix tests to allow dupliacte title but not of primary tit…
YishaiGlasner Feb 8, 2026
3692fe5
test(mongo indexes): remove unique from term's titles index.
YishaiGlasner Feb 8, 2026
4c4f352
fix(term): use f string rather than plus to concaterate int to str
YishaiGlasner Feb 9, 2026
51ca59a
chore: update gunicorn version to 25.0.3
nsantacruz Feb 9, 2026
0e9b37b
chore: downgrade gunicorn version to 23.0.0
nsantacruz Feb 9, 2026
cfd4bab
chore: downgrade gunicorn version to 23.0.0
nsantacruz Feb 9, 2026
b111d42
Merge branch 'fix-setup-tools-missing-bug' into chore/sc-41467/refact…
YishaiGlasner Feb 10, 2026
5484258
fix(links api): change english collective title to be the terms' prim…
YishaiGlasner Feb 10, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 9 additions & 19 deletions reader/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1409,22 +1409,16 @@ def edit_text_info(request, title=None, new_title=None):

@ensure_csrf_cookie
@staff_member_required
def terms_editor(request, term=None):
def terms_editor(request, name):
"""
Add/Editor a term using the JSON Editor.
"""
if term is not None:
existing_term = Term().load_by_title(term)
data = existing_term.contents() if existing_term else {"name": term, "titles": []}
else:
return render_template(request,'static/generic.html', None, {
"title": "Terms Editor",
"content": "Please include the primary Term name in the URL to uses the Terms Editor."
})
existing_term = Term().load({'name': name})
data = existing_term.contents() if existing_term else {"titles": []}

dataJSON = json.dumps(data)
return render_template(request,'edit_term.html', None, {
'term': term,
'term': name,
'dataJSON': dataJSON,
'is_update': "true" if existing_term else "false"
})
Expand Down Expand Up @@ -2631,9 +2625,9 @@ def _internal_do_post(request, update, cat, uid, **kwargs):
return jsonResponse({"error": "Missing data in POST request."})
j = json.loads(j)
update = int(request.GET.get("update", False))
new_category = Category().load({"path": j["path"]})
if "path" not in j:
return jsonResponse({"error": "'path' is a required attribute"})
new_category = Category().load({"path": j["path"]})
if not update and new_category is not None:
return jsonResponse({"error": "Category {} already exists.".format(", ".join(j["path"]))})

Expand All @@ -2652,14 +2646,10 @@ def _internal_do_post(request, update, cat, uid, **kwargs):
return {"error": f"Merging two categories named {last_path} is not supported."}
elif "heSharedTitle" in j:
# if heSharedTitle provided, make sure sharedTitle and heSharedTitle correspond to same Term
en_term = Term().load_by_title(last_path)
he_term = Term().load_by_title(he_last_path)
if en_term and en_term == he_term:
pass # both titles are found in an existing Term object
else:
existing_term = Term().load_by_primary_titles(last_path, he_last_path)
if not existing_term:
# titles weren't found in same Term object, so try to create a new Term
t = Term()
t.name = last_path
t.add_primary_titles(last_path, he_last_path)
t.save()

Expand Down Expand Up @@ -2743,15 +2733,15 @@ def terms_api(request, name):
This is mainly to be used for adding hebrew internationalization language for section names, categories and commentators
"""
if request.method == "GET":
term = Term().load({'name': name}) or Term().load_by_title(name)
term = Term().load({'name': name})
if term is None:
return jsonResponse({"error": "Term does not exist."})
else:
return jsonResponse(term.contents(), callback=request.GET.get("callback", None))

if request.method in ("POST", "DELETE"):
def _internal_do_post(request, uid):
t = Term().load({'name': name}) or Term().load_by_title(name)
t = Term().load({'name': name})
if request.method == "POST":
if "json" in request.POST:
term = request.POST.get("json")
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ google-auth==1.24.0
google-cloud-logging==1.15.1
google-cloud-storage==1.32.0
google-re2
gunicorn==20.0.4
gunicorn==23.0.0
setuptools==69.5.1
html5lib==0.9999999
httplib2==0.18.1
ipython==7.34.*
Expand Down
5 changes: 3 additions & 2 deletions sefaria/client/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,10 @@ def format_link_object_for_client(link, with_text, ref, pos=None):
# if the link is commentary, strip redundant info (e.g. "Rashi on Genesis 4:2" -> "Rashi")
# this is now simpler, and there is explicit data on the index record for it.
if com["type"] == "commentary":
collective_title = getattr(linkRef.index, 'collective_title', None)
term = library.get_simple_term_mapping().get(collective_title) or {}
com["collectiveTitle"] = {
'en': getattr(linkRef.index, 'collective_title', linkRef.index.title),
'he': hebrew_term(getattr(linkRef.index, 'collective_title', linkRef.index.get_title("he")))
lang: term.get(lang) or linkRef.index.get_title(lang) for lang in ('en', 'he')
}
else:
com["collectiveTitle"] = {'en': linkRef.index.title, 'he': linkRef.index.get_title("he")}
Expand Down
29 changes: 0 additions & 29 deletions sefaria/helper/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,32 +197,3 @@ def update_order_of_category_children(cat, uid, subcategoriesAndBooks):
result = tracker.update(uid, Category, cat)
results.append(result.contents())
return results





def check_term(last_path, he_last_path):
"""
if Category Editor is used, make sure English and Hebrew titles correspond to the same term.
if neither of the titles correspond to a term, create the appropriate term
:param last_path: (str) Corresponds to lastPath of Category and english title of Term
:param he_last_path: (str) Corresponds to a hebrew title of Term
"""

error_msg = ""
en_term = Term().load_by_title(last_path)
he_term = Term().load_by_title(he_last_path)

if en_term == he_term:
pass
if (en_term and he_term != en_term) or (he_term and he_term != en_term):
# they do not correspond, either because both terms exist but are not the same, or one term already
# exists but the other one doesn't exist
error_msg = f"English and Hebrew titles, {last_path} and {he_last_path}, do not correspond to the same term. Please use the term editor."
elif en_term is None and he_term is None:
t = Term()
t.name = last_path
t.add_primary_titles(last_path, he_last_path)
t.save()
return error_msg
46 changes: 40 additions & 6 deletions sefaria/helper/schema.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
# -*- coding: utf-8 -*-

from sefaria.settings import MULTISERVER_ENABLED
from sefaria.model import *
from sefaria.model import library
from sefaria.model.abstract import AbstractMongoRecord
from sefaria.model.marked_up_text_chunk import MarkedUpTextChunkSet
from sefaria.model.schema import DictionaryNode
from sefaria.system.exceptions import InputError
from sefaria.system.database import db
from sefaria.sheets import save_sheet
from sefaria.system.multiserver.coordinator import server_coordinator
from sefaria.utils.util import list_depth, traverse_dict_tree

import re
Expand Down Expand Up @@ -705,7 +707,7 @@ def construct_query(attribute, queries):
generic_rewrite(RefDataSet(construct_query('ref', identifier)))
print('Updating Topic Links')
generic_rewrite(RefTopicLinkSet(construct_query('ref', identifier)))
generic_rewrite(RefTopicLinkSet(construct_query('expandedRefs', identifier)))
generic_rewrite(RefTopicLinkSet(construct_query('expandedRefs', identifier)), attr_name='expandedRefs')
print('Updating Garden Stops')
generic_rewrite(GardenStopSet(construct_query('ref', identifier)))
print('Updating Sheets')
Expand All @@ -715,11 +717,11 @@ def construct_query(attribute, queries):
print('Updating Marked Up Text Chunks')
generic_rewrite(MarkedUpTextChunkSet(construct_query('ref', identifier)))
print('Updating Manuscripts')
generic_rewrite(ManuscriptSet(construct_query('contained_refs', identifier)))
generic_rewrite(ManuscriptSet(construct_query('expanded_refs', identifier)))
generic_rewrite(ManuscriptPageSet(construct_query('contained_refs', identifier)), attr_name='contained_refs')
generic_rewrite(ManuscriptPageSet(construct_query('expanded_refs', identifier)), attr_name='expanded_refs')
print('Updating WebPages')
generic_rewrite(WebPageSet(construct_query('refs', identifier)))
generic_rewrite(ManuscriptSet(construct_query('expandedRefs', identifier)))
generic_rewrite(WebPageSet(construct_query('refs', identifier)), attr_name='refs')
generic_rewrite(WebPageSet(construct_query('expandedRefs', identifier)), attr_name='expandedRefs')
if not skip_history:
print('Updating History')
generic_rewrite(HistorySet(construct_query('ref', identifier), sort=[('ref', 1)]))
Expand Down Expand Up @@ -1119,3 +1121,35 @@ def update_headwords_map(dictionary_node):
if quoted:
print(f'Other entries in this lexicon with this old headword as ref: {", ".join(quoted)}')
print('Warning: old ref can appear as wrapped ref in other places in the library.')


def cascade_node_shared_title_change(node, old):
old_address = node.address()[:-1] + [old]
old_pattern = f"^{re.escape(', '.join(old_address))}(?=$|, |:| \d)"
new_replacement = node.full_title()

needs_rewrite = lambda ref_str, *args: re.search(old_pattern, ref_str)
rewriter = lambda ref_str: re.sub(old_pattern, new_replacement, ref_str)

print(f'Cascading from {old_pattern} to {new_replacement}')
cascade(node.index.title, rewriter, needs_rewrite)


def process_term_primary_title_change(term, **kwargs):
"""
When a Term's primary title (en or he) changes, rebuild library caches.
This updates term mapping, categories, and indexes that reference this term.
"""
old = kwargs.get("old")
attr = kwargs.get("attr")

library.rebuild(include_toc=True)

if MULTISERVER_ENABLED:
server_coordinator.publish_event("library", "rebuild", [True])

if attr == "_primary_en": # Now new refs are available and can be cascaded
for index in library.all_index_records():
for node in [index.nodes] + index.nodes.all_children():
if getattr(node, "sharedTitle", None) == old:
cascade_node_shared_title_change(node, old)
17 changes: 8 additions & 9 deletions sefaria/model/dependencies.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
dependencies.py -- list cross model dependencies and subscribe listeners to changes.
"""

import sefaria.helper.schema
from . import abstract, link, note, history, schema, text, layer, version_state, timeperiod, garden, notification, collection, library, category, ref_data, user_profile, manuscript, topic, place, marked_up_text_chunk

from .abstract import subscribe, cascade, cascade_to_list, cascade_delete, cascade_delete_to_list
Expand Down Expand Up @@ -80,17 +80,16 @@ def process_version_title_change_in_search(ver, **kwargs):


# Terms
# TODO cascade change to Term.name.
# TODO Current locations where we know terms are used [Index, Categories]
# TODO Use Sefaria-Project/scripts/search_for_indexes_that_use_terms.py for now
# TermScheme name change cascades to Term.scheme field
subscribe(cascade(schema.TermSet, "scheme"), schema.TermScheme, "attributeChange", "name")

# Term save/delete rebuilds the term mapping cache
subscribe(text.reset_simple_term_mapping, schema.Term, "delete")
subscribe(text.reset_simple_term_mapping, schema.Term, "save")
"""
Notes on where Terms are used
Index (alt structs and schema)
Category
"""

# Term primary title change rebuilds library (term mapping, categories, indexes)
subscribe(sefaria.helper.schema.process_term_primary_title_change, schema.Term, "attributeChange", "_primary_en")
subscribe(sefaria.helper.schema.process_term_primary_title_change, schema.Term, "attributeChange", "_primary_he")

# Time
subscribe(cascade(topic.PersonTopicSet, "properties.era.value"), timeperiod.TimePeriod, "attributeChange", "symbol")
Expand Down
72 changes: 43 additions & 29 deletions sefaria/model/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
from sefaria.system.database import db
from sefaria.model.lexicon import LexiconEntrySet
from sefaria.model.linker.has_match_template import MatchTemplateMixin
from sefaria.system.exceptions import InputError, IndexSchemaError, DictionaryEntryNotFoundError, SheetNotFoundError
from sefaria.system.exceptions import InputError, IndexSchemaError, DictionaryEntryNotFoundError, SheetNotFoundError, \
DuplicateRecordError
from sefaria.utils.hebrew import decode_hebrew_numeral, encode_small_hebrew_numeral, encode_hebrew_numeral, encode_hebrew_daf, hebrew_term, sanitize
from sefaria.utils.talmud import daf_to_section

Expand Down Expand Up @@ -241,7 +242,7 @@ class Term(abst.AbstractMongoRecord, AbstractTitledObject):
"""
collection = 'term'
track_pkeys = True
pkeys = ["name"]
pkeys = ["name", "_primary_en", "_primary_he"]
title_group = None
history_noun = "term"

Expand All @@ -258,35 +259,65 @@ class Term(abst.AbstractMongoRecord, AbstractTitledObject):
"description"
]

def load_by_title(self, title):
query = {'titles.text': title}
def load_by_primary_titles(self, en_title, he_title):
query = {
'titles': {
'$all': [{'$elemMatch': {
'text': t, 'primary': True
}} for t in [en_title, he_title]]
}
}
return self.load(query=query)

def _update_tracked_primary_titles(self):
self._primary_en = self.get_primary_title("en")
self._primary_he = self.get_primary_title("he")

def _set_pkeys(self):
self.set_titles(getattr(self, "titles", None))
self._update_tracked_primary_titles()
super()._set_pkeys()

def _set_derived_attributes(self):
self.set_titles(getattr(self, "titles", None))

def set_titles(self, titles):
self.title_group = TitleGroup(titles)

def _set_name(self):
name = base_name = self.get_primary_title()
terms = TermSet({'name': {'$regex': f'^{re.escape(name)}\d*$'}})
existing_names = {t.name for t in terms}
i = 1
while name in existing_names:
name = f"{base_name}{i}"
i += 1
self.name = name

def _normalize(self):
self.titles = self.title_group.titles
self._update_tracked_primary_titles()
if not hasattr(self, 'name'):
self._set_name()

def _validate(self):
super(Term, self)._validate()
# do not allow duplicates:
for title in self.get_titles():
other_term = Term().load_by_title(title)
if other_term and not self.same_record(other_term):
raise InputError("A Term with the title {} in it already exists".format(title))
# ensue uniqueness of primary titles together
same_titles_term = Term().load_by_primary_titles(self.get_primary_title(), self.get_primary_title('he'))
if same_titles_term and not self.same_record(same_titles_term):
raise DuplicateRecordError(f"A Term with the primary titles {self.get_primary_title()} and {self.get_primary_title('he')} already exists")
# do not allow duplicate names
if self.is_new() and Term().load({'name': self.name}):
raise DuplicateRecordError(f"A Term with the name {self.name} already exists")
elif not self.is_new() and self.is_key_changed('name'):
raise InputError("The 'name' field of a Term cannot be changed.")
self.title_group.validate()
if self.name != self.get_primary_title():
raise InputError("Term name {} does not match primary title {}".format(self.name, self.get_primary_title()))

@staticmethod
def normalize(term, lang="en"):
""" Returns the primary title for of 'term' if it exists in the terms collection
otherwise return 'term' unchanged """
t = Term().load_by_title(term)
t = Term().load({'name': term})
return t.get_primary_title(lang=lang) if t else term


Expand Down Expand Up @@ -861,23 +892,6 @@ def validate(self):
if self.sharedTitle and Term().load({"name": self.sharedTitle}).titles != self.get_titles_object():
raise IndexSchemaError("Schema node {} with sharedTitle can not have explicit titles".format(self))

# disable this check while data is still not conforming to validation
if not self.sharedTitle and False:
special_book_cases = ["Genesis", "Exodus", "Leviticus", "Numbers", "Deuteronomy", "Judges"]
for title in self.title_group.titles:
title = title["text"]
if self.get_primary_title() in special_book_cases:
break
term = Term().load_by_title(title)
if term:
if "scheme" in list(vars(term).keys()):
if vars(term)["scheme"] == "Parasha":
raise InputError(
"Nodes that represent Parashot must contain the corresponding sharedTitles.")

# if not self.default and not self.primary_title("he"):
# raise IndexSchemaError("Schema node {} missing primary Hebrew title".format(self.key))

def serialize(self, **kwargs):
d = super(TitledTreeNode, self).serialize(**kwargs)
if self.default:
Expand Down
Loading
Loading