diff --git a/reader/views.py b/reader/views.py index d481a2be7d..7e4d2241c5 100644 --- a/reader/views.py +++ b/reader/views.py @@ -1409,22 +1409,16 @@ def edit_text_info(request, title=None, new_title=None): @ensure_csrf_cookie @staff_member_required -def terms_editor(request, term=None): +def terms_editor(request, name): """ Add/Editor a term using the JSON Editor. """ - if term is not None: - existing_term = Term().load_by_title(term) - data = existing_term.contents() if existing_term else {"name": term, "titles": []} - else: - return render_template(request,'static/generic.html', None, { - "title": "Terms Editor", - "content": "Please include the primary Term name in the URL to uses the Terms Editor." - }) + existing_term = Term().load({'name': name}) + data = existing_term.contents() if existing_term else {"titles": []} dataJSON = json.dumps(data) return render_template(request,'edit_term.html', None, { - 'term': term, + 'term': name, 'dataJSON': dataJSON, 'is_update': "true" if existing_term else "false" }) @@ -2631,9 +2625,9 @@ def _internal_do_post(request, update, cat, uid, **kwargs): return jsonResponse({"error": "Missing data in POST request."}) j = json.loads(j) update = int(request.GET.get("update", False)) - new_category = Category().load({"path": j["path"]}) if "path" not in j: return jsonResponse({"error": "'path' is a required attribute"}) + new_category = Category().load({"path": j["path"]}) if not update and new_category is not None: return jsonResponse({"error": "Category {} already exists.".format(", ".join(j["path"]))}) @@ -2652,14 +2646,10 @@ def _internal_do_post(request, update, cat, uid, **kwargs): return {"error": f"Merging two categories named {last_path} is not supported."} elif "heSharedTitle" in j: # if heSharedTitle provided, make sure sharedTitle and heSharedTitle correspond to same Term - en_term = Term().load_by_title(last_path) - he_term = Term().load_by_title(he_last_path) - if en_term and en_term == he_term: - pass # both titles are found in an existing Term object - else: + existing_term = Term().load_by_primary_titles(last_path, he_last_path) + if not existing_term: # titles weren't found in same Term object, so try to create a new Term t = Term() - t.name = last_path t.add_primary_titles(last_path, he_last_path) t.save() @@ -2743,7 +2733,7 @@ def terms_api(request, name): This is mainly to be used for adding hebrew internationalization language for section names, categories and commentators """ if request.method == "GET": - term = Term().load({'name': name}) or Term().load_by_title(name) + term = Term().load({'name': name}) if term is None: return jsonResponse({"error": "Term does not exist."}) else: @@ -2751,7 +2741,7 @@ def terms_api(request, name): if request.method in ("POST", "DELETE"): def _internal_do_post(request, uid): - t = Term().load({'name': name}) or Term().load_by_title(name) + t = Term().load({'name': name}) if request.method == "POST": if "json" in request.POST: term = request.POST.get("json") diff --git a/requirements.txt b/requirements.txt index 18ed1cf5f1..8d02bfc30f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -45,7 +45,8 @@ google-auth==1.24.0 google-cloud-logging==1.15.1 google-cloud-storage==1.32.0 google-re2 -gunicorn==20.0.4 +gunicorn==23.0.0 +setuptools==69.5.1 html5lib==0.9999999 httplib2==0.18.1 ipython==7.34.* diff --git a/sefaria/client/wrapper.py b/sefaria/client/wrapper.py index 1f29cab4d2..b69adb4ff5 100644 --- a/sefaria/client/wrapper.py +++ b/sefaria/client/wrapper.py @@ -72,9 +72,10 @@ def format_link_object_for_client(link, with_text, ref, pos=None): # if the link is commentary, strip redundant info (e.g. "Rashi on Genesis 4:2" -> "Rashi") # this is now simpler, and there is explicit data on the index record for it. if com["type"] == "commentary": + collective_title = getattr(linkRef.index, 'collective_title', None) + term = library.get_simple_term_mapping().get(collective_title) or {} com["collectiveTitle"] = { - 'en': getattr(linkRef.index, 'collective_title', linkRef.index.title), - 'he': hebrew_term(getattr(linkRef.index, 'collective_title', linkRef.index.get_title("he"))) + lang: term.get(lang) or linkRef.index.get_title(lang) for lang in ('en', 'he') } else: com["collectiveTitle"] = {'en': linkRef.index.title, 'he': linkRef.index.get_title("he")} diff --git a/sefaria/helper/category.py b/sefaria/helper/category.py index 68e9abfd14..bb5ba25582 100644 --- a/sefaria/helper/category.py +++ b/sefaria/helper/category.py @@ -197,32 +197,3 @@ def update_order_of_category_children(cat, uid, subcategoriesAndBooks): result = tracker.update(uid, Category, cat) results.append(result.contents()) return results - - - - - -def check_term(last_path, he_last_path): - """ - if Category Editor is used, make sure English and Hebrew titles correspond to the same term. - if neither of the titles correspond to a term, create the appropriate term - :param last_path: (str) Corresponds to lastPath of Category and english title of Term - :param he_last_path: (str) Corresponds to a hebrew title of Term - """ - - error_msg = "" - en_term = Term().load_by_title(last_path) - he_term = Term().load_by_title(he_last_path) - - if en_term == he_term: - pass - if (en_term and he_term != en_term) or (he_term and he_term != en_term): - # they do not correspond, either because both terms exist but are not the same, or one term already - # exists but the other one doesn't exist - error_msg = f"English and Hebrew titles, {last_path} and {he_last_path}, do not correspond to the same term. Please use the term editor." - elif en_term is None and he_term is None: - t = Term() - t.name = last_path - t.add_primary_titles(last_path, he_last_path) - t.save() - return error_msg diff --git a/sefaria/helper/schema.py b/sefaria/helper/schema.py index 76ee3f9230..a41a49c91f 100644 --- a/sefaria/helper/schema.py +++ b/sefaria/helper/schema.py @@ -1,12 +1,14 @@ # -*- coding: utf-8 -*- - +from sefaria.settings import MULTISERVER_ENABLED from sefaria.model import * +from sefaria.model import library from sefaria.model.abstract import AbstractMongoRecord from sefaria.model.marked_up_text_chunk import MarkedUpTextChunkSet from sefaria.model.schema import DictionaryNode from sefaria.system.exceptions import InputError from sefaria.system.database import db from sefaria.sheets import save_sheet +from sefaria.system.multiserver.coordinator import server_coordinator from sefaria.utils.util import list_depth, traverse_dict_tree import re @@ -705,7 +707,7 @@ def construct_query(attribute, queries): generic_rewrite(RefDataSet(construct_query('ref', identifier))) print('Updating Topic Links') generic_rewrite(RefTopicLinkSet(construct_query('ref', identifier))) - generic_rewrite(RefTopicLinkSet(construct_query('expandedRefs', identifier))) + generic_rewrite(RefTopicLinkSet(construct_query('expandedRefs', identifier)), attr_name='expandedRefs') print('Updating Garden Stops') generic_rewrite(GardenStopSet(construct_query('ref', identifier))) print('Updating Sheets') @@ -715,11 +717,11 @@ def construct_query(attribute, queries): print('Updating Marked Up Text Chunks') generic_rewrite(MarkedUpTextChunkSet(construct_query('ref', identifier))) print('Updating Manuscripts') - generic_rewrite(ManuscriptSet(construct_query('contained_refs', identifier))) - generic_rewrite(ManuscriptSet(construct_query('expanded_refs', identifier))) + generic_rewrite(ManuscriptPageSet(construct_query('contained_refs', identifier)), attr_name='contained_refs') + generic_rewrite(ManuscriptPageSet(construct_query('expanded_refs', identifier)), attr_name='expanded_refs') print('Updating WebPages') - generic_rewrite(WebPageSet(construct_query('refs', identifier))) - generic_rewrite(ManuscriptSet(construct_query('expandedRefs', identifier))) + generic_rewrite(WebPageSet(construct_query('refs', identifier)), attr_name='refs') + generic_rewrite(WebPageSet(construct_query('expandedRefs', identifier)), attr_name='expandedRefs') if not skip_history: print('Updating History') generic_rewrite(HistorySet(construct_query('ref', identifier), sort=[('ref', 1)])) @@ -1119,3 +1121,35 @@ def update_headwords_map(dictionary_node): if quoted: print(f'Other entries in this lexicon with this old headword as ref: {", ".join(quoted)}') print('Warning: old ref can appear as wrapped ref in other places in the library.') + + +def cascade_node_shared_title_change(node, old): + old_address = node.address()[:-1] + [old] + old_pattern = f"^{re.escape(', '.join(old_address))}(?=$|, |:| \d)" + new_replacement = node.full_title() + + needs_rewrite = lambda ref_str, *args: re.search(old_pattern, ref_str) + rewriter = lambda ref_str: re.sub(old_pattern, new_replacement, ref_str) + + print(f'Cascading from {old_pattern} to {new_replacement}') + cascade(node.index.title, rewriter, needs_rewrite) + + +def process_term_primary_title_change(term, **kwargs): + """ + When a Term's primary title (en or he) changes, rebuild library caches. + This updates term mapping, categories, and indexes that reference this term. + """ + old = kwargs.get("old") + attr = kwargs.get("attr") + + library.rebuild(include_toc=True) + + if MULTISERVER_ENABLED: + server_coordinator.publish_event("library", "rebuild", [True]) + + if attr == "_primary_en": # Now new refs are available and can be cascaded + for index in library.all_index_records(): + for node in [index.nodes] + index.nodes.all_children(): + if getattr(node, "sharedTitle", None) == old: + cascade_node_shared_title_change(node, old) diff --git a/sefaria/model/dependencies.py b/sefaria/model/dependencies.py index 7e95115119..a84e06a60f 100644 --- a/sefaria/model/dependencies.py +++ b/sefaria/model/dependencies.py @@ -1,7 +1,7 @@ """ dependencies.py -- list cross model dependencies and subscribe listeners to changes. """ - +import sefaria.helper.schema from . import abstract, link, note, history, schema, text, layer, version_state, timeperiod, garden, notification, collection, library, category, ref_data, user_profile, manuscript, topic, place, marked_up_text_chunk from .abstract import subscribe, cascade, cascade_to_list, cascade_delete, cascade_delete_to_list @@ -80,17 +80,16 @@ def process_version_title_change_in_search(ver, **kwargs): # Terms -# TODO cascade change to Term.name. -# TODO Current locations where we know terms are used [Index, Categories] -# TODO Use Sefaria-Project/scripts/search_for_indexes_that_use_terms.py for now +# TermScheme name change cascades to Term.scheme field subscribe(cascade(schema.TermSet, "scheme"), schema.TermScheme, "attributeChange", "name") + +# Term save/delete rebuilds the term mapping cache subscribe(text.reset_simple_term_mapping, schema.Term, "delete") subscribe(text.reset_simple_term_mapping, schema.Term, "save") -""" -Notes on where Terms are used -Index (alt structs and schema) -Category -""" + +# Term primary title change rebuilds library (term mapping, categories, indexes) +subscribe(sefaria.helper.schema.process_term_primary_title_change, schema.Term, "attributeChange", "_primary_en") +subscribe(sefaria.helper.schema.process_term_primary_title_change, schema.Term, "attributeChange", "_primary_he") # Time subscribe(cascade(topic.PersonTopicSet, "properties.era.value"), timeperiod.TimePeriod, "attributeChange", "symbol") diff --git a/sefaria/model/schema.py b/sefaria/model/schema.py index 4ced240d8c..ced5345e76 100644 --- a/sefaria/model/schema.py +++ b/sefaria/model/schema.py @@ -15,7 +15,8 @@ from sefaria.system.database import db from sefaria.model.lexicon import LexiconEntrySet from sefaria.model.linker.has_match_template import MatchTemplateMixin -from sefaria.system.exceptions import InputError, IndexSchemaError, DictionaryEntryNotFoundError, SheetNotFoundError +from sefaria.system.exceptions import InputError, IndexSchemaError, DictionaryEntryNotFoundError, SheetNotFoundError, \ + DuplicateRecordError from sefaria.utils.hebrew import decode_hebrew_numeral, encode_small_hebrew_numeral, encode_hebrew_numeral, encode_hebrew_daf, hebrew_term, sanitize from sefaria.utils.talmud import daf_to_section @@ -241,7 +242,7 @@ class Term(abst.AbstractMongoRecord, AbstractTitledObject): """ collection = 'term' track_pkeys = True - pkeys = ["name"] + pkeys = ["name", "_primary_en", "_primary_he"] title_group = None history_noun = "term" @@ -258,35 +259,65 @@ class Term(abst.AbstractMongoRecord, AbstractTitledObject): "description" ] - def load_by_title(self, title): - query = {'titles.text': title} + def load_by_primary_titles(self, en_title, he_title): + query = { + 'titles': { + '$all': [{'$elemMatch': { + 'text': t, 'primary': True + }} for t in [en_title, he_title]] + } + } return self.load(query=query) + def _update_tracked_primary_titles(self): + self._primary_en = self.get_primary_title("en") + self._primary_he = self.get_primary_title("he") + + def _set_pkeys(self): + self.set_titles(getattr(self, "titles", None)) + self._update_tracked_primary_titles() + super()._set_pkeys() + def _set_derived_attributes(self): self.set_titles(getattr(self, "titles", None)) def set_titles(self, titles): self.title_group = TitleGroup(titles) + def _set_name(self): + name = base_name = self.get_primary_title() + terms = TermSet({'name': {'$regex': f'^{re.escape(name)}\d*$'}}) + existing_names = {t.name for t in terms} + i = 1 + while name in existing_names: + name = f"{base_name}{i}" + i += 1 + self.name = name + def _normalize(self): self.titles = self.title_group.titles + self._update_tracked_primary_titles() + if not hasattr(self, 'name'): + self._set_name() def _validate(self): super(Term, self)._validate() - # do not allow duplicates: - for title in self.get_titles(): - other_term = Term().load_by_title(title) - if other_term and not self.same_record(other_term): - raise InputError("A Term with the title {} in it already exists".format(title)) + # ensue uniqueness of primary titles together + same_titles_term = Term().load_by_primary_titles(self.get_primary_title(), self.get_primary_title('he')) + if same_titles_term and not self.same_record(same_titles_term): + raise DuplicateRecordError(f"A Term with the primary titles {self.get_primary_title()} and {self.get_primary_title('he')} already exists") + # do not allow duplicate names + if self.is_new() and Term().load({'name': self.name}): + raise DuplicateRecordError(f"A Term with the name {self.name} already exists") + elif not self.is_new() and self.is_key_changed('name'): + raise InputError("The 'name' field of a Term cannot be changed.") self.title_group.validate() - if self.name != self.get_primary_title(): - raise InputError("Term name {} does not match primary title {}".format(self.name, self.get_primary_title())) @staticmethod def normalize(term, lang="en"): """ Returns the primary title for of 'term' if it exists in the terms collection otherwise return 'term' unchanged """ - t = Term().load_by_title(term) + t = Term().load({'name': term}) return t.get_primary_title(lang=lang) if t else term @@ -861,23 +892,6 @@ def validate(self): if self.sharedTitle and Term().load({"name": self.sharedTitle}).titles != self.get_titles_object(): raise IndexSchemaError("Schema node {} with sharedTitle can not have explicit titles".format(self)) - # disable this check while data is still not conforming to validation - if not self.sharedTitle and False: - special_book_cases = ["Genesis", "Exodus", "Leviticus", "Numbers", "Deuteronomy", "Judges"] - for title in self.title_group.titles: - title = title["text"] - if self.get_primary_title() in special_book_cases: - break - term = Term().load_by_title(title) - if term: - if "scheme" in list(vars(term).keys()): - if vars(term)["scheme"] == "Parasha": - raise InputError( - "Nodes that represent Parashot must contain the corresponding sharedTitles.") - - # if not self.default and not self.primary_title("he"): - # raise IndexSchemaError("Schema node {} missing primary Hebrew title".format(self.key)) - def serialize(self, **kwargs): d = super(TitledTreeNode, self).serialize(**kwargs) if self.default: diff --git a/sefaria/model/tests/terms_test.py b/sefaria/model/tests/terms_test.py index 90cfe368af..8a41b7ea52 100644 --- a/sefaria/model/tests/terms_test.py +++ b/sefaria/model/tests/terms_test.py @@ -2,7 +2,8 @@ import pytest from sefaria.model import * -from sefaria.system.exceptions import InputError +from sefaria.system.exceptions import InputError, DuplicateRecordError + class Test_Terms_Validation(object): @classmethod @@ -19,10 +20,6 @@ def test_existing_term(self): Term().load({"name": 'Torah'}).title_group.validate() Term().load({"name": 'Verse'}).title_group.validate() - def test_load_by_non_primary_title(self): - assert Term().load_by_title('Nachmanides') is not None - assert Term().load_by_title('פרשת לך לך') is not None - def test_add_duplicate_primary(self): with pytest.raises(InputError): term = Term({ @@ -90,7 +87,7 @@ def test_add_new_term(self): }).save() def test_duplicate_terms(self): - with pytest.raises(InputError): + with pytest.raises(DuplicateRecordError): Term({ "scheme": "commentary_works", "titles": [ @@ -108,51 +105,28 @@ def test_duplicate_terms(self): "name": "Ramban" }).save() - with pytest.raises(InputError): - Term({ - "scheme": "commentary_works", - "titles": [ - { - "lang": "en", - "text": "New Ramban", - "primary": True - }, - { - "lang": "en", - "text": "Ramban", - }, - { - "lang": "he", - "text": "רמב\"ן חדש", - "primary": True - }, - ], - "name": "New Ramban" - }).save() - with pytest.raises(InputError): - Term({"name" : "Parashat Nitzavim", - "titles" : [ - { - "lang" : "en", - "text" : "Parashat Nitzavim", - "primary" : True - }, - { - "lang" : "he", - "text" : "נצבים", - "primary" : True - }, - { - "lang" : "en", - "text" : "Nitzavim" - }, - { - "lang" : "he", - "text" : "פרשת נצבים" - } - ], - "scheme" : "Parasha"}).save() + def test_valid_duplicate_title(self): + Term({ + "scheme": "commentary_works", + "titles": [ + { + "lang": "en", + "text": "New Ramban", + "primary": True + }, + { + "lang": "en", + "text": "Ramban", + }, + { + "lang": "he", + "text": "רמב\"ן חדש", + "primary": True + }, + ], + "name": "New Ramban" + }).save() def test_add_invalid_terms(self): with pytest.raises(InputError): # no heb title at all @@ -215,24 +189,6 @@ def test_add_invalid_terms(self): ] }).save() - with pytest.raises(InputError): - Term({ - "name": "Test Fail Five", # name not the same as primary - "scheme": "testing_terms", - "titles" : [ - { - "lang": "en", - "text": "alalalalalala", - "primary": True - }, - { - "lang": "he", - "text": "גלדכחשדף", - "primary": True - } - ] - }).save() - # for ascii validation with pytest.raises(InputError): Term({ diff --git a/sefaria/model/text.py b/sefaria/model/text.py index 7771bb649b..e62dd9f403 100644 --- a/sefaria/model/text.py +++ b/sefaria/model/text.py @@ -6,7 +6,7 @@ import time import structlog from functools import reduce, partial -from typing import Optional, Union +from typing import Union from remote_config.keys import REF_CACHE_LIMIT_KEY logger = structlog.get_logger(__name__) @@ -18,17 +18,17 @@ import json import itertools from collections import defaultdict, OrderedDict -from bs4 import BeautifulSoup, Tag +from bs4 import BeautifulSoup import re2 as re from . import abstract as abst -from django_topics.models.topic import Topic as DjangoTopic -from .schema import deserialize_tree, AltStructNode, VirtualNode, DictionaryNode, JaggedArrayNode, TitledTreeNode, DictionaryEntryNode, SheetNode, AddressTalmud, Term, TermSet, TitleGroup, AddressType +from .schema import deserialize_tree, AltStructNode, VirtualNode, JaggedArrayNode, TitledTreeNode, DictionaryEntryNode, SheetNode, AddressTalmud, Term, TermSet, \ + AddressType from sefaria.system.database import db import sefaria.system.cache as scache from sefaria.system.cache import in_memory_cache from sefaria.system.exceptions import InputError, BookNameError, PartialRefInputError, IndexSchemaError, \ - NoVersionFoundError, DictionaryEntryNotFoundError, MissingKeyError, ComplexBookLevelRefError + NoVersionFoundError, MissingKeyError, ComplexBookLevelRefError from sefaria.utils.hebrew import has_hebrew, is_all_hebrew, hebrew_term from sefaria.utils.util import list_depth, truncate_string from sefaria.datatype.jagged_array import JaggedTextArray, JaggedArray @@ -5671,7 +5671,7 @@ def _build_topic_mapping(self): value of the topic's Hebrew primary title. :returns: topic map for the given slug Dictionary """ - from .topic import Topic, TopicSet + from .topic import TopicSet self._topic_mapping = {t.slug: {"en": t.get_primary_title("en"), "he": t.get_primary_title("he")} for t in TopicSet()} return self._topic_mapping diff --git a/sefaria/system/database.py b/sefaria/system/database.py index c0d7cdf15e..9cc5e393a4 100644 --- a/sefaria/system/database.py +++ b/sefaria/system/database.py @@ -149,7 +149,8 @@ def ensure_indices(active_db=None): ('word_form', ["form"],{}), ('word_form', ["c_form"],{}), ('word_form', ["refs"], {}), - ('term', ["titles.text"], {'unique': True}), + ('term', ["titles.text"]), + ('term', ["titles.name"]), ('term', ["category"],{}), ('lexicon_entry', [[("headword", pymongo.ASCENDING), ("parent_lexicon", pymongo.ASCENDING)]],{}), ('user_story', ["uid"],{}), diff --git a/sefaria/urls_library.py b/sefaria/urls_library.py index bc797786cc..8b4f5857da 100644 --- a/sefaria/urls_library.py +++ b/sefaria/urls_library.py @@ -55,8 +55,7 @@ url(r'^add/new/?$', reader_views.edit_text), url(r'^add/(?P.+)$', reader_views.edit_text), url(r'^translate/(?P.+)$', reader_views.edit_text), - url(r'^edit/terms/(?P.+)$', reader_views.terms_editor), - url(r'^add/terms/(?P.+)$', reader_views.terms_editor), + url(r'^edit/terms/(?P.+)$', reader_views.terms_editor), url(r'^edit/(?P.+)/(?P\w\w)/(?P.+)$', reader_views.edit_text), url(r'^edit/(?P.+)$', reader_views.edit_text), diff --git a/sefaria/utils/calendars.py b/sefaria/utils/calendars.py index 7f3a596e90..95a347e6db 100644 --- a/sefaria/utils/calendars.py +++ b/sefaria/utils/calendars.py @@ -364,7 +364,7 @@ def get_hok_parasha(datetime_obj, diaspora=diaspora): parasha = parasha.split('-')[0] if parasha == 'Shmini Atzeret': parasha = "V'Zot HaBerachah" - parasha_term = Term().load({'category': 'Torah Portions', 'titles': {'$elemMatch': {'text': parasha}}}) + parasha_term = Term().load({'category': 'Torah Portions', 'name': parasha}) if not parasha_term: parasha_term = get_hok_parasha(datetime_obj + datetime.timedelta(7), diaspora=diaspora) return parasha_term