Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 19 additions & 11 deletions openedx/core/djangoapps/content/search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@
from meilisearch.errors import MeilisearchApiError, MeilisearchError
from meilisearch.models.task import TaskInfo
from opaque_keys.edx.keys import UsageKey, OpaqueKey
from opaque_keys.edx.locator import LibraryContainerLocator, LibraryLocatorV2, LibraryCollectionLocator
from opaque_keys.edx.locator import (
LibraryCollectionLocator,
LibraryContainerLocator,
LibraryLocatorV2,
)
from openedx_learning.api import authoring as authoring_api
from common.djangoapps.student.roles import GlobalStaff
from rest_framework.request import Request
Expand Down Expand Up @@ -461,8 +465,9 @@ def index_collection_batch(batch, num_done, library_key) -> int:
docs = []
for collection in batch:
try:
doc = searchable_doc_for_collection(library_key, collection.key, collection=collection)
doc.update(searchable_doc_tags_for_collection(library_key, collection.key))
collection_key = lib_api.library_collection_locator(library_key, collection.key)
doc = searchable_doc_for_collection(collection_key, collection=collection)
doc.update(searchable_doc_tags_for_collection(collection_key))
docs.append(doc)
except Exception as err: # pylint: disable=broad-except
status_cb(f"Error indexing collection {collection}: {err}")
Expand Down Expand Up @@ -702,13 +707,13 @@ def _get_document_from_index(document_id: str) -> dict:
return document


def upsert_library_collection_index_doc(library_key: LibraryLocatorV2, collection_key: str) -> None:
def upsert_library_collection_index_doc(collection_key: LibraryCollectionLocator) -> None:
"""
Creates, updates, or deletes the document for the given Library Collection in the search index.

If the Collection is not found or disabled (i.e. soft-deleted), then delete it from the search index.
"""
doc = searchable_doc_for_collection(library_key, collection_key)
doc = searchable_doc_for_collection(collection_key)
update_components = False

# Soft-deleted/disabled collections are removed from the index
Expand Down Expand Up @@ -738,21 +743,24 @@ def upsert_library_collection_index_doc(library_key: LibraryLocatorV2, collectio
if update_components:
from .tasks import update_library_components_collections as update_task

update_task.delay(str(library_key), collection_key)
update_task.delay(str(collection_key))


def update_library_components_collections(
library_key: LibraryLocatorV2,
collection_key: str,
collection_key: LibraryCollectionLocator,
batch_size: int = 1000,
) -> None:
"""
Updates the "collections" field for all components associated with a given Library Collection.

Because there may be a lot of components, we send these updates to Meilisearch in batches.
"""
library_key = collection_key.library_key
library = lib_api.get_library(library_key)
components = authoring_api.get_collection_components(library.learning_package_id, collection_key)
components = authoring_api.get_collection_components(
library.learning_package_id,
collection_key.collection_id,
)

paginator = Paginator(components, batch_size)
for page in paginator.page_range:
Expand Down Expand Up @@ -828,12 +836,12 @@ def upsert_block_collections_index_docs(usage_key: UsageKey):
_update_index_docs([doc])


def upsert_collection_tags_index_docs(collection_usage_key: LibraryCollectionLocator):
def upsert_collection_tags_index_docs(collection_key: LibraryCollectionLocator):
"""
Updates the tags data in documents for the given library collection
"""

doc = searchable_doc_tags_for_collection(collection_usage_key.library_key, collection_usage_key.collection_id)
doc = searchable_doc_tags_for_collection(collection_key)
_update_index_docs([doc])


Expand Down
35 changes: 12 additions & 23 deletions openedx/core/djangoapps/content/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from django.core.exceptions import ObjectDoesNotExist
from django.utils.text import slugify
from opaque_keys.edx.keys import LearningContextKey, UsageKey, OpaqueKey
from opaque_keys.edx.locator import LibraryContainerLocator, LibraryLocatorV2
from opaque_keys.edx.locator import LibraryCollectionLocator, LibraryContainerLocator
from openedx_learning.api import authoring as authoring_api
from openedx_learning.api.authoring_models import Collection
from rest_framework.exceptions import NotFound
Expand Down Expand Up @@ -450,19 +450,14 @@ def searchable_doc_collections(usage_key: UsageKey) -> dict:


def searchable_doc_tags_for_collection(
library_key: LibraryLocatorV2,
collection_key: str,
collection_key: LibraryCollectionLocator
) -> dict:
"""
Generate a dictionary document suitable for ingestion into a search engine
like Meilisearch or Elasticsearch, with the tags data for the given library collection.
"""
collection_usage_key = lib_api.get_library_collection_usage_key(
library_key,
collection_key,
)
doc = searchable_doc_for_key(collection_usage_key)
doc.update(_tags_for_content_object(collection_usage_key))
doc = searchable_doc_for_key(collection_key)
doc.update(_tags_for_content_object(collection_key))

return doc

Expand All @@ -484,8 +479,7 @@ def searchable_doc_for_course_block(block) -> dict:


def searchable_doc_for_collection(
library_key: LibraryLocatorV2,
collection_key: str,
collection_key: LibraryCollectionLocator,
*,
# Optionally provide the collection if we've already fetched one
collection: Collection | None = None,
Expand All @@ -498,21 +492,16 @@ def searchable_doc_for_collection(
If no collection is found for the given library_key + collection_key, the returned document will contain only basic
information derived from the collection usage key, and no Fields.type value will be included in the returned dict.
"""
collection_usage_key = lib_api.get_library_collection_usage_key(
library_key,
collection_key,
)

doc = searchable_doc_for_key(collection_usage_key)
doc = searchable_doc_for_key(collection_key)

try:
collection = collection or lib_api.get_library_collection_from_usage_key(collection_usage_key)
collection = collection or lib_api.get_library_collection_from_locator(collection_key)
except lib_api.ContentLibraryCollectionNotFound:
# Collection not found, so we can only return the base doc
pass

if collection:
assert collection.key == collection_key
assert collection.key == collection_key.collection_id

draft_num_children = authoring_api.filter_publishable_entities(
collection.entities,
Expand All @@ -524,9 +513,9 @@ def searchable_doc_for_collection(
).count()

doc.update({
Fields.context_key: str(library_key),
Fields.org: str(library_key.org),
Fields.usage_key: str(collection_usage_key),
Fields.context_key: str(collection_key.library_key),
Fields.org: str(collection_key.org),
Fields.usage_key: str(collection_key),
Fields.block_id: collection.key,
Fields.type: DocType.collection,
Fields.display_name: collection.title,
Expand All @@ -537,7 +526,7 @@ def searchable_doc_for_collection(
Fields.published: {
Fields.published_num_children: published_num_children,
},
Fields.access_id: _meili_access_id_from_context_key(library_key),
Fields.access_id: _meili_access_id_from_context_key(collection_key.library_key),
Fields.breadcrumbs: [{"display_name": collection.learning_package.title}],
})

Expand Down
12 changes: 4 additions & 8 deletions openedx/core/djangoapps/content/search/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,16 +187,14 @@ def library_collection_updated_handler(**kwargs) -> None:

if library_collection.background:
update_library_collection_index_doc.delay(
str(library_collection.library_key),
library_collection.collection_key,
str(library_collection.collection_key),
)
else:
# Update collection index synchronously to make sure that search index is updated before
# the frontend invalidates/refetches index.
# See content_library_updated_handler for more details.
update_library_collection_index_doc.apply(args=[
str(library_collection.library_key),
library_collection.collection_key,
str(library_collection.collection_key),
])


Expand Down Expand Up @@ -252,16 +250,14 @@ def library_container_updated_handler(**kwargs) -> None:

if library_container.background:
update_library_container_index_doc.delay(
str(library_container.library_key),
library_container.container_key,
str(library_container.container_key),
)
else:
# Update container index synchronously to make sure that search index is updated before
# the frontend invalidates/refetches index.
# See content_library_updated_handler for more details.
update_library_container_index_doc.apply(args=[
str(library_container.library_key),
library_container.container_key,
str(library_container.container_key),
])


Expand Down
19 changes: 11 additions & 8 deletions openedx/core/djangoapps/content/search/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from meilisearch.errors import MeilisearchError
from opaque_keys.edx.keys import UsageKey
from opaque_keys.edx.locator import (
LibraryCollectionLocator,
LibraryContainerLocator,
LibraryLocatorV2,
LibraryUsageLocatorV2,
Expand Down Expand Up @@ -92,38 +93,40 @@ def update_content_library_index_docs(library_key_str: str) -> None:

@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError))
@set_code_owner_attribute
def update_library_collection_index_doc(library_key_str: str, collection_key: str) -> None:
def update_library_collection_index_doc(collection_key_str: str) -> None:
"""
Celery task to update the content index document for a library collection
"""
library_key = LibraryLocatorV2.from_string(library_key_str)
collection_key = LibraryCollectionLocator.from_string(collection_key_str)
library_key = collection_key.library_key

log.info("Updating content index documents for collection %s in library%s", collection_key, library_key)

api.upsert_library_collection_index_doc(library_key, collection_key)
api.upsert_library_collection_index_doc(collection_key)


@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError))
@set_code_owner_attribute
def update_library_components_collections(library_key_str: str, collection_key: str) -> None:
def update_library_components_collections(collection_key_str: str) -> None:
"""
Celery task to update the "collections" field for components in the given content library collection.
"""
library_key = LibraryLocatorV2.from_string(library_key_str)
collection_key = LibraryCollectionLocator.from_string(collection_key_str)
library_key = collection_key.library_key

log.info("Updating document.collections for library %s collection %s components", library_key, collection_key)

api.update_library_components_collections(library_key, collection_key)
api.update_library_components_collections(collection_key)


@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError))
@set_code_owner_attribute
def update_library_container_index_doc(library_key_str: str, container_key_str: str) -> None:
def update_library_container_index_doc(container_key_str: str) -> None:
"""
Celery task to update the content index document for a library container
"""
library_key = LibraryLocatorV2.from_string(library_key_str)
container_key = LibraryContainerLocator.from_string(container_key_str)
library_key = container_key.library_key

log.info("Updating content index documents for container %s in library%s", container_key, library_key)

Expand Down
11 changes: 7 additions & 4 deletions openedx/core/djangoapps/content/search/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from datetime import datetime, timezone
from unittest.mock import MagicMock, Mock, call, patch
from opaque_keys.edx.keys import UsageKey
from opaque_keys.edx.locator import LibraryCollectionLocator

import ddt
import pytest
Expand Down Expand Up @@ -188,11 +189,13 @@ def setUp(self):
created_by=None,
description="my collection description"
)
self.collection_usage_key = "lib-collection:org1:lib:MYCOL"
self.collection_key = LibraryCollectionLocator.from_string(
"lib-collection:org1:lib:MYCOL",
)
self.collection_dict = {
"id": "lib-collectionorg1libmycol-5b647617",
"block_id": self.collection.key,
"usage_key": self.collection_usage_key,
"usage_key": str(self.collection_key),
"type": "collection",
"display_name": "my_collection",
"description": "my collection description",
Expand Down Expand Up @@ -737,8 +740,8 @@ def test_delete_all_drafts(self, mock_meilisearch):
@override_settings(MEILISEARCH_ENABLED=True)
def test_index_tags_in_collections(self, mock_meilisearch):
# Tag collection
tagging_api.tag_object(self.collection_usage_key, self.taxonomyA, ["one", "two"])
tagging_api.tag_object(self.collection_usage_key, self.taxonomyB, ["three", "four"])
tagging_api.tag_object(str(self.collection_key), self.taxonomyA, ["one", "two"])
tagging_api.tag_object(str(self.collection_key), self.taxonomyB, ["three", "four"])

# Build expected docs with tags at each stage
doc_collection_with_tags1 = {
Expand Down
25 changes: 15 additions & 10 deletions openedx/core/djangoapps/content/search/tests/test_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from datetime import datetime, timezone

from freezegun import freeze_time
from opaque_keys.edx.locator import LibraryCollectionLocator, LibraryContainerLocator
from openedx_learning.api import authoring as authoring_api
from organizations.models import Organization

Expand Down Expand Up @@ -81,7 +82,9 @@ def setUpClass(cls):
created_by=None,
description="my toy collection description"
)
cls.collection_usage_key = "lib-collection:edX:2012_Fall:TOY_COLLECTION"
cls.collection_key = LibraryCollectionLocator.from_string(
"lib-collection:edX:2012_Fall:TOY_COLLECTION",
)
cls.library_block = library_api.create_library_block(
cls.library.key,
"html",
Expand All @@ -94,7 +97,9 @@ def setUpClass(cls):
title="A Unit in the Search Index",
user_id=None,
)
cls.container_usage_key = "lct:edX:2012_Fall:unit:unit1"
cls.container_key = LibraryContainerLocator.from_string(
"lct:edX:2012_Fall:unit:unit1",
)

# Add the problem block to the collection
library_api.update_library_collection_components(
Expand Down Expand Up @@ -123,8 +128,8 @@ def setUpClass(cls):
tagging_api.tag_object(str(cls.html_block_key), cls.subject_tags, tags=["Chinese", "Jump Links"])
tagging_api.tag_object(str(cls.html_block_key), cls.difficulty_tags, tags=["Normal"])
tagging_api.tag_object(str(cls.library_block.usage_key), cls.difficulty_tags, tags=["Normal"])
tagging_api.tag_object(cls.collection_usage_key, cls.difficulty_tags, tags=["Normal"])
tagging_api.tag_object(cls.container_usage_key, cls.difficulty_tags, tags=["Normal"])
tagging_api.tag_object(str(cls.collection_key), cls.difficulty_tags, tags=["Normal"])
tagging_api.tag_object(str(cls.container_key), cls.difficulty_tags, tags=["Normal"])

@property
def toy_course_access_id(self):
Expand Down Expand Up @@ -451,13 +456,13 @@ def test_html_published_library_block(self):
assert doc["publish_status"] == "modified"

def test_collection_with_library(self):
doc = searchable_doc_for_collection(self.library.key, self.collection.key)
doc.update(searchable_doc_tags_for_collection(self.library.key, self.collection.key))
doc = searchable_doc_for_collection(self.collection_key)
doc.update(searchable_doc_tags_for_collection(self.collection_key))

assert doc == {
"id": "lib-collectionedx2012_falltoy_collection-d1d907a4",
"block_id": self.collection.key,
"usage_key": self.collection_usage_key,
"usage_key": str(self.collection_key),
"type": "collection",
"org": "edX",
"display_name": "Toy Collection",
Expand All @@ -480,13 +485,13 @@ def test_collection_with_library(self):
def test_collection_with_published_library(self):
library_api.publish_changes(self.library.key)

doc = searchable_doc_for_collection(self.library.key, self.collection.key)
doc.update(searchable_doc_tags_for_collection(self.library.key, self.collection.key))
doc = searchable_doc_for_collection(self.collection_key)
doc.update(searchable_doc_tags_for_collection(self.collection_key))

assert doc == {
"id": "lib-collectionedx2012_falltoy_collection-d1d907a4",
"block_id": self.collection.key,
"usage_key": self.collection_usage_key,
"usage_key": str(self.collection_key),
"type": "collection",
"org": "edX",
"display_name": "Toy Collection",
Expand Down
Loading
Loading