Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 56 additions & 16 deletions openedx/core/djangoapps/content/search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,37 +17,39 @@
from meilisearch import Client as MeilisearchClient
from meilisearch.errors import MeilisearchApiError, MeilisearchError
from meilisearch.models.task import TaskInfo
from opaque_keys.edx.keys import UsageKey, OpaqueKey
from opaque_keys import OpaqueKey
from opaque_keys.edx.keys import UsageKey
from opaque_keys.edx.locator import (
LibraryCollectionLocator,
LibraryContainerLocator,
LibraryLocatorV2,
)
from openedx_learning.api import authoring as authoring_api
from common.djangoapps.student.roles import GlobalStaff
from rest_framework.request import Request

from common.djangoapps.student.role_helpers import get_course_roles
from common.djangoapps.student.roles import GlobalStaff
from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
from openedx.core.djangoapps.content.search.models import get_access_ids_for_request, IncrementalIndexCompleted
from openedx.core.djangoapps.content.search.index_config import (
INDEX_DISTINCT_ATTRIBUTE,
INDEX_FILTERABLE_ATTRIBUTES,
INDEX_SEARCHABLE_ATTRIBUTES,
INDEX_SORTABLE_ATTRIBUTES,
INDEX_RANKING_RULES,
INDEX_SEARCHABLE_ATTRIBUTES,
INDEX_SORTABLE_ATTRIBUTES
)
from openedx.core.djangoapps.content.search.models import IncrementalIndexCompleted, get_access_ids_for_request
from openedx.core.djangoapps.content_libraries import api as lib_api
from xmodule.modulestore.django import modulestore

from .documents import (
Fields,
meili_id_from_opaque_key,
searchable_doc_for_course_block,
searchable_doc_collections,
searchable_doc_for_collection,
searchable_doc_for_container,
searchable_doc_for_course_block,
searchable_doc_for_library_block,
searchable_doc_for_key,
searchable_doc_collections,
searchable_doc_tags,
searchable_doc_tags_for_collection,
)
Expand Down Expand Up @@ -492,6 +494,7 @@ def index_container_batch(batch, num_done, library_key) -> int:
)
doc = searchable_doc_for_container(container_key)
doc.update(searchable_doc_tags(container_key))
doc.update(searchable_doc_collections(container_key))
docs.append(doc)
except Exception as err: # pylint: disable=broad-except
status_cb(f"Error indexing container {container.key}: {err}")
Expand Down Expand Up @@ -722,7 +725,7 @@ def upsert_library_collection_index_doc(collection_key: LibraryCollectionLocator

_delete_index_doc(doc[Fields.id])

update_components = True
update_items = True

# Hard-deleted collections are also deleted from the index,
# but their components are automatically updated as part of the deletion process, so we don't have to.
Expand All @@ -735,15 +738,17 @@ def upsert_library_collection_index_doc(collection_key: LibraryCollectionLocator
else:
already_indexed = _get_document_from_index(doc[Fields.id])
if not already_indexed:
update_components = True
update_items = True

_update_index_docs([doc])

# Asynchronously update the collection's components "collections" field
if update_components:
from .tasks import update_library_components_collections as update_task
if update_items:
from .tasks import update_library_components_collections as update_components_task
from .tasks import update_library_containers_collections as update_containers_task

update_task.delay(str(collection_key))
update_components_task.delay(str(collection_key))
update_containers_task.delay(str(collection_key))


def update_library_components_collections(
Expand Down Expand Up @@ -781,6 +786,41 @@ def update_library_components_collections(
_update_index_docs(docs)


def update_library_containers_collections(
collection_key: LibraryCollectionLocator,
batch_size: int = 1000,
) -> None:
"""
Updates the "collections" field for all containers associated with a given Library Collection.

Because there may be a lot of containers, we send these updates to Meilisearch in batches.
"""
library_key = collection_key.library_key
library = lib_api.get_library(library_key)
containers = authoring_api.get_collection_containers(
library.learning_package_id,
collection_key.collection_id,
)

paginator = Paginator(containers, batch_size)
for page in paginator.page_range:
docs = []

for container in paginator.page(page).object_list:
container_key = lib_api.library_container_locator(
library_key,
container,
)
doc = searchable_doc_collections(container_key)
docs.append(doc)

log.info(
f"Updating document.collections for library {library_key} containers"
f" page {page} / {paginator.num_pages}"
)
_update_index_docs(docs)


def upsert_library_container_index_doc(container_key: LibraryContainerLocator) -> None:
"""
Creates, updates, or deletes the document for the given Library Container in the search index.
Expand Down Expand Up @@ -827,12 +867,12 @@ def upsert_content_object_tags_index_doc(key: OpaqueKey):
_update_index_docs([doc])


def upsert_block_collections_index_docs(usage_key: UsageKey):
def upsert_item_collections_index_docs(opaque_key: OpaqueKey):
"""
Updates the collections data in documents for the given Course/Library block
Updates the collections data in documents for the given Course/Library block, or Container
"""
doc = {Fields.id: meili_id_from_opaque_key(usage_key)}
doc.update(searchable_doc_collections(usage_key))
doc = {Fields.id: meili_id_from_opaque_key(opaque_key)}
doc.update(searchable_doc_collections(opaque_key))
_update_index_docs([doc])


Expand Down
30 changes: 20 additions & 10 deletions openedx/core/djangoapps/content/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ def _tags_for_content_object(object_id: OpaqueKey) -> dict:
return {Fields.tags: result}


def _collections_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
def _collections_for_content_object(object_id: OpaqueKey) -> dict:
"""
Given an XBlock, course, library, etc., get the collections for its index doc.

Expand Down Expand Up @@ -340,13 +340,23 @@ def _collections_for_content_object(object_id: UsageKey | LearningContextKey) ->
# Gather the collections associated with this object
collections = None
try:
component = lib_api.get_component_from_usage_key(object_id)
collections = authoring_api.get_entity_collections(
component.learning_package_id,
component.key,
)
if isinstance(object_id, UsageKey):
component = lib_api.get_component_from_usage_key(object_id)
collections = authoring_api.get_entity_collections(
component.learning_package_id,
component.key,
)
elif isinstance(object_id, LibraryContainerLocator):
container = lib_api.get_container_from_key(object_id)
collections = authoring_api.get_entity_collections(
container.publishable_entity.learning_package_id,
container.key,
)
else:
log.warning(f"Unexpected key type for {object_id}")

except ObjectDoesNotExist:
log.warning(f"No component found for {object_id}")
log.warning(f"No library item found for {object_id}")

if not collections:
return result
Expand Down Expand Up @@ -438,13 +448,13 @@ def searchable_doc_tags(key: OpaqueKey) -> dict:
return doc


def searchable_doc_collections(usage_key: UsageKey) -> dict:
def searchable_doc_collections(opaque_key: OpaqueKey) -> dict:
"""
Generate a dictionary document suitable for ingestion into a search engine
like Meilisearch or Elasticsearch, with the collections data for the given content object.
"""
doc = searchable_doc_for_key(usage_key)
doc.update(_collections_for_content_object(usage_key))
doc = searchable_doc_for_key(opaque_key)
doc.update(_collections_for_content_object(opaque_key))

return doc

Expand Down
16 changes: 8 additions & 8 deletions openedx/core/djangoapps/content/search/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@

from .api import (
only_if_meilisearch_enabled,
upsert_block_collections_index_docs,
upsert_content_object_tags_index_doc,
upsert_collection_tags_index_docs,
upsert_item_collections_index_docs,
)
from .tasks import (
delete_library_block_index_doc,
Expand Down Expand Up @@ -211,15 +211,15 @@ def content_object_associations_changed_handler(**kwargs) -> None:

try:
# Check if valid course or library block
usage_key = UsageKey.from_string(str(content_object.object_id))
opaque_key = UsageKey.from_string(str(content_object.object_id))
except InvalidKeyError:
try:
# Check if valid library collection
usage_key = LibraryCollectionLocator.from_string(str(content_object.object_id))
opaque_key = LibraryCollectionLocator.from_string(str(content_object.object_id))
except InvalidKeyError:
try:
# Check if valid library container
usage_key = LibraryContainerLocator.from_string(str(content_object.object_id))
opaque_key = LibraryContainerLocator.from_string(str(content_object.object_id))
except InvalidKeyError:
# Invalid content object id
log.error("Received invalid content object id")
Expand All @@ -228,12 +228,12 @@ def content_object_associations_changed_handler(**kwargs) -> None:
# This event's changes may contain both "tags" and "collections", but this will happen rarely, if ever.
# So we allow a potential double "upsert" here.
if not content_object.changes or "tags" in content_object.changes:
if isinstance(usage_key, LibraryCollectionLocator):
upsert_collection_tags_index_docs(usage_key)
if isinstance(opaque_key, LibraryCollectionLocator):
upsert_collection_tags_index_docs(opaque_key)
else:
upsert_content_object_tags_index_doc(usage_key)
upsert_content_object_tags_index_doc(opaque_key)
if not content_object.changes or "collections" in content_object.changes:
upsert_block_collections_index_docs(usage_key)
upsert_item_collections_index_docs(opaque_key)


@receiver(LIBRARY_CONTAINER_CREATED)
Expand Down
14 changes: 14 additions & 0 deletions openedx/core/djangoapps/content/search/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,20 @@ def update_library_components_collections(collection_key_str: str) -> None:
api.update_library_components_collections(collection_key)


@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError))
@set_code_owner_attribute
def update_library_containers_collections(collection_key_str: str) -> None:
"""
Celery task to update the "collections" field for containers in the given content library collection.
"""
collection_key = LibraryCollectionLocator.from_string(collection_key_str)
library_key = collection_key.library_key

log.info("Updating document.collections for library %s collection %s containers", library_key, collection_key)

api.update_library_containers_collections(collection_key)


@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError))
@set_code_owner_attribute
def update_library_container_index_doc(container_key_str: str) -> None:
Expand Down
Loading
Loading