From 496b6a17a3b1700231347fbf03c6adf6df2fb816 Mon Sep 17 00:00:00 2001 From: kemalatgithub Date: Mon, 4 Aug 2025 10:02:45 +0300 Subject: [PATCH 1/3] The branch rebased to supports both QueryDict and regular dictionaries, enabling modern JSON-style payloads. It also correctly processes multiple values per key, producing clean, predictable lists. This makes aligned with the multiple organization based course filtering features planned in these PR. --- search/api.py | 26 +++++++++ search/elastic.py | 12 ++++ search/filter_generator.py | 36 +++++++++++- search/views.py | 109 +++++++++++++++++++++++++------------ 4 files changed, 144 insertions(+), 39 deletions(-) diff --git a/search/api.py b/search/api.py index 87586cd1..f95bcc96 100644 --- a/search/api.py +++ b/search/api.py @@ -171,3 +171,29 @@ def course_discovery_search( ) return results + +# helper function for building filters from dicts (optional): +# def build_filters(field_dictionary): +# """ +# Convert field_dictionary into Elasticsearch filter clauses, +# using 'terms' for multi-value filters and 'term' for single-value filters. +# """ +# filters = [] +# for field, values in field_dictionary.items(): +# if not isinstance(values, list): +# values = [values] + +# if len(values) > 1: +# filters.append({ +# "terms": { +# field: values +# } +# }) +# else: +# filters.append({ +# "term": { +# field: values[0] +# } +# }) + +# return filters \ No newline at end of file diff --git a/search/elastic.py b/search/elastic.py index 4607abd7..44a1e823 100644 --- a/search/elastic.py +++ b/search/elastic.py @@ -211,6 +211,18 @@ def _process_filters(filter_dictionary): }, } + # --- update to handle multiple fields --- +# def _process_filters(filter_dictionary): +# for field, value in filter_dictionary.items(): +# if value: +# field_filter = _get_filter_field(field, value) +# # If the helper returns a list of filters, yield each filter one by one +# if isinstance(field_filter, list): +# for multi_filter in field_filter: +# yield multi_filter +# else: +# # If a single filter dictionary is returned, yield it directly +# yield field_filter def _process_exclude_dictionary(exclude_dictionary): """ diff --git a/search/filter_generator.py b/search/filter_generator.py index 5adacb86..186da1b4 100644 --- a/search/filter_generator.py +++ b/search/filter_generator.py @@ -6,6 +6,7 @@ from .utils import _load_class, DateRange +from typing import Any, Dict, List, Optional, Union class SearchFilterGenerator: @@ -14,9 +15,38 @@ class SearchFilterGenerator: Users of this search app will override this class and update setting for SEARCH_FILTER_GENERATOR """ - def filter_dictionary(self, **kwargs): - """ base implementation which filters via start_date """ - return {"start_date": DateRange(None, datetime.utcnow())} + # def filter_dictionary(self, **kwargs): + # """ base implementation which filters via start_date """ + # return {"start_date": DateRange(None, datetime.utcnow())} + + +# changed filter_dictionary function to support the following +# Starts with a default start_date filter. + +# Then dynamically adds more filters based on the provided field_filters dictionary. + +# Uses term or terms queries depending on whether the value is a single item or a list. + @staticmethod + def _normalise_to_list(value: Any) -> List[Any]: + """ + Return *value* as a list without mutating it if it already is one. + """ + if isinstance(value, (list, tuple, set)): + return list(value) + return [value] + + def filter_dictionary(self, *, field_filters=None, **_kwargs): + filters = { + "start_date": DateRange(None, datetime.utcnow()) + } + if field_filters: + for field, raw in field_filters.items(): + values = self._normalise_to_list(raw) + if len(values) == 1: + filters[field] = {"term": {f"{field}.keyword": values[0]}} + else: + filters[field] = {"terms": {f"{field}.keyword": values}} + return filters def field_dictionary(self, **kwargs): """ base implementation which add course if provided """ diff --git a/search/views.py b/search/views.py index 4d0499e9..b1389b3c 100644 --- a/search/views.py +++ b/search/views.py @@ -1,48 +1,83 @@ """ handle requests for courseware search http requests """ # This contains just the url entry points to use if desired, which currently has only one +import json import logging from django.conf import settings -from django.http import JsonResponse +from django.http import JsonResponse, QueryDict from django.utils.translation import gettext as _ from django.views.decorators.http import require_POST from eventtracking import tracker as track from .api import perform_search, course_discovery_search, course_discovery_filter_fields from .initializer import SearchInitializer - +from django.views.decorators.csrf import csrf_exempt # log appears to be standard name used for logger log = logging.getLogger(__name__) - -def _process_pagination_values(request): - """ process pagination requests from request parameter """ - size = 20 - page = 0 - from_ = 0 - if "page_size" in request.POST: - size = int(request.POST["page_size"]) - max_page_size = getattr(settings, "SEARCH_MAX_PAGE_SIZE", 100) - # The parens below are superfluous, but make it much clearer to the reader what is going on - if not (0 < size <= max_page_size): # pylint: disable=superfluous-parens - raise ValueError(_('Invalid page size of {page_size}').format(page_size=size)) - - if "page_index" in request.POST: - page = int(request.POST["page_index"]) - from_ = page * size +def parse_post_data(request): + """Support both JSON and form-encoded input.""" + if request.content_type == 'application/json': + try: + body = json.loads(request.body.decode('utf-8')) + except json.JSONDecodeError: + log.warning("⚠️ Malformed JSON received") + return QueryDict('', mutable=True) + + qdict = QueryDict('', mutable=True) + for key, value in body.items(): + if isinstance(value, list): + # ensure qdict.getlist("org") returns ['astu', 'openedx'], by appending lists + for item in value: + qdict.appendlist(key, item) + else: + qdict.update({key: value}) + return qdict + # else return request.post + return request.POST +# def _process_pagination_values(request): +# """ process pagination requests from request parameter """ +# size = 20 +# page = 0 +# from_ = 0 +# if "page_size" in request.POST: +# size = int(request.POST["page_size"]) +# max_page_size = getattr(settings, "SEARCH_MAX_PAGE_SIZE", 100) +# # The parens below are superfluous, but make it much clearer to the reader what is going on +# if not (0 < size <= max_page_size): # pylint: disable=superfluous-parens +# raise ValueError(_('Invalid page size of {page_size}').format(page_size=size)) + +# if "page_index" in request.POST: +# page = int(request.POST["page_index"]) +# from_ = page * size +# return size, from_, page + +def _process_pagination_values(data): + """Extract pagination info from data.""" + size = int(data.get("page_size", 20)) + page = int(data.get("page_index", 0)) + max_page_size = getattr(settings, "SEARCH_MAX_PAGE_SIZE", 100) + + if not (0 < size <= max_page_size): + raise ValueError(_('Invalid page size of {page_size}').format(page_size=size)) + + from_ = page * size return size, from_, page - -def _process_field_values(request, is_multivalue=False): - """ Create separate dictionary of supported filter values provided """ - get_value = request.POST.getlist if is_multivalue else request.POST.get - return { - field_key: get_value(field_key) - for field_key in request.POST - if field_key in course_discovery_filter_fields() - } - +# ----to support multiple values per key as QueryDict and regular dict like "org=astu&org=openedx" ---- +def _process_field_values(data): + filters = {} + for key in course_discovery_filter_fields(): + if isinstance(data, QueryDict): + values = data.getlist(key) + else: + values = data.get(key, []) + if not isinstance(values, list): + values = [values] + if values: + filters[key] = values + return filters @require_POST def do_search(request, course_id=None): @@ -138,6 +173,7 @@ def do_search(request, course_id=None): @require_POST +@csrf_exempt def course_discovery(request): """ Legacy single-value search endpoint """ return _course_discovery(request, is_multivalue=False) @@ -179,14 +215,15 @@ def _course_discovery(request, is_multivalue=False): "error": _("Nothing to search") } status_code = 500 - - search_term = request.POST.get("search_string", None) - enable_course_sorting_by_start_date = request.POST.get("enable_course_sorting_by_start_date", False) - + # search_term = request.POST.get("search_string", None) + post_data = parse_post_data(request) + search_term = post_data.get("search_string", "").strip() try: - size, from_, page = _process_pagination_values(request) - field_dictionary = _process_field_values(request, is_multivalue=is_multivalue) - + size, from_, page = _process_pagination_values(post_data) + field_dictionary = _process_field_values(post_data) + # ✅ Allow searches even if search_string is empty, as long as filters are applied + if not search_term and not field_dictionary: + search_term = None # Analytics - log search request track.emit( 'edx.course_discovery.search.initiated', @@ -194,6 +231,7 @@ def _course_discovery(request, is_multivalue=False): "search_term": search_term, "page_size": size, "page_number": page, + "filters": field_dictionary, #track filters information } ) @@ -202,7 +240,6 @@ def _course_discovery(request, is_multivalue=False): size=size, from_=from_, field_dictionary=field_dictionary, - enable_course_sorting_by_start_date=enable_course_sorting_by_start_date, is_multivalue=is_multivalue, ) From c840d6d59a8a309fc1b3d236d53d82426a27cad1 Mon Sep 17 00:00:00 2001 From: kemalatgithub Date: Thu, 7 Aug 2025 14:24:26 +0300 Subject: [PATCH 2/3] The branch rebased to supports both QueryDict and regular dictionaries,enabling modern JSON-style payloads. It also correctly processes multiple values per key, producing clean, predictable lists. This makes aligned with the multiple organization based course filtering features planned in these PR. --- search/api.py | 26 -------------------------- search/elastic.py | 13 ------------- search/filter_generator.py | 12 ------------ search/views.py | 20 -------------------- 4 files changed, 71 deletions(-) diff --git a/search/api.py b/search/api.py index f95bcc96..87586cd1 100644 --- a/search/api.py +++ b/search/api.py @@ -171,29 +171,3 @@ def course_discovery_search( ) return results - -# helper function for building filters from dicts (optional): -# def build_filters(field_dictionary): -# """ -# Convert field_dictionary into Elasticsearch filter clauses, -# using 'terms' for multi-value filters and 'term' for single-value filters. -# """ -# filters = [] -# for field, values in field_dictionary.items(): -# if not isinstance(values, list): -# values = [values] - -# if len(values) > 1: -# filters.append({ -# "terms": { -# field: values -# } -# }) -# else: -# filters.append({ -# "term": { -# field: values[0] -# } -# }) - -# return filters \ No newline at end of file diff --git a/search/elastic.py b/search/elastic.py index 44a1e823..e7e60f6a 100644 --- a/search/elastic.py +++ b/search/elastic.py @@ -211,19 +211,6 @@ def _process_filters(filter_dictionary): }, } - # --- update to handle multiple fields --- -# def _process_filters(filter_dictionary): -# for field, value in filter_dictionary.items(): -# if value: -# field_filter = _get_filter_field(field, value) -# # If the helper returns a list of filters, yield each filter one by one -# if isinstance(field_filter, list): -# for multi_filter in field_filter: -# yield multi_filter -# else: -# # If a single filter dictionary is returned, yield it directly -# yield field_filter - def _process_exclude_dictionary(exclude_dictionary): """ Build a list of term fields which will be excluded from result set. diff --git a/search/filter_generator.py b/search/filter_generator.py index 186da1b4..f5d531fa 100644 --- a/search/filter_generator.py +++ b/search/filter_generator.py @@ -14,18 +14,6 @@ class SearchFilterGenerator: Class to provide a set of filters for the search. Users of this search app will override this class and update setting for SEARCH_FILTER_GENERATOR """ - - # def filter_dictionary(self, **kwargs): - # """ base implementation which filters via start_date """ - # return {"start_date": DateRange(None, datetime.utcnow())} - - -# changed filter_dictionary function to support the following -# Starts with a default start_date filter. - -# Then dynamically adds more filters based on the provided field_filters dictionary. - -# Uses term or terms queries depending on whether the value is a single item or a list. @staticmethod def _normalise_to_list(value: Any) -> List[Any]: """ diff --git a/search/views.py b/search/views.py index b1389b3c..e82cefab 100644 --- a/search/views.py +++ b/search/views.py @@ -12,7 +12,6 @@ from eventtracking import tracker as track from .api import perform_search, course_discovery_search, course_discovery_filter_fields from .initializer import SearchInitializer -from django.views.decorators.csrf import csrf_exempt # log appears to be standard name used for logger log = logging.getLogger(__name__) @@ -28,7 +27,6 @@ def parse_post_data(request): qdict = QueryDict('', mutable=True) for key, value in body.items(): if isinstance(value, list): - # ensure qdict.getlist("org") returns ['astu', 'openedx'], by appending lists for item in value: qdict.appendlist(key, item) else: @@ -36,23 +34,6 @@ def parse_post_data(request): return qdict # else return request.post return request.POST -# def _process_pagination_values(request): -# """ process pagination requests from request parameter """ -# size = 20 -# page = 0 -# from_ = 0 -# if "page_size" in request.POST: -# size = int(request.POST["page_size"]) -# max_page_size = getattr(settings, "SEARCH_MAX_PAGE_SIZE", 100) -# # The parens below are superfluous, but make it much clearer to the reader what is going on -# if not (0 < size <= max_page_size): # pylint: disable=superfluous-parens -# raise ValueError(_('Invalid page size of {page_size}').format(page_size=size)) - -# if "page_index" in request.POST: -# page = int(request.POST["page_index"]) -# from_ = page * size -# return size, from_, page - def _process_pagination_values(data): """Extract pagination info from data.""" size = int(data.get("page_size", 20)) @@ -173,7 +154,6 @@ def do_search(request, course_id=None): @require_POST -@csrf_exempt def course_discovery(request): """ Legacy single-value search endpoint """ return _course_discovery(request, is_multivalue=False) From 934cbd717942b668eda543af1dd5d482b059c844 Mon Sep 17 00:00:00 2001 From: kemalatgithub Date: Fri, 14 Nov 2025 06:29:34 +0300 Subject: [PATCH 3/3] Based on the review and update requests the following edx-search codebase file updated. - search/filter_generator.py - search/views.py --- search/filter_generator.py | 17 +++++++++++++++++ search/views.py | 12 ++++++++---- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/search/filter_generator.py b/search/filter_generator.py index f5d531fa..5d716a9f 100644 --- a/search/filter_generator.py +++ b/search/filter_generator.py @@ -24,6 +24,23 @@ def _normalise_to_list(value: Any) -> List[Any]: return [value] def filter_dictionary(self, *, field_filters=None, **_kwargs): + """ + Build search filters by adding a default start-date range and converting + provided field values into term or terms filters, depending on whether + each field has one or multiple values. + + Parameters + ---------- + field_filters : dict, optional + A mapping of field names to raw filter values. Each value may be + singular or an iterable; values are normalized to lists. + + Returns + ------- + dict + A dictionary containing date and field-based filters suitable for use + in search queries. + """ filters = { "start_date": DateRange(None, datetime.utcnow()) } diff --git a/search/views.py b/search/views.py index e82cefab..5b5d920c 100644 --- a/search/views.py +++ b/search/views.py @@ -32,13 +32,17 @@ def parse_post_data(request): else: qdict.update({key: value}) return qdict - # else return request.post return request.POST + def _process_pagination_values(data): """Extract pagination info from data.""" - size = int(data.get("page_size", 20)) - page = int(data.get("page_index", 0)) - max_page_size = getattr(settings, "SEARCH_MAX_PAGE_SIZE", 100) + DEFAULT_PAGE_SIZE = 20 + DEFAULT_PAGE_INDEX = 0 + DEFAULT_MAX_PAGE_SIZE = 100 + max_page_size = getattr(settings, "SEARCH_MAX_PAGE_SIZE", DEFAULT_MAX_PAGE_SIZE) + + size = int(data.get("page_size", DEFAULT_PAGE_SIZE)) + page = int(data.get("page_index", DEFAULT_PAGE_INDEX)) if not (0 < size <= max_page_size): raise ValueError(_('Invalid page size of {page_size}').format(page_size=size))