From 753fa9c0f96b320fc73965b2064f4f296f812946 Mon Sep 17 00:00:00 2001 From: Clinton Blackburn Date: Wed, 30 Dec 2015 01:55:09 -0500 Subject: [PATCH 01/15] Added support for Elasticsearch 1.0+ Elasticsearch 0.90.x is quite out-of-date, and this requirement is preventing us from sharing an ES cluster with newer IDAs. ECOM-3261 and SOL-295 --- .travis.yml | 6 ++-- search/elastic.py | 55 +++++++++++++++--------------------- search/tests/test_engines.py | 23 +++++++++------ search/tests/tests.py | 11 +------- setup.py | 4 +-- 5 files changed, 43 insertions(+), 56 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3b4f7ec3..4111006a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,9 +10,9 @@ cache: - $HOME/.cache/pip before_install: - - curl -O https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-0.90.13.zip - - unzip elasticsearch-0.90.13.zip - - elasticsearch-0.90.13/bin/elasticsearch + - curl -O https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.5.2.zip + - unzip elasticsearch-1.5.2.zip + - elasticsearch-1.5.2/bin/elasticsearch -d install: diff --git a/search/elastic.py b/search/elastic.py index f56f23aa..8aabcd0c 100644 --- a/search/elastic.py +++ b/search/elastic.py @@ -5,7 +5,7 @@ from django.conf import settings from django.core.cache import cache from elasticsearch import Elasticsearch, exceptions -from elasticsearch.helpers import bulk +from elasticsearch.helpers import bulk, BulkIndexError from search.search_engine_base import SearchEngine from search.utils import ValueRange, _is_iterable @@ -244,25 +244,25 @@ def _get_mappings(self, doc_type): We cache the properties of each doc_type, if they are not available, we'll load them again from Elasticsearch """ - doc_mappings = ElasticSearchEngine.get_mappings(self.index_name, doc_type) - if not doc_mappings: - try: - doc_mappings = self._es.indices.get_mapping( - index=self.index_name, - doc_type=doc_type, - )[doc_type] + # Try loading the mapping from the cache. + mapping = ElasticSearchEngine.get_mappings(self.index_name, doc_type) + + # Fall back to Elasticsearch + if not mapping: + mapping = self._es.indices.get_mapping( + index=self.index_name, + doc_type=doc_type, + ).get(self.index_name, {}).get('mappings', {}).get(doc_type, {}) + + # Cache the mapping, if one was retrieved + if mapping: ElasticSearchEngine.set_mappings( self.index_name, doc_type, - doc_mappings + mapping ) - except exceptions.NotFoundError: - # In this case there are no mappings for this doc_type on the elasticsearch server - # This is a normal case when a new doc_type is being created, and it is expected that - # we'll hit it for new doc_type s - return {} - return doc_mappings + return mapping def _clear_mapping(self, doc_type): """ Remove the cached mappings, so that they get loaded from ES next time they are requested """ @@ -393,7 +393,7 @@ def remove(self, doc_type, doc_ids, **kwargs): # pylint: disable=unexpected-keyword-arg actions = [] for doc_id in doc_ids: - log.debug("remove index for %s object with id %s", doc_type, doc_id) + log.debug("Removing document of type %s and index %s", doc_type, doc_id) action = { '_op_type': 'delete', "_index": self.index_name, @@ -401,22 +401,13 @@ def remove(self, doc_type, doc_ids, **kwargs): "_id": doc_id } actions.append(action) - # bulk() returns a tuple with summary information - # number of successfully executed actions and number of errors if stats_only is set to True. - _, indexing_errors = bulk( - self._es, - actions, - # let notfound not cause error - ignore=[404], - **kwargs - ) - if indexing_errors: - ElasticSearchEngine.log_indexing_error(indexing_errors) - # Broad exception handler to protect around bulk call - except Exception as ex: - # log information and re-raise - log.exception("error while deleting document from index - %s", ex.message) - raise ex + bulk(self._es, actions, **kwargs) + except BulkIndexError as ex: + valid_errors = [error for error in ex.errors if error['delete']['status'] != 404] + + if valid_errors: + log.exception("An error occurred while removing documents from the index.") + raise # A few disabled pylint violations here: # This procedure takes each of the possible input parameters and builds the query with each argument diff --git a/search/tests/test_engines.py b/search/tests/test_engines.py index 8e4af411..1f2dd29e 100644 --- a/search/tests/test_engines.py +++ b/search/tests/test_engines.py @@ -3,22 +3,22 @@ # Some of the subclasses that get used as settings-overrides will yield this pylint # error, but they do get used when included as part of the override_settings # pylint: disable=too-few-public-methods -""" Tests for search functionalty """ -from datetime import datetime +""" Tests for search functionality """ import json import os +from datetime import datetime -from mock import patch from django.test import TestCase from django.test.utils import override_settings from elasticsearch import exceptions +from elasticsearch.helpers import BulkIndexError +from mock import patch +from search.api import perform_search, NoSearchEngineError from search.elastic import RESERVED_CHARACTERS +from search.tests.mock_search_engine import MockSearchEngine, json_date_to_datetime +from search.tests.tests import MockSearchTests from search.tests.utils import ErroringElasticImpl, SearcherMixin -from search.api import perform_search, NoSearchEngineError - -from .mock_search_engine import MockSearchEngine, json_date_to_datetime -from .tests import MockSearchTests @override_settings(SEARCH_ENGINE="search.tests.utils.ForceRefreshElasticSearchEngine") @@ -201,8 +201,13 @@ def test_search_failure(self): def test_remove_failure_bulk(self): """ the remove operation should fail """ - with patch('search.elastic.bulk', return_value=[0, [exceptions.ElasticsearchException()]]): - with self.assertRaises(exceptions.ElasticsearchException): + doc_id = 'test_id' + doc_type = 'test_doc' + error = {'delete': { + 'status': 500, '_type': doc_type, '_index': 'test_index', '_version': 1, 'found': True, '_id': doc_id + }} + with patch('search.elastic.bulk', side_effect=BulkIndexError('Simulated error', [error])): + with self.assertRaises(BulkIndexError): self.searcher.remove("test_doc", ["test_id"]) def test_remove_failure_general(self): diff --git a/search/tests/tests.py b/search/tests/tests.py index 16e9fef2..002e10e1 100644 --- a/search/tests/tests.py +++ b/search/tests/tests.py @@ -369,16 +369,7 @@ def test_delete_item_slashes(self): def test_delete_item_not_present(self): """ make sure that we get no error removing an item that does not exist """ - test_string = "This is a test of the emergency broadcast system" - self.searcher.index("test_doc", [{"id": "FAKE_ID", "content": {"name": "abc"}}]) - self.searcher.remove("test_doc", ["FAKE_ID"]) - - response = self.searcher.search(test_string) - self.assertEqual(response["total"], 0) - - self.searcher.remove("test_doc", ["FAKE_ID"]) - response = self.searcher.search(test_string) - self.assertEqual(response["total"], 0) + self.searcher.remove("test_doc", ["TOTALLY_FAKE_ID"]) def test_filter_items(self): """ Make sure that filters work """ diff --git a/setup.py b/setup.py index d7364250..e9b2bd3f 100755 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='edx-search', - version='0.1.1', + version='1.0.0', description='Search and Index routines for index access', author='edX', url='https://github.com/edx/edx-search', @@ -22,6 +22,6 @@ packages=['search'], install_requires=[ "django >= 1.8, < 1.9", - "elasticsearch<1.0.0" + "elasticsearch>=1.0.0,<2.0.0" ] ) From d7976c7d3b4bd2207cb7aa403f8e34e24844e42e Mon Sep 17 00:00:00 2001 From: Clinton Blackburn Date: Tue, 5 Jan 2016 14:12:58 -0500 Subject: [PATCH 02/15] Publishing new releases to PyPI --- .travis.yml | 9 +++++++++ setup.py | 3 ++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 4111006a..766c9fc5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,3 +23,12 @@ install: script: make validate after_success: coveralls + +deploy: + provider: pypi + user: edx + password: + secure: cMZzzg6zBrw55qlznVKb/3AaRVI9yeN4HhQOOUrGtcsEvamuOGyOXdYwNamKRqTy3VJIjRKVder+kSnEwNGoFPUBUdNIEN70IxyiGcnhLppoIvCw3FO3RI0J2N08oUSoK0nM+ACvNRT+Q2GQj3awyPJyb6J20QixCYvwax/O3v8= + distributions: "sdist bdist_wheel" + on: + tags: true diff --git a/setup.py b/setup.py index e9b2bd3f..0076bf53 100755 --- a/setup.py +++ b/setup.py @@ -6,8 +6,9 @@ setup( name='edx-search', version='1.0.0', - description='Search and Index routines for index access', + description='Search and index routines for index access', author='edX', + author_email='oscm@edx.org', url='https://github.com/edx/edx-search', license='AGPL', classifiers=[ From afb3d4444f4709ace7dd5df4a462109e77a6c26e Mon Sep 17 00:00:00 2001 From: Calen Pennington Date: Thu, 17 Mar 2016 14:40:00 -0400 Subject: [PATCH 03/15] Update the pypi password (ECOM-3912) --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 766c9fc5..08ff45f5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,7 +28,7 @@ deploy: provider: pypi user: edx password: - secure: cMZzzg6zBrw55qlznVKb/3AaRVI9yeN4HhQOOUrGtcsEvamuOGyOXdYwNamKRqTy3VJIjRKVder+kSnEwNGoFPUBUdNIEN70IxyiGcnhLppoIvCw3FO3RI0J2N08oUSoK0nM+ACvNRT+Q2GQj3awyPJyb6J20QixCYvwax/O3v8= - distributions: "sdist bdist_wheel" + secure: FIBjU6/5WPzUHqNWO9OqPdt3YipxSs7WPTnKMTJwlEvixXCIRkmAXd1CBd7kSYa0GvCfLSei7xLKsgUKaCe+OBsnw/ZDBllZv5EvLJwdKn/EKrPxhxeQ6/SNtqafWQ3mLL1+gosh0RHQdy0HlwwS+m/Qsf+51ohIJVpt+5jwxFA= + distributions: sdist bdist_wheel on: tags: true From 28cfaf0e06bed08735ace9a8ff176ad2489b58a1 Mon Sep 17 00:00:00 2001 From: Calen Pennington Date: Tue, 23 Aug 2016 10:42:23 -0400 Subject: [PATCH 04/15] Add an OEP-2 compliant openedx.yaml file --- openedx.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 openedx.yaml diff --git a/openedx.yaml b/openedx.yaml new file mode 100644 index 00000000..decfa11b --- /dev/null +++ b/openedx.yaml @@ -0,0 +1,8 @@ +# This file describes this Open edX repo, as described in OEP-2: +# http://open-edx-proposals.readthedocs.io/en/latest/oeps/oep-0002.html#specification + +nick: srch +oeps: {} +owner: MUST FILL IN OWNER +tags: [dedx] +track-pulls: true From 6009faa33248947b51df630ed00900787cefea51 Mon Sep 17 00:00:00 2001 From: Calen Pennington Date: Tue, 23 Aug 2016 14:19:20 -0400 Subject: [PATCH 05/15] Specify an owner --- openedx.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openedx.yaml b/openedx.yaml index decfa11b..1635868e 100644 --- a/openedx.yaml +++ b/openedx.yaml @@ -3,6 +3,6 @@ nick: srch oeps: {} -owner: MUST FILL IN OWNER +owner: edx/ecommerce tags: [dedx] track-pulls: true From 37b770e6edcee403788d3a68bb6c7c6888986b1d Mon Sep 17 00:00:00 2001 From: Calen Pennington Date: Tue, 23 Aug 2016 15:21:34 -0400 Subject: [PATCH 06/15] Update owner --- openedx.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openedx.yaml b/openedx.yaml index 1635868e..6b64a1b4 100644 --- a/openedx.yaml +++ b/openedx.yaml @@ -3,6 +3,6 @@ nick: srch oeps: {} -owner: edx/ecommerce +owner: edx/solutions-team tags: [dedx] track-pulls: true From ed56b29f32ca297aabc1d14778856d47d1e6732d Mon Sep 17 00:00:00 2001 From: Diana Huang Date: Fri, 3 Feb 2017 13:50:50 -0500 Subject: [PATCH 07/15] Update version and use search test packages. --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 0076bf53..174f1af2 100755 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='edx-search', - version='1.0.0', + version='1.0.1', description='Search and index routines for index access', author='edX', author_email='oscm@edx.org', @@ -20,7 +20,7 @@ 'Programming Language :: Python', 'Framework :: Django', ], - packages=['search'], + packages=['search', 'search.tests'], install_requires=[ "django >= 1.8, < 1.9", "elasticsearch>=1.0.0,<2.0.0" From e111f34927b988abd4f7f39cdb274b1883e905bd Mon Sep 17 00:00:00 2001 From: Diana Huang Date: Mon, 6 Feb 2017 13:17:25 -0500 Subject: [PATCH 08/15] Switch to using CodeCov for coverage checking. --- .coveragerc | 4 ++++ .travis.yml | 3 +-- codecov.yml | 12 ++++++++++++ test_requirements.txt | 1 + 4 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 .coveragerc create mode 100644 codecov.yml diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 00000000..6d7c0a99 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,4 @@ +[run] +branch = True +data_file = .coverage +source=search diff --git a/.travis.yml b/.travis.yml index 08ff45f5..4723f1bf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,13 +16,12 @@ before_install: install: - - "pip install -U pip wheel coveralls" - make requirements script: make validate -after_success: coveralls +after_success: codecov deploy: provider: pypi diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 00000000..4da47686 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,12 @@ +coverage: + status: + project: + default: + enabled: yes + target: auto + patch: + default: + enabled: yes + target: 100% + +comment: false diff --git a/test_requirements.txt b/test_requirements.txt index 27f30fcb..0365c350 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -4,6 +4,7 @@ edx-lint==0.4.0 mock==1.3.0 pep8==1.6.2 pytz +codecov # edX libraries From 1a6926de996755ae2402cd437089ce968a0aeb0b Mon Sep 17 00:00:00 2001 From: Ahsan Ulhaq Date: Mon, 10 Jul 2017 15:55:53 +0500 Subject: [PATCH 09/15] Upgrade to django 1.11 and implemented tox LEARNER-1529 --- .travis.yml | 9 +++++++- Makefile | 7 ++---- edxsearch/urls.py | 5 ++-- search/tests/test_search_result_processor.py | 24 ++++++++++---------- search/urls.py | 7 +++--- setup.py | 9 ++++++-- test_requirements.txt | 1 + tox.ini | 23 +++++++++++++++++++ 8 files changed, 59 insertions(+), 26 deletions(-) create mode 100644 tox.ini diff --git a/.travis.yml b/.travis.yml index 4723f1bf..e6e09ad5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,14 @@ sudo: false language: python -python: "2.7" +python: + - 2.7 + +env: + - TOXENV=django18 + - TOXENV=django19 + - TOXENV=django110 + - TOXENV=django111 # Cache the pip directory. "cache: pip" doesn't work due to install override. See https://github.com/travis-ci/travis-ci/issues/3239. cache: diff --git a/Makefile b/Makefile index 368d16eb..95d57d74 100644 --- a/Makefile +++ b/Makefile @@ -4,16 +4,13 @@ clean: rm -rf coverage htmlcov quality: - pep8 --config=.pep8 search - pylint --rcfile=pylintrc search + tox -e quality requirements: pip install -r test_requirements.txt validate: clean - DJANGO_SETTINGS_MODULE=settings coverage run --source=search ./manage.py test - coverage report - make quality + tox .PHONY: clean, quality, requirements, validate diff --git a/edxsearch/urls.py b/edxsearch/urls.py index f0a26dfc..2b4419bd 100644 --- a/edxsearch/urls.py +++ b/edxsearch/urls.py @@ -1,5 +1,6 @@ """ import urls from search component to test it's operation when included within other django projects """ -from django.conf.urls import patterns, include, url +import django +from django.conf.urls import include, url # from django.contrib import admin # admin.autodiscover() @@ -8,4 +9,4 @@ # urlpatterns is the standard name to use here # pylint: disable=invalid-name -urlpatterns = patterns('', url(r'^search/', include(search.urls)),) +urlpatterns = [url(r'^search/', include(search.urls))] diff --git a/search/tests/test_search_result_processor.py b/search/tests/test_search_result_processor.py index 18d84e64..ef4e552a 100644 --- a/search/tests/test_search_result_processor.py +++ b/search/tests/test_search_result_processor.py @@ -25,8 +25,8 @@ def test_strings_in_dictionary(self): }) get_strings = SearchResultProcessor.strings_in_dictionary(test_dict) self.assertEqual(len(get_strings), 2) - self.assertEqual(get_strings[0], test_dict["a"]) - self.assertEqual(get_strings[1], test_dict["b"]) + self.assertIn(test_dict["a"], get_strings) + self.assertIn(test_dict["b"], get_strings) test_dict.update({ "CASCADE": { @@ -35,9 +35,9 @@ def test_strings_in_dictionary(self): }) get_strings = SearchResultProcessor.strings_in_dictionary(test_dict) self.assertEqual(len(get_strings), 3) - self.assertEqual(get_strings[0], test_dict["a"]) - self.assertEqual(get_strings[1], test_dict["b"]) - self.assertEqual(get_strings[2], test_dict["CASCADE"]["z"]) + self.assertIn(test_dict["a"], get_strings) + self.assertIn(test_dict["b"], get_strings) + self.assertIn(test_dict["CASCADE"]["z"], get_strings) test_dict.update({ "DEEP": { @@ -52,10 +52,10 @@ def test_strings_in_dictionary(self): }) get_strings = SearchResultProcessor.strings_in_dictionary(test_dict) self.assertEqual(len(get_strings), 4) - self.assertEqual(get_strings[0], test_dict["a"]) - self.assertEqual(get_strings[1], test_dict["b"]) - self.assertEqual(get_strings[2], test_dict["CASCADE"]["z"]) - self.assertEqual(get_strings[3], test_dict["DEEP"]["DEEPER"]["STILL_GOING"]["MORE"]["here"]) + self.assertIn(test_dict["a"], get_strings) + self.assertIn(test_dict["b"], get_strings) + self.assertIn(test_dict["CASCADE"]["z"], get_strings) + self.assertIn(test_dict["DEEP"]["DEEPER"]["STILL_GOING"]["MORE"]["here"], get_strings) def test_find_matches(self): """ test finding matches """ @@ -127,9 +127,9 @@ def test_excerpt(self): self.assertEqual(srp.excerpt, u"Here is a الاستحسان about edx") srp = SearchResultProcessor(test_result, u"edx") - self.assertEqual( - srp.excerpt, - u'Here is a الاستحسان about edxedX search a lot' + self.assertIn( + u"Here is a الاستحسان about edx", + srp.excerpt ) def test_too_long_excerpt(self): diff --git a/search/urls.py b/search/urls.py index f0225a55..2dc3f482 100644 --- a/search/urls.py +++ b/search/urls.py @@ -1,6 +1,6 @@ """ expose courseware search http interface """ from django.conf import settings -from django.conf.urls import patterns, url +from django.conf.urls import url from . import views @@ -8,9 +8,8 @@ # urlpatterns is the standard name to use here # pylint: disable=invalid-name -urlpatterns = patterns( - '', +urlpatterns = [ url(r'^$', views.do_search, name='do_search'), url(r'^{}$'.format(COURSE_ID_PATTERN), views.do_search, name='do_search'), url(r'^course_discovery/$', views.course_discovery, name='course_discovery'), -) +] diff --git a/setup.py b/setup.py index 174f1af2..dbed2bd5 100755 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='edx-search', - version='1.0.1', + version='1.1.0', description='Search and index routines for index access', author='edX', author_email='oscm@edx.org', @@ -18,11 +18,16 @@ 'License :: OSI Approved :: GNU Affero General Public License v3', 'Operating System :: OS Independent', 'Programming Language :: Python', + 'Programming Language :: Python :: 2.7', 'Framework :: Django', + 'Framework :: Django :: 1.8', + 'Framework :: Django :: 1.9', + 'Framework :: Django :: 1.10', + 'Framework :: Django :: 1.11', ], packages=['search', 'search.tests'], install_requires=[ - "django >= 1.8, < 1.9", + "django >= 1.8, < 2.0", "elasticsearch>=1.0.0,<2.0.0" ] ) diff --git a/test_requirements.txt b/test_requirements.txt index 0365c350..baf449b8 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -5,6 +5,7 @@ mock==1.3.0 pep8==1.6.2 pytz codecov +tox>=2.3.1,<3.0.0 # edX libraries diff --git a/tox.ini b/tox.ini new file mode 100644 index 00000000..7c28619a --- /dev/null +++ b/tox.ini @@ -0,0 +1,23 @@ +[tox] +envlist = py{27}-django{18,19,110,111}, quality + +[testenv] +setenv = + # This allows us to reference settings.py + PYTHONPATH = {toxinidir} + +deps = + django18: Django>=1.8,<1.9 + django19: Django>=1.9,<1.10 + django110: Django>=1.10,<1.11 + django111: Django>=1.11,<2.0 + -rtest_requirements.txt + +commands = + coverage run ./manage.py test --settings=settings + coverage report + +[testenv:quality] +commands = + pep8 --config=.pep8 search + pylint --rcfile=pylintrc search From be7611e483d19b1556c233b4d0422b68db471eab Mon Sep 17 00:00:00 2001 From: rythmE Date: Thu, 20 May 2021 17:30:21 +0800 Subject: [PATCH 10/15] Fixed all test failures --- search/elastic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/search/elastic.py b/search/elastic.py index 093371b5..ea50db51 100755 --- a/search/elastic.py +++ b/search/elastic.py @@ -596,6 +596,7 @@ def search(self, try: log.info("search body: %s", body) + print(" ------ search body --------\n {}".format(body)) es_response = self._es.search( index=self.index_name, body=body, From 8737be4b3b9163d44afef35e70ae4d9ab372f1d9 Mon Sep 17 00:00:00 2001 From: rythmE Date: Mon, 24 May 2021 10:36:03 +0800 Subject: [PATCH 11/15] Added course_hashtag_list in course search --- search/elastic.py | 1 - 1 file changed, 1 deletion(-) diff --git a/search/elastic.py b/search/elastic.py index ea50db51..093371b5 100755 --- a/search/elastic.py +++ b/search/elastic.py @@ -596,7 +596,6 @@ def search(self, try: log.info("search body: %s", body) - print(" ------ search body --------\n {}".format(body)) es_response = self._es.search( index=self.index_name, body=body, From c006e144ec765c6be77b3753f698041071117266 Mon Sep 17 00:00:00 2001 From: rythmE Date: Mon, 24 May 2021 17:33:36 +0800 Subject: [PATCH 12/15] Added hashtag to course search --- search/elastic.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/search/elastic.py b/search/elastic.py index 093371b5..d7b4ca2a 100755 --- a/search/elastic.py +++ b/search/elastic.py @@ -543,6 +543,11 @@ def search(self, "analyzer": "standard" } }) + elastic_queries.append({ + "term": { + "course_hashtag_list": query_string.encode('utf-8') + } + }) if field_dictionary: if use_field_match: @@ -570,7 +575,7 @@ def search(self, if elastic_queries: query_segment = { "bool": { - "must": elastic_queries + "should": elastic_queries } } From 6314f7620c6947c0d8aebb2e24eaa5f847624f8a Mon Sep 17 00:00:00 2001 From: rythmE Date: Tue, 13 Jul 2021 11:34:39 +0800 Subject: [PATCH 13/15] download and search feature update --- search/api.py | 13 +++++++++++++ search/elastic.py | 10 ++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/search/api.py b/search/api.py index e4c7f9e3..e13f7215 100755 --- a/search/api.py +++ b/search/api.py @@ -232,6 +232,16 @@ def course_discovery_search(search_term=None, size=20, from_=0, field_dictionary if getattr(settings, 'ALLOW_CATALOG_VISIBILITY_FILTER', False): use_field_dictionary['catalog_visibility'] = CATALOG_VISIBILITY_CATALOG_AND_ABOUT + print '--- searcher.search ---' + print 'search_term: ', search_term + print 'size: ', size + print 'from_: ', from_ + print 'field_dictionary: ', use_field_dictionary + print 'filter_dictionary: ', filter_dictionary + print 'exclude_dictionary: ', exclude_dictionary + print 'facet_terms(): ', course_discovery_facets() + print 'sort_args: ', sort_args + results = searcher.search( query_string=search_term, doc_type="course_info", @@ -246,5 +256,8 @@ def course_discovery_search(search_term=None, size=20, from_=0, field_dictionary sort=sort_args ) + print 'type(results)', type(results) + print 'results', results + results = process_range_data(results) return results diff --git a/search/elastic.py b/search/elastic.py index d7b4ca2a..c409bb14 100755 --- a/search/elastic.py +++ b/search/elastic.py @@ -10,6 +10,7 @@ from search.api import QueryParseError from search.search_engine_base import SearchEngine from search.utils import ValueRange, _is_iterable +from hashtag.models import Hashtag # log appears to be standard name used for logger log = logging.getLogger(__name__) # pylint: disable=invalid-name @@ -525,6 +526,10 @@ def search(self, elastic_queries = [] elastic_filters = [] + hashtag_query = Hashtag.objects.filter(name__icontains=query_string) + hashtag_query_id_list = list(hashtag_query.values('id')) + print('------hashtag_query_id_list------', hashtag_query_id_list) + # We have a query string, search all fields for matching text within the "content" node if query_string: if include_content: @@ -544,8 +549,8 @@ def search(self, } }) elastic_queries.append({ - "term": { - "course_hashtag_list": query_string.encode('utf-8') + "terms": { + "course_hashtag_list": hashtag_query_id_list } }) @@ -606,6 +611,7 @@ def search(self, body=body, **kwargs ) + print 'kwargs', kwargs except exceptions.ElasticsearchException as ex: message = unicode(ex) if 'QueryParsingException' in message: From 15ac9b99981089663d882d5e555c5a9e34b7f961 Mon Sep 17 00:00:00 2001 From: rythmE Date: Fri, 16 Jul 2021 10:02:23 +0800 Subject: [PATCH 14/15] bugs fixing --- search/api.py | 16 ++++------------ search/elastic.py | 9 ++------- 2 files changed, 6 insertions(+), 19 deletions(-) diff --git a/search/api.py b/search/api.py index e13f7215..d7f630cb 100755 --- a/search/api.py +++ b/search/api.py @@ -8,6 +8,7 @@ from .search_engine_base import SearchEngine from .result_processor import SearchResultProcessor from .utils import DateRange +from hashtag.models import Hashtag # Default filters that we support, override using COURSE_DISCOVERY_FILTERS setting if desired DEFAULT_FILTER_FIELDS = ["org", "modes", "language"] @@ -232,15 +233,8 @@ def course_discovery_search(search_term=None, size=20, from_=0, field_dictionary if getattr(settings, 'ALLOW_CATALOG_VISIBILITY_FILTER', False): use_field_dictionary['catalog_visibility'] = CATALOG_VISIBILITY_CATALOG_AND_ABOUT - print '--- searcher.search ---' - print 'search_term: ', search_term - print 'size: ', size - print 'from_: ', from_ - print 'field_dictionary: ', use_field_dictionary - print 'filter_dictionary: ', filter_dictionary - print 'exclude_dictionary: ', exclude_dictionary - print 'facet_terms(): ', course_discovery_facets() - print 'sort_args: ', sort_args + hashtag_query = Hashtag.objects.filter(name__icontains=search_term) + hashtag_query_list = list(hashtag_query.values('id')) results = searcher.search( query_string=search_term, @@ -253,11 +247,9 @@ def course_discovery_search(search_term=None, size=20, from_=0, field_dictionary filter_dictionary=filter_dictionary, exclude_dictionary=exclude_dictionary, facet_terms=course_discovery_facets(), + hashtag_query_list=hashtag_query_list, sort=sort_args ) - print 'type(results)', type(results) - print 'results', results - results = process_range_data(results) return results diff --git a/search/elastic.py b/search/elastic.py index c409bb14..80dbfa6e 100755 --- a/search/elastic.py +++ b/search/elastic.py @@ -10,7 +10,6 @@ from search.api import QueryParseError from search.search_engine_base import SearchEngine from search.utils import ValueRange, _is_iterable -from hashtag.models import Hashtag # log appears to be standard name used for logger log = logging.getLogger(__name__) # pylint: disable=invalid-name @@ -426,6 +425,7 @@ def search(self, exclude_ids=None, use_field_match=False, include_content=False, + hashtag_query_list=None, **kwargs): # pylint: disable=too-many-arguments, too-many-locals, too-many-branches, arguments-differ """ Implements call to search the index for the desired content. @@ -526,10 +526,6 @@ def search(self, elastic_queries = [] elastic_filters = [] - hashtag_query = Hashtag.objects.filter(name__icontains=query_string) - hashtag_query_id_list = list(hashtag_query.values('id')) - print('------hashtag_query_id_list------', hashtag_query_id_list) - # We have a query string, search all fields for matching text within the "content" node if query_string: if include_content: @@ -550,7 +546,7 @@ def search(self, }) elastic_queries.append({ "terms": { - "course_hashtag_list": hashtag_query_id_list + "course_hashtag_list": hashtag_query_list } }) @@ -611,7 +607,6 @@ def search(self, body=body, **kwargs ) - print 'kwargs', kwargs except exceptions.ElasticsearchException as ex: message = unicode(ex) if 'QueryParsingException' in message: From 9e9d6a3596cb43b4597d48f6c6ccd257b8648be4 Mon Sep 17 00:00:00 2001 From: rythmE Date: Mon, 19 Jul 2021 16:51:28 +0800 Subject: [PATCH 15/15] LMS explore search bug fixing --- search/api.py | 4 +--- search/views.py | 3 +++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/search/api.py b/search/api.py index d7f630cb..146ea87c 100755 --- a/search/api.py +++ b/search/api.py @@ -8,7 +8,6 @@ from .search_engine_base import SearchEngine from .result_processor import SearchResultProcessor from .utils import DateRange -from hashtag.models import Hashtag # Default filters that we support, override using COURSE_DISCOVERY_FILTERS setting if desired DEFAULT_FILTER_FIELDS = ["org", "modes", "language"] @@ -233,8 +232,7 @@ def course_discovery_search(search_term=None, size=20, from_=0, field_dictionary if getattr(settings, 'ALLOW_CATALOG_VISIBILITY_FILTER', False): use_field_dictionary['catalog_visibility'] = CATALOG_VISIBILITY_CATALOG_AND_ABOUT - hashtag_query = Hashtag.objects.filter(name__icontains=search_term) - hashtag_query_list = list(hashtag_query.values('id')) + hashtag_query_list = kwargs.get('hashtag_query_list', []) results = searcher.search( query_string=search_term, diff --git a/search/views.py b/search/views.py index 3655d348..97cabd46 100755 --- a/search/views.py +++ b/search/views.py @@ -186,6 +186,8 @@ def course_discovery(request): status_code = 500 search_term = request.POST.get("search_string", None) + hashtag_query_list_string = request.POST.get("hashtag_query_list_string", '') + hashtag_query_list = hashtag_query_list_string.split(', ') try: size, from_, page = _process_pagination_values(request) @@ -206,6 +208,7 @@ def course_discovery(request): size=size, from_=from_, field_dictionary=field_dictionary, + hashtag_query_list=hashtag_query_list, user=request.user, allow_enrollment_end_filter=True, sort_type=request.POST.get('sort_type', '')