diff --git a/.gitignore b/.gitignore index 9fc011c..a304e31 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,8 @@ reports/ \#*\# *.egg-info .idea/ + + +# Visual Studio Code +.vscode/*.log +*.code-workspace diff --git a/.prospector.yaml b/.prospector.yaml new file mode 100755 index 0000000..8c73e9f --- /dev/null +++ b/.prospector.yaml @@ -0,0 +1,62 @@ +mccabe: + disable: + - MC0001 + +pep8: + disable: + - E305 + - E306 + - E115 + - E116 + - E501 + - E722 + - E741 + +pycodestyle: + disable: + - E115 + - E116 + - E305 + - E306 + - E501 + - E722 + - E741 + +pyflakes: + disable: + - F401 + - F821 + - F841 + +pylint: + disable: + - arguments-renamed + - bare-except + - consider-using-f-string + - consider-using-with + - deprecated-module + - django-not-configured + - import-error + - import-outside-toplevel + - inconsistent-return-statements + - line-too-long + - logging-format-interpolation + - logging-not-lazy + - method-hidden + - multiple-imports + - no-else-raise + - no-else-return + - pointless-statement + - super-with-arguments + - too-many-arguments + - too-many-branches + - too-many-locals + - too-many-statements + - undefined-variable + - unidiomatic-typecheck + - unused-argument + - unused-import + - unused-variable + - unspecified-encoding + - useless-object-inheritance + - useless-suppression diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100755 index 0000000..5c0578a --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,91 @@ +{ + "editor.rulers": [79], + + "files.exclude": { + "**/*.egg-info": true, + "**/.git": true, + "**/.mypy_cache": true, + "**/*.pyc": {"when": "$(basename).py"}, + "**/__pycache__": true, + "**/.ropeproject": true + }, + + "python.analysis.diagnosticSeverityOverrides": { + "reportMissingImports" : "none", + "reportMissingModuleSource" : "none", + "reportUndefinedVariable" : "none" + }, + + "python.linting.enabled": true, + + "python.linting.pylintEnabled": true, + "python.linting.pylintArgs": [ + "--disable", "arguments-renamed", + "--disable", "bare-except", + "--disable", "broad-except", + "--disable", "c-extension-no-member", + "--disable", "consider-using-f-string", + "--disable", "consider-using-with", + "--disable", "deprecated-module", + "--disable", "fixme", + "--disable", "import-error", + "--disable", "import-outside-toplevel", + "--disable", "inconsistent-return-statements", + "--disable", "invalid-name", + "--disable", "line-too-long", + "--disable", "logging-format-interpolation", + "--disable", "logging-not-lazy", + "--disable", "method-hidden", + "--disable", "missing-class-docstring", + "--disable", "missing-function-docstring", + "--disable", "missing-module-docstring", + "--disable", "multiple-imports", + "--disable", "no-else-raise", + "--disable", "no-else-return", + "--disable", "no-self-use", + "--disable", "pointless-statement", + "--disable", "super-with-arguments", + "--disable", "too-few-public-methods", + "--disable", "too-many-arguments", + "--disable", "too-many-branches", + "--disable", "too-many-instance-attributes", + "--disable", "too-many-locals", + "--disable", "too-many-return-statements", + "--disable", "too-many-statements", + "--disable", "undefined-variable", + "--disable", "unidiomatic-typecheck", + "--disable", "unnecessary-pass", + "--disable", "unspecified-encoding", + "--disable", "unused-argument", + "--disable", "unused-import", + "--disable", "unused-variable", + "--disable", "useless-object-inheritance", + "--disable", "wrong-import-order" + ], + + "python.linting.flake8Enabled": true, + "python.linting.flake8Args": [ + "--ignore=E115,E116,E123,E128,E226,E231,E261,E265,E266,E302,E303,E305,E306,E401,E501,E722,E741,F401,F821,F841,N806" + ], + + "python.linting.mypyEnabled": false, + + "python.linting.pydocstyleEnabled": false, + + "python.linting.pycodestyleEnabled": true, + "python.linting.pycodestyleArgs": [ + "--ignore=E115,E116,E123,E128,E226,E261,E265,E231,E266,E302,E303,E305,E306,E401,E501,E722,E741" + ], + + "python.linting.prospectorEnabled": true, + + "python.linting.pylamaEnabled": true, + "python.linting.pylamaArgs": [ + "--ignore=C901,E115,E116,E123,E128,E226,E231,E261,E265,E266,E302,E303,E305,E306,E401,E501,E0602,E722,E741,W0611,W0612" + ], + + "python.linting.banditEnabled": true, + "python.linting.banditArgs": [ + "--skip=B103,B108,B110,B311" + ] +} diff --git a/README.md b/README.md index 9a8b574..e6874db 100644 --- a/README.md +++ b/README.md @@ -3,58 +3,153 @@ xqueue_watcher This is an implementation of a polling [XQueue](https://github.com/edx/xqueue) client and grader. +Overview +======== -Running -======= - -`python -m xqueue_watcher -d [path to settings directory]` - - -JSON configuration file -======================= - { - "test-123": { - "SERVER": "http://127.0.0.1:18040", - "CONNECTIONS": 1, - "AUTH": ["lms", "lms"], - "HANDLERS": [ - { - "HANDLER": "xqueue_watcher.grader.Grader", - "KWARGS": { - "grader_root": "/path/to/course/graders/", - } - } - ] - } - } +There are several components in a working XQueue Watcher service: +- **XQueue Watcher**: it polls an xqueue service continually for new submissions and grades them. +- **Submissions Handler**: when the watcher finds any new submission, it will be passed to the handler for grading. It is a generic handler that can be configured to work with different submissions through individual submission graders. +- **Individual Submission Grader**: each exercise or homework may specify its own "grader". This should map to a file on the server that usually specifies test cases or additional processing for the student submission. + +Usually your server will look like this: +``` +root/ +├── xqueue-watcher/ +│ ├── ... # xqueue-watcher repo, unchanged +│ └── ... +├── config/ +│ └── conf.d/ +│ │ └── my-course.json +│ └── logging.json +└── my-course/ + ├── exercise1/ + │ ├── grader.py # - per-exercise grader + │ └── answer.py # - if using JailedGrader + ├── ... + └── exercise2/ + ├── grader.py + └── answer.py +``` +Running XQueue Watcher: +====================== + +Usually you can run XQueue Watcher without making any changes. You should keep course-specific in another folder like shown above, so that you can update xqueue_watcher anytime. + +Install the requirements before running `xqueue_watcher` +```bash +cd xqueue-watcher/ +make requirements +``` + +Now you're ready to run it. +```bash +python -m xqueue_watcher -d [path to the config directory, eg ../config] +``` + +The course configuration JSON file in `conf.d` should have the following structure: +```json + { + "test-123": { + "SERVER": "http://127.0.0.1:18040", + "CONNECTIONS": 1, + "AUTH": ["lms", "lms"], + "HANDLERS": [ + { + "HANDLER": "xqueue_watcher.grader.Grader", + "KWARGS": { + "grader_root": "/path/to/course/graders/", + } + } + ] + } + } +``` * `test-123`: the name of the queue * `SERVER`: XQueue server address * `AUTH`: list of username, password * `CONNECTIONS`: how many threads to spawn to watch the queue * `HANDLERS`: list of callables that will be called for each queue submission - * `HANDLER`: callable name - * `KWARGS`: optional keyword arguments to apply during instantiation + * `HANDLER`: callable name, see below for Submissions Handler + * `KWARGS`: optional keyword arguments to apply during instantiation + * `grader_root`: path to the course directory, eg /path/to/my-course +> TODO: document logging.json -xqueue_watcher.grader.Grader -======================== +Submissions Handler +=================== + +When xqueue_watcher detects any new submission, it will be passed to the submission handler for grading. It will instantiate a new handler based on the name configured above, with submission information retrieved +from XQueue. There is a base grader defined in xqueue_watcher: Grader and JailedGrader (for Python, using CodeJail). If you don't use JailedGrader, you'd have to implement your own Grader by subclassing `xqueue_watcher.grader.Grader + +The payload from XQueue will be a JSON that usually looks like this, notice that "grader" is a required field in the "grader_payload" and must be configured accordingly in the Studio for the exercise. +```json +{ + "student_info": { + "random_seed": 1, + "submission_time": "20210109222647", + "anonymous_student_id": "6d07814a4ece5cdda54af1558a6dfec0" + }, + "grader_payload": "\n {\"grader\": \"relative/path/to/grader.py\"}\n ", + "student_response": "print \"hello\"\r\n " +} +``` + +## Custom Handler To implement a pull grader: -Subclass xqueue_watcher.grader.Grader and override the `grade` method. Then add your grader to the config like `"handler": "my_module.MyGrader"`. The arguments for the `grade` method are: - * `grader_path`: absolute path to the grader defined for the current problem - * `grader_config`: other configuration particular to the problem - * `student_response`: student-supplied code +Subclass `xqueue_watcher.grader.Grader` and override the `grade` method. Then add your grader to the config like `"handler": "my_module.MyGrader"`. The arguments for the `grade` method are: + * `grader_path`: absolute path to the grader defined for the current problem. + * `grader_config`: other configuration particular to the problem + * `student_response`: student-supplied code +Note that `grader_path` is constructed by appending the relative path to the grader from `grader_payload` to the `grader_root` in the configuration JSON. If the handler cannot find a `grader.py` file, it would fail to grade the submission. -Sandboxing -========== -To sandbox python, use [CodeJail](https://github.com/edx/codejail). In your handler configuration, add: +## Grading Python submissions with JailedGrader - "CODEJAIL": { - "name": "python", - "python_bin": "/path/to/sandbox/python", - "user": "sandbox_username" - } +`xqueue_watcher` provides a few utilities for grading python submissions, including JailedGrader for running python code in a safe environment and grading support utilities. +### JailedGrader +To sandbox python, use [CodeJail](https://github.com/edx/codejail). In your handler configuration, add: +```json + "HANDLER": "xqueue_watcher.jailedgrader.JailedGrader", + "CODEJAIL": { + "name": "python", + "python_bin": "/path/to/sandbox/python", + "user": "sandbox_username" + } +``` Then, `codejail_python` will automatically be added to the kwargs for your handler. You can then import codejail.jail_code and run `jail_code("python", code...)`. You can define multiple sandboxes and use them as in `jail_code("special-python", ...)` + +To use JailedGrader, you also need to provide an `answer.py` file on the same folder with the `grader.py` file. The grader will run both student submission and `answer.py` and compare the output with each other. + +### Grading Support utilities +There are several grading support utilities that make writing `grader.py` for python code easy. Check out +`grader_support/gradelib.py` for the documentation. + +- `grader_support.gradelib.Grader`: a base class for creating a new submission grader. Not to be confused with `xqueue-watcher.grader.Grader`. You can add input checks, preprocessors and tests to a grader object. +- `grader_support.gradelib.Test`: a base class for creating tests for a submission. Usually a submission can be graded with one or a few tests. There are also few useful test functions and classes included, like `InvokeStudentFunctionTest` , `exec_wrapped_code`, etc. +- Preprocessors: utilities to process the raw submission before grading it. `wrap_in_string` is useful for testing code that is not wrapped in a function. +- Input checks: sanity checks before running a submission, eg check `required_string` or `prohibited_string` + +Using the provided grader class, your `grader.py` would look something like this: +```python +from grader_support import gradelib +grader = gradelib.Grader() + +# invoke student function foo with parameter [] +grader.add_test(gradelib.InvokeStudentFunctionTest('foo', [])) +``` + +Or with a pre-processor: +```python +import gradelib + +grader = gradelib.Grader() + +# execute a raw student code & capture stdout +grader.add_preprocessor(gradelib.wrap_in_string) +grader.add_test(gradelib.ExecWrappedStudentCodeTest({}, "basic test")) +``` + +You can also write your own test class, processor and input checks. \ No newline at end of file diff --git a/grader_support/gradelib.py b/grader_support/gradelib.py index 6f16721..bc6dd5d 100644 --- a/grader_support/gradelib.py +++ b/grader_support/gradelib.py @@ -69,6 +69,9 @@ def __init__(self): # list of functions: submission_text -> error text or None self._input_checks = [] + # Flag: Do not run, just check input + self._only_check_input = False + # list of functions: submission_text -> processed_submission_text. Run # in the specified order. (foldl) self._preprocessors = [fix_line_endings] @@ -88,6 +91,12 @@ def input_errors(self, submission_str): """ return [_f for _f in [check(submission_str) for check in self._input_checks] if _f] + def only_check_input(self): + return self._only_check_input + + def set_only_check_input(self, value): + self._only_check_input = value + def preprocess(self, submission_str): """ submission: string @@ -546,6 +555,17 @@ def __init__(self, fn_name, args, environment=None, output_writer=None, short_de short_desc = "Test: %s(%s)" % (fn_name, ", ".join(repr(a) for a in args)) Test.__init__(self, test_fn, short_desc, detailed_desc, compare) +class ExecWrappedStudentCodeTest(Test): + """ + A Test that exec student code and capture the stdout result. + The code must be preprocessed with `wrap_in_string` + """ + def __init__(self, environment=None, short_desc=None, detailed_desc=None, compare=None): + test_fn = exec_wrapped_code(environment) + if short_desc is None: + short_desc = "Test: %s(%s)" % (fn_name, ", ".join(repr(a) for a in args)) + Test.__init__(self, test_fn, short_desc, detailed_desc, compare) + def round_float_writer(n): """ Returns an output_writer function that rounds its argument to `n` places. diff --git a/requirements/production.txt b/requirements/production.txt index eee07bc..9ce674a 100644 --- a/requirements/production.txt +++ b/requirements/production.txt @@ -4,7 +4,7 @@ # # make upgrade # --e git+https://github.com/edx/codejail.git@4127fc4bd5775cc72aee8d7f0a70e31405e22439#egg=codejail # via -r requirements/base.txt +EdX-CodeJail >= 3.2.0 backports.os==0.1.1 # via -r requirements/base.txt, path.py certifi==2020.6.20 # via -r requirements/base.txt, requests chardet==3.0.4 # via -r requirements/base.txt, requests diff --git a/setup.py b/setup.py index a4c14f6..6de6c1e 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,8 @@ version='0.2', description='XQueue Pull Grader', packages=[ + 'grader_support', 'xqueue_watcher', ], - install_requires=open('requirements/production.txt', 'rb').readlines() + install_requires=open('requirements/production.txt', 'r').readlines() ) diff --git a/xqueue_watcher/grader.py b/xqueue_watcher/grader.py index d3b50d6..7b1a542 100644 --- a/xqueue_watcher/grader.py +++ b/xqueue_watcher/grader.py @@ -116,9 +116,13 @@ def process_item(self, content, queue=None): files = content['xqueue_files'] # Delivery from the lms + print("____ DEBUG ____") + print(body) body = json.loads(body) student_response = body['student_response'] payload = body['grader_payload'] + print(student_response) + print(payload) try: grader_config = json.loads(payload) except ValueError as err: @@ -130,8 +134,12 @@ def process_item(self, content, queue=None): raise self.log.debug("Processing submission, grader payload: {0}".format(payload)) + #relative_grader_path = 'lesson1_hw2/grader.py' # TODO actually have a grader in the config relative_grader_path = grader_config['grader'] grader_path = (self.grader_root / relative_grader_path).abspath() + print("___ DEBUG ___") + print("Grader path", grader_path) + print("Relative path", relative_grader_path) start = time.time() results = self.grade(grader_path, grader_config, student_response) diff --git a/xqueue_watcher/jailedgrader.py b/xqueue_watcher/jailedgrader.py index 8bab17f..be45b8a 100644 --- a/xqueue_watcher/jailedgrader.py +++ b/xqueue_watcher/jailedgrader.py @@ -10,6 +10,7 @@ import gettext from path import Path import six +import traceback import codejail @@ -66,7 +67,7 @@ class JailedGrader(Grader): and optionally codejail_python="python name" (the name that you used to configure codejail) """ def __init__(self, *args, **kwargs): - self.codejail_python = kwargs.pop("codejail_python", "python") + self.codejail_python = kwargs.pop("codejail_python", "python3") super(JailedGrader, self).__init__(*args, **kwargs) self.locale_dir = self.grader_root / "conf" / "locale" self.fork_per_item = False # it's probably safe not to fork @@ -78,14 +79,24 @@ def _enable_i18n(self, language): trans.install(names=None) def _run(self, grader_path, thecode, seed): + print("--- RUN CODE ---", grader_path, thecode) files = SUPPORT_FILES + [grader_path] if self.locale_dir.exists(): files.append(self.locale_dir) extra_files = [('submission.py', thecode.encode('utf-8'))] argv = ["-m", "grader_support.run", Path(grader_path).basename(), 'submission.py', seed] + print("argv -- ", argv) + print("files", files) + print("extra_files", extra_files) r = codejail.jail_code.jail_code(self.codejail_python, files=files, extra_files=extra_files, argv=argv) + print("result", r.status, r.stdout, r.stderr) return r + def read_answer_file(self, answer_path): + print("answer path", answer_path) + with open(answer_path, 'rb') as f: + return f.read().decode('utf-8') + def grade(self, grader_path, grader_config, submission): if type(submission) != six.text_type: self.log.warning("Submission is NOT unicode") @@ -116,9 +127,24 @@ def grade(self, grader_path, grader_config, submission): self._enable_i18n(grader_config.get("lang", LANGUAGE)) - answer_path = Path(grader_path).dirname() / 'answer.py' - with open(answer_path, 'rb') as f: - answer = f.read().decode('utf-8') + print("__DEBUG__") + print("grader path", grader_path) + + answers = [] + try: + answer_dir_files = os.listdir(os.path.dirname(grader_path)) + answer_files = list(filter(lambda f: f.lower().startswith('answer') and f.endswith('.py'), answer_dir_files)) + answers = [Path(grader_path).dirname() + '/' + a for a in answer_files] + print("Multiple answer files: " + str(answers)) + except Exception: + print(traceback.format_exc()) + answers = [Path(grader_path).dirname() + '/answer.py'] + + read_answer_files = [self.read_answer_file(f) for f in answers] + #answer_path = Path(grader_path).dirname() / 'answer.py' + #print("answer path", answer_path) + #with open(answer_path, 'rb') as f: + #answer = f.read().decode('utf-8') # Import the grader, straight from the original file. (It probably isn't in # sys.path, and we may be in a long running gunicorn process, so we don't @@ -133,23 +159,41 @@ def grade(self, grader_path, grader_config, submission): # Don't run tests if there were errors return results + if grader.only_check_input(): + results['correct'] = True + results['score'] = 1 + self.log.debug('Only checking inputs, returning correct.') + return results + # Add a unicode encoding declaration. - processed_answer = prepend_coding(grader.preprocess(answer)) + #processed_answer = prepend_coding(grader.preprocess(answer)) + processed_answers = [prepend_coding(grader.preprocess(a)) for a in read_answer_files] processed_submission = prepend_coding(grader.preprocess(submission)) + #print("processed answer", processed_answer) + for a in processed_answers: + print("processed answer", a) + print("processed_submission", processed_submission) + # Same seed for both runs seed = str(random.randint(0, 20000)) # Run the official answer, to get the expected output. expected_ok = False expected_exc = None + expected_solutions = [] try: # If we want a factor of two speedup for now: trust the staff solution to # avoid hitting the sandbox. (change run to run_trusted) expected_outputs = None # in case run_trusted raises an exception. - expected_outputs = self._run(grader_path, processed_answer, seed).stdout + #expected_outputs = self._run(grader_path, processed_answer, seed).stdout + expected_outputs = [self._run(grader_path, a, seed).stdout for a in processed_answers] + print("expected_outputs", expected_outputs) if expected_outputs: - expected = json.loads(expected_outputs.decode('utf-8')) + for o in expected_outputs: + if o: + expected = json.loads(o.decode('utf-8')) + expected_solutions.append(expected) expected_ok = True except Exception: expected_exc = sys.exc_info() @@ -205,42 +249,63 @@ def grade(self, grader_path, grader_config, submission): # Compare actual and expected through the grader tests, but only if we haven't # already found a problem. corrects = [] + tests_dont_match_up = [] if not results['errors']: - expected_results = expected['results'] - actual_results = actual['results'] - if len(expected_results) != len(actual_results): + is_num_results_diff = True + for expected in expected_solutions: + expected_results = expected['results'] + actual_results = actual['results'] + if len(expected_results) == len(actual_results): + is_num_results_diff = False + break + if is_num_results_diff: results['errors'].append(_('Something went wrong: different numbers of ' 'tests ran for your code and for our reference code.')) return results - for test, exp, act in zip(grader.tests(), expected_results, actual_results): - exp_short_desc, exp_long_desc, exp_output = exp - act_short_desc, act_long_desc, act_output = act - if exp_short_desc != act_short_desc: - results['errors'].append(_("Something went wrong: tests don't match up.")) - # TODO: don't give up so easily? - return results - # Truncate here--we don't want to send long output back, and also don't want to - # confuse students by comparing the full output but sending back truncated output. - act_output = truncate(act_output) - try: - correct = test.compare_results(exp_output, act_output) - except EndTest as e: - # Allows a grader's compare_results function to raise an EndTest exception - # (defined in gradelib.py). This enables the checker to print out an error - # message to the student, which will be appended to the end of stdout. - if e is not None: - act_output += '\n' - error_msg = _("ERROR") - act_output += "*** {error_msg}: {error_detail} ***".format( - error_msg=error_msg, - error_detail=e - ) - correct = False - corrects.append(correct) - if not grader_config.get("hide_output", False): - results['tests'].append((exp_short_desc, exp_long_desc, - correct, exp_output, act_output)) + final_results = results + for expected in expected_solutions: + corrects = [] + results = final_results + expected_results = expected['results'] + actual_results = actual['results'] + for test, exp, act in zip(grader.tests(), expected_results, actual_results): + exp_short_desc, exp_long_desc, exp_output = exp + act_short_desc, act_long_desc, act_output = act + tests_dont_match_up.append(exp_short_desc != act_short_desc) + if exp_short_desc != act_short_desc: + #results['errors'].append(_("Something went wrong: tests don't match up.")) + # TODO: don't give up so easily? + #return results + # Jump to next solution, no need to compare results here + next + # Truncate here--we don't want to send long output back, and also don't want to + # confuse students by comparing the full output but sending back truncated output. + act_output = truncate(act_output) + try: + correct = test.compare_results(exp_output, act_output) + except EndTest as e: + # Allows a grader's compare_results function to raise an EndTest exception + # (defined in gradelib.py). This enables the checker to print out an error + # message to the student, which will be appended to the end of stdout. + if e is not None: + act_output += '\n' + error_msg = _("ERROR") + act_output += "*** {error_msg}: {error_detail} ***".format( + error_msg=error_msg, + error_detail=e + ) + correct = False + corrects.append(correct) + if not grader_config.get("hide_output", False): + results['tests'].append((exp_short_desc, exp_long_desc, + correct, exp_output, act_output)) + if len(corrects) > 0 and all(corrects): + break # This solution works, short-circuit here + # All solutions ran into "tests don't match up" problem + if all(tests_dont_match_up): + results['errors'].append(_("Something went wrong: tests don't match up.")) + return results # If there were no tests run, then there was probably an error, so it's incorrect n = len(corrects) @@ -272,15 +337,16 @@ def main(args): # pragma: no cover if len(args) != 2: return - configure("python", sys.executable, user=getpass.getuser()) + configure('python3', sys.executable, user=getpass.getuser()) (grader_path, submission_path) = args with open(submission_path) as f: - submission = f.read().decode('utf-8') + submission = f.read() # .decode('utf-8') grader_config = {"lang": "eo"} - grader_path = path(grader_path).abspath() - g = JailedGrader(grader_root=grader_path.dirname().parent.parent) + grader_path = Path(grader_path).abspath() + g = JailedGrader(grader_root=grader_path.dirname().parent.parent, + codejail_python='python3') pprint(g.grade(grader_path, grader_config, submission)) diff --git a/xqueue_watcher/jailedgrader.py.2022-03-20-fully-functional-single-answer b/xqueue_watcher/jailedgrader.py.2022-03-20-fully-functional-single-answer new file mode 100644 index 0000000..0799a84 --- /dev/null +++ b/xqueue_watcher/jailedgrader.py.2022-03-20-fully-functional-single-answer @@ -0,0 +1,307 @@ +""" +An implementation of a grader that uses codejail to sandbox submission execution. +""" +import codecs +import os +import sys +import imp +import json +import random +import gettext +from path import Path +import six + +import codejail + +from grader_support.gradelib import EndTest +from grader_support.graderutil import LANGUAGE +import grader_support + +from .grader import Grader +from six.moves import zip + +TIMEOUT = 1 + +def path_to_six(): + """ + Return the full path to six.py + """ + if any(six.__file__.endswith(suffix) for suffix in ('.pyc', '.pyo')): + # __file__ points to the compiled bytecode in python 2 + return Path(six.__file__[:-1]) + else: + # __file__ points to the .py file in python 3 + return Path(six.__file__) + + +SUPPORT_FILES = [ + Path(grader_support.__file__).dirname(), + path_to_six(), +] + + +def truncate(out): + """ + Truncate test output that's too long. This is per-test. + """ + TOO_LONG = 5000 # 5K bytes seems like enough for a single test. + if len(out) > TOO_LONG: + out = out[:TOO_LONG] + "...OUTPUT TRUNCATED" + + return out + + +def prepend_coding(code): + """ + Add a coding line--makes submissions with inline unicode not + explode (as long as they're utf8, I guess) + """ + return '# coding: utf8\n' + code + + +class JailedGrader(Grader): + """ + A grader implementation that uses codejail. + Instantiate it with grader_root="path/to/graders" + and optionally codejail_python="python name" (the name that you used to configure codejail) + """ + def __init__(self, *args, **kwargs): + self.codejail_python = kwargs.pop("codejail_python", "python") + super(JailedGrader, self).__init__(*args, **kwargs) + self.locale_dir = self.grader_root / "conf" / "locale" + self.fork_per_item = False # it's probably safe not to fork + # EDUCATOR-3368: OpenBLAS library is allowed to allocate 1 thread + os.environ["OPENBLAS_NUM_THREADS"] = "1" + + def _enable_i18n(self, language): + trans = gettext.translation('graders', localedir=self.locale_dir, fallback=True, languages=[language]) + trans.install(names=None) + + def _run(self, grader_path, thecode, seed): + print("--- RUN CODE ---", grader_path, thecode) + files = SUPPORT_FILES + [grader_path] + if self.locale_dir.exists(): + files.append(self.locale_dir) + extra_files = [('submission.py', thecode.encode('utf-8'))] + argv = ["-m", "grader_support.run", Path(grader_path).basename(), 'submission.py', seed] + print("argv -- ", argv) + print("files", files) + print("extra_files", extra_files) + r = codejail.jail_code.jail_code(self.codejail_python, files=files, extra_files=extra_files, argv=argv) + print("result", r.status, r.stdout, r.stderr) + return r + + def grade(self, grader_path, grader_config, submission): + if type(submission) != six.text_type: + self.log.warning("Submission is NOT unicode") + + results = { + 'errors': [], + 'tests': [], + 'correct': False, + 'score': 0, + } + + # There are some cases where the course team would like to accept a + # student submission but not process the student code. Some examples are + # cases where the problem would require dependencies that are difficult + # or impractical to install in a sandbox or if the complexity of the + # solution would cause the runtime of the student code to exceed what is + # possible in the sandbox. + + # skip_grader is a flag in the grader config which is a boolean. If it + # is set to true on a problem then it will always show that the + # submission is correct and give the student a full score for the + # problem. + if grader_config.get('skip_grader', False): + results['correct'] = True + results['score'] = 1 + self.log.debug('Skipping the grader.') + return results + + self._enable_i18n(grader_config.get("lang", LANGUAGE)) + + print("__DEBUG__") + print("grader path", grader_path) + + answer_path = Path(grader_path).dirname() / 'answer.py' + print("answer path", answer_path) + with open(answer_path, 'rb') as f: + answer = f.read().decode('utf-8') + + # Import the grader, straight from the original file. (It probably isn't in + # sys.path, and we may be in a long running gunicorn process, so we don't + # want to add stuff to sys.path either.) + grader_module = imp.load_source("grader_module", six.text_type(grader_path)) + grader = grader_module.grader + + # Preprocess for grader-specified errors + errors = grader.input_errors(submission) + if errors != []: + results['errors'].extend(errors) + # Don't run tests if there were errors + return results + + if grader.only_check_input(): + results['correct'] = True + results['score'] = 1 + self.log.debug('Only checking inputs, returning correct.') + return results + + # Add a unicode encoding declaration. + processed_answer = prepend_coding(grader.preprocess(answer)) + processed_submission = prepend_coding(grader.preprocess(submission)) + + print("processed answer", processed_answer) + print("processed_submission", processed_submission) + + # Same seed for both runs + seed = str(random.randint(0, 20000)) + + # Run the official answer, to get the expected output. + expected_ok = False + expected_exc = None + try: + # If we want a factor of two speedup for now: trust the staff solution to + # avoid hitting the sandbox. (change run to run_trusted) + expected_outputs = None # in case run_trusted raises an exception. + expected_outputs = self._run(grader_path, processed_answer, seed).stdout + print("expected_outputs", expected_outputs) + if expected_outputs: + expected = json.loads(expected_outputs.decode('utf-8')) + expected_ok = True + except Exception: + expected_exc = sys.exc_info() + else: + # We just ran the official answer, nothing should have gone wrong, so check + # everything, and note it as bad if anything is wrong. + if expected_ok: + if expected['exceptions'] \ + or expected['grader']['status'] != 'ok' \ + or expected['submission']['status'] != 'ok': + expected_ok = False + + if not expected_ok: + # We couldn't run the official answer properly, bail out, but don't show + # details to the student, since none of it is their code. + results['errors'].append(_('There was a problem running the staff solution (Staff debug: L364)')) + self.log.error("Couldn't run staff solution. grader = %s, output: %r", + grader_path, expected_outputs, exc_info=expected_exc) + return results + + # The expected code ran fine, go ahead and run the student submission. + actual_ok = False + actual_exc = None + try: + # Do NOT trust the student solution (in production). + actual_outputs = None # in case run raises an exception. + actual_outputs = self._run(grader_path, processed_submission, seed).stdout + if actual_outputs: + actual = json.loads(actual_outputs.decode('utf-8')) + actual_ok = True + else: + results['errors'].append(_("There was a problem running your solution (Staff debug: L379).")) + except Exception: + actual_exc = sys.exc_info() + else: + if actual_ok and actual['grader']['status'] == 'ok': + if actual['submission']['status'] != 'ok': + # The grader ran OK, but the student code didn't, so show the student + # details of what went wrong. There is probably an exception to show. + shown_error = actual['submission']['exception'] or _('There was an error thrown while running your solution.') + results['errors'].append(shown_error) + else: + # The grader didn't run well, we are going to bail. + actual_ok = False + + # If something went wrong, then don't continue + if not actual_ok: + results['errors'].append(_("We couldn't run your solution (Staff debug: L397).")) + self.log.error("Couldn't run student solution. grader = %s, output: %r", + grader_path, actual_outputs, exc_info=actual_exc) + return results + + # Compare actual and expected through the grader tests, but only if we haven't + # already found a problem. + corrects = [] + if not results['errors']: + expected_results = expected['results'] + actual_results = actual['results'] + if len(expected_results) != len(actual_results): + results['errors'].append(_('Something went wrong: different numbers of ' + 'tests ran for your code and for our reference code.')) + return results + + for test, exp, act in zip(grader.tests(), expected_results, actual_results): + exp_short_desc, exp_long_desc, exp_output = exp + act_short_desc, act_long_desc, act_output = act + if exp_short_desc != act_short_desc: + results['errors'].append(_("Something went wrong: tests don't match up.")) + # TODO: don't give up so easily? + return results + # Truncate here--we don't want to send long output back, and also don't want to + # confuse students by comparing the full output but sending back truncated output. + act_output = truncate(act_output) + try: + correct = test.compare_results(exp_output, act_output) + except EndTest as e: + # Allows a grader's compare_results function to raise an EndTest exception + # (defined in gradelib.py). This enables the checker to print out an error + # message to the student, which will be appended to the end of stdout. + if e is not None: + act_output += '\n' + error_msg = _("ERROR") + act_output += "*** {error_msg}: {error_detail} ***".format( + error_msg=error_msg, + error_detail=e + ) + correct = False + corrects.append(correct) + if not grader_config.get("hide_output", False): + results['tests'].append((exp_short_desc, exp_long_desc, + correct, exp_output, act_output)) + + # If there were no tests run, then there was probably an error, so it's incorrect + n = len(corrects) + results['correct'] = all(corrects) and n > 0 + results['score'] = float(sum(corrects))/n if n > 0 else 0 + + if n == 0 and len(results['errors']) == 0: + results['errors'] = [ + _("There was a problem while running your code (Staff debug: L450). " + "Please contact the course staff for assistance.") + ] + + return results + + +def main(args): # pragma: no cover + """ + Prints a json list: + [ ("Test description", "value") ] + + TODO: what about multi-file submission? + """ + import logging + from pprint import pprint + from codejail.jail_code import configure + import getpass + + logging.basicConfig(level=logging.DEBUG) + if len(args) != 2: + return + + configure("python", sys.executable, user=getpass.getuser()) + (grader_path, submission_path) = args + + with open(submission_path) as f: + submission = f.read().decode('utf-8') + + grader_config = {"lang": "eo"} + grader_path = path(grader_path).abspath() + g = JailedGrader(grader_root=grader_path.dirname().parent.parent) + pprint(g.grade(grader_path, grader_config, submission)) + + +if __name__ == '__main__': # pragma: no cover + main(sys.argv[1:]) diff --git a/xqueue_watcher/jailedgrader.py.bak.2021-04-09-original b/xqueue_watcher/jailedgrader.py.bak.2021-04-09-original new file mode 100644 index 0000000..c804280 --- /dev/null +++ b/xqueue_watcher/jailedgrader.py.bak.2021-04-09-original @@ -0,0 +1,301 @@ +""" +An implementation of a grader that uses codejail to sandbox submission execution. +""" +import codecs +import os +import sys +import imp +import json +import random +import gettext +from path import Path +import six + +import codejail + +from grader_support.gradelib import EndTest +from grader_support.graderutil import LANGUAGE +import grader_support + +from .grader import Grader +from six.moves import zip + +TIMEOUT = 1 + +def path_to_six(): + """ + Return the full path to six.py + """ + if any(six.__file__.endswith(suffix) for suffix in ('.pyc', '.pyo')): + # __file__ points to the compiled bytecode in python 2 + return Path(six.__file__[:-1]) + else: + # __file__ points to the .py file in python 3 + return Path(six.__file__) + + +SUPPORT_FILES = [ + Path(grader_support.__file__).dirname(), + path_to_six(), +] + + +def truncate(out): + """ + Truncate test output that's too long. This is per-test. + """ + TOO_LONG = 5000 # 5K bytes seems like enough for a single test. + if len(out) > TOO_LONG: + out = out[:TOO_LONG] + "...OUTPUT TRUNCATED" + + return out + + +def prepend_coding(code): + """ + Add a coding line--makes submissions with inline unicode not + explode (as long as they're utf8, I guess) + """ + return '# coding: utf8\n' + code + + +class JailedGrader(Grader): + """ + A grader implementation that uses codejail. + Instantiate it with grader_root="path/to/graders" + and optionally codejail_python="python name" (the name that you used to configure codejail) + """ + def __init__(self, *args, **kwargs): + self.codejail_python = kwargs.pop("codejail_python", "python") + super(JailedGrader, self).__init__(*args, **kwargs) + self.locale_dir = self.grader_root / "conf" / "locale" + self.fork_per_item = False # it's probably safe not to fork + # EDUCATOR-3368: OpenBLAS library is allowed to allocate 1 thread + os.environ["OPENBLAS_NUM_THREADS"] = "1" + + def _enable_i18n(self, language): + trans = gettext.translation('graders', localedir=self.locale_dir, fallback=True, languages=[language]) + trans.install(names=None) + + def _run(self, grader_path, thecode, seed): + print("--- RUN CODE ---", grader_path, thecode) + files = SUPPORT_FILES + [grader_path] + if self.locale_dir.exists(): + files.append(self.locale_dir) + extra_files = [('submission.py', thecode.encode('utf-8'))] + argv = ["-m", "grader_support.run", Path(grader_path).basename(), 'submission.py', seed] + print("argv -- ", argv) + print("files", files) + print("extra_files", extra_files) + r = codejail.jail_code.jail_code(self.codejail_python, files=files, extra_files=extra_files, argv=argv) + print("result", r.status, r.stdout, r.stderr) + return r + + def grade(self, grader_path, grader_config, submission): + if type(submission) != six.text_type: + self.log.warning("Submission is NOT unicode") + + results = { + 'errors': [], + 'tests': [], + 'correct': False, + 'score': 0, + } + + # There are some cases where the course team would like to accept a + # student submission but not process the student code. Some examples are + # cases where the problem would require dependencies that are difficult + # or impractical to install in a sandbox or if the complexity of the + # solution would cause the runtime of the student code to exceed what is + # possible in the sandbox. + + # skip_grader is a flag in the grader config which is a boolean. If it + # is set to true on a problem then it will always show that the + # submission is correct and give the student a full score for the + # problem. + if grader_config.get('skip_grader', False): + results['correct'] = True + results['score'] = 1 + self.log.debug('Skipping the grader.') + return results + + self._enable_i18n(grader_config.get("lang", LANGUAGE)) + + print("__DEBUG__") + print("grader path", grader_path) + + answer_path = Path(grader_path).dirname() / 'answer.py' + print("answer path", answer_path) + with open(answer_path, 'rb') as f: + answer = f.read().decode('utf-8') + + # Import the grader, straight from the original file. (It probably isn't in + # sys.path, and we may be in a long running gunicorn process, so we don't + # want to add stuff to sys.path either.) + grader_module = imp.load_source("grader_module", six.text_type(grader_path)) + grader = grader_module.grader + + # Preprocess for grader-specified errors + errors = grader.input_errors(submission) + if errors != []: + results['errors'].extend(errors) + # Don't run tests if there were errors + return results + + # Add a unicode encoding declaration. + processed_answer = prepend_coding(grader.preprocess(answer)) + processed_submission = prepend_coding(grader.preprocess(submission)) + + print("processed answer", processed_answer) + print("processed_submission", processed_submission) + + # Same seed for both runs + seed = str(random.randint(0, 20000)) + + # Run the official answer, to get the expected output. + expected_ok = False + expected_exc = None + try: + # If we want a factor of two speedup for now: trust the staff solution to + # avoid hitting the sandbox. (change run to run_trusted) + expected_outputs = None # in case run_trusted raises an exception. + expected_outputs = self._run(grader_path, processed_answer, seed).stdout + print("expected_outputs", expected_outputs) + if expected_outputs: + expected = json.loads(expected_outputs.decode('utf-8')) + expected_ok = True + except Exception: + expected_exc = sys.exc_info() + else: + # We just ran the official answer, nothing should have gone wrong, so check + # everything, and note it as bad if anything is wrong. + if expected_ok: + if expected['exceptions'] \ + or expected['grader']['status'] != 'ok' \ + or expected['submission']['status'] != 'ok': + expected_ok = False + + if not expected_ok: + # We couldn't run the official answer properly, bail out, but don't show + # details to the student, since none of it is their code. + results['errors'].append(_('There was a problem running the staff solution (Staff debug: L364)')) + self.log.error("Couldn't run staff solution. grader = %s, output: %r", + grader_path, expected_outputs, exc_info=expected_exc) + return results + + # The expected code ran fine, go ahead and run the student submission. + actual_ok = False + actual_exc = None + try: + # Do NOT trust the student solution (in production). + actual_outputs = None # in case run raises an exception. + actual_outputs = self._run(grader_path, processed_submission, seed).stdout + if actual_outputs: + actual = json.loads(actual_outputs.decode('utf-8')) + actual_ok = True + else: + results['errors'].append(_("There was a problem running your solution (Staff debug: L379).")) + except Exception: + actual_exc = sys.exc_info() + else: + if actual_ok and actual['grader']['status'] == 'ok': + if actual['submission']['status'] != 'ok': + # The grader ran OK, but the student code didn't, so show the student + # details of what went wrong. There is probably an exception to show. + shown_error = actual['submission']['exception'] or _('There was an error thrown while running your solution.') + results['errors'].append(shown_error) + else: + # The grader didn't run well, we are going to bail. + actual_ok = False + + # If something went wrong, then don't continue + if not actual_ok: + results['errors'].append(_("We couldn't run your solution (Staff debug: L397).")) + self.log.error("Couldn't run student solution. grader = %s, output: %r", + grader_path, actual_outputs, exc_info=actual_exc) + return results + + # Compare actual and expected through the grader tests, but only if we haven't + # already found a problem. + corrects = [] + if not results['errors']: + expected_results = expected['results'] + actual_results = actual['results'] + if len(expected_results) != len(actual_results): + results['errors'].append(_('Something went wrong: different numbers of ' + 'tests ran for your code and for our reference code.')) + return results + + for test, exp, act in zip(grader.tests(), expected_results, actual_results): + exp_short_desc, exp_long_desc, exp_output = exp + act_short_desc, act_long_desc, act_output = act + if exp_short_desc != act_short_desc: + results['errors'].append(_("Something went wrong: tests don't match up.")) + # TODO: don't give up so easily? + return results + # Truncate here--we don't want to send long output back, and also don't want to + # confuse students by comparing the full output but sending back truncated output. + act_output = truncate(act_output) + try: + correct = test.compare_results(exp_output, act_output) + except EndTest as e: + # Allows a grader's compare_results function to raise an EndTest exception + # (defined in gradelib.py). This enables the checker to print out an error + # message to the student, which will be appended to the end of stdout. + if e is not None: + act_output += '\n' + error_msg = _("ERROR") + act_output += "*** {error_msg}: {error_detail} ***".format( + error_msg=error_msg, + error_detail=e + ) + correct = False + corrects.append(correct) + if not grader_config.get("hide_output", False): + results['tests'].append((exp_short_desc, exp_long_desc, + correct, exp_output, act_output)) + + # If there were no tests run, then there was probably an error, so it's incorrect + n = len(corrects) + results['correct'] = all(corrects) and n > 0 + results['score'] = float(sum(corrects))/n if n > 0 else 0 + + if n == 0 and len(results['errors']) == 0: + results['errors'] = [ + _("There was a problem while running your code (Staff debug: L450). " + "Please contact the course staff for assistance.") + ] + + return results + + +def main(args): # pragma: no cover + """ + Prints a json list: + [ ("Test description", "value") ] + + TODO: what about multi-file submission? + """ + import logging + from pprint import pprint + from codejail.jail_code import configure + import getpass + + logging.basicConfig(level=logging.DEBUG) + if len(args) != 2: + return + + configure("python", sys.executable, user=getpass.getuser()) + (grader_path, submission_path) = args + + with open(submission_path) as f: + submission = f.read().decode('utf-8') + + grader_config = {"lang": "eo"} + grader_path = path(grader_path).abspath() + g = JailedGrader(grader_root=grader_path.dirname().parent.parent) + pprint(g.grade(grader_path, grader_config, submission)) + + +if __name__ == '__main__': # pragma: no cover + main(sys.argv[1:]) diff --git a/xqueue_watcher/manager.py b/xqueue_watcher/manager.py index db64152..63b2dd1 100644 --- a/xqueue_watcher/manager.py +++ b/xqueue_watcher/manager.py @@ -70,7 +70,9 @@ def configure(self, configuration): """ Configure XQueue clients. """ + print(configuration) for queue_name, config in configuration.items(): + print(config) for i in range(config.get('CONNECTIONS', 1)): watcher = self.client_from_config(queue_name, config) self.clients.append(watcher)