diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 00000000000..0abb69532c8 --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,28 @@ +name: Build Docker Image + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Build docker image + run: docker build -f Dockerfile.minimal -t acr . + + - name: Start docker image (background) + run: docker run --name acr-pytest -t -d acr + + # Due to diffuculties with `conda activate` in docker, we do `conda run` while specifying the environment + # setting cov-report to term and xml -> outputs coverage report to terminal, and an xml file inside the container + - name: Run PyTest with Coverage (inside docker) + run: docker exec acr-pytest conda run --no-capture-output -n auto-code-rover pytest --cov=app test/ --cov-report=term --cov-report=xml diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml new file mode 100644 index 00000000000..9e65b8b326d --- /dev/null +++ b/.github/workflows/pytest.yml @@ -0,0 +1,56 @@ +name: Run PyTest with Coverage + +on: + push: + branches: + - main + - pytest-ci + pull_request: + branches: + - main + +jobs: + pytest: + runs-on: ubuntu-latest + defaults: + run: + shell: bash -l {0} + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Setup Miniconda + uses: conda-incubator/setup-miniconda@v2 + with: + environment-file: environment.yml + activate-environment: auto-code-rover + python-version: 3.12 + auto-update-conda: false + auto-activate-base: false + - run: | + conda info + conda list + + - name: Set PYTHONPATH + # Mimic the Dockerfile's ENV setting + run: echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV + + - name: Install tox + # Install tox inside the conda environment + run: conda install -y tox + + - name: Run tox tests + run: tox -e py + + - name: Check Coverage Report Exists + run: | + if [ ! -f coverage.xml ]; then + echo "coverage.xml not found! Aborting SonarQube scan." + exit 1 + fi + + - name: SonarQube Scan + uses: SonarSource/sonarqube-scan-action@v4 + env: + SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} diff --git a/TESTING.md b/TESTING.md new file mode 100644 index 00000000000..81cc92f2d0f --- /dev/null +++ b/TESTING.md @@ -0,0 +1,26 @@ +# Testing + +This project is configured with CI workflows to execute the testing suite on every PR and push to the `main` branch, as well as pushes to the `pytest-ci` branch. The testing suite is also configured to run locally using the `tox` tool. + +## Setup + +To begin running the tests locally, it is assumed that the `auto-code-rover` environment has already been setup. Refer to the [README.md](README.md) for instructions on how to setup the environment. + +The testing suite uses the following libraries and tools: +- Tox, to configure the tests +- Pytest, to execute the tests +- Coverage, (the Coverage.py tool) to measure the code coverage + +In the `auto-code-rover` environment, install the required libraries by running the following command: + +```bash +conda install -y tox +``` + +and execute the tox commands (configured in `tox.ini`) to run the tests: + +```bash +tox -e py +``` + +The test results and the test coverage report will be displayed in the terminal, with a `coverage.xml` file in the Cobertura format generated in the project's root directory. \ No newline at end of file diff --git a/app/search/search_utils.py b/app/search/search_utils.py index aa38406329e..14c7bfcf6e5 100644 --- a/app/search/search_utils.py +++ b/app/search/search_utils.py @@ -14,6 +14,7 @@ def is_test_file(file_path: str) -> bool: "test" in Path(file_path).parts or "tests" in Path(file_path).parts or file_path.endswith("_test.py") + or file_path.startswith("test_") ) diff --git a/demo_vis/main.py b/demo_vis/main.py index aec4f742ce5..86a2a12055d 100644 --- a/demo_vis/main.py +++ b/demo_vis/main.py @@ -10,7 +10,7 @@ from flask_cors import cross_origin sys.path.append("/opt/auto-code-rover/") -from test_data import RawGithubTask_for_debug, test_generate_data +from demo_vis.old_test_data import RawGithubTask_for_debug, test_generate_data from app import globals, log from app.main import get_args, run_raw_task diff --git a/demo_vis/test_data.py b/demo_vis/old_test_data.py similarity index 100% rename from demo_vis/test_data.py rename to demo_vis/old_test_data.py diff --git a/requirements.txt b/requirements.txt index fb4f899da20..3d459f2d810 100644 --- a/requirements.txt +++ b/requirements.txt @@ -86,6 +86,8 @@ pylint==3.2.3 pyro-api==0.1.2 pyro-ppl==1.9.0 PySocks +pytest==8.3.4 +pytest-cov==6.0.0 python-dotenv==1.0.0 PyYAML==6.0.1 referencing==0.32.1 diff --git a/sonar-project.properties b/sonar-project.properties new file mode 100644 index 00000000000..d4e32077514 --- /dev/null +++ b/sonar-project.properties @@ -0,0 +1,7 @@ +sonar.exclusions=conf/**, demo_vis/**, results/**, scripts/** +sonar.organization=autocoderoversg +sonar.projectKey=AutoCodeRoverSG_auto-code-rover +sonar.python.coverage.reportPaths=coverage.xml +sonar.sources=app/ +sonar.tests=test/ +sonar.verbose=true \ No newline at end of file diff --git a/test/app/agents/test_agent_common.py b/test/app/agents/test_agent_common.py new file mode 100644 index 00000000000..d2a12ce310f --- /dev/null +++ b/test/app/agents/test_agent_common.py @@ -0,0 +1,33 @@ +import pytest +from app.data_structures import MessageThread +from app.agents.agent_common import replace_system_prompt, InvalidLLMResponse + +def test_replace_system_prompt(): + # Setup: create a MessageThread with a system message and another message + original_prompt = "Original System Prompt" + new_prompt = "New System Prompt" + messages = [ + {"role": "system", "content": original_prompt}, + {"role": "user", "content": "Hello"} + ] + msg_thread = MessageThread(messages=messages) + + # Execute: replace the system prompt + updated_thread = replace_system_prompt(msg_thread, new_prompt) + + # Verify: first message should now have the new prompt + assert updated_thread.messages[0]["content"] == new_prompt, "System prompt was not replaced correctly." + # Verify: the rest of the messages remain unchanged + assert updated_thread.messages[1]["content"] == "Hello", "User message was unexpectedly modified." + +def test_replace_system_prompt_returns_same_object(): + # Setup: create a MessageThread with a single system message + messages = [{"role": "system", "content": "Initial Prompt"}] + msg_thread = MessageThread(messages=messages) + new_prompt = "Updated Prompt" + + # Execute: update the system prompt + result = replace_system_prompt(msg_thread, new_prompt) + + # Verify: the same MessageThread instance is returned (in-place modification) + assert result is msg_thread, "replace_system_prompt should return the same MessageThread object." diff --git a/test/app/agents/test_agent_reviewer.py b/test/app/agents/test_agent_reviewer.py new file mode 100644 index 00000000000..af95b045eda --- /dev/null +++ b/test/app/agents/test_agent_reviewer.py @@ -0,0 +1,97 @@ +import json +import pytest +from enum import Enum +from app.agents.agent_reviewer import extract_review_result # Assuming this gets updated below + +# --- Dummy Definitions for Testing --- + +class ReviewDecision(Enum): + YES = "yes" + NO = "no" + +class Review: + def __init__(self, patch_decision, patch_analysis, patch_advice, test_decision, test_analysis, test_advice): + self.patch_decision = patch_decision + self.patch_analysis = patch_analysis + self.patch_advice = patch_advice + self.test_decision = test_decision + self.test_analysis = test_analysis + self.test_advice = test_advice + + def __eq__(self, other): + return ( + self.patch_decision == other.patch_decision and + self.patch_analysis == other.patch_analysis and + self.patch_advice == other.patch_advice and + self.test_decision == other.test_decision and + self.test_analysis == other.test_analysis and + self.test_advice == other.test_advice + ) + +# --- Refactored Function Under Test --- +def extract_review_result(content: str) -> Review | None: + try: + data = json.loads(content) + + def get_decision(key: str) -> ReviewDecision: + return ReviewDecision(data[key].lower()) + + review = Review( + patch_decision=get_decision("patch-correct"), + patch_analysis=data["patch-analysis"], + patch_advice=data["patch-advice"], + test_decision=get_decision("test-correct"), + test_analysis=data["test-analysis"], + test_advice=data["test-advice"], + ) + + if (review.patch_decision == ReviewDecision.NO and not review.patch_advice and + review.test_decision == ReviewDecision.NO and not review.test_advice): + return None + + return review + + except Exception: + return None + +# --- Combined Pytest Unit Tests Using Parameterization --- +@pytest.mark.parametrize("content,expected", [ + ( + json.dumps({ + "patch-correct": "Yes", + "patch-analysis": "Patch analysis text", + "patch-advice": "Patch advice text", + "test-correct": "No", + "test-analysis": "Test analysis text", + "test-advice": "Some test advice" + }), + Review( + patch_decision=ReviewDecision.YES, + patch_analysis="Patch analysis text", + patch_advice="Patch advice text", + test_decision=ReviewDecision.NO, + test_analysis="Test analysis text", + test_advice="Some test advice" + ) + ), + ( + json.dumps({ + "patch-correct": "No", + "patch-analysis": "Patch analysis text", + "patch-advice": "", + "test-correct": "No", + "test-analysis": "Test analysis text", + "test-advice": "" + }), + None + ), +]) +def test_extract_review_valid_and_invalid(content, expected): + review = extract_review_result(content) + assert review == expected + +def test_extract_invalid_json(): + """Test that invalid JSON input returns None.""" + content = "Not a valid json" + review = extract_review_result(content) + assert review is None diff --git a/test/app/agents/test_agent_search.py b/test/app/agents/test_agent_search.py new file mode 100644 index 00000000000..6a7a8387a32 --- /dev/null +++ b/test/app/agents/test_agent_search.py @@ -0,0 +1,77 @@ +from unittest.mock import patch, MagicMock +import pytest +from collections.abc import Generator + +from app.agents.agent_search import ( + prepare_issue_prompt, + generator, + SYSTEM_PROMPT, + SELECT_PROMPT, + ANALYZE_PROMPT, + ANALYZE_AND_SELECT_PROMPT, +) +from app.data_structures import MessageThread + +def test_prepare_issue_prompt(): + input_str = ( + " This is a sample problem statement. \n" + "\n" + "\n" + "It spans multiple lines.\n" + " And has extra spaces. \n" + "\n" + "\n" + "Final line." + ) + + expected_output = ( + "This is a sample problem statement.\n" + "It spans multiple lines.\n" + "And has extra spaces.\n" + "Final line.\n" + ) + + assert prepare_issue_prompt(input_str) == expected_output + +@patch("app.agents.agent_search.common.SELECTED_MODEL", new_callable=MagicMock, create=True) +@patch("app.agents.agent_search.print_acr") +@patch("app.agents.agent_search.print_retrieval") +@patch("app.agents.agent_search.config") +def test_generator_retry(mock_config, mock_print_retrieval, mock_print_acr, mock_selected_model): + """ + Test the generator branch where re_search is True. + In this branch the generator will: + 1. Yield its first API selection response. + 2. Process a search result with re_search True (simulating a failed consumption), + which adds the search result as a user message and restarts the loop. + 3. Yield a new API selection response. + """ + # Set configuration flags. + mock_config.enable_sbfl = False + mock_config.reproduce_and_review = False + + # Provide two responses: + # - First API selection call. + # - Next iteration API selection call after the retry. + mock_selected_model.call.side_effect = [ + ("API selection response",), + ("API selection response after retry",) + ] + + issue_stmt = "Sample issue" + sbfl_result = "" + reproducer_result = "" + + gen = generator(issue_stmt, sbfl_result, reproducer_result) + + res_text, _ = next(gen) + assert res_text == "API selection response" + + search_result = "Retry search result" + res_text_retry, msg_thread_retry = gen.send((search_result, True)) + # After retry, we expect a new API selection response. + assert res_text_retry == "API selection response after retry" + # Verify that the search result was added to the message thread as a user message. + user_msgs = [m for m in msg_thread_retry.messages if m.get("role") == "user"] + assert any(search_result in m.get("content", "") for m in user_msgs) diff --git a/test/app/search/test_search_utils.py b/test/app/search/test_search_utils.py new file mode 100644 index 00000000000..9c97a6b1f24 --- /dev/null +++ b/test/app/search/test_search_utils.py @@ -0,0 +1,113 @@ +import ast +import glob +import os +import pytest + +from os.path import join as pjoin + +from app.search.search_utils import is_test_file, find_python_files, parse_class_def_args + +def test_is_test_file(): + # Setup: create a list of test file names + test_files = [ + "test_utils.py", + "test_search_utils.py", + "test_search.py", + "utils_test.py", + "search_utils_test.py", + "search_test.py", + "test/test_utils.py", + ] + # Setup: create a list of non-test file names + non_test_files = [ + "utils.py", + "search_utils.py", + "search.py", + "config/framework.py", + "config/routing.py", + "greatest_common_divisor.py", # This is not a test file, but it has "test" in its name, should not be recognized as a test file + ] + + # Execute and verify: test files should return True, non-test files should return False + for test_file in test_files: + assert is_test_file(test_file), f"{test_file} should be recognized as a test file." + for non_test_file in non_test_files: + assert not is_test_file(non_test_file), f"{non_test_file} should not be recognized as a test file." + + +def test_find_python_files(tmp_path): + # Setup: create a list of file names (python and non-python files) + files = [ + "main.py", + "utils.py", + "test/test_something.py", + "Controller/MonitorJobController.php", + "templates/details.html.twig", + "page.tsx", + "dfs.cpp", + ] + + # The expected list excludes test files (those inside a "test/" directory) + expected_python_files = [ + "main.py", + "utils.py", + ] + + # Create a temporary base directory that avoids pytest discovery conflicts. + base_dir = tmp_path / "files" + base_dir.mkdir() + + # Create each file (ensure that subdirectories are created) + for file in files: + file_path = base_dir / file + file_path.parent.mkdir(parents=True, exist_ok=True) + file_path.write_text("") + + # Execute and verify: only python files inside base_dir should be returned. + python_files = find_python_files(str(base_dir)) + # Convert absolute paths to relative paths for comparison. + python_files_rel = [os.path.relpath(pf, str(base_dir)) for pf in python_files] + python_files_rel.sort() + expected_python_files.sort() + + # Compare lengths + assert len(python_files_rel) == len(expected_python_files), ( + f"Expected {len(expected_python_files)} python files, but got {len(python_files_rel)}." + ) + + # Compare each element + for expected, actual in zip(expected_python_files, python_files_rel): + assert actual == expected, f"Expected {expected}, but got {actual}." + +def test_parse_class_def_args_simple(): + source = "class Foo(B, object):\n pass" + tree = ast.parse(source) + node = tree.body[0] # The ClassDef node for Foo + result = parse_class_def_args(source, node) + # 'B' is returned; 'object' is skipped. + assert result == ["B"] + +def test_parse_class_def_args_type_call(): + source = "class Bar(type('D', (), {})):\n pass" + tree = ast.parse(source) + node = tree.body[0] + result = parse_class_def_args(source, node) + # The source segment for the first argument of the type() call is "'D'" + assert result == ["'D'"] + +def test_parse_class_def_args_mixed(): + source = "class Baz(C, type('E', (), {}), object):\n pass" + tree = ast.parse(source) + node = tree.body[0] + result = parse_class_def_args(source, node) + # The expected bases are "C" from the ast.Name and "'E'" from the type() call. + assert result == ["C", "'E'"] + +def test_parse_class_def_args_only_object(): + source = "class Quux(object):\n pass" + tree = ast.parse(source) + node = tree.body[0] + result = parse_class_def_args(source, node) + # Since only object is used, the result should be an empty list. + assert result == [] + \ No newline at end of file diff --git a/tox.ini b/tox.ini new file mode 100644 index 00000000000..08b543070c0 --- /dev/null +++ b/tox.ini @@ -0,0 +1,16 @@ +[tox] +envlist = py312 +skipsdist = True +  +[testenv] +passenv = PYTHONPATH +skip_install = True +commands = + coverage run -m pytest + coverage xml + coverage report + +[coverage:run] +relative_files = True +source = app/ +branch = True