Skip to content

Commit cd9cc91

Browse files
committed
local test command
1 parent 66542cf commit cd9cc91

File tree

3 files changed

+310
-0
lines changed

3 files changed

+310
-0
lines changed

eval_protocol/cli.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,27 @@ def parse_args(args=None):
427427
rft_parser.add_argument("--dry-run", action="store_true", help="Print planned REST calls without sending")
428428
rft_parser.add_argument("--force", action="store_true", help="Overwrite existing evaluator with the same ID")
429429

430+
# Local test command
431+
local_test_parser = subparsers.add_parser(
432+
"local-test",
433+
help="Select an evaluation test and run it locally. If a Dockerfile exists, build and run via Docker; otherwise run on host.",
434+
)
435+
local_test_parser.add_argument(
436+
"--entry",
437+
help="Entrypoint to run (path::function or path). If not provided, a selector will be shown (unless --yes).",
438+
)
439+
local_test_parser.add_argument(
440+
"--ignore-docker",
441+
action="store_true",
442+
help="Ignore Dockerfile even if present; run pytest on host",
443+
)
444+
local_test_parser.add_argument(
445+
"--yes",
446+
"-y",
447+
action="store_true",
448+
help="Non-interactive: if multiple tests exist and no --entry, fails with guidance",
449+
)
450+
430451
# Run command (for Hydra-based evaluations)
431452
# This subparser intentionally defines no arguments itself.
432453
# All arguments after 'run' will be passed to Hydra by parse_known_args.
@@ -559,6 +580,10 @@ def _extract_flag_value(argv_list, flag_name):
559580
return create_rft_command(args)
560581
print("Error: missing subcommand for 'create'. Try: eval-protocol create rft")
561582
return 1
583+
elif args.command == "local-test":
584+
from .cli_commands.local_test import local_test_command
585+
586+
return local_test_command(args)
562587
elif args.command == "run":
563588
# For the 'run' command, Hydra takes over argument parsing.
564589

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
import argparse
2+
import os
3+
import subprocess
4+
import sys
5+
from typing import List
6+
7+
from .upload import _discover_tests, _prompt_select
8+
9+
10+
def _find_dockerfiles(root: str) -> List[str]:
11+
skip_dirs = {".venv", "venv", "node_modules", "dist", "build", "__pycache__", ".git", "vendor"}
12+
dockerfiles: List[str] = []
13+
for dirpath, dirnames, filenames in os.walk(root):
14+
dirnames[:] = [d for d in dirnames if d not in skip_dirs and not d.startswith(".")]
15+
for name in filenames:
16+
if name == "Dockerfile":
17+
dockerfiles.append(os.path.join(dirpath, name))
18+
return dockerfiles
19+
20+
21+
def _run_pytest_host(pytest_target: str) -> int:
22+
print(f"Running locally: pytest {pytest_target} -vs")
23+
proc = subprocess.run([sys.executable, "-m", "pytest", pytest_target, "-vs"])
24+
return proc.returncode
25+
26+
27+
def _build_docker_image(dockerfile_path: str, image_tag: str) -> bool:
28+
context_dir = os.path.dirname(dockerfile_path)
29+
print(f"Building Docker image '{image_tag}' from {dockerfile_path} ...")
30+
try:
31+
proc = subprocess.run(
32+
["docker", "build", "-t", image_tag, "-f", dockerfile_path, context_dir],
33+
stdout=subprocess.PIPE,
34+
stderr=subprocess.STDOUT,
35+
text=True,
36+
)
37+
print(proc.stdout)
38+
return proc.returncode == 0
39+
except FileNotFoundError:
40+
print("Error: docker not found in PATH. Install Docker or use --ignore-docker.")
41+
return False
42+
43+
44+
def _run_pytest_in_docker(project_root: str, image_tag: str, pytest_target: str) -> int:
45+
workdir = "/workspace"
46+
# Mount read-only is safer; but tests may write artifacts. Use read-write.
47+
cmd = [
48+
"docker",
49+
"run",
50+
"--rm",
51+
"-v",
52+
f"{project_root}:{workdir}",
53+
"-w",
54+
workdir,
55+
image_tag,
56+
"pytest",
57+
pytest_target,
58+
"-vs",
59+
]
60+
print("Running in Docker:", " ".join(cmd))
61+
try:
62+
proc = subprocess.run(cmd)
63+
return proc.returncode
64+
except FileNotFoundError:
65+
print("Error: docker not found in PATH. Install Docker or use --ignore-docker.")
66+
return 1
67+
68+
69+
def local_test_command(args: argparse.Namespace) -> int:
70+
project_root = os.getcwd()
71+
72+
# Selection and pytest target resolution
73+
pytest_target: str = ""
74+
entry = getattr(args, "entry", None)
75+
if entry:
76+
if "::" in entry:
77+
file_part = entry.split("::", 1)[0]
78+
file_path = (
79+
file_part if os.path.isabs(file_part) else os.path.abspath(os.path.join(project_root, file_part))
80+
)
81+
pytest_target = entry
82+
else:
83+
file_path = entry if os.path.isabs(entry) else os.path.abspath(os.path.join(project_root, entry))
84+
# Use path relative to project_root when possible
85+
try:
86+
rel = os.path.relpath(file_path, project_root)
87+
except Exception:
88+
rel = file_path
89+
pytest_target = rel
90+
else:
91+
tests = _discover_tests(project_root)
92+
if not tests:
93+
print("No evaluation tests found.\nHint: Ensure @evaluation_test is applied.")
94+
return 1
95+
non_interactive = bool(getattr(args, "yes", False))
96+
selected = _prompt_select(tests, non_interactive=non_interactive)
97+
if not selected:
98+
print("No tests selected.")
99+
return 1
100+
if len(selected) != 1:
101+
print("Error: Please select exactly one evaluation test for 'local-test'.")
102+
return 1
103+
chosen = selected[0]
104+
abs_path = os.path.abspath(chosen.file_path)
105+
try:
106+
rel = os.path.relpath(abs_path, project_root)
107+
except Exception:
108+
rel = abs_path
109+
pytest_target = rel
110+
111+
ignore_docker = bool(getattr(args, "ignore_docker", False))
112+
if ignore_docker:
113+
if not pytest_target:
114+
print("Error: Failed to resolve a pytest target to run.")
115+
return 1
116+
return _run_pytest_host(pytest_target)
117+
118+
dockerfiles = _find_dockerfiles(project_root)
119+
if len(dockerfiles) > 1:
120+
print("Error: Multiple Dockerfiles found. Only one Dockerfile is allowed for local-test.")
121+
for df in dockerfiles:
122+
print(f" - {df}")
123+
print("Hint: use --ignore-docker to bypass Docker.")
124+
return 1
125+
if len(dockerfiles) == 1:
126+
image_tag = "ep-evaluator:local"
127+
ok = _build_docker_image(dockerfiles[0], image_tag)
128+
if not ok:
129+
print("Docker build failed. See logs above.")
130+
return 1
131+
if not pytest_target:
132+
print("Error: Failed to resolve a pytest target to run.")
133+
return 1
134+
return _run_pytest_in_docker(project_root, image_tag, pytest_target)
135+
136+
# No Dockerfile: run on host
137+
if not pytest_target:
138+
print("Error: Failed to resolve a pytest target to run.")
139+
return 1
140+
return _run_pytest_host(pytest_target)

tests/test_cli_local_test.py

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
import os
2+
from types import SimpleNamespace
3+
4+
import pytest
5+
6+
7+
def test_local_test_runs_host_pytest_with_entry(tmp_path, monkeypatch):
8+
project = tmp_path / "proj"
9+
project.mkdir()
10+
monkeypatch.chdir(project)
11+
12+
# Create a dummy test file
13+
test_file = project / "metric" / "test_one.py"
14+
test_file.parent.mkdir(parents=True, exist_ok=True)
15+
test_file.write_text("def test_dummy():\n assert True\n", encoding="utf-8")
16+
17+
# Import module under test
18+
from eval_protocol.cli_commands import local_test as lt
19+
20+
# Avoid Docker path
21+
monkeypatch.setattr(lt, "_find_dockerfiles", lambda root: [])
22+
23+
captured = {"target": ""}
24+
25+
def _fake_host(target: str) -> int:
26+
captured["target"] = target
27+
return 0
28+
29+
monkeypatch.setattr(lt, "_run_pytest_host", _fake_host)
30+
31+
args = SimpleNamespace(entry=str(test_file), ignore_docker=False, yes=True)
32+
rc = lt.local_test_command(args) # pyright: ignore[reportArgumentType]
33+
assert rc == 0
34+
# Expect relative path target
35+
assert captured["target"] == os.path.relpath(str(test_file), str(project))
36+
37+
38+
def test_local_test_ignores_docker_when_flag_set(tmp_path, monkeypatch):
39+
project = tmp_path / "proj"
40+
project.mkdir()
41+
monkeypatch.chdir(project)
42+
43+
test_file = project / "metric" / "test_two.py"
44+
test_file.parent.mkdir(parents=True, exist_ok=True)
45+
test_file.write_text("def test_dummy():\n assert True\n", encoding="utf-8")
46+
47+
from eval_protocol.cli_commands import local_test as lt
48+
49+
# Pretend we have Dockerfile(s), but ignore_docker=True should skip
50+
monkeypatch.setattr(lt, "_find_dockerfiles", lambda root: [str(project / "Dockerfile")])
51+
52+
called = {"host": False}
53+
54+
def _fake_host(target: str) -> int:
55+
called["host"] = True
56+
return 0
57+
58+
monkeypatch.setattr(lt, "_run_pytest_host", _fake_host)
59+
60+
args = SimpleNamespace(entry=str(test_file), ignore_docker=True, yes=True)
61+
rc = lt.local_test_command(args) # pyright: ignore[reportArgumentType]
62+
assert rc == 0
63+
assert called["host"] is True
64+
65+
66+
def test_local_test_errors_on_multiple_dockerfiles(tmp_path, monkeypatch):
67+
project = tmp_path / "proj"
68+
project.mkdir()
69+
monkeypatch.chdir(project)
70+
71+
test_file = project / "metric" / "test_three.py"
72+
test_file.parent.mkdir(parents=True, exist_ok=True)
73+
test_file.write_text("def test_dummy():\n assert True\n", encoding="utf-8")
74+
75+
from eval_protocol.cli_commands import local_test as lt
76+
77+
monkeypatch.setattr(
78+
lt, "_find_dockerfiles", lambda root: [str(project / "Dockerfile"), str(project / "another" / "Dockerfile")]
79+
)
80+
81+
args = SimpleNamespace(entry=str(test_file), ignore_docker=False, yes=True)
82+
rc = lt.local_test_command(args) # pyright: ignore[reportArgumentType]
83+
assert rc == 1
84+
85+
86+
def test_local_test_builds_and_runs_in_docker(tmp_path, monkeypatch):
87+
project = tmp_path / "proj"
88+
project.mkdir()
89+
monkeypatch.chdir(project)
90+
91+
test_file = project / "metric" / "test_four.py"
92+
test_file.parent.mkdir(parents=True, exist_ok=True)
93+
test_file.write_text("def test_dummy():\n assert True\n", encoding="utf-8")
94+
95+
from eval_protocol.cli_commands import local_test as lt
96+
97+
monkeypatch.setattr(lt, "_find_dockerfiles", lambda root: [str(project / "Dockerfile")])
98+
monkeypatch.setattr(lt, "_build_docker_image", lambda dockerfile, tag: True)
99+
100+
captured = {"target": "", "image": ""}
101+
102+
def _fake_run_docker(root: str, image_tag: str, pytest_target: str) -> int:
103+
captured["target"] = pytest_target
104+
captured["image"] = image_tag
105+
return 0
106+
107+
monkeypatch.setattr(lt, "_run_pytest_in_docker", _fake_run_docker)
108+
109+
args = SimpleNamespace(entry=str(test_file), ignore_docker=False, yes=True)
110+
rc = lt.local_test_command(args) # pyright: ignore[reportArgumentType]
111+
assert rc == 0
112+
assert captured["image"] == "ep-evaluator:local"
113+
assert captured["target"] == os.path.relpath(str(test_file), str(project))
114+
115+
116+
def test_local_test_selector_single_test(tmp_path, monkeypatch):
117+
project = tmp_path / "proj"
118+
project.mkdir()
119+
monkeypatch.chdir(project)
120+
121+
test_file = project / "metric" / "test_sel.py"
122+
test_file.parent.mkdir(parents=True, exist_ok=True)
123+
test_file.write_text("def test_dummy():\n assert True\n", encoding="utf-8")
124+
125+
from eval_protocol.cli_commands import local_test as lt
126+
from eval_protocol.cli_commands import upload as up
127+
128+
# No entry; force discover + selector
129+
disc = SimpleNamespace(qualname="metric.test_sel", file_path=str(test_file))
130+
monkeypatch.setattr(lt, "_discover_tests", lambda root: [disc])
131+
monkeypatch.setattr(up, "_prompt_select", lambda tests, non_interactive=False: tests[:1])
132+
monkeypatch.setattr(lt, "_find_dockerfiles", lambda root: [])
133+
134+
called = {"host": False}
135+
136+
def _fake_host(target: str) -> int:
137+
called["host"] = True
138+
return 0
139+
140+
monkeypatch.setattr(lt, "_run_pytest_host", _fake_host)
141+
142+
args = SimpleNamespace(entry=None, ignore_docker=False, yes=True)
143+
rc = lt.local_test_command(args) # pyright: ignore[reportArgumentType]
144+
assert rc == 0
145+
assert called["host"] is True

0 commit comments

Comments
 (0)