From e45bc959ffafd3a40e9e515ad70955338704b1bc Mon Sep 17 00:00:00 2001 From: Vansh Lohia Date: Mon, 15 Dec 2025 22:29:33 +0530 Subject: [PATCH 1/3] test(suite): add smoke test for docs/templates/suite_template.py Mock a simple agent and verify the FAQAdapter template pattern records a response and passes a metric --- tests/test_suite_template_smoke.py | 63 ++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 tests/test_suite_template_smoke.py diff --git a/tests/test_suite_template_smoke.py b/tests/test_suite_template_smoke.py new file mode 100644 index 0000000..3e410a4 --- /dev/null +++ b/tests/test_suite_template_smoke.py @@ -0,0 +1,63 @@ +import pytest +from agentunit import Scenario, DatasetCase, Runner +from agentunit.metrics.builtin import AnswerCorrectnessMetric + +class MockAgent: + def connect(self): + return self + + def answer(self, query: str) -> str: + return "This is a canned FAQ answer." + +from agentunit.adapters.base import AdapterOutcome, BaseAdapter +from agentunit.datasets.base import DatasetSource + + +class MockAdapter(BaseAdapter): + def __init__(self, agent): + self.agent = agent + + def prepare(self) -> None: # pragma: no cover - trivial + return None + + def execute(self, case, trace) -> AdapterOutcome: + conn = self.agent.connect() + resp_text = conn.answer(case.query if hasattr(case, "query") else case.input) + trace.record("agent_response", content=resp_text) + trace.record("tool_call", name="knowledge_base", status="success") + success = case.expected_output is None or resp_text.strip() == case.expected_output.strip() + return AdapterOutcome(success=success, output=resp_text) + + def cleanup(self) -> None: # pragma: no cover - trivial + return None + +def test_suite_template_faqadapter_smoke(): + cases = [ + DatasetCase( + id="faq_1", + query="How do I reset my password?", + expected_output="This is a canned FAQ answer." + ) + ] + + agent = MockAgent() + adapter = MockAdapter(agent) + + dataset = DatasetSource.from_list(cases, name="test-faq") + scenario = Scenario(name="FAQAdapter smoke test", adapter=adapter, dataset=dataset) + + runner = Runner([scenario]) + results = runner.run() + + assert results is not None + + # Extract recorded runs from the suite result + assert len(results.scenarios) == 1 + runs = results.scenarios[0].runs + matching = [r for r in runs if r.case_id == "faq_1"] + assert len(matching) == 1, "Expected exactly one recorded response for faq_1" + + recorded_run = matching[0] + assert recorded_run.success is True + # Answer correctness metric should be 1.0 for an exact match + assert recorded_run.metrics.get("answer_correctness") == 1.0 From f4c399059122e9e211f03ec1a492a1f47db9f17c Mon Sep 17 00:00:00 2001 From: Vansh Lohia Date: Wed, 17 Dec 2025 16:38:52 +0530 Subject: [PATCH 2/3] style(tests): fix import ordering and remove unused imports in smoke test Address ruff lint failures: remove unused imports and place module imports at top of file --- tests/test_suite_template_smoke.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/test_suite_template_smoke.py b/tests/test_suite_template_smoke.py index 3e410a4..f550db0 100644 --- a/tests/test_suite_template_smoke.py +++ b/tests/test_suite_template_smoke.py @@ -1,6 +1,6 @@ -import pytest from agentunit import Scenario, DatasetCase, Runner -from agentunit.metrics.builtin import AnswerCorrectnessMetric +from agentunit.adapters.base import AdapterOutcome, BaseAdapter +from agentunit.datasets.base import DatasetSource class MockAgent: def connect(self): @@ -9,9 +9,6 @@ def connect(self): def answer(self, query: str) -> str: return "This is a canned FAQ answer." -from agentunit.adapters.base import AdapterOutcome, BaseAdapter -from agentunit.datasets.base import DatasetSource - class MockAdapter(BaseAdapter): def __init__(self, agent): From b45fb1ed1e976b33461efb7593d449b1815464f3 Mon Sep 17 00:00:00 2001 From: Vansh Lohia Date: Wed, 17 Dec 2025 17:01:54 +0530 Subject: [PATCH 3/3] style(tests): sort imports in smoke test to satisfy ruff --- tests/test_suite_template_smoke.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_suite_template_smoke.py b/tests/test_suite_template_smoke.py index f550db0..996ff4e 100644 --- a/tests/test_suite_template_smoke.py +++ b/tests/test_suite_template_smoke.py @@ -1,6 +1,6 @@ -from agentunit import Scenario, DatasetCase, Runner from agentunit.adapters.base import AdapterOutcome, BaseAdapter from agentunit.datasets.base import DatasetSource +from agentunit import DatasetCase, Runner, Scenario class MockAgent: def connect(self):