diff --git a/dvc/commands/stage.py b/dvc/commands/stage.py index 6055554e7f..626a364434 100644 --- a/dvc/commands/stage.py +++ b/dvc/commands/stage.py @@ -65,6 +65,43 @@ def prepare_stages_data( } +def _serialize_stage_for_json(stage: "Stage") -> dict: + from typing import Any + + from dvc.stage.utils import split_params_deps + + param_deps, other_deps = split_params_deps(stage) + deps = [dep.def_path for dep in other_deps] + params: dict[str, Any] = {} + for param_dep in param_deps: + param_values: Any = param_dep.hash_info.value if param_dep.hash_info else {} + if isinstance(param_values, dict) and param_values: + params[param_dep.def_path] = param_values + + outs, metrics, plots = [], [], [] + for out in stage.outs: + if out.metric: + metrics.append(out.def_path) + elif out.plot: + plots.append(out.def_path) + else: + outs.append(out.def_path) + + return { + "cmd": stage.cmd, + "deps": deps, + "outs": outs, + "metrics": metrics, + "plots": plots, + "params": params, + "desc": stage.desc, + } + + +def prepare_stages_data_json(stages: Iterable["Stage"]) -> dict[str, dict]: + return {stage.addressing: _serialize_stage_for_json(stage) for stage in stages} + + class CmdStageList(CmdBase): def _get_stages(self) -> Iterable["Stage"]: if self.args.all: @@ -91,8 +128,17 @@ def log_error(relpath: str, exc: Exception): self.repo.stage_collection_error_handler = log_error stages = self._get_stages() - data = prepare_stages_data(stages, description=not self.args.name_only) - ui.table(list(data.items())) + + if self.args.json: + ui.write_json(prepare_stages_data_json(stages)) + else: + ui.table( + list( + prepare_stages_data( + stages, description=not self.args.name_only + ).items() + ) + ) return 0 @@ -351,4 +397,10 @@ def add_parser(subparsers, parent_parser): default=False, help="List only stage names.", ) + stage_list_parser.add_argument( + "--json", + action="store_true", + default=False, + help="Show output in JSON format.", + ) stage_list_parser.set_defaults(func=CmdStageList) diff --git a/tests/func/test_stage_list_json.py b/tests/func/test_stage_list_json.py new file mode 100644 index 0000000000..cc93acbb3c --- /dev/null +++ b/tests/func/test_stage_list_json.py @@ -0,0 +1,143 @@ +import json + +import pytest + +from dvc.cli import main + + +@pytest.fixture +def simple_stage(tmp_dir, dvc): + tmp_dir.gen("train.py", "print('training')") + tmp_dir.gen("data.csv", "a,b,c") + (tmp_dir / "dvc.yaml").dump( + { + "stages": { + "train": { + "cmd": "python train.py", + "deps": ["data.csv"], + "outs": ["model.pkl"], + "metrics": [{"metrics.json": {"cache": False}}], + "desc": "Train the model", + } + } + } + ) + return dvc + + +def test_stage_list_json_simple(simple_stage, tmp_dir, capsys): + assert main(["stage", "list", "--json"]) == 0 + + out, _ = capsys.readouterr() + result = json.loads(out) + + assert "train" in result + assert result["train"]["cmd"] == "python train.py" + assert "data.csv" in result["train"]["deps"] + assert "model.pkl" in result["train"]["outs"] + assert "metrics.json" in result["train"]["metrics"] + assert result["train"]["desc"] == "Train the model" + + +@pytest.fixture +def interpolated_stage(tmp_dir, dvc): + tmp_dir.gen("train.py", "print('training')") + (tmp_dir / "params.yaml").dump({"train": {"lr": 0.001, "epochs": 100}}) + (tmp_dir / "dvc.yaml").dump( + { + "stages": { + "train": { + "cmd": "python train.py --lr ${train.lr} --epochs ${train.epochs}", + "params": ["train.lr", "train.epochs"], + } + } + } + ) + return dvc + + +def test_stage_list_json_interpolated_params(interpolated_stage, tmp_dir, capsys): + assert main(["stage", "list", "--json"]) == 0 + + out, _ = capsys.readouterr() + result = json.loads(out) + + assert "train" in result + assert result["train"]["cmd"] == "python train.py --lr 0.001 --epochs 100" + + +@pytest.fixture +def matrix_stage(tmp_dir, dvc): + tmp_dir.gen("train.py", "print('training')") + (tmp_dir / "dvc.yaml").dump( + { + "stages": { + "train": { + "matrix": {"lr": [0.001, 0.01], "epochs": [10, 100]}, + "cmd": "python train.py --lr ${item.lr} --epochs ${item.epochs}", + } + } + } + ) + return dvc + + +def test_stage_list_json_matrix_stage(matrix_stage, tmp_dir, capsys): + assert main(["stage", "list", "--json"]) == 0 + + out, _ = capsys.readouterr() + result = json.loads(out) + + assert len(result) == 4 + for stage_name, stage_data in result.items(): + assert stage_name.startswith("train@") + assert "python train.py" in stage_data["cmd"] + assert "--lr" in stage_data["cmd"] + assert "--epochs" in stage_data["cmd"] + + +@pytest.fixture +def foreach_stage(tmp_dir, dvc): + tmp_dir.gen("process.py", "print('processing')") + (tmp_dir / "dvc.yaml").dump( + { + "stages": { + "process": { + "foreach": ["a", "b", "c"], + "do": {"cmd": "python process.py --file ${item}"}, + } + } + } + ) + return dvc + + +def test_stage_list_json_foreach_stage(foreach_stage, tmp_dir, capsys): + assert main(["stage", "list", "--json"]) == 0 + + out, _ = capsys.readouterr() + result = json.loads(out) + + expected_stages = ["process@a", "process@b", "process@c"] + for stage_name in expected_stages: + assert stage_name in result + assert "python process.py" in result[stage_name]["cmd"] + + +def test_stage_list_json_with_target(simple_stage, tmp_dir, capsys): + assert main(["stage", "list", "--json", "train"]) == 0 + + out, _ = capsys.readouterr() + result = json.loads(out) + + assert "train" in result + assert len(result) == 1 + + +def test_stage_list_json_all_flag(simple_stage, tmp_dir, capsys): + assert main(["stage", "list", "--json", "--all"]) == 0 + + out, _ = capsys.readouterr() + result = json.loads(out) + + assert "train" in result diff --git a/tests/unit/command/test_stage_list_json.py b/tests/unit/command/test_stage_list_json.py new file mode 100644 index 0000000000..5c8f4d21c6 --- /dev/null +++ b/tests/unit/command/test_stage_list_json.py @@ -0,0 +1,187 @@ +import json + +import pytest + +from dvc.cli import parse_args +from dvc.commands.stage import CmdStageList +from dvc.dependency import ParamsDependency +from dvc.output import Output +from dvc.stage import PipelineStage +from dvc_data.hashfile.hash_info import HashInfo + + +def _add_deps(stage, deps): + from dvc.dependency import Dependency + + for dep_path in deps: + stage.deps.append(Dependency(stage, dep_path)) + + +def _add_params(stage, params): + for param_file, param_values in params.items(): + param_dep = ParamsDependency( + stage, param_file, params=list(param_values.keys()) + ) + param_dep.hash_info = HashInfo("params", param_values) + stage.deps.append(param_dep) + + +def _add_outs(stage, outs, metric=False, plot=False): + for out_path in outs: + stage.outs.append(Output(stage, out_path, metric=metric, plot=plot)) + + +def _create_mock_stage( + dvc, + name, + cmd, + deps=None, + outs=None, + metrics=None, + plots=None, + params=None, + desc=None, +): + stage = PipelineStage(dvc, "dvc.yaml", cmd=cmd, name=name) + stage.desc = desc + _add_deps(stage, deps or []) + _add_params(stage, params or {}) + _add_outs(stage, outs or []) + _add_outs(stage, metrics or [], metric=True) + _add_outs(stage, plots or [], plot=True) + return stage + + +@pytest.mark.parametrize( + "stages_data, expected_json", + [ + pytest.param( + [ + { + "name": "train", + "cmd": "python train.py --lr 0.001", + "deps": ["data/train.csv", "src/train.py"], + "outs": ["model.pkl"], + "metrics": ["metrics.json"], + "desc": "Train the model", + } + ], + { + "train": { + "cmd": "python train.py --lr 0.001", + "deps": ["data/train.csv", "src/train.py"], + "outs": ["model.pkl"], + "metrics": ["metrics.json"], + "plots": [], + "params": {}, + "desc": "Train the model", + } + }, + id="simple_stage", + ), + pytest.param( + [ + { + "name": "preprocess", + "cmd": "python preprocess.py", + "deps": ["raw_data.csv"], + "outs": ["processed_data.csv"], + "params": {"params.yaml": {"preprocess.threshold": 0.5}}, + } + ], + { + "preprocess": { + "cmd": "python preprocess.py", + "deps": ["raw_data.csv"], + "outs": ["processed_data.csv"], + "metrics": [], + "plots": [], + "params": {"params.yaml": {"preprocess.threshold": 0.5}}, + "desc": None, + } + }, + id="stage_with_params", + ), + pytest.param( + [ + { + "name": "evaluate", + "cmd": "python evaluate.py", + "plots": ["plots/confusion.png", "plots/roc.png"], + } + ], + { + "evaluate": { + "cmd": "python evaluate.py", + "deps": [], + "outs": [], + "metrics": [], + "plots": ["plots/confusion.png", "plots/roc.png"], + "params": {}, + "desc": None, + } + }, + id="stage_with_plots", + ), + ], +) +def test_stage_list_json(dvc, mocker, capsys, stages_data, expected_json): + cli_args = parse_args(["stage", "list", "--json"]) + assert cli_args.func == CmdStageList + + cmd = cli_args.func(cli_args) + + mock_stages = [_create_mock_stage(dvc, **data) for data in stages_data] + mocker.patch.object(cmd, "_get_stages", return_value=mock_stages) + + assert cmd.run() == 0 + + out, _ = capsys.readouterr() + result = json.loads(out) + assert result == expected_json + + +def test_stage_list_json_multiple_stages(dvc, mocker, capsys): + cli_args = parse_args(["stage", "list", "--json"]) + cmd = cli_args.func(cli_args) + + mock_stages = [ + _create_mock_stage(dvc, "prepare", "python prepare.py", deps=["raw.csv"]), + _create_mock_stage(dvc, "train", "python train.py", outs=["model.pkl"]), + ] + mocker.patch.object(cmd, "_get_stages", return_value=mock_stages) + + assert cmd.run() == 0 + + out, _ = capsys.readouterr() + result = json.loads(out) + + assert "prepare" in result + assert "train" in result + assert result["prepare"]["cmd"] == "python prepare.py" + assert result["train"]["cmd"] == "python train.py" + + +def test_stage_list_json_empty(dvc, mocker, capsys): + cli_args = parse_args(["stage", "list", "--json"]) + cmd = cli_args.func(cli_args) + + mocker.patch.object(cmd, "_get_stages", return_value=[]) + + assert cmd.run() == 0 + + out, _ = capsys.readouterr() + result = json.loads(out) + assert result == {} + + +def test_stage_list_json_flag_parsing(dvc): + cli_args = parse_args(["stage", "list", "--json"]) + assert cli_args.func == CmdStageList + assert cli_args.json is True + + +def test_stage_list_without_json_flag(dvc): + cli_args = parse_args(["stage", "list"]) + assert cli_args.func == CmdStageList + assert cli_args.json is False