From 57b86b15ef98bcc1ff5ce0b3b7826bde11372234 Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Sat, 18 Jan 2025 21:19:42 +0000 Subject: [PATCH 01/23] feat: Add ExperimentState to commit messages --- mthd/service/git.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mthd/service/git.py b/mthd/service/git.py index 4d67100..c50bf50 100644 --- a/mthd/service/git.py +++ b/mthd/service/git.py @@ -1,13 +1,16 @@ import git -from mthd.domain.commit import CommitMessage, StageStrategy +from mthd.domain.experiment import ExperimentState +from mthd.domain.git import Commit, StageStrategy class GitService: def __init__(self, repo: git.Repo): self._repo = repo - def stage_and_commit(self, message: CommitMessage): + def get_all_commits(self) -> list[Commit]: ... + + def stage_and_commit(self, state: ExperimentState): """Stage all changes and create a commit with the given message. Args: @@ -17,7 +20,7 @@ def stage_and_commit(self, message: CommitMessage): self._repo.git.add(A=True) # Create commit with formatted message - self._repo.index.commit(message.format()) + self._repo.index.commit(state.as_commit_message()) def should_commit(self, strategy: StageStrategy) -> bool: """Determine if the repo state can be staged and committed From 4e0bb766202d35bf004eaebf415ee6db3ff419b7 Mon Sep 17 00:00:00 2001 From: "Rory Byrne (aider)" Date: Sat, 18 Jan 2025 21:19:44 +0000 Subject: [PATCH 02/23] feat: Implement get_all_commits in GitService --- mthd/service/git.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/mthd/service/git.py b/mthd/service/git.py index c50bf50..1f033bc 100644 --- a/mthd/service/git.py +++ b/mthd/service/git.py @@ -8,7 +8,22 @@ class GitService: def __init__(self, repo: git.Repo): self._repo = repo - def get_all_commits(self) -> list[Commit]: ... + def get_all_commits(self) -> list[Commit]: + """Get all commits in the repository. + + Returns: + List of Commit objects representing the git history + """ + commits = [] + for commit in self._repo.iter_commits(): + commits.append( + Commit( + sha=commit.hexsha, + message=commit.message, + date=commit.committed_datetime + ) + ) + return commits def stage_and_commit(self, state: ExperimentState): """Stage all changes and create a commit with the given message. From f7b245ee9661d4340dacd28c2130d2a1caaa73b9 Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Sun, 19 Jan 2025 10:22:01 +0000 Subject: [PATCH 03/23] feat: Add unit tests for query service --- tests/unit/service/query_test.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/unit/service/query_test.py diff --git a/tests/unit/service/query_test.py b/tests/unit/service/query_test.py new file mode 100644 index 0000000..e69de29 From 2d4d2fa8ac5506b403247bafc3a4223046dd77bd Mon Sep 17 00:00:00 2001 From: "Rory Byrne (aider)" Date: Sun, 19 Jan 2025 10:22:02 +0000 Subject: [PATCH 04/23] feat: Add query tests mocking ExperimentState.parse --- tests/unit/service/query_test.py | 81 ++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/tests/unit/service/query_test.py b/tests/unit/service/query_test.py index e69de29..230bd8b 100644 --- a/tests/unit/service/query_test.py +++ b/tests/unit/service/query_test.py @@ -0,0 +1,81 @@ +from datetime import datetime +from unittest.mock import Mock, patch + +import pytest + +from mthd.domain.git import Commit +from mthd.domain.query import Query +from mthd.service.git import GitService +from mthd.service.query import QueryService + + +@pytest.fixture +def git_service(): + service = Mock(spec=GitService) + commits = [ + Commit( + sha="abc123", + message="Test 1", + date=datetime(2024, 1, 1), + ), + Commit( + sha="def456", + message="Test 2", + date=datetime(2024, 1, 2), + ), + ] + service.get_all_commits.return_value = commits + return service + + +@pytest.fixture +def query_service(git_service): + return QueryService(git_service) + + +@patch('mthd.domain.experiment.ExperimentState.parse') +def test_execute_query(mock_parse, query_service): + # Setup mock parsed experiments + mock_parse.side_effect = [ + {"accuracy": 0.8, "loss": 0.2}, + {"accuracy": 0.9, "loss": 0.1}, + ] + + # Test query for high accuracy + query = Query.where("accuracy", ">", 0.85) + result = query_service.execute(query) + + assert len(result.commits) == 1 + assert result.num_searched == 2 + assert result.query == query + + +@patch('mthd.domain.experiment.ExperimentState.parse') +def test_execute_simple_query(mock_parse, query_service): + # Setup mock parsed experiments + mock_parse.side_effect = [ + {"metrics": {"loss": 0.2}}, + {"metrics": {"loss": 0.1}}, + ] + + # Test simple query for low loss + result = query_service.execute_simple("loss", "<", 0.15) + + assert len(result.commits) == 1 + assert result.num_searched == 2 + + +@patch('mthd.domain.experiment.ExperimentState.parse') +def test_execute_query_with_limit(mock_parse, query_service): + # Setup mock parsed experiments + mock_parse.side_effect = [ + {"accuracy": 0.9}, + {"accuracy": 0.95}, + ] + + # Test query with limit + query = Query.where("accuracy", ">=", 0.9) + result = query_service.execute(query, limit=1) + + assert len(result.commits) == 1 + assert result.num_searched == 2 From b32c4939f36ba60966fe1d65c50b45ee4d628c0d Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Sun, 19 Jan 2025 11:02:55 +0000 Subject: [PATCH 05/23] feat: add CLI interface and experiment query functionality The commit adds a new CLI interface and query functionality for experiments: - Implement CLI commands using Click framework - Add query service to filter experiment commits by metrics - Add JMESPath-based query engine for flexible experiment filtering - Add experiment state parsing and commit message formatting - Restructure git-related domain models --- mthd/cli/__init__.py | 0 mthd/cli/app.py | 18 ++++++++++ mthd/cli/commands/__init__.py | 0 mthd/cli/commands/query.py | 18 ++++++++++ mthd/config.py | 1 + mthd/decorator.py | 24 ++++++------- mthd/domain/commit.py | 18 ---------- mthd/domain/experiment.py | 27 ++++++++++++++ mthd/domain/git.py | 55 ++++++++++++++++++++++++++++ mthd/domain/query.py | 60 +++++++++++++++++++++++++++++++ mthd/service/git.py | 17 +++------ mthd/service/query.py | 61 ++++++++++++++++++++++++++++++++ mthd/util/di.py | 3 ++ pyproject.toml | 6 ++++ tests/unit/domain/commit_test.py | 6 ++-- tests/unit/service/query_test.py | 28 +++++++-------- uv.lock | 27 +++++++++++++- 17 files changed, 307 insertions(+), 62 deletions(-) create mode 100644 mthd/cli/__init__.py create mode 100644 mthd/cli/app.py create mode 100644 mthd/cli/commands/__init__.py create mode 100644 mthd/cli/commands/query.py create mode 100644 mthd/config.py delete mode 100644 mthd/domain/commit.py create mode 100644 mthd/domain/git.py create mode 100644 mthd/domain/query.py create mode 100644 mthd/service/query.py diff --git a/mthd/cli/__init__.py b/mthd/cli/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mthd/cli/app.py b/mthd/cli/app.py new file mode 100644 index 0000000..f28d0f1 --- /dev/null +++ b/mthd/cli/app.py @@ -0,0 +1,18 @@ +import click + +from dishka.integrations.click import setup_dishka + +from mthd.cli.commands.query import query +from mthd.util.di import DI + + +def start(): + @click.group() + @click.pass_context + def main(context: click.Context): + di = DI() + setup_dishka(container=di.container, context=context, auto_inject=True) + + main.command("query")(query) + + main() diff --git a/mthd/cli/commands/__init__.py b/mthd/cli/commands/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mthd/cli/commands/query.py b/mthd/cli/commands/query.py new file mode 100644 index 0000000..94230cf --- /dev/null +++ b/mthd/cli/commands/query.py @@ -0,0 +1,18 @@ +import click + +from dishka import FromDishka + +from mthd.service.query import QueryService + + +@click.argument("query", type=str) +@click.option("limit", "--limit", type=int, default=-1) +def query(query: str, limit: int, query_service: FromDishka[QueryService]): + query_parts = query.split(" ") + print(query_parts) + if len(query_parts) != 3: + raise ValueError(f"Invalid query: {query}") + + result = query_service.execute_simple(*query_parts, limit=limit) + + print(result) diff --git a/mthd/config.py b/mthd/config.py new file mode 100644 index 0000000..2b364a9 --- /dev/null +++ b/mthd/config.py @@ -0,0 +1 @@ +METADATA_SEPARATOR = "---" diff --git a/mthd/decorator.py b/mthd/decorator.py index a67c9ad..15201a4 100644 --- a/mthd/decorator.py +++ b/mthd/decorator.py @@ -5,8 +5,11 @@ from rich.console import Console from rich.padding import Padding -from mthd.domain.commit import CommitMessage, StageStrategy +from mthd.domain.experiment import ExperimentState +from mthd.domain.git import StageStrategy from mthd.error import MthdError +from mthd.service.git import GitService +from mthd.util.di import DI def commit( @@ -22,18 +25,15 @@ def commit( def decorator(func: Callable) -> Callable: @wraps(func) def wrapper(*args, **kwargs): - # di = DI() + di = DI() hyperparameters = cast(BaseModel, kwargs.get(hypers, None)) if not hyperparameters: - raise MthdError( - "Hyperparameters must be provided in the function call." - ) - # git_service = di[GitService] + raise MthdError("Hyperparameters must be provided in the function call.") + git_service = di[GitService] # codebase_service = di[CodebaseService] # Generate commit message - commit_msg = CommitMessage( - summary="exp: foo bar baz", + exp_state = ExperimentState( hyperparameters=hyperparameters.model_dump(), # annotations=codebase_service.get_all_annotations(), ) @@ -46,11 +46,9 @@ def wrapper(*args, **kwargs): # Commit changes console = Console() console.print("Generating commit with message:\n") - console.print( - Padding(commit_msg.format(), pad=(0, 0, 0, 4)) - ) # Indent by 4 spaces. - # if git_service.should_commit(strategy): - # git_service.stage_and_commit(commit_msg) + console.print(Padding(exp_state.as_commit_message(), pad=(0, 0, 0, 4))) # Indent by 4 spaces. + if git_service.should_commit(strategy): + git_service.stage_and_commit(exp_state.as_commit_message()) return result diff --git a/mthd/domain/commit.py b/mthd/domain/commit.py deleted file mode 100644 index 568cdd8..0000000 --- a/mthd/domain/commit.py +++ /dev/null @@ -1,18 +0,0 @@ -from enum import Enum, auto - -from mthd.util.model import Model - - -class CommitMessage(Model): - summary: str - hyperparameters: dict - # annotations: set[Annotation] # @todo: fix anot - - def format(self) -> str: - return ( - f"{self.summary}\n\n{self.model_dump_json(indent=2, exclude={'summary'})}" - ) - - -class StageStrategy(Enum): - ALL = auto() diff --git a/mthd/domain/experiment.py b/mthd/domain/experiment.py index e69de29..59d9859 100644 --- a/mthd/domain/experiment.py +++ b/mthd/domain/experiment.py @@ -0,0 +1,27 @@ +import json + +from mthd.config import METADATA_SEPARATOR +from mthd.domain.git import CommitKind +from mthd.util.model import Model + + +class ExperimentState(Model): + hyperparameters: dict + # annotations: set[Annotation] # @todo: fix anot + + def as_commit_message(self) -> str: + return f"{self.summary}\n\n{self.body}\n\n{METADATA_SEPARATOR}\n\n{self.model_dump_json(indent=2, exclude={'summary'})}" + + @property + def summary(self) -> str: + return f"{CommitKind.EXP.value}: TODO" + + @property + def body(self) -> str: + return "TODO" + + @staticmethod + def parse(message: str) -> "ExperimentState": + # @todo: test this + metadata = message.split(METADATA_SEPARATOR)[1].strip() + return ExperimentState(hyperparameters=json.loads(metadata)) diff --git a/mthd/domain/git.py b/mthd/domain/git.py new file mode 100644 index 0000000..5c6dc01 --- /dev/null +++ b/mthd/domain/git.py @@ -0,0 +1,55 @@ +from datetime import datetime +from enum import Enum, StrEnum, auto +from typing import Optional + +import git + +from mthd.util.model import Model + + +class CommitKind(StrEnum): + EXP = "exp" + FIX = "fix" + FEAT = "feat" + CHORE = "chore" + TOOLING = "tooling" + REFACTOR = "refactor" + + +class Commit(Model): + sha: str + message: str + date: datetime + kind: Optional[CommitKind] + metadata: dict + + @staticmethod + def from_git(commit: git.Commit) -> "Commit": + message = commit.message if isinstance(commit.message, str) else commit.message.decode() + return Commit( + sha=commit.hexsha, + message=message, + date=commit.committed_datetime, + kind=Commit._parse_kind(message), + metadata={}, + ) + + @staticmethod + def _parse_kind(message: str) -> Optional[CommitKind]: + if message.startswith("exp:"): + return CommitKind.EXP + if message.startswith("fix:"): + return CommitKind.FIX + if message.startswith("feat:"): + return CommitKind.FEAT + if message.startswith("chore:"): + return CommitKind.CHORE + if message.startswith("tooling:"): + return CommitKind.TOOLING + if message.startswith("refactor:"): + return CommitKind.REFACTOR + return None + + +class StageStrategy(Enum): + ALL = auto() diff --git a/mthd/domain/query.py b/mthd/domain/query.py new file mode 100644 index 0000000..073713a --- /dev/null +++ b/mthd/domain/query.py @@ -0,0 +1,60 @@ +from typing import List, Literal + +import jmespath + +from jmespath.parser import ParsedResult +from pydantic import BaseModel + +from mthd.domain.git import Commit + +# SimpleQueryOp = Literal[">", "<", ">=", "<=", "=="] +# SimpleQueryValue = str | int | float +SimpleQueryOp = str +SimpleQueryValue = str | int | float + + +class Query(BaseModel): + """A query to filter experiment commits.""" + + expression: str + + def compile(self) -> ParsedResult: + """Compile the JMESPath expression.""" + return jmespath.compile(self.expression) + + @staticmethod + def from_expression(expr: str) -> "Query": + """Create a query from a JMESPath expression.""" + return Query(expression=expr) + + @staticmethod + def where(field: str, op: SimpleQueryOp, value: SimpleQueryValue) -> "Query": + """Create a query that filters on a field value. + + Example: + Query.where("accuracy", ">", 0.9) + """ + # Convert comparison operators to JMESPath + match op: + case ">": + expr = f"[?{field} > `{value}`]" + case "<": + expr = f"[?{field} < `{value}`]" + case ">=": + expr = f"[?{field} >= `{value}`]" + case "<=": + expr = f"[?{field} <= `{value}`]" + case "==": + expr = f"[?{field} == `{value}`]" + case _: + raise ValueError(f"Invalid operator: {op}") + + return Query(expression=expr) + + +class QueryResult(BaseModel): + """Result of executing a query.""" + + commits: List[Commit] + query: Query + num_searched: int diff --git a/mthd/service/git.py b/mthd/service/git.py index 1f033bc..66497c9 100644 --- a/mthd/service/git.py +++ b/mthd/service/git.py @@ -1,6 +1,5 @@ import git -from mthd.domain.experiment import ExperimentState from mthd.domain.git import Commit, StageStrategy @@ -10,32 +9,24 @@ def __init__(self, repo: git.Repo): def get_all_commits(self) -> list[Commit]: """Get all commits in the repository. - + Returns: List of Commit objects representing the git history """ commits = [] for commit in self._repo.iter_commits(): - commits.append( - Commit( - sha=commit.hexsha, - message=commit.message, - date=commit.committed_datetime - ) - ) + commits.append(Commit.from_git(commit)) return commits - def stage_and_commit(self, state: ExperimentState): + def stage_and_commit(self, message: str): """Stage all changes and create a commit with the given message. Args: message: CommitMessage object containing commit metadata """ - # Stage all changes self._repo.git.add(A=True) - # Create commit with formatted message - self._repo.index.commit(state.as_commit_message()) + self._repo.index.commit(message) def should_commit(self, strategy: StageStrategy) -> bool: """Determine if the repo state can be staged and committed diff --git a/mthd/service/query.py b/mthd/service/query.py new file mode 100644 index 0000000..b21860a --- /dev/null +++ b/mthd/service/query.py @@ -0,0 +1,61 @@ +from typing import Optional + +from mthd.domain.experiment import ExperimentState +from mthd.domain.query import Query, QueryResult, SimpleQueryOp, SimpleQueryValue +from mthd.service.git import GitService + + +class QueryService: + """Service for querying experiment commits.""" + + def __init__(self, git_service: GitService): + self.git_service = git_service + + def execute(self, query: Query, limit: Optional[int] = None) -> QueryResult: + """Execute a query against the experiment commit history. + + Args: + query: The query to execute + limit: Optional maximum number of results to return + + Returns: + QueryResult containing matching commits + """ + # Get all commits with experimental metadata + commits = self.git_service.get_all_commits() + total = len(commits) + + exp_commits = [ExperimentState.parse(commit) for commit in commits] + + # Compile and execute the JMESPath query + jmespath_query = query.compile() + results = jmespath_query.search(exp_commits) + + # Handle limit + # @todo: sort by commit date? + if limit: + results = results[:limit] + + return QueryResult(commits=results or [], query=query, num_searched=total) + + def execute_simple( + self, + metric: str, + op: SimpleQueryOp, + value: SimpleQueryValue, + limit: Optional[int] = None, + ) -> QueryResult: + """Convenience method to find experiments by metric value. + + Args: + metric: Name of the metric to filter on + op: Comparison operator (">", "<", ">=", "<=", "==") + value: Value to compare against + limit: Optional maximum number of results + + Returns: + QueryResult containing matching commits + """ + query = Query.where(f"message.metrics.{metric}", op, value) + return self.execute(query, limit=limit) + diff --git a/mthd/util/di.py b/mthd/util/di.py index b1c82f3..86a2149 100644 --- a/mthd/util/di.py +++ b/mthd/util/di.py @@ -6,6 +6,7 @@ from mthd.service.codebase import CodebaseService from mthd.service.experiment import ExperimentService from mthd.service.git import GitService +from mthd.service.query import QueryService T = TypeVar("T") @@ -16,6 +17,7 @@ def provide_repo(self) -> Repo: try: return Repo() except Exception as e: + print(e) raise RuntimeError(f"Failed to initialize Git repository: {e}") @@ -40,6 +42,7 @@ def services(self) -> Provider: provider.provide(GitService) provider.provide(ExperimentService) provider.provide(CodebaseService) + provider.provide(QueryService) return provider diff --git a/pyproject.toml b/pyproject.toml index c3a2f32..e71767e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,8 +7,10 @@ authors = [ requires-python = ">=3.10" dependencies = [ "anot>=0.0.6", + "click>=8.1.8", "dishka>=1.4.2", "gitpython>=3.1.44", + "jmespath>=1.0.1", "pydantic>=2.10.5", "rich>=13.9.4", ] @@ -37,6 +39,9 @@ Repository = "https://github.com/flywhl/mthd" Documentation = "https://github.com/flywhl/mthd#readme" "Bug Tracker" = "https://github.com/flywhl/mthd/issues" +[project.scripts] +mthd = "mthd.cli.app:start" + [build-system] requires = ["hatchling", "hatch-vcs"] build-backend = "hatchling.build" @@ -56,6 +61,7 @@ dev-dependencies = [ [tool.ruff] src = ["mthd"] +line-length = 120 [tool.ruff.lint] extend-select = ["I"] diff --git a/tests/unit/domain/commit_test.py b/tests/unit/domain/commit_test.py index 5e5b59f..69c5831 100644 --- a/tests/unit/domain/commit_test.py +++ b/tests/unit/domain/commit_test.py @@ -1,6 +1,6 @@ from pydantic import BaseModel -from mthd.domain.commit import CommitMessage +from mthd.domain.git import ExperimentState def test_commitmessage_format_success(): @@ -9,9 +9,9 @@ class Hypers(BaseModel): b: float c: str - msg = CommitMessage( + msg = ExperimentState( summary="test", hyperparameters=Hypers(a=1, b=2.0, c="3").model_dump(), ) - print(msg.format()) + print(msg.as_commit_message()) diff --git a/tests/unit/service/query_test.py b/tests/unit/service/query_test.py index 230bd8b..c6ace5b 100644 --- a/tests/unit/service/query_test.py +++ b/tests/unit/service/query_test.py @@ -20,7 +20,7 @@ def git_service(): ), Commit( sha="def456", - message="Test 2", + message="Test 2", date=datetime(2024, 1, 2), ), ] @@ -29,53 +29,53 @@ def git_service(): @pytest.fixture -def query_service(git_service): +def query_service(git_service: GitService): return QueryService(git_service) -@patch('mthd.domain.experiment.ExperimentState.parse') -def test_execute_query(mock_parse, query_service): +@patch("mthd.domain.experiment.ExperimentState.parse") +def test_execute_query(mock_parse, query_service: QueryService): # Setup mock parsed experiments mock_parse.side_effect = [ {"accuracy": 0.8, "loss": 0.2}, {"accuracy": 0.9, "loss": 0.1}, ] - + # Test query for high accuracy query = Query.where("accuracy", ">", 0.85) result = query_service.execute(query) - + assert len(result.commits) == 1 assert result.num_searched == 2 assert result.query == query -@patch('mthd.domain.experiment.ExperimentState.parse') -def test_execute_simple_query(mock_parse, query_service): +@patch("mthd.domain.experiment.ExperimentState.parse") +def test_execute_simple_query(mock_parse, query_service: QueryService): # Setup mock parsed experiments mock_parse.side_effect = [ {"metrics": {"loss": 0.2}}, {"metrics": {"loss": 0.1}}, ] - + # Test simple query for low loss result = query_service.execute_simple("loss", "<", 0.15) - + assert len(result.commits) == 1 assert result.num_searched == 2 -@patch('mthd.domain.experiment.ExperimentState.parse') -def test_execute_query_with_limit(mock_parse, query_service): +@patch("mthd.domain.experiment.ExperimentState.parse") +def test_execute_query_with_limit(mock_parse, query_service: QueryService): # Setup mock parsed experiments mock_parse.side_effect = [ {"accuracy": 0.9}, {"accuracy": 0.95}, ] - + # Test query with limit query = Query.where("accuracy", ">=", 0.9) result = query_service.execute(query, limit=1) - + assert len(result.commits) == 1 assert result.num_searched == 2 diff --git a/uv.lock b/uv.lock index cde7f66..d1b0aee 100644 --- a/uv.lock +++ b/uv.lock @@ -100,6 +100,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249 }, ] +[[package]] +name = "click" +version = "8.1.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", size = 98188 }, +] + [[package]] name = "colorama" version = "0.4.6" @@ -249,6 +261,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 }, ] +[[package]] +name = "jmespath" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/00/2a/e867e8531cf3e36b41201936b7fa7ba7b5702dbef42922193f05c8976cd6/jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe", size = 25843 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256 }, +] + [[package]] name = "markdown-it-py" version = "3.0.0" @@ -295,12 +316,14 @@ wheels = [ [[package]] name = "mthd" -version = "0.0.3.dev4+g0bdb96d.d20250118" +version = "0.0.3.dev1+g673bac1.d20250118" source = { editable = "." } dependencies = [ { name = "anot" }, + { name = "click" }, { name = "dishka" }, { name = "gitpython" }, + { name = "jmespath" }, { name = "pydantic" }, { name = "rich" }, ] @@ -318,8 +341,10 @@ dev = [ [package.metadata] requires-dist = [ { name = "anot", specifier = ">=0.0.6" }, + { name = "click", specifier = ">=8.1.8" }, { name = "dishka", specifier = ">=1.4.2" }, { name = "gitpython", specifier = ">=3.1.44" }, + { name = "jmespath", specifier = ">=1.0.1" }, { name = "pydantic", specifier = ">=2.10.5" }, { name = "rich", specifier = ">=13.9.4" }, ] From 1f131b6b682d0f69845ccfe0d3512ebcde58bab0 Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Sun, 19 Jan 2025 11:48:12 +0000 Subject: [PATCH 06/23] feat: Refactor experiment and git domain models with improved semantics --- mthd/domain/experiment.py | 44 +++++++++++++++--------- mthd/domain/git.py | 72 +++++++++++++++++++++++++++++---------- 2 files changed, 81 insertions(+), 35 deletions(-) diff --git a/mthd/domain/experiment.py b/mthd/domain/experiment.py index 59d9859..4f59f1b 100644 --- a/mthd/domain/experiment.py +++ b/mthd/domain/experiment.py @@ -1,27 +1,37 @@ -import json +from datetime import datetime +from typing import Optional from mthd.config import METADATA_SEPARATOR -from mthd.domain.git import CommitKind from mthd.util.model import Model -class ExperimentState(Model): - hyperparameters: dict - # annotations: set[Annotation] # @todo: fix anot +class ExperimentResult(Model): + """Represents the outcome of an experiment run""" + metrics: dict + artifacts: Optional[dict] = None # Any generated files/data - def as_commit_message(self) -> str: - return f"{self.summary}\n\n{self.body}\n\n{METADATA_SEPARATOR}\n\n{self.model_dump_json(indent=2, exclude={'summary'})}" - @property - def summary(self) -> str: - return f"{CommitKind.EXP.value}: TODO" +class ExperimentRun(Model): + """Represents a single run of an experiment""" + hyperparameters: dict + metrics: Optional[dict] = None # Results/metrics from the run + annotations: Optional[dict] = None # Code annotations/metadata + timestamp: datetime = datetime.now() + + def record_results(self, metrics: dict, artifacts: Optional[dict] = None) -> None: + """Record the results of this experiment run""" + self.metrics = metrics + if artifacts: + self.artifacts = artifacts - @property - def body(self) -> str: - return "TODO" + def as_commit_message(self) -> str: + """Formats the experiment run as a semantic commit message""" + from mthd.domain.git import CommitMessage + return CommitMessage.from_experiment(self).format() @staticmethod - def parse(message: str) -> "ExperimentState": - # @todo: test this - metadata = message.split(METADATA_SEPARATOR)[1].strip() - return ExperimentState(hyperparameters=json.loads(metadata)) + def parse(message: str) -> "ExperimentRun": + """Parse an experiment run from a commit message""" + from mthd.domain.git import CommitMessage + commit_msg = CommitMessage.parse(message) + return ExperimentRun(**commit_msg.metadata) diff --git a/mthd/domain/git.py b/mthd/domain/git.py index 5c6dc01..5179011 100644 --- a/mthd/domain/git.py +++ b/mthd/domain/git.py @@ -1,13 +1,16 @@ +import json from datetime import datetime from enum import Enum, StrEnum, auto from typing import Optional import git +from mthd.config import METADATA_SEPARATOR from mthd.util.model import Model class CommitKind(StrEnum): + """Types of semantic commits""" EXP = "exp" FIX = "fix" FEAT = "feat" @@ -16,7 +19,54 @@ class CommitKind(StrEnum): REFACTOR = "refactor" +class CommitMessage(Model): + """Formats and parses semantic commit messages""" + kind: CommitKind + summary: str + body: Optional[str] = None + metadata: dict + + def format(self) -> str: + """Format as a git commit message""" + msg = f"{self.kind.value}: {self.summary}" + if self.body: + msg += f"\n\n{self.body}" + msg += f"\n\n{METADATA_SEPARATOR}\n\n{json.dumps(self.metadata, indent=2)}" + return msg + + @classmethod + def parse(cls, message: str) -> "CommitMessage": + """Parse a git commit message into its components""" + parts = message.split(METADATA_SEPARATOR) + header = parts[0].strip() + metadata = json.loads(parts[1].strip()) if len(parts) > 1 else {} + + # Parse header + lines = header.split("\n") + first_line = lines[0] + kind_str, summary = first_line.split(":", 1) + body = "\n".join(lines[1:]).strip() if len(lines) > 1 else None + + return cls( + kind=CommitKind(kind_str.strip()), + summary=summary.strip(), + body=body, + metadata=metadata + ) + + @classmethod + def from_experiment(cls, experiment: "ExperimentRun") -> "CommitMessage": + """Create a commit message from an experiment run""" + return cls( + kind=CommitKind.EXP, + summary="Experiment run", # TODO: Generate better summary + body="TODO: Generate experiment description", + metadata=experiment.model_dump(exclude={"summary"}) + ) + + class Commit(Model): + """Represents a git commit""" sha: str message: str date: datetime @@ -26,30 +76,16 @@ class Commit(Model): @staticmethod def from_git(commit: git.Commit) -> "Commit": message = commit.message if isinstance(commit.message, str) else commit.message.decode() + commit_msg = CommitMessage.parse(message) return Commit( sha=commit.hexsha, message=message, date=commit.committed_datetime, - kind=Commit._parse_kind(message), - metadata={}, + kind=commit_msg.kind, + metadata=commit_msg.metadata, ) - @staticmethod - def _parse_kind(message: str) -> Optional[CommitKind]: - if message.startswith("exp:"): - return CommitKind.EXP - if message.startswith("fix:"): - return CommitKind.FIX - if message.startswith("feat:"): - return CommitKind.FEAT - if message.startswith("chore:"): - return CommitKind.CHORE - if message.startswith("tooling:"): - return CommitKind.TOOLING - if message.startswith("refactor:"): - return CommitKind.REFACTOR - return None - class StageStrategy(Enum): + """Strategy for staging files in git""" ALL = auto() From 89d4bb1d4f334f8ffe6d23330dad0d4e2b1a2aa7 Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Sun, 19 Jan 2025 15:48:17 +0000 Subject: [PATCH 07/23] refactor: Restructure experiment module with new CommitMessage and CommitKind classes --- mthd/domain/experiment.py | 87 +++++++++++++++++++++++++++++++-------- 1 file changed, 70 insertions(+), 17 deletions(-) diff --git a/mthd/domain/experiment.py b/mthd/domain/experiment.py index 4f59f1b..895788f 100644 --- a/mthd/domain/experiment.py +++ b/mthd/domain/experiment.py @@ -1,37 +1,90 @@ +import json + from datetime import datetime +from enum import StrEnum from typing import Optional from mthd.config import METADATA_SEPARATOR from mthd.util.model import Model -class ExperimentResult(Model): - """Represents the outcome of an experiment run""" - metrics: dict - artifacts: Optional[dict] = None # Any generated files/data - - class ExperimentRun(Model): """Represents a single run of an experiment""" + hyperparameters: dict - metrics: Optional[dict] = None # Results/metrics from the run - annotations: Optional[dict] = None # Code annotations/metadata + metrics: dict + artifacts: Optional[dict] = None # Any generated files/data + annotations: Optional[dict] = None timestamp: datetime = datetime.now() - - def record_results(self, metrics: dict, artifacts: Optional[dict] = None) -> None: - """Record the results of this experiment run""" - self.metrics = metrics - if artifacts: - self.artifacts = artifacts def as_commit_message(self) -> str: """Formats the experiment run as a semantic commit message""" - from mthd.domain.git import CommitMessage - return CommitMessage.from_experiment(self).format() + return str(CommitMessage.from_experiment(self)) @staticmethod def parse(message: str) -> "ExperimentRun": """Parse an experiment run from a commit message""" - from mthd.domain.git import CommitMessage commit_msg = CommitMessage.parse(message) return ExperimentRun(**commit_msg.metadata) + + +class CommitKind(StrEnum): + """Types of semantic commits""" + + EXP = "exp" + FIX = "fix" + FEAT = "feat" + CHORE = "chore" + TOOLING = "tooling" + REFACTOR = "refactor" + + @staticmethod + def from_header(header: str) -> Optional["CommitKind"]: + """Parse a commit kind from a header string""" + try: + kind = header.split(":")[0].strip() + return CommitKind(kind) + except Exception: + return None + + +class CommitMessage(Model): + """Formats and parses semantic commit messages""" + + kind: CommitKind + summary: str + body: Optional[str] = None + metadata: dict + + def __str__(self) -> str: + """Format as a git commit message""" + msg = f"{self.kind.value}: {self.summary}" + if self.body: + msg += f"\n\n{self.body}" + msg += f"\n\n{METADATA_SEPARATOR}\n\n{json.dumps(self.metadata, indent=2)}" + return msg + + @classmethod + def parse(cls, message: str) -> "CommitMessage": + """Parse a git commit message into its components""" + parts = message.split(METADATA_SEPARATOR) + header = parts[0].strip() + metadata = json.loads(parts[1].strip()) if len(parts) > 1 else {} + + # Parse header + lines = header.split("\n") + first_line = lines[0] + kind_str, summary = first_line.split(":", 1) + body = "\n".join(lines[1:]).strip() if len(lines) > 1 else None + + return cls(kind=CommitKind(kind_str.strip()), summary=summary.strip(), body=body, metadata=metadata) + + @classmethod + def from_experiment(cls, experiment: ExperimentRun) -> "CommitMessage": + """Create a commit message from an experiment run""" + return cls( + kind=CommitKind.EXP, + summary="experiment run", # TODO: Generate better summary + body="TODO: Generate experiment description", + metadata=experiment.model_dump(exclude={"summary"}), + ) From 36e6660e9e2504fcab1af080893201e2d1be07a2 Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Sun, 19 Jan 2025 15:48:19 +0000 Subject: [PATCH 08/23] refactor: Separate semantic commit types and improve experiment commit handling --- mthd/decorator.py | 14 ++++-- mthd/domain/experiment.py | 96 +++++++++++++++------------------------ 2 files changed, 46 insertions(+), 64 deletions(-) diff --git a/mthd/decorator.py b/mthd/decorator.py index 15201a4..4c2d74d 100644 --- a/mthd/decorator.py +++ b/mthd/decorator.py @@ -5,7 +5,8 @@ from rich.console import Console from rich.padding import Padding -from mthd.domain.experiment import ExperimentState +from mthd.domain.experiment import ExperimentRun, ExperimentCommit +from mthd.domain.change_type import ChangeType from mthd.domain.git import StageStrategy from mthd.error import MthdError from mthd.service.git import GitService @@ -33,10 +34,15 @@ def wrapper(*args, **kwargs): # codebase_service = di[CodebaseService] # Generate commit message - exp_state = ExperimentState( + experiment = ExperimentRun( hyperparameters=hyperparameters.model_dump(), # annotations=codebase_service.get_all_annotations(), ) + commit = ExperimentCommit( + type=ChangeType.EXPERIMENT, + summary="experiment run", + experiment=experiment + ) # print(hyperparameters.model_dump_json(indent=2)) # print(commit_msg.format()) @@ -46,9 +52,9 @@ def wrapper(*args, **kwargs): # Commit changes console = Console() console.print("Generating commit with message:\n") - console.print(Padding(exp_state.as_commit_message(), pad=(0, 0, 0, 4))) # Indent by 4 spaces. + console.print(Padding(commit.format_message(), pad=(0, 0, 0, 4))) # Indent by 4 spaces. if git_service.should_commit(strategy): - git_service.stage_and_commit(exp_state.as_commit_message()) + git_service.stage_and_commit(commit.format_message()) return result diff --git a/mthd/domain/experiment.py b/mthd/domain/experiment.py index 895788f..fe6eb39 100644 --- a/mthd/domain/experiment.py +++ b/mthd/domain/experiment.py @@ -1,90 +1,66 @@ import json - from datetime import datetime -from enum import StrEnum -from typing import Optional +from typing import Literal, Optional from mthd.config import METADATA_SEPARATOR +from mthd.domain.change_type import ChangeType from mthd.util.model import Model class ExperimentRun(Model): - """Represents a single run of an experiment""" - + """The core data from an experiment run""" hyperparameters: dict - metrics: dict + metrics: Optional[dict] = None artifacts: Optional[dict] = None # Any generated files/data annotations: Optional[dict] = None timestamp: datetime = datetime.now() - def as_commit_message(self) -> str: - """Formats the experiment run as a semantic commit message""" - return str(CommitMessage.from_experiment(self)) - - @staticmethod - def parse(message: str) -> "ExperimentRun": - """Parse an experiment run from a commit message""" - commit_msg = CommitMessage.parse(message) - return ExperimentRun(**commit_msg.metadata) - - -class CommitKind(StrEnum): - """Types of semantic commits""" - - EXP = "exp" - FIX = "fix" - FEAT = "feat" - CHORE = "chore" - TOOLING = "tooling" - REFACTOR = "refactor" - - @staticmethod - def from_header(header: str) -> Optional["CommitKind"]: - """Parse a commit kind from a header string""" - try: - kind = header.split(":")[0].strip() - return CommitKind(kind) - except Exception: - return None - -class CommitMessage(Model): - """Formats and parses semantic commit messages""" - - kind: CommitKind +class SemanticCommit(Model): + """Base class for our semantic commit formats""" + type: ChangeType summary: str body: Optional[str] = None - metadata: dict - def __str__(self) -> str: - """Format as a git commit message""" - msg = f"{self.kind.value}: {self.summary}" + def format_message(self) -> str: + msg = f"{self.type.value}: {self.summary}" if self.body: msg += f"\n\n{self.body}" - msg += f"\n\n{METADATA_SEPARATOR}\n\n{json.dumps(self.metadata, indent=2)}" + return msg + + +class ExperimentCommit(SemanticCommit): + """A commit specifically representing an experiment run""" + type: Literal[ChangeType.EXPERIMENT] # Must be EXPERIMENT + experiment: ExperimentRun + + def format_message(self) -> str: + msg = super().format_message() + msg += f"\n\n{METADATA_SEPARATOR}\n\n{json.dumps(self.experiment.model_dump(), indent=2)}" return msg @classmethod - def parse(cls, message: str) -> "CommitMessage": - """Parse a git commit message into its components""" + def parse(cls, message: str) -> "ExperimentCommit": + """Parse a git commit message into an ExperimentCommit""" parts = message.split(METADATA_SEPARATOR) + if len(parts) != 2: + raise ValueError("Invalid experiment commit - missing metadata section") + header = parts[0].strip() - metadata = json.loads(parts[1].strip()) if len(parts) > 1 else {} - - # Parse header lines = header.split("\n") first_line = lines[0] - kind_str, summary = first_line.split(":", 1) + type_str, summary = first_line.split(":", 1) + + if ChangeType(type_str.strip()) != ChangeType.EXPERIMENT: + raise ValueError("Invalid experiment commit - wrong type") + body = "\n".join(lines[1:]).strip() if len(lines) > 1 else None + metadata = json.loads(parts[1].strip()) + experiment = ExperimentRun(**metadata) - return cls(kind=CommitKind(kind_str.strip()), summary=summary.strip(), body=body, metadata=metadata) - - @classmethod - def from_experiment(cls, experiment: ExperimentRun) -> "CommitMessage": - """Create a commit message from an experiment run""" return cls( - kind=CommitKind.EXP, - summary="experiment run", # TODO: Generate better summary - body="TODO: Generate experiment description", - metadata=experiment.model_dump(exclude={"summary"}), + type=ChangeType.EXPERIMENT, + summary=summary.strip(), + body=body, + experiment=experiment ) From b359e17e03d3fe117da0451de920b2a7b2a9f05c Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Sat, 8 Feb 2025 16:11:26 +0000 Subject: [PATCH 09/23] refactor: restructure experiment commit handling and message formatting The changes improve the way experiment commits are handled by: - Separating commit message formatting from experiment data - Adding UUID and timestamp tracking for experiments - Introducing clearer separation between commit body and metadata - Adding template support for commit messages --- mthd/config.py | 3 +- mthd/decorator.py | 34 +++++----- mthd/domain/experiment.py | 129 ++++++++++++++++++++++++++------------ mthd/domain/git.py | 71 ++------------------- mthd/domain/query.py | 6 +- mthd/service/git.py | 8 ++- mthd/service/query.py | 5 +- uv.lock | 1 - 8 files changed, 129 insertions(+), 128 deletions(-) diff --git a/mthd/config.py b/mthd/config.py index 2b364a9..90e99a3 100644 --- a/mthd/config.py +++ b/mthd/config.py @@ -1 +1,2 @@ -METADATA_SEPARATOR = "---" +BODY_METADATA_SEPARATOR = "---" +SUMMARY_BODY_SEPARATOR = "\n\n" diff --git a/mthd/decorator.py b/mthd/decorator.py index 4c2d74d..bd531ba 100644 --- a/mthd/decorator.py +++ b/mthd/decorator.py @@ -5,8 +5,7 @@ from rich.console import Console from rich.padding import Padding -from mthd.domain.experiment import ExperimentRun, ExperimentCommit -from mthd.domain.change_type import ChangeType +from mthd.domain.experiment import ExperimentRun from mthd.domain.git import StageStrategy from mthd.error import MthdError from mthd.service.git import GitService @@ -15,7 +14,9 @@ def commit( fn: Optional[Callable] = None, + *, hypers: str = "hypers", + template: str = "run {experiment}", strategy: StageStrategy = StageStrategy.ALL, ) -> Callable: """Decorator to auto-commit experimental code with scientific metadata. @@ -27,6 +28,7 @@ def decorator(func: Callable) -> Callable: @wraps(func) def wrapper(*args, **kwargs): di = DI() + # @todo: handle this better (eg. positional args) hyperparameters = cast(BaseModel, kwargs.get(hypers, None)) if not hyperparameters: raise MthdError("Hyperparameters must be provided in the function call.") @@ -35,26 +37,22 @@ def wrapper(*args, **kwargs): # Generate commit message experiment = ExperimentRun( + experiment=func.__name__, hyperparameters=hyperparameters.model_dump(), + metrics={}, # annotations=codebase_service.get_all_annotations(), ) - commit = ExperimentCommit( - type=ChangeType.EXPERIMENT, - summary="experiment run", - experiment=experiment - ) - # print(hyperparameters.model_dump_json(indent=2)) - # print(commit_msg.format()) + message = experiment.as_commit_message(template=template) # Run experiment result = func(*args, **kwargs) # Commit changes - console = Console() - console.print("Generating commit with message:\n") - console.print(Padding(commit.format_message(), pad=(0, 0, 0, 4))) # Indent by 4 spaces. if git_service.should_commit(strategy): - git_service.stage_and_commit(commit.format_message()) + console = Console() + console.print("Generating commit with message:\n") + console.print(Padding(message.render(with_metadata=True), pad=(0, 0, 0, 4))) # Indent by 4 spaces. + # git_service.stage_and_commit(message.render()) return result @@ -72,8 +70,14 @@ class Hyperparameters(BaseModel): b: float c: str - @commit(hypers="hypers") + class Metrics(BaseModel): + a: int + b: float + c: str + + @commit(hypers="hypers", template="run {experiment} at {timestamp}") def test(hypers: Hyperparameters): - print("\n") + print("\n\n") + return Metrics(a=1, b=2.0, c="3") test(hypers=Hyperparameters(a=1, b=2.0, c="3")) diff --git a/mthd/domain/experiment.py b/mthd/domain/experiment.py index fe6eb39..9966b5e 100644 --- a/mthd/domain/experiment.py +++ b/mthd/domain/experiment.py @@ -1,66 +1,113 @@ import json +import logging + from datetime import datetime -from typing import Literal, Optional +from enum import StrEnum +from typing import Any, Optional +from uuid import uuid4 + +from pydantic import UUID4, Field -from mthd.config import METADATA_SEPARATOR -from mthd.domain.change_type import ChangeType +from mthd.config import BODY_METADATA_SEPARATOR, SUMMARY_BODY_SEPARATOR +from mthd.domain.git import Commit from mthd.util.model import Model +logger = logging.getLogger(__name__) + class ExperimentRun(Model): """The core data from an experiment run""" + + experiment: str hyperparameters: dict - metrics: Optional[dict] = None + metrics: dict + uuid: UUID4 = Field(default_factory=uuid4) artifacts: Optional[dict] = None # Any generated files/data annotations: Optional[dict] = None timestamp: datetime = datetime.now() + def as_commit_message(self, template: str) -> "SemanticMessage": + """Convert this experiment run into a commit""" + return SemanticMessage( + kind=CommitKind.EXP, + summary=template.format(**self.model_dump(include={"experiment", "timestamp"})), + metadata=self.model_dump(mode="json"), + ) + + @staticmethod + def from_commit(commit: Commit) -> Optional["ExperimentRun"]: + message = SemanticMessage.from_commit(commit) + # if not message: + # # @todo: is :20s the right syntax to truncate? + # logger.debug(f"Could not parse semantic message: '{commit.message:.20s}'") + # return None + + return ExperimentRun.model_validate(message.metadata) + + +class CommitKind(StrEnum): + """Types of semantic commits""" + + EXP = "exp" + FIX = "fix" + FEAT = "feat" + CHORE = "chore" + TOOLING = "tooling" + REFACTOR = "refactor" -class SemanticCommit(Model): + @property + def has_metadata(self) -> bool: + return self is CommitKind.EXP + + @staticmethod + def from_header(header: str) -> Optional["CommitKind"]: + """Parse a commit kind from a header string""" + try: + kind = header.split(":")[0].strip() + return CommitKind(kind) + except Exception: + return None + + +class SemanticMessage(Model): """Base class for our semantic commit formats""" - type: ChangeType + + kind: CommitKind summary: str body: Optional[str] = None + metadata: Optional[dict[str, Any]] = None - def format_message(self) -> str: - msg = f"{self.type.value}: {self.summary}" + def render(self, with_metadata: bool = False) -> str: + msg = f"{self.kind.value}: {self.summary}" if self.body: msg += f"\n\n{self.body}" + if with_metadata and self.metadata: + msg += f"\n\n{BODY_METADATA_SEPARATOR}\n\n{json.dumps(self.metadata, indent=2)}" return msg + @classmethod + def from_commit(cls, commit: Commit) -> "SemanticMessage": + kind, summary, body, metadata = cls._parse_semantic_parts(commit) -class ExperimentCommit(SemanticCommit): - """A commit specifically representing an experiment run""" - type: Literal[ChangeType.EXPERIMENT] # Must be EXPERIMENT - experiment: ExperimentRun + return cls(kind=kind, summary=summary.strip(), body=body, metadata=metadata) - def format_message(self) -> str: - msg = super().format_message() - msg += f"\n\n{METADATA_SEPARATOR}\n\n{json.dumps(self.experiment.model_dump(), indent=2)}" - return msg + @staticmethod + def _parse_semantic_parts(commit: Commit) -> tuple[CommitKind, str, Optional[str], Optional[dict]]: + # we only want to split on the first separator + parts = commit.message.split(SUMMARY_BODY_SEPARATOR, maxsplit=1) + header = parts[0] + kind_str, summary = header.split(":", 1) + kind = CommitKind(kind_str) - @classmethod - def parse(cls, message: str) -> "ExperimentCommit": - """Parse a git commit message into an ExperimentCommit""" - parts = message.split(METADATA_SEPARATOR) - if len(parts) != 2: - raise ValueError("Invalid experiment commit - missing metadata section") - - header = parts[0].strip() - lines = header.split("\n") - first_line = lines[0] - type_str, summary = first_line.split(":", 1) - - if ChangeType(type_str.strip()) != ChangeType.EXPERIMENT: - raise ValueError("Invalid experiment commit - wrong type") - - body = "\n".join(lines[1:]).strip() if len(lines) > 1 else None - metadata = json.loads(parts[1].strip()) - experiment = ExperimentRun(**metadata) - - return cls( - type=ChangeType.EXPERIMENT, - summary=summary.strip(), - body=body, - experiment=experiment - ) + if len(parts) == 2: + body = parts[1] + if kind.has_metadata: + body, raw_metadata = body.split(BODY_METADATA_SEPARATOR) + metadata = json.loads(raw_metadata) + assert isinstance(metadata, dict) + else: + metadata = None + else: + body = metadata = None + + return kind, summary, body, metadata diff --git a/mthd/domain/git.py b/mthd/domain/git.py index 5179011..696d4ab 100644 --- a/mthd/domain/git.py +++ b/mthd/domain/git.py @@ -1,91 +1,32 @@ -import json from datetime import datetime -from enum import Enum, StrEnum, auto -from typing import Optional +from enum import Enum, auto import git -from mthd.config import METADATA_SEPARATOR from mthd.util.model import Model -class CommitKind(StrEnum): - """Types of semantic commits""" - EXP = "exp" - FIX = "fix" - FEAT = "feat" - CHORE = "chore" - TOOLING = "tooling" - REFACTOR = "refactor" - - -class CommitMessage(Model): - """Formats and parses semantic commit messages""" - kind: CommitKind - summary: str - body: Optional[str] = None - metadata: dict - - def format(self) -> str: - """Format as a git commit message""" - msg = f"{self.kind.value}: {self.summary}" - if self.body: - msg += f"\n\n{self.body}" - msg += f"\n\n{METADATA_SEPARATOR}\n\n{json.dumps(self.metadata, indent=2)}" - return msg - - @classmethod - def parse(cls, message: str) -> "CommitMessage": - """Parse a git commit message into its components""" - parts = message.split(METADATA_SEPARATOR) - header = parts[0].strip() - metadata = json.loads(parts[1].strip()) if len(parts) > 1 else {} - - # Parse header - lines = header.split("\n") - first_line = lines[0] - kind_str, summary = first_line.split(":", 1) - body = "\n".join(lines[1:]).strip() if len(lines) > 1 else None - - return cls( - kind=CommitKind(kind_str.strip()), - summary=summary.strip(), - body=body, - metadata=metadata - ) - - @classmethod - def from_experiment(cls, experiment: "ExperimentRun") -> "CommitMessage": - """Create a commit message from an experiment run""" - return cls( - kind=CommitKind.EXP, - summary="Experiment run", # TODO: Generate better summary - body="TODO: Generate experiment description", - metadata=experiment.model_dump(exclude={"summary"}) - ) - - class Commit(Model): """Represents a git commit""" + sha: str message: str date: datetime - kind: Optional[CommitKind] - metadata: dict @staticmethod def from_git(commit: git.Commit) -> "Commit": message = commit.message if isinstance(commit.message, str) else commit.message.decode() - commit_msg = CommitMessage.parse(message) return Commit( sha=commit.hexsha, message=message, date=commit.committed_datetime, - kind=commit_msg.kind, - metadata=commit_msg.metadata, ) + def startswith(self, value: str) -> bool: + return self.message.startswith(value) + class StageStrategy(Enum): """Strategy for staging files in git""" + ALL = auto() diff --git a/mthd/domain/query.py b/mthd/domain/query.py index 073713a..6328aee 100644 --- a/mthd/domain/query.py +++ b/mthd/domain/query.py @@ -1,4 +1,4 @@ -from typing import List, Literal +from typing import List import jmespath @@ -31,6 +31,10 @@ def from_expression(expr: str) -> "Query": def where(field: str, op: SimpleQueryOp, value: SimpleQueryValue) -> "Query": """Create a query that filters on a field value. + $ mthd viz "accuracy from ['lr', 'batch_size']" + + $ mthd query "accuracy > 0.9" + Example: Query.where("accuracy", ">", 0.9) """ diff --git a/mthd/service/git.py b/mthd/service/git.py index 66497c9..2b2a1ba 100644 --- a/mthd/service/git.py +++ b/mthd/service/git.py @@ -1,5 +1,8 @@ +from typing import Optional + import git +from mthd.domain.experiment import CommitKind from mthd.domain.git import Commit, StageStrategy @@ -7,7 +10,7 @@ class GitService: def __init__(self, repo: git.Repo): self._repo = repo - def get_all_commits(self) -> list[Commit]: + def get_all_commits(self, kind: Optional[CommitKind] = None) -> list[Commit]: """Get all commits in the repository. Returns: @@ -16,6 +19,9 @@ def get_all_commits(self) -> list[Commit]: commits = [] for commit in self._repo.iter_commits(): commits.append(Commit.from_git(commit)) + + if kind: + pass # @todo: filter by kind return commits def stage_and_commit(self, message: str): diff --git a/mthd/service/query.py b/mthd/service/query.py index b21860a..6aea6e2 100644 --- a/mthd/service/query.py +++ b/mthd/service/query.py @@ -1,6 +1,6 @@ from typing import Optional -from mthd.domain.experiment import ExperimentState +from mthd.domain.experiment import ExperimentRun from mthd.domain.query import Query, QueryResult, SimpleQueryOp, SimpleQueryValue from mthd.service.git import GitService @@ -25,7 +25,7 @@ def execute(self, query: Query, limit: Optional[int] = None) -> QueryResult: commits = self.git_service.get_all_commits() total = len(commits) - exp_commits = [ExperimentState.parse(commit) for commit in commits] + exp_commits = [ExperimentRun.parse(commit) for commit in commits] # Compile and execute the JMESPath query jmespath_query = query.compile() @@ -58,4 +58,3 @@ def execute_simple( """ query = Query.where(f"message.metrics.{metric}", op, value) return self.execute(query, limit=limit) - diff --git a/uv.lock b/uv.lock index d1b0aee..6c2af94 100644 --- a/uv.lock +++ b/uv.lock @@ -316,7 +316,6 @@ wheels = [ [[package]] name = "mthd" -version = "0.0.3.dev1+g673bac1.d20250118" source = { editable = "." } dependencies = [ { name = "anot" }, From 9f5576852f91386db0db8ff22e8419d50e1f4689 Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Sat, 8 Feb 2025 16:11:32 +0000 Subject: [PATCH 10/23] docs: add explanation of @commit decorator functionality in README --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index b1c5ee4..d5d9f15 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,4 @@ # mthd Automatically commit code experiments with hypothesis-driven git messages + +If the user adds a `@commit` decorator to their experiment's function, then we will automatically generate a commit with semantic metadata whenever the experiment runs. From de4a216a1a5fc7c1c87124e1b32f385855830270 Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Sat, 8 Feb 2025 16:30:08 +0000 Subject: [PATCH 11/23] refactor: Update query_test.py to use ExperimentRun and improve test mocking --- tests/unit/service/query_test.py | 71 +++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 23 deletions(-) diff --git a/tests/unit/service/query_test.py b/tests/unit/service/query_test.py index c6ace5b..1f364b3 100644 --- a/tests/unit/service/query_test.py +++ b/tests/unit/service/query_test.py @@ -3,6 +3,7 @@ import pytest +from mthd.domain.experiment import ExperimentRun from mthd.domain.git import Commit from mthd.domain.query import Query from mthd.service.git import GitService @@ -15,12 +16,12 @@ def git_service(): commits = [ Commit( sha="abc123", - message="Test 1", + message="exp: Test 1\n\n---\n\n{\"experiment\": \"test1\", \"metrics\": {\"accuracy\": 0.8, \"loss\": 0.2}}", date=datetime(2024, 1, 1), ), Commit( - sha="def456", - message="Test 2", + sha="def456", + message="exp: Test 2\n\n---\n\n{\"experiment\": \"test2\", \"metrics\": {\"accuracy\": 0.9, \"loss\": 0.1}}", date=datetime(2024, 1, 2), ), ] @@ -33,16 +34,24 @@ def query_service(git_service: GitService): return QueryService(git_service) -@patch("mthd.domain.experiment.ExperimentState.parse") -def test_execute_query(mock_parse, query_service: QueryService): - # Setup mock parsed experiments - mock_parse.side_effect = [ - {"accuracy": 0.8, "loss": 0.2}, - {"accuracy": 0.9, "loss": 0.1}, +@patch("mthd.domain.experiment.ExperimentRun.from_commit") +def test_execute_query(mock_from_commit, query_service: QueryService): + # Setup mock experiments + mock_from_commit.side_effect = [ + ExperimentRun( + experiment="test1", + hyperparameters={}, + metrics={"accuracy": 0.8, "loss": 0.2} + ), + ExperimentRun( + experiment="test2", + hyperparameters={}, + metrics={"accuracy": 0.9, "loss": 0.1} + ), ] # Test query for high accuracy - query = Query.where("accuracy", ">", 0.85) + query = Query.where("metrics.accuracy", ">", 0.85) result = query_service.execute(query) assert len(result.commits) == 1 @@ -50,12 +59,20 @@ def test_execute_query(mock_parse, query_service: QueryService): assert result.query == query -@patch("mthd.domain.experiment.ExperimentState.parse") -def test_execute_simple_query(mock_parse, query_service: QueryService): - # Setup mock parsed experiments - mock_parse.side_effect = [ - {"metrics": {"loss": 0.2}}, - {"metrics": {"loss": 0.1}}, +@patch("mthd.domain.experiment.ExperimentRun.from_commit") +def test_execute_simple_query(mock_from_commit, query_service: QueryService): + # Setup mock experiments + mock_from_commit.side_effect = [ + ExperimentRun( + experiment="test1", + hyperparameters={}, + metrics={"loss": 0.2} + ), + ExperimentRun( + experiment="test2", + hyperparameters={}, + metrics={"loss": 0.1} + ), ] # Test simple query for low loss @@ -65,16 +82,24 @@ def test_execute_simple_query(mock_parse, query_service: QueryService): assert result.num_searched == 2 -@patch("mthd.domain.experiment.ExperimentState.parse") -def test_execute_query_with_limit(mock_parse, query_service: QueryService): - # Setup mock parsed experiments - mock_parse.side_effect = [ - {"accuracy": 0.9}, - {"accuracy": 0.95}, +@patch("mthd.domain.experiment.ExperimentRun.from_commit") +def test_execute_query_with_limit(mock_from_commit, query_service: QueryService): + # Setup mock experiments + mock_from_commit.side_effect = [ + ExperimentRun( + experiment="test1", + hyperparameters={}, + metrics={"accuracy": 0.9} + ), + ExperimentRun( + experiment="test2", + hyperparameters={}, + metrics={"accuracy": 0.95} + ), ] # Test query with limit - query = Query.where("accuracy", ">=", 0.9) + query = Query.where("metrics.accuracy", ">=", 0.9) result = query_service.execute(query, limit=1) assert len(result.commits) == 1 From e935d65579537d9c3d7693c27851c9e99f5a9016 Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Sat, 8 Feb 2025 16:42:13 +0000 Subject: [PATCH 12/23] test: Update query test cases and remove commented mock patches --- tests/unit/service/query_test.py | 51 +++++++++----------------------- 1 file changed, 14 insertions(+), 37 deletions(-) diff --git a/tests/unit/service/query_test.py b/tests/unit/service/query_test.py index 1f364b3..f6e23b8 100644 --- a/tests/unit/service/query_test.py +++ b/tests/unit/service/query_test.py @@ -16,12 +16,12 @@ def git_service(): commits = [ Commit( sha="abc123", - message="exp: Test 1\n\n---\n\n{\"experiment\": \"test1\", \"metrics\": {\"accuracy\": 0.8, \"loss\": 0.2}}", + message='exp: Test 1\n\n---\n\n{"experiment": "test1", "metrics": {"accuracy": 0.8, "loss": 0.2}}', date=datetime(2024, 1, 1), ), Commit( - sha="def456", - message="exp: Test 2\n\n---\n\n{\"experiment\": \"test2\", \"metrics\": {\"accuracy\": 0.9, \"loss\": 0.1}}", + sha="def456", + message='exp: Test 2\n\n---\n\n{"experiment": "test2", "metrics": {"accuracy": 0.9, "loss": 0.1}}', date=datetime(2024, 1, 2), ), ] @@ -34,25 +34,18 @@ def query_service(git_service: GitService): return QueryService(git_service) -@patch("mthd.domain.experiment.ExperimentRun.from_commit") -def test_execute_query(mock_from_commit, query_service: QueryService): +# @patch("mthd.domain.experiment.ExperimentRun.from_commit") +def test_execute_query(query_service: QueryService): # Setup mock experiments - mock_from_commit.side_effect = [ - ExperimentRun( - experiment="test1", - hyperparameters={}, - metrics={"accuracy": 0.8, "loss": 0.2} - ), - ExperimentRun( - experiment="test2", - hyperparameters={}, - metrics={"accuracy": 0.9, "loss": 0.1} - ), - ] + # mock_from_commit.side_effect = [ + # ExperimentRun(experiment="test1", hyperparameters={}, metrics={"accuracy": 0.8, "loss": 0.2}), + # ExperimentRun(experiment="test2", hyperparameters={}, metrics={"accuracy": 0.9, "loss": 0.1}), + # ] # Test query for high accuracy query = Query.where("metrics.accuracy", ">", 0.85) result = query_service.execute(query) + print(result) assert len(result.commits) == 1 assert result.num_searched == 2 @@ -63,16 +56,8 @@ def test_execute_query(mock_from_commit, query_service: QueryService): def test_execute_simple_query(mock_from_commit, query_service: QueryService): # Setup mock experiments mock_from_commit.side_effect = [ - ExperimentRun( - experiment="test1", - hyperparameters={}, - metrics={"loss": 0.2} - ), - ExperimentRun( - experiment="test2", - hyperparameters={}, - metrics={"loss": 0.1} - ), + ExperimentRun(experiment="test1", hyperparameters={}, metrics={"loss": 0.2}), + ExperimentRun(experiment="test2", hyperparameters={}, metrics={"loss": 0.1}), ] # Test simple query for low loss @@ -86,16 +71,8 @@ def test_execute_simple_query(mock_from_commit, query_service: QueryService): def test_execute_query_with_limit(mock_from_commit, query_service: QueryService): # Setup mock experiments mock_from_commit.side_effect = [ - ExperimentRun( - experiment="test1", - hyperparameters={}, - metrics={"accuracy": 0.9} - ), - ExperimentRun( - experiment="test2", - hyperparameters={}, - metrics={"accuracy": 0.95} - ), + ExperimentRun(experiment="test1", hyperparameters={}, metrics={"accuracy": 0.9}), + ExperimentRun(experiment="test2", hyperparameters={}, metrics={"accuracy": 0.95}), ] # Test query with limit From d156fb99d30c62919e1724a4889b4178ff5dcf7b Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Sat, 8 Feb 2025 16:42:15 +0000 Subject: [PATCH 13/23] fix: Add missing hyperparameters field in test commit metadata --- tests/unit/service/query_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/service/query_test.py b/tests/unit/service/query_test.py index f6e23b8..763fced 100644 --- a/tests/unit/service/query_test.py +++ b/tests/unit/service/query_test.py @@ -16,12 +16,12 @@ def git_service(): commits = [ Commit( sha="abc123", - message='exp: Test 1\n\n---\n\n{"experiment": "test1", "metrics": {"accuracy": 0.8, "loss": 0.2}}', + message='exp: Test 1\n\n---\n\n{"experiment": "test1", "hyperparameters": {}, "metrics": {"accuracy": 0.8, "loss": 0.2}}', date=datetime(2024, 1, 1), ), Commit( sha="def456", - message='exp: Test 2\n\n---\n\n{"experiment": "test2", "metrics": {"accuracy": 0.9, "loss": 0.1}}', + message='exp: Test 2\n\n---\n\n{"experiment": "test2", "hyperparameters": {}, "metrics": {"accuracy": 0.9, "loss": 0.1}}', date=datetime(2024, 1, 2), ), ] From 316472444bdfd7cb9105a1c618074e6fba91d12d Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Sat, 8 Feb 2025 20:00:26 +0000 Subject: [PATCH 14/23] refactor: Update query service to use commits by SHA and improve query handling --- mthd/service/query.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mthd/service/query.py b/mthd/service/query.py index 6aea6e2..1467b46 100644 --- a/mthd/service/query.py +++ b/mthd/service/query.py @@ -1,6 +1,7 @@ from typing import Optional from mthd.domain.experiment import ExperimentRun +from mthd.domain.git import Commit from mthd.domain.query import Query, QueryResult, SimpleQueryOp, SimpleQueryValue from mthd.service.git import GitService @@ -23,13 +24,16 @@ def execute(self, query: Query, limit: Optional[int] = None) -> QueryResult: """ # Get all commits with experimental metadata commits = self.git_service.get_all_commits() + commits_by_sha = {c.sha: c for c in commits} total = len(commits) - exp_commits = [ExperimentRun.parse(commit) for commit in commits] + exp_commits = [ExperimentRun.from_commit(commit) for commit in commits] + # print(exp_commits) # Compile and execute the JMESPath query jmespath_query = query.compile() - results = jmespath_query.search(exp_commits) + print(query) + results: list[Commit] = jmespath_query.search(commits) # Handle limit # @todo: sort by commit date? From 084df5fabe56ef6995eec41406f262414957d1ec Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Sat, 8 Feb 2025 20:00:27 +0000 Subject: [PATCH 15/23] refactor: Implement ExperimentCommit with improved query handling --- mthd/domain/git.py | 19 +++++++++++++++++++ mthd/service/query.py | 24 ++++++++++++------------ 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/mthd/domain/git.py b/mthd/domain/git.py index 696d4ab..a5a04db 100644 --- a/mthd/domain/git.py +++ b/mthd/domain/git.py @@ -1,8 +1,10 @@ from datetime import datetime from enum import Enum, auto +from typing import Optional import git +from mthd.domain.experiment import ExperimentRun from mthd.util.model import Model @@ -26,6 +28,23 @@ def startswith(self, value: str) -> bool: return self.message.startswith(value) +class ExperimentCommit(Commit): + """A commit that contains experiment data""" + experiment_run: ExperimentRun + + @classmethod + def from_commit(cls, commit: Commit) -> Optional["ExperimentCommit"]: + exp = ExperimentRun.from_commit(commit) + if exp: + return cls( + sha=commit.sha, + message=commit.message, + date=commit.date, + experiment_run=exp + ) + return None + + class StageStrategy(Enum): """Strategy for staging files in git""" diff --git a/mthd/service/query.py b/mthd/service/query.py index 1467b46..3ec7112 100644 --- a/mthd/service/query.py +++ b/mthd/service/query.py @@ -22,25 +22,25 @@ def execute(self, query: Query, limit: Optional[int] = None) -> QueryResult: Returns: QueryResult containing matching commits """ - # Get all commits with experimental metadata commits = self.git_service.get_all_commits() - commits_by_sha = {c.sha: c for c in commits} total = len(commits) - exp_commits = [ExperimentRun.from_commit(commit) for commit in commits] - # print(exp_commits) + # Convert regular commits to experiment commits where possible + exp_commits = [ + exp_commit for commit in commits + if (exp_commit := ExperimentCommit.from_commit(commit)) + ] - # Compile and execute the JMESPath query - jmespath_query = query.compile() - print(query) - results: list[Commit] = jmespath_query.search(commits) + # Query on the experiment_run field + query_str = query.expression.replace("metrics.", "experiment_run.metrics.") + modified_query = Query(expression=query_str) + matching = modified_query.compile().search(exp_commits) - # Handle limit - # @todo: sort by commit date? - if limit: + results = matching or [] + if limit and limit > 0: results = results[:limit] - return QueryResult(commits=results or [], query=query, num_searched=total) + return QueryResult(commits=results, query=query, num_searched=total) def execute_simple( self, From 303fbfb12365b7e755dc331893e1edf22492f89d Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Mon, 10 Feb 2025 18:27:43 +0000 Subject: [PATCH 16/23] test: Remove unnecessary print statement in query test --- tests/unit/service/query_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/service/query_test.py b/tests/unit/service/query_test.py index 763fced..3c7e8d7 100644 --- a/tests/unit/service/query_test.py +++ b/tests/unit/service/query_test.py @@ -1,6 +1,7 @@ from datetime import datetime from unittest.mock import Mock, patch +from pydantic import BaseModel import pytest from mthd.domain.experiment import ExperimentRun @@ -45,7 +46,6 @@ def test_execute_query(query_service: QueryService): # Test query for high accuracy query = Query.where("metrics.accuracy", ">", 0.85) result = query_service.execute(query) - print(result) assert len(result.commits) == 1 assert result.num_searched == 2 From 2ba0bc688d28cceb2245b2c7c9937e1a29f4f2a0 Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Mon, 10 Feb 2025 18:27:45 +0000 Subject: [PATCH 17/23] fix: Update query test to mock correct ExperimentCommit method --- tests/unit/service/query_test.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/tests/unit/service/query_test.py b/tests/unit/service/query_test.py index 3c7e8d7..4cdfe14 100644 --- a/tests/unit/service/query_test.py +++ b/tests/unit/service/query_test.py @@ -52,13 +52,22 @@ def test_execute_query(query_service: QueryService): assert result.query == query -@patch("mthd.domain.experiment.ExperimentRun.from_commit") +@patch("mthd.domain.git.ExperimentCommit.from_commit") def test_execute_simple_query(mock_from_commit, query_service: QueryService): - # Setup mock experiments - mock_from_commit.side_effect = [ - ExperimentRun(experiment="test1", hyperparameters={}, metrics={"loss": 0.2}), - ExperimentRun(experiment="test2", hyperparameters={}, metrics={"loss": 0.1}), + # Setup mock experiments with ExperimentCommits + mock_commits = [ + Mock( + experiment_run=ExperimentRun( + experiment="test1", hyperparameters={}, metrics={"loss": 0.2} + ) + ), + Mock( + experiment_run=ExperimentRun( + experiment="test2", hyperparameters={}, metrics={"loss": 0.1} + ) + ), ] + mock_from_commit.side_effect = mock_commits # Test simple query for low loss result = query_service.execute_simple("loss", "<", 0.15) From ea86e5a073d4c5e2a55c871f8b1c5e8398dd57bf Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Mon, 10 Feb 2025 18:28:29 +0000 Subject: [PATCH 18/23] refactor: Update query service to handle experiment commits and metrics querying --- mthd/service/query.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/mthd/service/query.py b/mthd/service/query.py index 3ec7112..ea1980e 100644 --- a/mthd/service/query.py +++ b/mthd/service/query.py @@ -1,7 +1,6 @@ from typing import Optional -from mthd.domain.experiment import ExperimentRun -from mthd.domain.git import Commit +from mthd.domain.git import ExperimentCommit from mthd.domain.query import Query, QueryResult, SimpleQueryOp, SimpleQueryValue from mthd.service.git import GitService @@ -25,16 +24,19 @@ def execute(self, query: Query, limit: Optional[int] = None) -> QueryResult: commits = self.git_service.get_all_commits() total = len(commits) - # Convert regular commits to experiment commits where possible + # Convert regular commits to experiment commits exp_commits = [ - exp_commit for commit in commits + {"commit": exp_commit, "run": exp_commit.experiment_run.model_dump()} + for commit in commits if (exp_commit := ExperimentCommit.from_commit(commit)) ] - # Query on the experiment_run field - query_str = query.expression.replace("metrics.", "experiment_run.metrics.") + query_str = query.expression.replace("metrics.", "run.metrics.").replace( + "hyperparameters.", "run.hyperparameters." + ) modified_query = Query(expression=query_str) - matching = modified_query.compile().search(exp_commits) + search = modified_query.compile().search(exp_commits) + matching: list[ExperimentCommit] = [match["commit"] for match in search] results = matching or [] if limit and limit > 0: @@ -60,5 +62,5 @@ def execute_simple( Returns: QueryResult containing matching commits """ - query = Query.where(f"message.metrics.{metric}", op, value) + query = Query.where(metric, op, value) return self.execute(query, limit=limit) From cb97324a40384bdd7aed577208feabcc29c275ea Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Mon, 10 Feb 2025 18:28:31 +0000 Subject: [PATCH 19/23] fix: Correct query service metric filtering and test mock setup --- mthd/service/query.py | 2 +- tests/unit/service/query_test.py | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/mthd/service/query.py b/mthd/service/query.py index ea1980e..a1a9aad 100644 --- a/mthd/service/query.py +++ b/mthd/service/query.py @@ -62,5 +62,5 @@ def execute_simple( Returns: QueryResult containing matching commits """ - query = Query.where(metric, op, value) + query = Query.where(f"metrics.{metric}", op, value) return self.execute(query, limit=limit) diff --git a/tests/unit/service/query_test.py b/tests/unit/service/query_test.py index 4cdfe14..38581b7 100644 --- a/tests/unit/service/query_test.py +++ b/tests/unit/service/query_test.py @@ -55,19 +55,19 @@ def test_execute_query(query_service: QueryService): @patch("mthd.domain.git.ExperimentCommit.from_commit") def test_execute_simple_query(mock_from_commit, query_service: QueryService): # Setup mock experiments with ExperimentCommits - mock_commits = [ - Mock( - experiment_run=ExperimentRun( - experiment="test1", hyperparameters={}, metrics={"loss": 0.2} - ) - ), - Mock( - experiment_run=ExperimentRun( - experiment="test2", hyperparameters={}, metrics={"loss": 0.1} - ) - ), - ] - mock_from_commit.side_effect = mock_commits + # Create experiment runs + exp_run1 = ExperimentRun( + experiment="test1", hyperparameters={}, metrics={"loss": 0.2} + ) + exp_run2 = ExperimentRun( + experiment="test2", hyperparameters={}, metrics={"loss": 0.1} + ) + + # Create mock experiment commits that will be returned by from_commit + mock_exp_commit1 = Mock(experiment_run=exp_run1, model_dump=lambda: {"run": exp_run1.model_dump()}) + mock_exp_commit2 = Mock(experiment_run=exp_run2, model_dump=lambda: {"run": exp_run2.model_dump()}) + + mock_from_commit.side_effect = [mock_exp_commit1, mock_exp_commit2] # Test simple query for low loss result = query_service.execute_simple("loss", "<", 0.15) From 9d1cd696cde428c59a2d4f8be6bfe6ded1f61600 Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Mon, 10 Feb 2025 18:32:54 +0000 Subject: [PATCH 20/23] refactor: improve type hints and circular import handling The commit message focuses on the main changes which include: - Using TYPE_CHECKING for circular imports - Replacing List with Sequence for better type hints - Removing unused test file - Restructuring and simplifying test cases --- mthd/domain/experiment.py | 12 ++++++----- mthd/domain/git.py | 10 +++------ mthd/domain/query.py | 4 ++-- mthd/service/query.py | 2 +- tests/unit/domain/commit_test.py | 17 --------------- tests/unit/service/query_test.py | 36 ++++++++++++-------------------- 6 files changed, 26 insertions(+), 55 deletions(-) diff --git a/mthd/domain/experiment.py b/mthd/domain/experiment.py index 9966b5e..91915fa 100644 --- a/mthd/domain/experiment.py +++ b/mthd/domain/experiment.py @@ -3,15 +3,17 @@ from datetime import datetime from enum import StrEnum -from typing import Any, Optional +from typing import TYPE_CHECKING, Any, Optional from uuid import uuid4 from pydantic import UUID4, Field from mthd.config import BODY_METADATA_SEPARATOR, SUMMARY_BODY_SEPARATOR -from mthd.domain.git import Commit from mthd.util.model import Model +if TYPE_CHECKING: + from mthd.domain.git import Commit + logger = logging.getLogger(__name__) @@ -35,7 +37,7 @@ def as_commit_message(self, template: str) -> "SemanticMessage": ) @staticmethod - def from_commit(commit: Commit) -> Optional["ExperimentRun"]: + def from_commit(commit: "Commit") -> Optional["ExperimentRun"]: message = SemanticMessage.from_commit(commit) # if not message: # # @todo: is :20s the right syntax to truncate? @@ -86,13 +88,13 @@ def render(self, with_metadata: bool = False) -> str: return msg @classmethod - def from_commit(cls, commit: Commit) -> "SemanticMessage": + def from_commit(cls, commit: "Commit") -> "SemanticMessage": kind, summary, body, metadata = cls._parse_semantic_parts(commit) return cls(kind=kind, summary=summary.strip(), body=body, metadata=metadata) @staticmethod - def _parse_semantic_parts(commit: Commit) -> tuple[CommitKind, str, Optional[str], Optional[dict]]: + def _parse_semantic_parts(commit: "Commit") -> tuple[CommitKind, str, Optional[str], Optional[dict]]: # we only want to split on the first separator parts = commit.message.split(SUMMARY_BODY_SEPARATOR, maxsplit=1) header = parts[0] diff --git a/mthd/domain/git.py b/mthd/domain/git.py index a5a04db..4fb862b 100644 --- a/mthd/domain/git.py +++ b/mthd/domain/git.py @@ -30,18 +30,14 @@ def startswith(self, value: str) -> bool: class ExperimentCommit(Commit): """A commit that contains experiment data""" + experiment_run: ExperimentRun - @classmethod + @classmethod def from_commit(cls, commit: Commit) -> Optional["ExperimentCommit"]: exp = ExperimentRun.from_commit(commit) if exp: - return cls( - sha=commit.sha, - message=commit.message, - date=commit.date, - experiment_run=exp - ) + return cls(sha=commit.sha, message=commit.message, date=commit.date, experiment_run=exp) return None diff --git a/mthd/domain/query.py b/mthd/domain/query.py index 6328aee..7fb5eea 100644 --- a/mthd/domain/query.py +++ b/mthd/domain/query.py @@ -1,4 +1,4 @@ -from typing import List +from typing import Sequence import jmespath @@ -59,6 +59,6 @@ def where(field: str, op: SimpleQueryOp, value: SimpleQueryValue) -> "Query": class QueryResult(BaseModel): """Result of executing a query.""" - commits: List[Commit] + commits: Sequence[Commit] query: Query num_searched: int diff --git a/mthd/service/query.py b/mthd/service/query.py index a1a9aad..ea1980e 100644 --- a/mthd/service/query.py +++ b/mthd/service/query.py @@ -62,5 +62,5 @@ def execute_simple( Returns: QueryResult containing matching commits """ - query = Query.where(f"metrics.{metric}", op, value) + query = Query.where(metric, op, value) return self.execute(query, limit=limit) diff --git a/tests/unit/domain/commit_test.py b/tests/unit/domain/commit_test.py index 69c5831..e69de29 100644 --- a/tests/unit/domain/commit_test.py +++ b/tests/unit/domain/commit_test.py @@ -1,17 +0,0 @@ -from pydantic import BaseModel - -from mthd.domain.git import ExperimentState - - -def test_commitmessage_format_success(): - class Hypers(BaseModel): - a: int - b: float - c: str - - msg = ExperimentState( - summary="test", - hyperparameters=Hypers(a=1, b=2.0, c="3").model_dump(), - ) - - print(msg.as_commit_message()) diff --git a/tests/unit/service/query_test.py b/tests/unit/service/query_test.py index 38581b7..6d5d598 100644 --- a/tests/unit/service/query_test.py +++ b/tests/unit/service/query_test.py @@ -1,7 +1,6 @@ from datetime import datetime from unittest.mock import Mock, patch -from pydantic import BaseModel import pytest from mthd.domain.experiment import ExperimentRun @@ -52,32 +51,24 @@ def test_execute_query(query_service: QueryService): assert result.query == query -@patch("mthd.domain.git.ExperimentCommit.from_commit") -def test_execute_simple_query(mock_from_commit, query_service: QueryService): - # Setup mock experiments with ExperimentCommits - # Create experiment runs - exp_run1 = ExperimentRun( - experiment="test1", hyperparameters={}, metrics={"loss": 0.2} - ) - exp_run2 = ExperimentRun( - experiment="test2", hyperparameters={}, metrics={"loss": 0.1} - ) - - # Create mock experiment commits that will be returned by from_commit - mock_exp_commit1 = Mock(experiment_run=exp_run1, model_dump=lambda: {"run": exp_run1.model_dump()}) - mock_exp_commit2 = Mock(experiment_run=exp_run2, model_dump=lambda: {"run": exp_run2.model_dump()}) - - mock_from_commit.side_effect = [mock_exp_commit1, mock_exp_commit2] - - # Test simple query for low loss - result = query_service.execute_simple("loss", "<", 0.15) +@patch("mthd.domain.experiment.ExperimentRun.from_commit") +def test_execute_query_with_limit(mock_from_commit, query_service: QueryService): + # Setup mock experiments + mock_from_commit.side_effect = [ + ExperimentRun(experiment="test1", hyperparameters={}, metrics={"accuracy": 0.9}), + ExperimentRun(experiment="test2", hyperparameters={}, metrics={"accuracy": 0.95}), + ] + + # Test query with limit + query = Query.where("metrics.accuracy", ">=", 0.9) + result = query_service.execute(query, limit=1) assert len(result.commits) == 1 assert result.num_searched == 2 @patch("mthd.domain.experiment.ExperimentRun.from_commit") -def test_execute_query_with_limit(mock_from_commit, query_service: QueryService): +def test_execute_simple(mock_from_commit, query_service: QueryService): # Setup mock experiments mock_from_commit.side_effect = [ ExperimentRun(experiment="test1", hyperparameters={}, metrics={"accuracy": 0.9}), @@ -85,8 +76,7 @@ def test_execute_query_with_limit(mock_from_commit, query_service: QueryService) ] # Test query with limit - query = Query.where("metrics.accuracy", ">=", 0.9) - result = query_service.execute(query, limit=1) + result = query_service.execute_simple("metrics.accuracy", "<", 0.95, limit=1) assert len(result.commits) == 1 assert result.num_searched == 2 From 54adf4b983afbb859f7a53b7556284d511b316d5 Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Mon, 10 Feb 2025 18:40:07 +0000 Subject: [PATCH 21/23] feat(decorator): add dry run mode and improve experiment metrics handling --- mthd/decorator.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/mthd/decorator.py b/mthd/decorator.py index bd531ba..ea73c7d 100644 --- a/mthd/decorator.py +++ b/mthd/decorator.py @@ -1,4 +1,5 @@ from functools import wraps +import os from typing import Callable, Optional, cast from pydantic import BaseModel @@ -36,25 +37,29 @@ def wrapper(*args, **kwargs): # codebase_service = di[CodebaseService] # Generate commit message + + # Run experiment + metrics = func(*args, **kwargs) + experiment = ExperimentRun( experiment=func.__name__, hyperparameters=hyperparameters.model_dump(), - metrics={}, + metrics=metrics.model_dump(), # annotations=codebase_service.get_all_annotations(), ) message = experiment.as_commit_message(template=template) - # Run experiment - result = func(*args, **kwargs) - # Commit changes if git_service.should_commit(strategy): console = Console() console.print("Generating commit with message:\n") console.print(Padding(message.render(with_metadata=True), pad=(0, 0, 0, 4))) # Indent by 4 spaces. - # git_service.stage_and_commit(message.render()) + if os.getenv("MTHD_DRY_RUN") == "1": + console.print("\nDry run enabled. Not committing changes.") + else: + git_service.stage_and_commit(message.render()) - return result + return metrics return wrapper @@ -64,6 +69,7 @@ def wrapper(*args, **kwargs): if __name__ == "__main__": + os.environ["MTHD_DRY_RUN"] = "1" class Hyperparameters(BaseModel): a: int From 5c51b7f97a59a2c14a8b1a9dd6fc25db9d0f8502 Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Mon, 10 Feb 2025 19:03:45 +0000 Subject: [PATCH 22/23] docs: add comprehensive README with installation and usage instructions --- README.md | 100 ++++++++++++++++++++++++++++++++++++++++++++-- mthd/decorator.py | 3 +- 2 files changed, 99 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index d5d9f15..ecd343f 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,98 @@ -# mthd -Automatically commit code experiments with hypothesis-driven git messages +
+ + ![logo](https://github.com/user-attachments/assets/cb3ddb4f-5f40-4231-9efe-29e045705dda) -If the user adds a `@commit` decorator to their experiment's function, then we will automatically generate a commit with semantic metadata whenever the experiment runs. + [Discord](https://discord.gg/kTkF2e69fH) • [Website](https://flywhl.dev) • [Installation](#installation) +
+
+
+ +## Features + +Mthd turns your commit history into a scientific log by auto-committing your code with metadata in the commit message - every time you run an experiment. + + +## Installation + +* `uv add mthd` + +## Usage + +* Put the `@commit` decorator on your experiment function. +* `mthd` will store hyperparameters and metrics as metadata in the commit message. +* Query your scientific log, e.g. `mthd query metrics.accuracy < 0.8`. + +```python +from mthd import commit +from pydantic import BaseModel + +class Hypers(BaseModel): + + lr: float + epochs: int + + +class Metrics(BaseModel): + + accuracy: float + + +@commit(hypers="hypers") +def my_experiment(hypers: Hypers) -> Metrics: + ... + # experiment + ... + + metrics = Metrics(...) + + return metrics +``` + +Then run your experiment: + +``` +$ python experiment.py + +Generating commit with message: + + exp: run test at 2025-02-10 18:39:18.759230 + + --- + + { + "experiment": "test", + "hyperparameters": { + "lr": 0.001, + "epochs": 100, + }, + "metrics": { + "accuracy": 0.9, + }, + "uuid": "94871de1-4d6c-4e70-9c9d-60ec11df1159", + "artifacts": null, + "annotations": null, + "timestamp": "2025-02-10T18:39:18.759230" + } +``` + +Finally, query for relevant commits: + +``` +$ mthd query metrics.accuracy > 0.8 + +Found 1 commit(s): + + af6cd7 +``` + + +## Development + +* `git clone https://github.com/flywhl/mthd.git` +* `cd mthd` +* `uv sync` +* `just test` + +## Flywheel + +Science needs better software tools. [Flywheel](https://flywhl.dev/) is an open source collective building simple tools to preserve scientific momentum, inspired by devtools and devops culture. Join our Discord [here](discord.gg/fd37MFZ7RS). diff --git a/mthd/decorator.py b/mthd/decorator.py index ea73c7d..2d06685 100644 --- a/mthd/decorator.py +++ b/mthd/decorator.py @@ -1,5 +1,6 @@ -from functools import wraps import os + +from functools import wraps from typing import Callable, Optional, cast from pydantic import BaseModel From 17d47eacc0acf655a1cf9ccda3c6f359e43371b2 Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Mon, 10 Feb 2025 19:06:01 +0000 Subject: [PATCH 23/23] docs: improve clarity of README --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ecd343f..b20f41a 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,9 @@ ## Features -Mthd turns your commit history into a scientific log by auto-committing your code with metadata in the commit message - every time you run an experiment. +`mthd` turns your commit history into a searchable scientific log. + +Every time you run an experiment, your code will be auto-committed with metadata in the commit message. ## Installation