Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
57b86b1
feat: Add ExperimentState to commit messages
rorybyrne Jan 18, 2025
4e0bb76
feat: Implement get_all_commits in GitService
rorybyrne Jan 18, 2025
f7b245e
feat: Add unit tests for query service
rorybyrne Jan 19, 2025
2d4d2fa
feat: Add query tests mocking ExperimentState.parse
rorybyrne Jan 19, 2025
b32c493
feat: add CLI interface and experiment query functionality
rorybyrne Jan 19, 2025
1f131b6
feat: Refactor experiment and git domain models with improved semantics
rorybyrne Jan 19, 2025
89d4bb1
refactor: Restructure experiment module with new CommitMessage and Co…
rorybyrne Jan 19, 2025
36e6660
refactor: Separate semantic commit types and improve experiment commi…
rorybyrne Jan 19, 2025
b359e17
refactor: restructure experiment commit handling and message formatting
rorybyrne Feb 8, 2025
9f55768
docs: add explanation of @commit decorator functionality in README
rorybyrne Feb 8, 2025
de4a216
refactor: Update query_test.py to use ExperimentRun and improve test …
rorybyrne Feb 8, 2025
e935d65
test: Update query test cases and remove commented mock patches
rorybyrne Feb 8, 2025
d156fb9
fix: Add missing hyperparameters field in test commit metadata
rorybyrne Feb 8, 2025
3164724
refactor: Update query service to use commits by SHA and improve quer…
rorybyrne Feb 8, 2025
084df5f
refactor: Implement ExperimentCommit with improved query handling
rorybyrne Feb 8, 2025
303fbfb
test: Remove unnecessary print statement in query test
rorybyrne Feb 10, 2025
2ba0bc6
fix: Update query test to mock correct ExperimentCommit method
rorybyrne Feb 10, 2025
ea86e5a
refactor: Update query service to handle experiment commits and metri…
rorybyrne Feb 10, 2025
cb97324
fix: Correct query service metric filtering and test mock setup
rorybyrne Feb 10, 2025
9d1cd69
refactor: improve type hints and circular import handling
rorybyrne Feb 10, 2025
54adf4b
feat(decorator): add dry run mode and improve experiment metrics hand…
rorybyrne Feb 10, 2025
5c51b7f
docs: add comprehensive README with installation and usage instructions
rorybyrne Feb 10, 2025
17d47ea
docs: improve clarity of README
rorybyrne Feb 10, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 100 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,100 @@
# mthd
Automatically commit code experiments with hypothesis-driven git messages
<div align="center">

![logo](https://github.com/user-attachments/assets/cb3ddb4f-5f40-4231-9efe-29e045705dda)

[Discord](https://discord.gg/kTkF2e69fH) • [Website](https://flywhl.dev) • [Installation](#installation)
<br/>
<br/>
</div>

## Features

`mthd` turns your commit history into a searchable scientific log.

Every time you run an experiment, your code will be auto-committed with metadata in the commit message.


## Installation

* `uv add mthd`

## Usage

* Put the `@commit` decorator on your experiment function.
* `mthd` will store hyperparameters and metrics as metadata in the commit message.
* Query your scientific log, e.g. `mthd query metrics.accuracy < 0.8`.

```python
from mthd import commit
from pydantic import BaseModel

class Hypers(BaseModel):

lr: float
epochs: int


class Metrics(BaseModel):

accuracy: float


@commit(hypers="hypers")
def my_experiment(hypers: Hypers) -> Metrics:
...
# experiment
...

metrics = Metrics(...)

return metrics
```

Then run your experiment:

```
$ python experiment.py

Generating commit with message:

exp: run test at 2025-02-10 18:39:18.759230

---

{
"experiment": "test",
"hyperparameters": {
"lr": 0.001,
"epochs": 100,
},
"metrics": {
"accuracy": 0.9,
},
"uuid": "94871de1-4d6c-4e70-9c9d-60ec11df1159",
"artifacts": null,
"annotations": null,
"timestamp": "2025-02-10T18:39:18.759230"
}
```

Finally, query for relevant commits:

```
$ mthd query metrics.accuracy > 0.8

Found 1 commit(s):

af6cd7
```


## Development

* `git clone https://github.com/flywhl/mthd.git`
* `cd mthd`
* `uv sync`
* `just test`

## Flywheel

Science needs better software tools. [Flywheel](https://flywhl.dev/) is an open source collective building simple tools to preserve scientific momentum, inspired by devtools and devops culture. Join our Discord [here](discord.gg/fd37MFZ7RS).
Empty file added mthd/cli/__init__.py
Empty file.
18 changes: 18 additions & 0 deletions mthd/cli/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import click

from dishka.integrations.click import setup_dishka

from mthd.cli.commands.query import query
from mthd.util.di import DI


def start():
@click.group()
@click.pass_context
def main(context: click.Context):
di = DI()
setup_dishka(container=di.container, context=context, auto_inject=True)

main.command("query")(query)

main()
Empty file added mthd/cli/commands/__init__.py
Empty file.
18 changes: 18 additions & 0 deletions mthd/cli/commands/query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import click

from dishka import FromDishka

from mthd.service.query import QueryService


@click.argument("query", type=str)
@click.option("limit", "--limit", type=int, default=-1)
def query(query: str, limit: int, query_service: FromDishka[QueryService]):
query_parts = query.split(" ")
print(query_parts)
if len(query_parts) != 3:
raise ValueError(f"Invalid query: {query}")

result = query_service.execute_simple(*query_parts, limit=limit)

print(result)
2 changes: 2 additions & 0 deletions mthd/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
BODY_METADATA_SEPARATOR = "---"
SUMMARY_BODY_SEPARATOR = "\n\n"
61 changes: 38 additions & 23 deletions mthd/decorator.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,24 @@
import os

from functools import wraps
from typing import Callable, Optional, cast

from pydantic import BaseModel
from rich.console import Console
from rich.padding import Padding

from mthd.domain.commit import CommitMessage, StageStrategy
from mthd.domain.experiment import ExperimentRun
from mthd.domain.git import StageStrategy
from mthd.error import MthdError
from mthd.service.git import GitService
from mthd.util.di import DI


def commit(
fn: Optional[Callable] = None,
*,
hypers: str = "hypers",
template: str = "run {experiment}",
strategy: StageStrategy = StageStrategy.ALL,
) -> Callable:
"""Decorator to auto-commit experimental code with scientific metadata.
Expand All @@ -22,37 +29,38 @@ def commit(
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs):
# di = DI()
di = DI()
# @todo: handle this better (eg. positional args)
hyperparameters = cast(BaseModel, kwargs.get(hypers, None))
if not hyperparameters:
raise MthdError(
"Hyperparameters must be provided in the function call."
)
# git_service = di[GitService]
raise MthdError("Hyperparameters must be provided in the function call.")
git_service = di[GitService]
# codebase_service = di[CodebaseService]

# Generate commit message
commit_msg = CommitMessage(
summary="exp: foo bar baz",

# Run experiment
metrics = func(*args, **kwargs)

experiment = ExperimentRun(
experiment=func.__name__,
hyperparameters=hyperparameters.model_dump(),
metrics=metrics.model_dump(),
# annotations=codebase_service.get_all_annotations(),
)
# print(hyperparameters.model_dump_json(indent=2))
# print(commit_msg.format())

# Run experiment
result = func(*args, **kwargs)
message = experiment.as_commit_message(template=template)

# Commit changes
console = Console()
console.print("Generating commit with message:\n")
console.print(
Padding(commit_msg.format(), pad=(0, 0, 0, 4))
) # Indent by 4 spaces.
# if git_service.should_commit(strategy):
# git_service.stage_and_commit(commit_msg)
if git_service.should_commit(strategy):
console = Console()
console.print("Generating commit with message:\n")
console.print(Padding(message.render(with_metadata=True), pad=(0, 0, 0, 4))) # Indent by 4 spaces.
if os.getenv("MTHD_DRY_RUN") == "1":
console.print("\nDry run enabled. Not committing changes.")
else:
git_service.stage_and_commit(message.render())

return result
return metrics

return wrapper

Expand All @@ -62,14 +70,21 @@ def wrapper(*args, **kwargs):


if __name__ == "__main__":
os.environ["MTHD_DRY_RUN"] = "1"

class Hyperparameters(BaseModel):
a: int
b: float
c: str

@commit(hypers="hypers")
class Metrics(BaseModel):
a: int
b: float
c: str

@commit(hypers="hypers", template="run {experiment} at {timestamp}")
def test(hypers: Hyperparameters):
print("<Experiment goes here>\n")
print("\n<Experiment goes here>\n")
return Metrics(a=1, b=2.0, c="3")

test(hypers=Hyperparameters(a=1, b=2.0, c="3"))
18 changes: 0 additions & 18 deletions mthd/domain/commit.py

This file was deleted.

115 changes: 115 additions & 0 deletions mthd/domain/experiment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import json
import logging

from datetime import datetime
from enum import StrEnum
from typing import TYPE_CHECKING, Any, Optional
from uuid import uuid4

from pydantic import UUID4, Field

from mthd.config import BODY_METADATA_SEPARATOR, SUMMARY_BODY_SEPARATOR
from mthd.util.model import Model

if TYPE_CHECKING:
from mthd.domain.git import Commit

logger = logging.getLogger(__name__)


class ExperimentRun(Model):
"""The core data from an experiment run"""

experiment: str
hyperparameters: dict
metrics: dict
uuid: UUID4 = Field(default_factory=uuid4)
artifacts: Optional[dict] = None # Any generated files/data
annotations: Optional[dict] = None
timestamp: datetime = datetime.now()

def as_commit_message(self, template: str) -> "SemanticMessage":
"""Convert this experiment run into a commit"""
return SemanticMessage(
kind=CommitKind.EXP,
summary=template.format(**self.model_dump(include={"experiment", "timestamp"})),
metadata=self.model_dump(mode="json"),
)

@staticmethod
def from_commit(commit: "Commit") -> Optional["ExperimentRun"]:
message = SemanticMessage.from_commit(commit)
# if not message:
# # @todo: is :20s the right syntax to truncate?
# logger.debug(f"Could not parse semantic message: '{commit.message:.20s}'")
# return None

return ExperimentRun.model_validate(message.metadata)


class CommitKind(StrEnum):
"""Types of semantic commits"""

EXP = "exp"
FIX = "fix"
FEAT = "feat"
CHORE = "chore"
TOOLING = "tooling"
REFACTOR = "refactor"

@property
def has_metadata(self) -> bool:
return self is CommitKind.EXP

@staticmethod
def from_header(header: str) -> Optional["CommitKind"]:
"""Parse a commit kind from a header string"""
try:
kind = header.split(":")[0].strip()
return CommitKind(kind)
except Exception:
return None


class SemanticMessage(Model):
"""Base class for our semantic commit formats"""

kind: CommitKind
summary: str
body: Optional[str] = None
metadata: Optional[dict[str, Any]] = None

def render(self, with_metadata: bool = False) -> str:
msg = f"{self.kind.value}: {self.summary}"
if self.body:
msg += f"\n\n{self.body}"
if with_metadata and self.metadata:
msg += f"\n\n{BODY_METADATA_SEPARATOR}\n\n{json.dumps(self.metadata, indent=2)}"
return msg

@classmethod
def from_commit(cls, commit: "Commit") -> "SemanticMessage":
kind, summary, body, metadata = cls._parse_semantic_parts(commit)

return cls(kind=kind, summary=summary.strip(), body=body, metadata=metadata)

@staticmethod
def _parse_semantic_parts(commit: "Commit") -> tuple[CommitKind, str, Optional[str], Optional[dict]]:
# we only want to split on the first separator
parts = commit.message.split(SUMMARY_BODY_SEPARATOR, maxsplit=1)
header = parts[0]
kind_str, summary = header.split(":", 1)
kind = CommitKind(kind_str)

if len(parts) == 2:
body = parts[1]
if kind.has_metadata:
body, raw_metadata = body.split(BODY_METADATA_SEPARATOR)
metadata = json.loads(raw_metadata)
assert isinstance(metadata, dict)
else:
metadata = None
else:
body = metadata = None

return kind, summary, body, metadata
Loading