From 751ad12c801eaaa638027ad66de0d4720ad5f36c Mon Sep 17 00:00:00 2001 From: Peter Koval Date: Wed, 3 Sep 2025 18:38:55 +0200 Subject: [PATCH 1/8] + --- pyproject.toml | 5 +++++ src/binary_classification_ratios/ratios.py | 10 ++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 16685ea..38bbfd5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,3 +34,8 @@ binary-classification-ratios = "binary_classification_ratios.cli.main:main" [tool.uv] package = true + +[build-system] +requires = ["hatchling >= 1.26"] +build-backend = "hatchling.build" + diff --git a/src/binary_classification_ratios/ratios.py b/src/binary_classification_ratios/ratios.py index 03eb336..93d6135 100644 --- a/src/binary_classification_ratios/ratios.py +++ b/src/binary_classification_ratios/ratios.py @@ -21,6 +21,8 @@ def __init__(self, *, tp: int = 0, tn: int = 0, fp: int = 0, fn: int = 0) -> Non self.tn = tn self.fp = fp self.fn = fn + self.accuracy_fmt = '.5f' + self.fmt = '.3f' def get_summary(self) -> str: """Return a summary of the classification metrics, including accuracy, @@ -32,10 +34,10 @@ def get_summary(self) -> str: cc = self return ( f'Confusion matrix: TP {cc.tp} TN {cc.tn} FP {cc.fp} FN {cc.fn}\n' - f' accuracy {cc.get_accuracy():.3f}\n' - f' precision {cc.get_precision():.3f}\n' - f' recall {cc.get_recall():.3f}\n' - f' f1-score {cc.get_f1_score():.3f}\n' + f' accuracy {cc.get_accuracy():{self.accuracy_fmt}}\n' + f' precision {cc.get_precision():{self.fmt}}\n' + f' recall {cc.get_recall():{self.fmt}}\n' + f' f1-score {cc.get_f1_score():{self.fmt}}\n' ) def get_precision(self) -> float: From 1da1e098fd9a2aa4c908b6b0cc4b895ccd85840d Mon Sep 17 00:00:00 2001 From: Peter Koval Date: Wed, 3 Sep 2025 19:26:48 +0200 Subject: [PATCH 2/8] + --- CHANGELOG.md | 5 +++ .../cli/cmd_line.py | 5 +++ src/binary_classification_ratios/cli/main.py | 2 + src/binary_classification_ratios/ratios.py | 30 +++++++++----- src/binary_classification_ratios/summary.py | 41 +++++++++++++++++++ test/cli/test_cmd_line.py | 7 +++- test/cli/test_main.py | 9 +++- ...lassification_ratios.py => test_ratios.py} | 20 +++++++-- test/test_summary.py | 34 +++++++++++++++ 9 files changed, 136 insertions(+), 17 deletions(-) create mode 100644 src/binary_classification_ratios/summary.py rename test/{test_binary_classification_ratios.py => test_ratios.py} (71%) create mode 100644 test/test_summary.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 16345c7..8e7a224 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 0.2.0 + + - Adjustable format for the accuracy and other metrics. + + # 0.1.0 - Command-line utility `binary-classification-ratios` for quick try. diff --git a/src/binary_classification_ratios/cli/cmd_line.py b/src/binary_classification_ratios/cli/cmd_line.py index 760e4fc..0957b5f 100644 --- a/src/binary_classification_ratios/cli/cmd_line.py +++ b/src/binary_classification_ratios/cli/cmd_line.py @@ -5,6 +5,7 @@ PROG = 'binary-classification-ratios' +HLP_FMT = 'Format for the recall, precision and F1-score.' class CmdLine: @@ -15,6 +16,8 @@ def __init__(self) -> None: self.tn: int = 0 self.fp: int = 0 self.fn: int = 0 + self.fmt: str = '.3f' + self.accuracy_fmt: str = '.5f' def get_cmd_line(args: Union[Sequence[str], None] = None) -> CmdLine: @@ -24,6 +27,8 @@ def get_cmd_line(args: Union[Sequence[str], None] = None) -> CmdLine: parser.add_argument('-tn', type=int, default=0, help='Number of true negatives.') parser.add_argument('-fp', type=int, default=0, help='Number of false positives.') parser.add_argument('-fn', type=int, default=0, help='Number of false negatives.') + parser.add_argument('--fmt', help=HLP_FMT) + parser.add_argument('--accuracy-fmt', help='Format for the accuracy.') namespace = CmdLine() parser.parse_args(args, namespace=namespace) return namespace diff --git a/src/binary_classification_ratios/cli/main.py b/src/binary_classification_ratios/cli/main.py index 7f2c490..5c142ba 100644 --- a/src/binary_classification_ratios/cli/main.py +++ b/src/binary_classification_ratios/cli/main.py @@ -11,6 +11,8 @@ def run(args: Union[Sequence[str], None] = None) -> float: """.""" cli = get_cmd_line(args) bcr = BinaryClassificationRatios(tp=cli.tp, tn=cli.tn, fp=cli.fp, fn=cli.fn) + bcr.summary.fmt = cli.fmt + bcr.summary.accuracy_fmt = cli.accuracy_fmt print(bcr.get_summary()) return bcr.get_f1_score() diff --git a/src/binary_classification_ratios/ratios.py b/src/binary_classification_ratios/ratios.py index 93d6135..e5561fc 100644 --- a/src/binary_classification_ratios/ratios.py +++ b/src/binary_classification_ratios/ratios.py @@ -3,6 +3,10 @@ and summarize classification metrics such as accuracy, precision, recall, and F1-score. """ +from typing import Dict, Union + +from binary_classification_ratios.summary import BinaryClassificationSummary + class BinaryClassificationRatios(object): """ @@ -21,8 +25,7 @@ def __init__(self, *, tp: int = 0, tn: int = 0, fp: int = 0, fn: int = 0) -> Non self.tn = tn self.fp = fp self.fn = fn - self.accuracy_fmt = '.5f' - self.fmt = '.3f' + self.summary = BinaryClassificationSummary() def get_summary(self) -> str: """Return a summary of the classification metrics, including accuracy, @@ -31,14 +34,21 @@ def get_summary(self) -> str: Returns: str: A formatted string summarizing the classification metrics. """ - cc = self - return ( - f'Confusion matrix: TP {cc.tp} TN {cc.tn} FP {cc.fp} FN {cc.fn}\n' - f' accuracy {cc.get_accuracy():{self.accuracy_fmt}}\n' - f' precision {cc.get_precision():{self.fmt}}\n' - f' recall {cc.get_recall():{self.fmt}}\n' - f' f1-score {cc.get_f1_score():{self.fmt}}\n' - ) + dct = self.get_summary_dct() + return self.summary.get_summary(dct) + + def get_summary_dct(self) -> Dict[str, Union[int, float]]: + """.""" + return { + 'tp': self.tp, + 'tn': self.tn, + 'fp': self.fp, + 'fn': self.fn, + 'accuracy': self.get_accuracy(), + 'precision': self.get_precision(), + 'recall': self.get_recall(), + 'f1_score': self.get_f1_score(), + } def get_precision(self) -> float: """Calculate the Precision. diff --git a/src/binary_classification_ratios/summary.py b/src/binary_classification_ratios/summary.py new file mode 100644 index 0000000..ce1ae2f --- /dev/null +++ b/src/binary_classification_ratios/summary.py @@ -0,0 +1,41 @@ +""".""" + +from typing import Dict, Union + + +class BinaryClassificationSummary: + """.""" + + def __init__(self) -> None: + """.""" + self.accuracy_fmt = '.5f' + self.fmt = '.3f' + self.confusion_matrix_prefix = 'Confusion matrix' + + def get_summary(self, dct: Dict[str, Union[int, float]]) -> str: + """Return a human-readable summary of the quality metrics. + + Returns: + str: A formatted string summarizing the classification metrics. + """ + tp = dct.get('tp', '?') + tn = dct.get('tn', '?') + fp = dct.get('fp', '?') + fn = dct.get('fn', '?') + lines = [f'{self.confusion_matrix_prefix} TP {tp} TN {tn} FP {fp} FN {fn}'] + accuracy = dct.get('accuracy', None) + recall = dct.get('recall', None) + precision = dct.get('precision', None) + f1_score = dct.get('f1_score', None) + + if accuracy is not None: + lines.append(f' accuracy {accuracy:{self.accuracy_fmt}}') + if precision is not None: + lines.append(f' precision {precision:{self.fmt}}') + if recall is not None: + lines.append(f' recall {recall:{self.fmt}}') + if f1_score is not None: + lines.append(f' f1-score {f1_score:{self.fmt}}') + + summary = '\n'.join(lines) + return summary diff --git a/test/cli/test_cmd_line.py b/test/cli/test_cmd_line.py index d9b8991..3218239 100644 --- a/test/cli/test_cmd_line.py +++ b/test/cli/test_cmd_line.py @@ -5,12 +5,15 @@ def test_cmd_line_short_args() -> None: """.""" - cli = get_cmd_line(['-tp', '1', '-tn', '2', '-fp', '3', '-fn', '4']) + cli = get_cmd_line(['-tp', '1', '-tn', '2', '-fp', '3', '-fn', '4', + '--fmt', '.4f', '--accuracy-fmt', '.6f']) assert isinstance(cli, CmdLine) assert cli.tp == 1 assert cli.tn == 2 assert cli.fp == 3 assert cli.fn == 4 + assert cli.fmt == '.4f' + assert cli.accuracy_fmt == '.6f' def test_cmd_line_no_args() -> None: @@ -20,3 +23,5 @@ def test_cmd_line_no_args() -> None: assert cli.tn == 0 assert cli.fn == 0 assert cli.fp == 0 + assert cli.accuracy_fmt == '.5f' + assert cli.fmt == '.3f' diff --git a/test/cli/test_main.py b/test/cli/test_main.py index 4acf828..d0ea754 100644 --- a/test/cli/test_main.py +++ b/test/cli/test_main.py @@ -5,7 +5,12 @@ from binary_classification_ratios.cli.main import run -def test_run() -> None: +def test_run(capsys: pytest.CaptureFixture) -> None: """.""" - f1 = run(['-tp', '1', '-tn', '2', '-fp', '3', '-fn', '4']) + f1 = run(['-tp', '1', '-tn', '2', '-fp', '3', '-fn', '4', + '--fmt', '.4f', '--accuracy-fmt', '.6f']) assert f1 == pytest.approx(0.222222222222222222) + stdout = capsys.readouterr().out + assert 'Confusion matrix TP 1 TN 2 FP 3 FN 4' in stdout + assert 'accuracy 0.300000' in stdout + assert 'f1-score 0.2222' in stdout diff --git a/test/test_binary_classification_ratios.py b/test/test_ratios.py similarity index 71% rename from test/test_binary_classification_ratios.py rename to test/test_ratios.py index 15cfbb8..9d412b6 100644 --- a/test/test_binary_classification_ratios.py +++ b/test/test_ratios.py @@ -32,15 +32,27 @@ def test_get_summary(bcr: BinaryClassificationRatios) -> None: """.""" assert ( bcr.get_summary() - == """Confusion matrix: TP 10 TN 9 FP 8 FN 7 - accuracy 0.559 + == """Confusion matrix TP 10 TN 9 FP 8 FN 7 + accuracy 0.55882 precision 0.556 recall 0.588 - f1-score 0.571 -""" + f1-score 0.571""" ) +def test_get_summary_dct(bcr: BinaryClassificationRatios) -> None: + """.""" + dct = bcr.get_summary_dct() + assert dct['tp'] == 10 + assert dct['tn'] == 9 + assert dct['fp'] == 8 + assert dct['fn'] == 7 + assert dct['accuracy'] == pytest.approx(0.5588235294117647) + assert dct['precision'] == pytest.approx(0.5555555555555556) + assert dct['recall'] == pytest.approx(0.5882352941176471) + assert dct['f1_score'] == pytest.approx(0.5714285714285715) + + def test_assert_min(bcr: BinaryClassificationRatios) -> None: """.""" bcr.assert_min(0.558, 0.555, 0.587) diff --git a/test/test_summary.py b/test/test_summary.py new file mode 100644 index 0000000..1ebc1de --- /dev/null +++ b/test/test_summary.py @@ -0,0 +1,34 @@ +""".""" + +import pytest + +from binary_classification_ratios.summary import BinaryClassificationSummary + + +@pytest.fixture +def bcs() -> BinaryClassificationSummary: + """.""" + summary = BinaryClassificationSummary() + return summary + + +def test_get_summary(bcs: BinaryClassificationSummary) -> None: + """.""" + dct = { + 'tp': 10, + 'tn': 9, + 'fp': 8, + 'fn': 7, + 'accuracy': 0.5588256789012345, + 'precision': 0.5561234, + 'recall': 0.5881234, + 'f1_score': 0.57101234, + } + ref = """Confusion matrix TP 10 TN 9 FP 8 FN 7 + accuracy 0.5588 + precision 0.56 + recall 0.59 + f1-score 0.57""" + bcs.accuracy_fmt = '.4f' + bcs.fmt = '.2f' + assert bcs.get_summary(dct) == ref From 6ffa97150383309229867b546673db9d31994775 Mon Sep 17 00:00:00 2001 From: Peter Koval Date: Wed, 3 Sep 2025 19:27:10 +0200 Subject: [PATCH 3/8] format --- test/cli/test_cmd_line.py | 5 +++-- test/cli/test_main.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/test/cli/test_cmd_line.py b/test/cli/test_cmd_line.py index 3218239..64dfce9 100644 --- a/test/cli/test_cmd_line.py +++ b/test/cli/test_cmd_line.py @@ -5,8 +5,9 @@ def test_cmd_line_short_args() -> None: """.""" - cli = get_cmd_line(['-tp', '1', '-tn', '2', '-fp', '3', '-fn', '4', - '--fmt', '.4f', '--accuracy-fmt', '.6f']) + cli = get_cmd_line( + ['-tp', '1', '-tn', '2', '-fp', '3', '-fn', '4', '--fmt', '.4f', '--accuracy-fmt', '.6f'] + ) assert isinstance(cli, CmdLine) assert cli.tp == 1 assert cli.tn == 2 diff --git a/test/cli/test_main.py b/test/cli/test_main.py index d0ea754..2ecffc4 100644 --- a/test/cli/test_main.py +++ b/test/cli/test_main.py @@ -7,8 +7,9 @@ def test_run(capsys: pytest.CaptureFixture) -> None: """.""" - f1 = run(['-tp', '1', '-tn', '2', '-fp', '3', '-fn', '4', - '--fmt', '.4f', '--accuracy-fmt', '.6f']) + f1 = run( + ['-tp', '1', '-tn', '2', '-fp', '3', '-fn', '4', '--fmt', '.4f', '--accuracy-fmt', '.6f'] + ) assert f1 == pytest.approx(0.222222222222222222) stdout = capsys.readouterr().out assert 'Confusion matrix TP 1 TN 2 FP 3 FN 4' in stdout From 5845cdb5cd9e38020c2c61fedaa6c73f5b33bbde Mon Sep 17 00:00:00 2001 From: Peter Koval Date: Wed, 3 Sep 2025 19:27:26 +0200 Subject: [PATCH 4/8] =?UTF-8?q?Bump=20version:=200.1.5=20=E2=86=92=200.2.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.toml | 2 +- pyproject.toml | 2 +- src/binary_classification_ratios/__init__.py | 2 +- uv.lock | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.bumpversion.toml b/.bumpversion.toml index 2cb59e7..cd4d8b5 100644 --- a/.bumpversion.toml +++ b/.bumpversion.toml @@ -1,5 +1,5 @@ [tool.bumpversion] -current_version = "0.1.5" +current_version = "0.2.0" parse = "(?P\\d+)\\.(?P\\d+)\\.(?P\\d+)" serialize = ["{major}.{minor}.{patch}"] search = "{current_version}" diff --git a/pyproject.toml b/pyproject.toml index 38bbfd5..52f2c34 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "binary-classification-ratios" -version = "0.1.5" +version = "0.2.0" description = "Binary classification ratios gathered in one package." readme = "README.md" requires-python = ">=3.8,<4.0" diff --git a/src/binary_classification_ratios/__init__.py b/src/binary_classification_ratios/__init__.py index 108f00c..779a500 100644 --- a/src/binary_classification_ratios/__init__.py +++ b/src/binary_classification_ratios/__init__.py @@ -1,6 +1,6 @@ """.""" -__version__ = '0.1.5' +__version__ = '0.2.0' from .ratios import BinaryClassificationRatios diff --git a/uv.lock b/uv.lock index a852e57..91964f7 100644 --- a/uv.lock +++ b/uv.lock @@ -33,7 +33,7 @@ wheels = [ [[package]] name = "binary-classification-ratios" -version = "0.1.5" +version = "0.2.0" source = { editable = "." } [package.dev-dependencies] From 57f152213a6853b5a0cd89de1cdbf6a001f9c15b Mon Sep 17 00:00:00 2001 From: Peter Koval Date: Wed, 3 Sep 2025 19:31:06 +0200 Subject: [PATCH 5/8] Update readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c9fc13d..c55a445 100644 --- a/README.md +++ b/README.md @@ -32,8 +32,8 @@ F1-score and prints them to terminal. ```shell binary-classification-ratios -tp 10 -tn 20 -fp 30 -fn 40 -Confusion matrix: TP 10 TN 20 FP 30 FN 40 - accuracy 0.300 +Confusion matrix TP 10 TN 20 FP 30 FN 40 + accuracy 0.30000 precision 0.250 recall 0.200 f1-score 0.222 From fa36996aa953764cf271dc8d46a78aeb5c7914ba Mon Sep 17 00:00:00 2001 From: Peter Koval Date: Wed, 3 Sep 2025 19:32:47 +0200 Subject: [PATCH 6/8] + chlog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e7a224..31ab6b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # 0.2.0 - Adjustable format for the accuracy and other metrics. + - Using the hatchling build system to avoid complains about the `project.license` format. # 0.1.0 From d01a0a8d6e13af64d9b713668747240a9ce45661 Mon Sep 17 00:00:00 2001 From: Peter Koval Date: Wed, 3 Sep 2025 19:42:46 +0200 Subject: [PATCH 7/8] edits --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c55a445..4c3aca7 100644 --- a/README.md +++ b/README.md @@ -9,14 +9,14 @@ generated by an algorithm or procedure. The ratios are easier to interpret than the confusion matrix. -For example, the $\mathrm{Accuracy}$ is given by the ratio of all accurate responses to the +For example, the `Accuracy` is given by the ratio of all accurate responses to the total number of responses $$ \mathrm{Accuracy} = \frac{\mathrm{TP} + \mathrm{TN}}{\mathrm{TP} + \mathrm{TN} + \mathrm{FP} + \mathrm{FN}}. $$ -Another popular ratio is $\mathrm{Recall}$ +Another popular ratio is `Recall` $$ \mathrm{Recall} = \frac{\mathrm{TP}}{\mathrm{TP} + \mathrm{FN}}. From 7e88eeef0809add1868e6985abf99591e491090f Mon Sep 17 00:00:00 2001 From: Peter Koval Date: Wed, 3 Sep 2025 19:42:57 +0200 Subject: [PATCH 8/8] =?UTF-8?q?Bump=20version:=200.2.0=20=E2=86=92=200.2.1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.toml | 2 +- pyproject.toml | 2 +- src/binary_classification_ratios/__init__.py | 2 +- uv.lock | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.bumpversion.toml b/.bumpversion.toml index cd4d8b5..dcdd040 100644 --- a/.bumpversion.toml +++ b/.bumpversion.toml @@ -1,5 +1,5 @@ [tool.bumpversion] -current_version = "0.2.0" +current_version = "0.2.1" parse = "(?P\\d+)\\.(?P\\d+)\\.(?P\\d+)" serialize = ["{major}.{minor}.{patch}"] search = "{current_version}" diff --git a/pyproject.toml b/pyproject.toml index 52f2c34..ab62a2c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "binary-classification-ratios" -version = "0.2.0" +version = "0.2.1" description = "Binary classification ratios gathered in one package." readme = "README.md" requires-python = ">=3.8,<4.0" diff --git a/src/binary_classification_ratios/__init__.py b/src/binary_classification_ratios/__init__.py index 779a500..419aa5d 100644 --- a/src/binary_classification_ratios/__init__.py +++ b/src/binary_classification_ratios/__init__.py @@ -1,6 +1,6 @@ """.""" -__version__ = '0.2.0' +__version__ = '0.2.1' from .ratios import BinaryClassificationRatios diff --git a/uv.lock b/uv.lock index 91964f7..c28908b 100644 --- a/uv.lock +++ b/uv.lock @@ -33,7 +33,7 @@ wheels = [ [[package]] name = "binary-classification-ratios" -version = "0.2.0" +version = "0.2.1" source = { editable = "." } [package.dev-dependencies]