From 3384c7d44e4ab4abb69f754060666f2b2cadaca3 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 11 Dec 2024 09:55:06 +0100 Subject: [PATCH 1/7] docs: add repr & explanation to `C` --- lydata/accessor.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/lydata/accessor.py b/lydata/accessor.py index 4efbf84..db69d84 100644 --- a/lydata/accessor.py +++ b/lydata/accessor.py @@ -284,6 +284,10 @@ def execute(self, df: pd.DataFrame) -> pd.Series: class C: """Wraps a column name and produces a :py:class:`Q` object upon comparison. + This is basically a shorthand for creating a :py:class:`Q` object that avoids + writing the operator and value in quotes. Thus, it may be more readable and allows + IDEs to provide better autocompletion. + .. caution:: Just like for the :py:class:`Q` object, it is not checked upon instantiation @@ -301,6 +305,16 @@ def __init__(self, *column: str) -> None: """ self.column = column[0] if len(column) == 1 else column + def __repr__(self) -> str: + """Return a string representation of the column object. + + >>> repr(C('foo')) + "C('foo')" + >>> repr(C('foo', 'bar')) + "C(('foo', 'bar'))" + """ + return f"C({self.column!r})" + def __eq__(self, value: Any) -> Q: """Create a query object for comparing equality. From ccc52dc48155dbee5a1bd65a7270a1017a501a82 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 11 Dec 2024 10:00:52 +0100 Subject: [PATCH 2/7] change: add parentheses to `AndQ` and `OrQ`. This is to make the string representation of long, nested query objects correctly parseable (both to Python and a human reader). --- lydata/accessor.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/lydata/accessor.py b/lydata/accessor.py index db69d84..24e7bd0 100644 --- a/lydata/accessor.py +++ b/lydata/accessor.py @@ -175,7 +175,7 @@ class AndQ(CombineQMixin): >>> q2 = Q('col2', 'contains', 'ba') >>> and_q = q1 & q2 >>> print(and_q) - Q('col1', '!=', 3) & Q('col2', 'contains', 'ba') + (Q('col1', '!=', 3) & Q('col2', 'contains', 'ba')) >>> isinstance(and_q, AndQ) True >>> and_q.execute(df) @@ -194,7 +194,7 @@ def __init__(self, q1: QTypes, q2: QTypes) -> None: def __repr__(self) -> str: """Return a string representation of the query.""" - return f"{self.q1!r} & {self.q2!r}" + return f"({self.q1!r} & {self.q2!r})" def execute(self, df: pd.DataFrame) -> pd.Series: """Return a boolean mask where both queries are satisfied.""" @@ -209,7 +209,7 @@ class OrQ(CombineQMixin): >>> q2 = Q('col1', '==', 3) >>> or_q = q1 | q2 >>> print(or_q) - Q('col1', '==', 1) | Q('col1', '==', 3) + (Q('col1', '==', 1) | Q('col1', '==', 3)) >>> isinstance(or_q, OrQ) True >>> or_q.execute(df) @@ -228,7 +228,7 @@ def __init__(self, q1: QTypes, q2: QTypes) -> None: def __repr__(self) -> str: """Return a string representation of the query.""" - return f"{self.q1!r} | {self.q2!r}" + return f"({self.q1!r} | {self.q2!r})" def execute(self, df: pd.DataFrame) -> pd.Series: """Return a boolean mask where either query is satisfied.""" @@ -250,6 +250,8 @@ class NotQ(CombineQMixin): 1 False 2 True Name: col1, dtype: bool + >>> print(~(Q('col1', '==', 2) & Q('col1', '!=', 3))) + ~(Q('col1', '==', 2) & Q('col1', '!=', 3)) """ def __init__(self, q: QTypes) -> None: @@ -298,7 +300,7 @@ def __init__(self, *column: str) -> None: """Create a column object for comparison. For querying multi-level columns, both the syntax ``C('col1', 'col2')`` and - ``C(('col1', 'col2'))`` are valid. + ``C(('col1', 'col2'))`` is valid. >>> (C('col1', 'col2') == 1) == (C(('col1', 'col2')) == 1) True From 2b0a4eb10076b3ee9bcf0ae1c22c894fda29cac4 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 11 Dec 2024 10:16:36 +0100 Subject: [PATCH 3/7] docs: mention private attr `_column_map` --- lydata/accessor.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/lydata/accessor.py b/lydata/accessor.py index 24e7bd0..a87b717 100644 --- a/lydata/accessor.py +++ b/lydata/accessor.py @@ -107,9 +107,33 @@ class Q(CombineQMixin): .. caution:: The column names are not checked upon instantiation. This is only done when the - query is executed. In fact, the ``Q`` object does not even know about the - :py:class:`~pandas.DataFrame` it will be applied to in the beginning. On the + query is executed. In fact, the :py:class:`Q` object does not even know about + the :py:class:`~pandas.DataFrame` it will be applied to in the beginning. On the flip side, this means a query may be reused for different DataFrames. + + The ``operator`` argument may be one of the following: + + - ``'=='``: Checks if ``column`` values are equal to the ``value``. + - ``'<'``: Checks if ``column`` values are less than the ``value``. + - ``'<='``: Checks if ``column`` values are less than or equal to ``value``. + - ``'>'``: Checks if ``column`` values are greater than the ``value``. + - ``'>='``: Checks if ``column`` values are greater than or equal to ``value``. + - ``'!='``: Checks if ``column`` values are not equal to the ``value``. This is + equivalent to ``~Q(column, '==', value)``. + - ``'in'``: Checks if ``column`` values are in the list of ``value``. For this, + pandas' :py:meth:`~pandas.Series.isin` method is used. + - ``'contains'``: Checks if ``column`` values contain the string ``value``. + Here, pandas' :py:meth:`~pandas.Series.str.contains` method is used. + + .. note:: + + During initialization, a private attribute ``_column_map`` is set to the + default column map returned by :py:func:`~lydata.utils.get_default_column_map`. + This is used to convert short column names to long ones. If one feels + adventurous, they may set this attribute to a custom column map containing + additional or other column short names. This could also be achieved by + subclassing the :py:class:`Q`. However, the attribute may change in the future, + and without notice. """ _OPERATOR_MAP: dict[str, Callable[[pd.Series, Any], pd.Series]] = { From 1038ead41b9cf2b1f490e1f585e67f941d7b1f41 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 11 Dec 2024 10:19:35 +0100 Subject: [PATCH 4/7] docs: mention `execute` method of `Q` objects --- lydata/accessor.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/lydata/accessor.py b/lydata/accessor.py index a87b717..e7a08d3 100644 --- a/lydata/accessor.py +++ b/lydata/accessor.py @@ -11,20 +11,24 @@ as easy as typing ``df.ly.age``. Beyond that, the module implements a convenient wat to query the -:py:class:`pd.DataFrame`: The :py:class:`Q` object, that was inspired by Django's +:py:class:`~pandas.DataFrame`: The :py:class:`Q` object, that was inspired by Django's ``Q`` object. It allows for more readable and modular queries, which can be combined with logical operators and reused across different DataFrames. The :py:class:`Q` objects can be passed to the :py:meth:`LyDataAccessor.query` and :py:meth:`LyDataAccessor.portion` methods to filter the DataFrame or compute the -:py:class:`QueryPortion` of rows that satisfy the query. +:py:class:`QueryPortion` of rows that satisfy the query. Alternatively, any of these +:py:class:`Q` objects have a method called :py:meth:`~Q.execute` that can be called with +a :py:class:`~pandas.DataFrame` to get a boolean mask of the rows satisfying the query. -Further, we implement methods like :py:meth:`LyDataAccessor.combine`, -:py:meth:`LyDataAccessor.infer_sublevels`, and -:py:meth:`LyDataAccessor.infer_superlevels` to compute additional columns from the +Further, we implement methods like :py:meth:`~LyDataAccessor.combine`, +:py:meth:`~LyDataAccessor.infer_sublevels`, and +:py:meth:`~LyDataAccessor.infer_superlevels` to compute additional columns from the lyDATA tables. This is sometimes necessary, because not all data contains all the possibly necessary columns. E.g., in some cohorts we do have detailed sublevel information (i.e., IIa and IIb), while in others only the superlevel (II) is reported. +In such a case, one can now simply call ``df.ly.infer_sublevels()`` to get the +additional columns. """ from __future__ import annotations From f79091cf3581897aa9f8ae8e5064c30ee944cddf Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 15 Jan 2025 14:09:16 +0100 Subject: [PATCH 5/7] docs: fix unfinished sentence in utils --- lydata/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lydata/utils.py b/lydata/utils.py index fcb4528..ed56629 100644 --- a/lydata/utils.py +++ b/lydata/utils.py @@ -13,7 +13,7 @@ def update_and_expand( right: pd.DataFrame, **update_kwargs: Any, ) -> pd.DataFrame: - """Update ``left`` with values from ``right``, also adding . + """Update ``left`` with values from ``right``, also adding columns from ``right``. The added feature of this function over pandas' :py:meth:`~pandas.DataFrame.update` is that it also adds columns that are present in ``right`` but not in ``left``. From 2a7010329f26c4c6d3406f81f17baf2b34d767f0 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 15 Jan 2025 14:13:20 +0100 Subject: [PATCH 6/7] change: switch to loguru for logging --- lydata/__init__.py | 6 ++---- lydata/loader.py | 3 +-- lydata/validator.py | 4 +--- pyproject.toml | 1 + 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/lydata/__init__.py b/lydata/__init__.py index 28c18d8..5c837d9 100644 --- a/lydata/__init__.py +++ b/lydata/__init__.py @@ -1,6 +1,6 @@ """Library for handling lymphatic involvement data.""" -import logging +from loguru import logger import lydata._version as _version from lydata.accessor import C, Q @@ -26,6 +26,4 @@ "infer_and_combine_levels", ] -logger = logging.getLogger(__name__) -logger.addHandler(logging.NullHandler()) -logger.setLevel(logging.WARNING) +logger.disable("lydata") diff --git a/lydata/loader.py b/lydata/loader.py index 781015b..a29f849 100644 --- a/lydata/loader.py +++ b/lydata/loader.py @@ -21,7 +21,6 @@ """ import fnmatch -import logging import os import warnings from collections.abc import Generator @@ -32,9 +31,9 @@ import pandas as pd from github import Auth, Github, Repository from github.ContentFile import ContentFile +from loguru import logger from pydantic import BaseModel, Field, PrivateAttr, constr -logger = logging.getLogger(__name__) _default_repo_name = "rmnldwg/lydata" low_min1_str = constr(to_lower=True, min_length=1) diff --git a/lydata/validator.py b/lydata/validator.py index b5307d0..403b3ee 100644 --- a/lydata/validator.py +++ b/lydata/validator.py @@ -16,17 +16,15 @@ .. _LyProX: https://lyprox.org """ -import logging from typing import Any import pandas as pd +from loguru import logger from pandera import Check, Column, DataFrameSchema from pandera.errors import SchemaError from lydata.loader import available_datasets -logger = logging.getLogger(__name__) - _NULLABLE_OPTIONAL = {"required": False, "nullable": True} _NULLABLE_OPTIONAL_BOOLEAN_COLUMN = Column( dtype="boolean", diff --git a/pyproject.toml b/pyproject.toml index 033e851..226d040 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ dependencies = [ "mistletoe", "pandera", "pydantic", + "loguru", ] [project.urls] From cb43fab71b3f582e8d8f42cfca41b685ca574926 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 15 Jan 2025 14:21:19 +0100 Subject: [PATCH 7/7] chore: update changelog --- CHANGELOG.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f19fe51..6e998d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,20 @@ All notable changes to this project will be documented in this file. +## [0.2.4] - 2025-01-15 + +### 📚 Documentation + +- Add `__repr__` & explanation to `C` +- Mention private attribute `_column_map` +- Mention `execute` method of `Q` objects +- Fix unfinished sentence in utils + +### Change + +- In `__repr__`, add parentheses around combination of `AndQ` and `OrQ`. +- Switch to [`loguru`](https://loguru.readthedocs.io/en/stable/index.html) for logging + ## [0.2.3] - 2024-12-05 ### 🚀 Features @@ -220,6 +234,7 @@ Initial implementation of the lyDATA library. +[0.2.4]: https://github.com/rmnldwg/lydata/compare/0.2.3..0.2.4 [0.2.3]: https://github.com/rmnldwg/lydata/compare/0.2.2..0.2.3 [0.2.2]: https://github.com/rmnldwg/lydata/compare/0.2.1..0.2.2 [0.2.1]: https://github.com/rmnldwg/lydata/compare/0.2.0..0.2.1