diff --git a/CHANGELOG.md b/CHANGELOG.md index 71febfa..3b3947a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,17 @@ All notable changes to this project will be documented in this file. +## [0.3.3] - 2025-07-22 + +### 🚀 Features + +- Add simple utility to detect if dataset uses old 2nd level headers. + +### 💼 Other + +- Make `use_github=True` default everywhere +- Simplify `Q`'s query execution a bit + ## [0.3.2] - 2025-07-22 ### 🚀 Features diff --git a/src/lydata/accessor.py b/src/lydata/accessor.py index 8138d49..24fc94e 100644 --- a/src/lydata/accessor.py +++ b/src/lydata/accessor.py @@ -162,7 +162,6 @@ def __init__( self.colname = column self.operator = operator self.value = value - self._column_map = get_default_column_map_old() def __repr__(self) -> str: """Return a string representation of the query.""" @@ -183,12 +182,7 @@ def execute(self, df: pd.DataFrame) -> pd.Series: 2 True Name: col2, dtype: bool """ - try: - colname = self._column_map.from_short[self.colname].long - except KeyError: - colname = self.colname - - column = df[colname] + column = df.ly[self.colname] if callable(self.value): return self.value(column) diff --git a/src/lydata/loader.py b/src/lydata/loader.py index 741f2a0..7b08d2a 100644 --- a/src/lydata/loader.py +++ b/src/lydata/loader.py @@ -143,7 +143,7 @@ def get_content_file( def get_dataframe( self, - use_github: bool = False, + use_github: bool = True, token: str | None = None, user: str | None = None, password: str | None = None, diff --git a/src/lydata/utils.py b/src/lydata/utils.py index deaf046..487af71 100644 --- a/src/lydata/utils.py +++ b/src/lydata/utils.py @@ -164,7 +164,7 @@ def get_default_column_map_old() -> _ColumnMap: def _new_from_old(long_name: tuple[str, str, str]) -> tuple[str, str, str]: - """Convert an old long name to a new long name. + """Convert an old long key name to a new long key name. >>> _new_from_old(("patient", "#", "neck_dissection")) ('patient', 'info', 'neck_dissection') @@ -179,6 +179,12 @@ def _new_from_old(long_name: tuple[str, str, str]) -> tuple[str, str, str]: return (start, middle, end) +def is_old(dataset: pd.DataFrame) -> bool: + """Check if the dataset uses the old column names.""" + second_lvl_headers = dataset.columns.get_level_values(1) + return "#" in second_lvl_headers or "1" in second_lvl_headers + + def get_default_column_map_new() -> _ColumnMap: """Get the old default column map. diff --git a/src/lydata/validator.py b/src/lydata/validator.py index 9be9f74..ed46552 100644 --- a/src/lydata/validator.py +++ b/src/lydata/validator.py @@ -116,7 +116,7 @@ def validate_datasets( year: int | str = "*", institution: str = "*", subsite: str = "*", - use_github: bool = False, + use_github: bool = True, repo: str = "lycosystem/lydata", ref: str = "main", **kwargs,