From 2a023e495a6a6c46a96b1109cfb08224fe1c0cab Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Thu, 30 Jan 2025 18:19:26 -0500 Subject: [PATCH 01/47] Update polars version. First draft of most basic funcs. --- .gitignore | 1 + src/csv_helper/__init__.py | 1 + src/csv_helper/impute.py | 309 +++++++++++++++++++++++++++++++++++++ src/csv_helper/py.typed | 0 uv.lock | 17 +- 5 files changed, 320 insertions(+), 8 deletions(-) create mode 100644 src/csv_helper/impute.py create mode 100644 src/csv_helper/py.typed diff --git a/.gitignore b/.gitignore index 3152050..368f684 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ __pycache__/ dist/ sdist/ test.py +scratch.py diff --git a/src/csv_helper/__init__.py b/src/csv_helper/__init__.py index e69de29..4fb493f 100644 --- a/src/csv_helper/__init__.py +++ b/src/csv_helper/__init__.py @@ -0,0 +1 @@ +from . import impute diff --git a/src/csv_helper/impute.py b/src/csv_helper/impute.py new file mode 100644 index 0000000..405736c --- /dev/null +++ b/src/csv_helper/impute.py @@ -0,0 +1,309 @@ +from typing import NamedTuple, TypeAlias + +import numpy as np +import polars as pl + +PolarsNumericType: TypeAlias = ( + pl.Decimal + | pl.Float32 + | pl.Float64 + | pl.Int8 + | pl.Int16 + | pl.Int32 + | pl.Int64 + | pl.Int128 + | pl.UInt8 + | pl.UInt16 + | pl.UInt32 + | pl.UInt64 +) + + +def check(df: pl.DataFrame, fill_cols: list[str], fill_flag: str) -> pl.DataFrame: + """ + Return dataframe with counts and proportion of instances of fill_flag in each of + the given fill_cols + """ + for col in fill_cols: + if col not in df.columns: + raise ValueError(f"Column {col} doesn't exist") + + if not fill_flag_exists(df, col, fill_flag): + raise ValueError( + f"Column {col} doesn't contain any instances of '{fill_flag}'" + ) + + if len(fill_cols) > 1: + return ( + df.select(fill_cols) + .unpivot(variable_name="column", value_name="value") + .group_by("column") + .agg( + count=pl.col("value").filter(pl.col("value") == fill_flag).count(), + prop=pl.col("value").filter(pl.col("value") == fill_flag).count() + / pl.count(), + ) + .sort("column") + ) + else: + fill_col = fill_cols[0] + return ( + df.select(fill_col) + .unpivot(variable_name="column", value_name="value") + .group_by("column") + .agg( + count=pl.col("value").filter(pl.col("value") == fill_flag).count(), + prop=pl.col("value").filter(pl.col("value") == fill_flag).count() + / pl.count(), + ) + ) + + +def impute_columns( + df: pl.DataFrame, + fill_cols: list[str], + fill_flag: str, + fill_range: tuple[int, int], + col_type: PolarsNumericType | None = None, + seed: int | None = None, +) -> pl.DataFrame: + """ + Fill instances of the fill flag (a string) in the given column + with random integers in the given range (inclusive). + + If col_type is specified, will attempt to cast the final result + of fill_cols to that type. Currently, the only options are + Polars numeric types. + """ + for col in fill_cols: + if col not in df.columns: + raise ValueError(f"Column {col} doesn't exist") + + if not fill_flag_exists(df, col, fill_flag): + raise ValueError( + f"Column {col} doesn't contain any instances of '{fill_flag}'" + ) + + fill_range_int = parse_fill_range(fill_range) + + if len(fill_cols) > 1: + rng = np.random.default_rng(seed) + n = (len(fill_cols), df.height) + # must gen all nums up front + fill_nums = rng.integers( + fill_range_int.lb, + fill_range_int.ub, + size=n, + endpoint=True, + ) + + for col, num in zip(fill_cols, fill_nums): + df = df.with_columns( + pl.when(pl.col(col) == fill_flag) + .then(pl.lit(num)) + .otherwise(pl.col(col)) + .alias(col) + ) + + if col_type is not None: + df = df.with_columns(pl.col(col).cast(col_type)) + else: + fill_col = fill_cols[0] + # NOTE: this implementation and numpy implementation for filling values are roughly the same speed + # with this native impl barely faster + df = df.with_columns( + pl.when(pl.col(fill_col) == fill_flag) + .then( + pl.int_range(fill_range_int.lb, fill_range_int.ub + 1).sample( + pl.len(), + with_replacement=True, + seed=seed, + ) + ) + .otherwise(pl.col(fill_col)) + .alias(fill_col), + ) + + if col_type is not None: + df = df.with_columns(pl.col(fill_col).cast(col_type)) + + return df + + +def impute_columns_lazy( + lf: pl.DataFrame | pl.LazyFrame, + fill_column: str, + fill_flag: str, + fill_range: tuple[int, int], + seed: int | None = None, +) -> pl.DataFrame: + lf = lf.lazy() + + if fill_column not in lf.collect_schema().names(): + raise ValueError(f"Column {fill_column} doesn't exist") + + if not fill_flag_exists(lf, fill_column, fill_flag): + raise ValueError( + f"Column {fill_column} doesn't contain any instances of '{fill_flag}'" + ) + + fill_range_int = parse_fill_range(fill_range) + + df = lf.with_columns( + pl.when(pl.col(fill_column) == fill_flag) + .then( + pl.int_range(fill_range_int.lb, fill_range_int.ub + 1).sample( + pl.len(), with_replacement=True + ) + ) + .otherwise(pl.col(fill_column)) + .alias(fill_column) + ).collect() + + return df + + +def fill_flag_exists( + df: pl.DataFrame | pl.LazyFrame, fill_col: str, fill_flag: str +) -> bool: + # TODO: could just do lf = df.lazy() then don't need isinstance() + if isinstance(df, pl.DataFrame): + return df.select((pl.col(fill_col) == fill_flag).any()).item() + else: + # TODO: is there another way to check that doesn't materialize lf? + return df.select((pl.col(fill_col) == fill_flag).any()).collect().item() + + +class FillRange(NamedTuple): + lb: int + ub: int + + +def parse_fill_range(fill_range: tuple[int, int]) -> FillRange: + if len(fill_range) != 2: + raise ValueError("Must only pass 2 values") + + fill_range_int = FillRange(*fill_range) + if fill_range_int.lb > fill_range_int.ub: + raise ValueError("Lower bound can't be greater than the upper bound") + + return fill_range_int + + +# TODO: difficult to benchmark without reasonably sized data +def complete_rows(df: pl.DataFrame, columns: list[str]) -> pl.DataFrame: + """ + Generate implicit missing rows based on the unique combinations + of the given columns' values. The missing values will be nulls. + """ + df_expand = df.select(pl.col(columns).unique().implode()) + for col in columns: + df_expand = df_expand.explode(col) + + df = df_expand.join(df, on=columns, how="left", coalesce=True) + return df + + +def complete_rows_lazy( + lf: pl.DataFrame | pl.LazyFrame, columns: list[str] +) -> pl.DataFrame: + lf = lf.lazy() + lf_expand = lf.select(pl.col(columns).unique().implode()) + for col in columns: + lf_expand = lf_expand.explode(col) + + df = lf_expand.join(lf, on=columns, how="left", coalesce=True).collect() + + return df + + +def impute_column_pair( + df: pl.DataFrame, + numerator: str, + denominator: str, + fill_flag: str, + fill_range: tuple[int, int], + seed: int | None = None, +) -> pl.DataFrame: + return df + + +if __name__ == "__main__": + import timeit + + print("Benchmarking implementations...") + + repeat = 2 + number = 100_000 + + # NOTE: impute_flag somehow slower? For gen_rows, if instead of passing in a LazyFrame I + # take a DataFrame and convert it to LazyFrame inside the func, the times are more comparable + # Maybe I'm doing benchmark wrong? + + setup = """ +import polars as pl +from __main__ import impute_columns +df = pl.read_csv('../../tests/data/test_impute_data.csv') + """ + t_eager = timeit.repeat( + "impute_columns(df, ['cases'], '<=5', (1, 5))", + setup=setup, + repeat=repeat, + number=number, + ) + + setup = """ +import polars as pl +from __main__ import impute_columns_lazy +df = pl.scan_csv('../../tests/data/test_impute_data.csv') + """ + t_lazy = timeit.repeat( + "impute_columns_lazy(df, ['cases'], '<=5', (1, 5))", + setup=setup, + repeat=repeat, + number=number, + ) + + print(f"Min. time of impute.impute_columns(): {min(t_eager)}") + print(f"Min. time of impute.impute_columns_lazy(): {min(t_lazy)}") + +# setup = """ +# import polars as pl +# from __main__ import complete_rows +# df = pl.DataFrame( +# { +# "orig": ["France", "France", "UK", "UK", "Spain"], +# "dest": ["Japan", "Vietnam", "Japan", "China", "China"], +# "year": [2020, 2021, 2019, 2020, 2022], +# "value": [1, 2, 3, 4, 5], +# } +# ) +# """ +# t_eager = timeit.repeat( +# "complete_rows(df, ['orig', 'dest', 'year'])", +# setup=setup, +# repeat=repeat, +# number=number, +# ) +# +# setup = """ +# import polars as pl +# from __main__ import complete_rows_lazy +# df = pl.LazyFrame( +# { +# "orig": ["France", "France", "UK", "UK", "Spain"], +# "dest": ["Japan", "Vietnam", "Japan", "China", "China"], +# "year": [2020, 2021, 2019, 2020, 2022], +# "value": [1, 2, 3, 4, 5], +# } +# ) +# """ +# t_lazy = timeit.repeat( +# "complete_rows_lazy(df, ['orig', 'dest', 'year'])", +# setup=setup, +# repeat=repeat, +# number=number, +# ) +# +# print(f"Min. time of impute.complete_rows(): {min(t_eager)}") +# print(f"Min. time of impute.complete_rows_lazy(): {min(t_lazy)}") diff --git a/src/csv_helper/py.typed b/src/csv_helper/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/uv.lock b/uv.lock index 79e8a46..2c59e96 100644 --- a/uv.lock +++ b/uv.lock @@ -24,7 +24,7 @@ name = "click" version = "8.1.7" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "platform_system == 'Windows'" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/96/d3/f04c7bfcf5c1862a2a5b845c6b2b360488cf47af55dfa79c98f6a6bf98b5/click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de", size = 336121 } wheels = [ @@ -196,15 +196,16 @@ wheels = [ [[package]] name = "polars" -version = "1.16.0" +version = "1.21.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/43/80/d845897273be97a3e73b59be711deda375b638330d591a7ef8132c20f52f/polars-1.16.0.tar.gz", hash = "sha256:dd99808b833872babe02434a809fd45c1cffe66a3d57123cdc5e447c7753d328", size = 4192568 } +sdist = { url = "https://files.pythonhosted.org/packages/98/49/3733f0a34fd2504264579bad2c66021e175ab548b21767340721e10a1dcf/polars-1.21.0.tar.gz", hash = "sha256:7692d0fe0fb4faac18ef9423de55789e289f4d3f26d42519bd23ef8afb672d62", size = 4323012 } wheels = [ - { url = "https://files.pythonhosted.org/packages/8e/b0/51c944ecd58b3ebc81eb03b50448127ff85fd9448063094524e0c6693c75/polars-1.16.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:072f5ff3b5fe05797c59890de0e464b34ede75a9735e7d7221622fa3a0616d8e", size = 34735038 }, - { url = "https://files.pythonhosted.org/packages/61/2f/d0b45007f2ae4b4926070b420c8525840b9757013cd96077bcde40807ecb/polars-1.16.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:ebaf7a1ea114b042fa9f1cd17d49436279eb30545dd74361a2f5e3febeb867cd", size = 30577461 }, - { url = "https://files.pythonhosted.org/packages/31/9e/21e05959323883abcee799837d8cac08adf10a48c233432993757e41791a/polars-1.16.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e626d21dcd2566e1442dac414fe177bc70ebfc2f16620d59d778b1b774361018", size = 36006233 }, - { url = "https://files.pythonhosted.org/packages/25/80/da5c3cd248c7642d1feb896f0a27a0860c607f8cdde3e75457182e4c76c6/polars-1.16.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:53debcce55f68731ee2c7d6c787afdee26860ed6576f1ffa0cb9111b57f82857", size = 32348398 }, - { url = "https://files.pythonhosted.org/packages/08/0b/677c905f9dd5bc37708694e8f7367659c5382bd011f5dc1d564474032d0b/polars-1.16.0-cp39-abi3-win_amd64.whl", hash = "sha256:17efcb550c42d51034ff79702612b9184d8eac0d500de1dd7fb98490459276d3", size = 35743314 }, + { url = "https://files.pythonhosted.org/packages/d4/c3/976f0251e96c957143905530b236f1e278b28a8eb5850eab94595bf5d220/polars-1.21.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:063f8807f633f8fd15458a43971d930f6ee568b8e95936d7736c9054fc4f6f52", size = 31015281 }, + { url = "https://files.pythonhosted.org/packages/94/33/c55c19dde172e34dd7a5074a1dcac6472074236131698269db236550283e/polars-1.21.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:519863e0990e3323e7a32fc66bac3ad9da51938a1ffce6c09a92e0b1adb026a5", size = 28033973 }, + { url = "https://files.pythonhosted.org/packages/da/72/b108cd7e063f03f5b029edbd73ca514291dd3e3d88617965d09df64d71ba/polars-1.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbecddca35c57efde99070517db5d2c63d4c6d0e3c992123ba3be93e86e7bfac", size = 31641844 }, + { url = "https://files.pythonhosted.org/packages/ac/0a/1df51a9e09fb9974a511eb098e13afed916e8643556799799884f22c7869/polars-1.21.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:d9ce8e6f0d8140e67b0f7c276d22bb5f3345ce7412558643c8b5c270db254b64", size = 29005158 }, + { url = "https://files.pythonhosted.org/packages/90/4b/f75f0eb9527c943440c6ed90be7e97146a00699fee69f9d5aff577f15659/polars-1.21.0-cp39-abi3-win_amd64.whl", hash = "sha256:c4517abb008af890e4ca8fb6bb0372868381017af0ecadf9d062e2f91f50b276", size = 31729901 }, + { url = "https://files.pythonhosted.org/packages/e6/a0/d48548f4c9e139b02eacfc074bfd02d98d9bb5f9bf9c03ec5649a481d8ff/polars-1.21.0-cp39-abi3-win_arm64.whl", hash = "sha256:6bb0ba805defb05b76fdca392e48d84d1f16403de5be25d4dd8cdc7fccfd4251", size = 28179572 }, ] [[package]] From 1a8885059cb248fc53bc3c140ba7bf9099b0e13e Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Sat, 8 Mar 2025 14:27:55 -0500 Subject: [PATCH 02/47] Draft of lib funcs --- .gitignore | 1 + src/csv_helper/impute.py | 236 ++++++++++++++++++--------------------- 2 files changed, 107 insertions(+), 130 deletions(-) diff --git a/.gitignore b/.gitignore index 368f684..4fbdf0d 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ dist/ sdist/ test.py scratch.py +co-est2023-alldata.csv diff --git a/src/csv_helper/impute.py b/src/csv_helper/impute.py index 405736c..a0c06be 100644 --- a/src/csv_helper/impute.py +++ b/src/csv_helper/impute.py @@ -1,28 +1,14 @@ -from typing import NamedTuple, TypeAlias +from typing import NamedTuple import numpy as np import polars as pl - -PolarsNumericType: TypeAlias = ( - pl.Decimal - | pl.Float32 - | pl.Float64 - | pl.Int8 - | pl.Int16 - | pl.Int32 - | pl.Int64 - | pl.Int128 - | pl.UInt8 - | pl.UInt16 - | pl.UInt32 - | pl.UInt64 -) +from polars._typing import PolarsDataType def check(df: pl.DataFrame, fill_cols: list[str], fill_flag: str) -> pl.DataFrame: """ - Return dataframe with counts and proportion of instances of fill_flag in each of - the given fill_cols + Return dataframe with counts and proportion of instances of `fill_flag` in each of + the given `fill_cols` """ for col in fill_cols: if col not in df.columns: @@ -45,18 +31,18 @@ def check(df: pl.DataFrame, fill_cols: list[str], fill_flag: str) -> pl.DataFram ) .sort("column") ) - else: - fill_col = fill_cols[0] - return ( - df.select(fill_col) - .unpivot(variable_name="column", value_name="value") - .group_by("column") - .agg( - count=pl.col("value").filter(pl.col("value") == fill_flag).count(), - prop=pl.col("value").filter(pl.col("value") == fill_flag).count() - / pl.count(), - ) + + fill_col = fill_cols[0] + return ( + df.select(fill_col) + .unpivot(variable_name="column", value_name="value") + .group_by("column") + .agg( + count=pl.col("value").filter(pl.col("value") == fill_flag).count(), + prop=pl.col("value").filter(pl.col("value") == fill_flag).count() + / pl.count(), ) + ) def impute_columns( @@ -64,16 +50,15 @@ def impute_columns( fill_cols: list[str], fill_flag: str, fill_range: tuple[int, int], - col_type: PolarsNumericType | None = None, + col_type: PolarsDataType = pl.Int64, seed: int | None = None, ) -> pl.DataFrame: """ - Fill instances of the fill flag (a string) in the given column + Fill instances of `fill_flag` (a string) in the given column with random integers in the given range (inclusive). - If col_type is specified, will attempt to cast the final result - of fill_cols to that type. Currently, the only options are - Polars numeric types. + If `col_type` is specified, will attempt to cast the final result + of `fill_cols` to that Polars type. """ for col in fill_cols: if col not in df.columns: @@ -89,7 +74,7 @@ def impute_columns( if len(fill_cols) > 1: rng = np.random.default_rng(seed) n = (len(fill_cols), df.height) - # must gen all nums up front + # must gen enough numbers for whole column up-front, otherwise reused fill_nums = rng.integers( fill_range_int.lb, fill_range_int.ub, @@ -103,46 +88,43 @@ def impute_columns( .then(pl.lit(num)) .otherwise(pl.col(col)) .alias(col) + .cast(col_type) ) - - if col_type is not None: - df = df.with_columns(pl.col(col).cast(col_type)) else: fill_col = fill_cols[0] # NOTE: this implementation and numpy implementation for filling values are roughly the same speed - # with this native impl barely faster + # with this Polars-only impl barely faster df = df.with_columns( pl.when(pl.col(fill_col) == fill_flag) .then( pl.int_range(fill_range_int.lb, fill_range_int.ub + 1).sample( - pl.len(), - with_replacement=True, - seed=seed, + pl.len(), with_replacement=True, seed=seed ) ) .otherwise(pl.col(fill_col)) - .alias(fill_col), + .alias(fill_col) + .cast(col_type) ) - if col_type is not None: - df = df.with_columns(pl.col(fill_col).cast(col_type)) - return df -def impute_columns_lazy( +def _impute_columns_lazy( lf: pl.DataFrame | pl.LazyFrame, fill_column: str, fill_flag: str, fill_range: tuple[int, int], seed: int | None = None, ) -> pl.DataFrame: + """ + Experimental with Lazy + """ lf = lf.lazy() if fill_column not in lf.collect_schema().names(): raise ValueError(f"Column {fill_column} doesn't exist") - if not fill_flag_exists(lf, fill_column, fill_flag): + if not _fill_flag_exists_lazy(lf, fill_column, fill_flag): raise ValueError( f"Column {fill_column} doesn't contain any instances of '{fill_flag}'" ) @@ -153,7 +135,7 @@ def impute_columns_lazy( pl.when(pl.col(fill_column) == fill_flag) .then( pl.int_range(fill_range_int.lb, fill_range_int.ub + 1).sample( - pl.len(), with_replacement=True + pl.len(), with_replacement=True, seed=seed ) ) .otherwise(pl.col(fill_column)) @@ -163,15 +145,15 @@ def impute_columns_lazy( return df -def fill_flag_exists( +def fill_flag_exists(df: pl.DataFrame, fill_col: str, fill_flag: str) -> bool: + return df.select((pl.col(fill_col) == fill_flag).any()).item() + + +def _fill_flag_exists_lazy( df: pl.DataFrame | pl.LazyFrame, fill_col: str, fill_flag: str ) -> bool: - # TODO: could just do lf = df.lazy() then don't need isinstance() - if isinstance(df, pl.DataFrame): - return df.select((pl.col(fill_col) == fill_flag).any()).item() - else: - # TODO: is there another way to check that doesn't materialize lf? - return df.select((pl.col(fill_col) == fill_flag).any()).collect().item() + lf = df.lazy() + return lf.select((pl.col(fill_col) == fill_flag).any()).collect().item() class FillRange(NamedTuple): @@ -190,7 +172,8 @@ def parse_fill_range(fill_range: tuple[int, int]) -> FillRange: return fill_range_int -# TODO: difficult to benchmark without reasonably sized data +# TODO: instead of relying on .unique(), can allow user-defined mapping from col name +# to list of possible values? def complete_rows(df: pl.DataFrame, columns: list[str]) -> pl.DataFrame: """ Generate implicit missing rows based on the unique combinations @@ -201,10 +184,11 @@ def complete_rows(df: pl.DataFrame, columns: list[str]) -> pl.DataFrame: df_expand = df_expand.explode(col) df = df_expand.join(df, on=columns, how="left", coalesce=True) + return df -def complete_rows_lazy( +def _complete_rows_lazy( lf: pl.DataFrame | pl.LazyFrame, columns: list[str] ) -> pl.DataFrame: lf = lf.lazy() @@ -223,87 +207,79 @@ def impute_column_pair( denominator: str, fill_flag: str, fill_range: tuple[int, int], + col_type: PolarsDataType = pl.Int64, seed: int | None = None, ) -> pl.DataFrame: - return df + """ + Fill instances of the fill_flag in both the numerator column + and the denominator column such that numerator <= denominator. + If col_type is specified, will attempt to cast the final result + of fill_cols to that type. Currently, the only options are + Polars numeric types. + """ + # TODO: this should also handle denom being in another file like the CLI + # command? -if __name__ == "__main__": - import timeit + if numerator not in df.columns: + raise ValueError(f"Column {numerator} doesn't exist") - print("Benchmarking implementations...") + if denominator not in df.columns: + raise ValueError(f"Column {numerator} doesn't exist") - repeat = 2 - number = 100_000 + if not fill_flag_exists(df, numerator, fill_flag): + raise ValueError( + f"Column {numerator} doesn't contain any instances of '{fill_flag}'" + ) - # NOTE: impute_flag somehow slower? For gen_rows, if instead of passing in a LazyFrame I - # take a DataFrame and convert it to LazyFrame inside the func, the times are more comparable - # Maybe I'm doing benchmark wrong? + if not fill_flag_exists(df, denominator, fill_flag): + raise ValueError( + f"Column {denominator} doesn't contain any instances of '{fill_flag}'" + ) - setup = """ -import polars as pl -from __main__ import impute_columns -df = pl.read_csv('../../tests/data/test_impute_data.csv') - """ - t_eager = timeit.repeat( - "impute_columns(df, ['cases'], '<=5', (1, 5))", - setup=setup, - repeat=repeat, - number=number, + fill_range_int = parse_fill_range(fill_range) + + # TODO: I think repeated use of the same seed is undesirable + df = df.with_columns( + pl.when(pl.col(denominator) == fill_flag) + .then( + pl.int_range(fill_range_int.lb, fill_range_int.ub + 1).sample( + pl.len(), + with_replacement=True, + seed=seed, + ) + ) + .otherwise(pl.col(denominator)) + .alias(denominator) + .cast(col_type) ) - setup = """ -import polars as pl -from __main__ import impute_columns_lazy -df = pl.scan_csv('../../tests/data/test_impute_data.csv') - """ - t_lazy = timeit.repeat( - "impute_columns_lazy(df, ['cases'], '<=5', (1, 5))", - setup=setup, - repeat=repeat, - number=number, + df = df.with_columns( + # TODO: use list b/c no arr.sample() what about struct perf? + pl.when( + (pl.col(numerator) == fill_flag) + & (pl.col(denominator) <= fill_range_int.ub) + ) + .then( + pl.int_ranges(fill_range_int.lb, pl.col(denominator) + 1) + # TODO: use of seed? + .list.sample(1) + .explode() + ) + .when( + (pl.col(numerator) == fill_flag) & (pl.col(denominator) > fill_range_int.ub) + ) + .then( + pl.int_range(fill_range_int.lb, fill_range_int.ub + 1).sample( + # TODO: use of seed? + pl.len(), + with_replacement=True, + seed=seed, + ) + ) + .otherwise(pl.col(numerator)) + .alias(numerator) + .cast(pl.Int64) ) - print(f"Min. time of impute.impute_columns(): {min(t_eager)}") - print(f"Min. time of impute.impute_columns_lazy(): {min(t_lazy)}") - -# setup = """ -# import polars as pl -# from __main__ import complete_rows -# df = pl.DataFrame( -# { -# "orig": ["France", "France", "UK", "UK", "Spain"], -# "dest": ["Japan", "Vietnam", "Japan", "China", "China"], -# "year": [2020, 2021, 2019, 2020, 2022], -# "value": [1, 2, 3, 4, 5], -# } -# ) -# """ -# t_eager = timeit.repeat( -# "complete_rows(df, ['orig', 'dest', 'year'])", -# setup=setup, -# repeat=repeat, -# number=number, -# ) -# -# setup = """ -# import polars as pl -# from __main__ import complete_rows_lazy -# df = pl.LazyFrame( -# { -# "orig": ["France", "France", "UK", "UK", "Spain"], -# "dest": ["Japan", "Vietnam", "Japan", "China", "China"], -# "year": [2020, 2021, 2019, 2020, 2022], -# "value": [1, 2, 3, 4, 5], -# } -# ) -# """ -# t_lazy = timeit.repeat( -# "complete_rows_lazy(df, ['orig', 'dest', 'year'])", -# setup=setup, -# repeat=repeat, -# number=number, -# ) -# -# print(f"Min. time of impute.complete_rows(): {min(t_eager)}") -# print(f"Min. time of impute.complete_rows_lazy(): {min(t_lazy)}") + return df From aa4b1b9c44c92cd850097f4c2a13353f4dcb6625 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Tue, 11 Mar 2025 17:32:45 -0400 Subject: [PATCH 03/47] Add complete_total_rows() and complete_present_rows() functions. --- README.md | 2 +- src/csv_helper/impute.py | 29 +++++++++++++++++++++++++---- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 994ea61..5323329 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,6 @@ pip install git+https://git@github.com/winter-again/csv-helper # or via SSH pip install git+ssh://git@github.com/winter-again/csv-helper -# designate specific version +# designate specific version tag or branch pip install git+ssh://git@github.com/winter-again/csv-helper.git@v0.1.0 ``` diff --git a/src/csv_helper/impute.py b/src/csv_helper/impute.py index a0c06be..20e3bdd 100644 --- a/src/csv_helper/impute.py +++ b/src/csv_helper/impute.py @@ -172,11 +172,32 @@ def parse_fill_range(fill_range: tuple[int, int]) -> FillRange: return fill_range_int -# TODO: instead of relying on .unique(), can allow user-defined mapping from col name -# to list of possible values? -def complete_rows(df: pl.DataFrame, columns: list[str]) -> pl.DataFrame: +def complete_total_rows(df: pl.DataFrame, columns: list[pl.Series]) -> pl.DataFrame: """ - Generate implicit missing rows based on the unique combinations + Generate missing rows based on unique combinations of the + given list of series. The missing values will be nulls. + """ + lfs = [pl.LazyFrame(col.unique()) for col in columns] + combos = lfs[0] + for lf in lfs[1:]: + combos = combos.join(lf, how="cross") + + df_combos = combos.collect() + + col_names = [col.name for col in columns] + df = df_combos.join( + df, + on=col_names, + how="left", + validate="1:1", + ) + + return df + + +def complete_present_rows(df: pl.DataFrame, columns: list[str]) -> pl.DataFrame: + """ + Generate missing rows based on the unique combinations of the given columns' values. The missing values will be nulls. """ df_expand = df.select(pl.col(columns).unique().implode()) From b044153b0b25c884aeaa862242c13dbe99de82a7 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Tue, 8 Apr 2025 19:48:24 -0400 Subject: [PATCH 04/47] Experiment with requesting helper data --- pyproject.toml | 1 + src/csv_helper/census.py | 107 +++++++++++++++++++++++++++++++++++++++ src/csv_helper/impute.py | 2 + uv.lock | 92 +++++++++++++++++++++++++++++++++ 4 files changed, 202 insertions(+) create mode 100644 src/csv_helper/census.py diff --git a/pyproject.toml b/pyproject.toml index 3af0b9b..4e4ea3e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,6 +12,7 @@ dependencies = [ "typer>=0.12.3", "polars>=1.4.1", "numpy>=2.0.1", + "requests>=2.32.3", ] [project.scripts] diff --git a/src/csv_helper/census.py b/src/csv_helper/census.py new file mode 100644 index 0000000..24d68ea --- /dev/null +++ b/src/csv_helper/census.py @@ -0,0 +1,107 @@ +from io import StringIO +from pathlib import Path + +import polars as pl +import requests + +OUT = Path(__file__).parents[2] + + +def main() -> int: + df = get_census_popn("state", ".") + with pl.Config(tbl_cols=-1): + print(df) + + return 0 + + +# TODO: should consider the county FIPS changes over time, boundary changes, etc. +# TODO: irregular data availability and naming schemes mean it's impossible to generalize this? +# TODO: sep function or just state? +def get_census_popn(geo: str, out: str | Path | None = None) -> pl.DataFrame: + """ + Request and return state or county population data from Census Bureau's + FTP site. Restricted to 2020-2023 dataset, which has data from + 2020 to 2023. Also takes optional path to a directory in which + to save the raw data. + """ + # TODO: this county file actually has state data too so could just it for both? + # but then we're throwing away most of it so inefficient + arg_mapper = { + "state": ("2020-2023", "state", "NST-EST2023-ALLDATA"), + "county": ("2020-2023", "counties", "co-est2023-alldata"), + } + + time, geo, file_name = arg_mapper[geo] + url = f"https://www2.census.gov/programs-surveys/popest/datasets/{time}/{geo}/totals/{file_name}.csv" + + try: + req = requests.get(url) + req.raise_for_status() + except requests.exceptions.HTTPError: + print("HTTP error while requesting") + raise + except requests.exceptions.RequestException: + print("Some fatal request error") + raise + + if out is not None: + out = Path(out) + if not out.is_dir(): + raise ValueError(f"Directory at {out} doesn't exist") + + with open(out / f"{file_name}.csv", "w") as f: + f.write(req.text) + + with StringIO(req.text) as f: + lf = pl.scan_csv(f, schema_overrides={"STATE": pl.String, "COUNTY": pl.String}) + + invalid_states = ["60", "66", "69", "72", "74", "78"] + df = ( + lf.select( + "STATE", + "COUNTY", + "STNAME", + "CTYNAME", + "POPESTIMATE2020", + "POPESTIMATE2021", + "POPESTIMATE2022", + "POPESTIMATE2023", + ) + .filter( + pl.col("COUNTY") != "000", + ~pl.col("STATE").is_in(invalid_states), + ) + .with_columns(county_fips=pl.col("STATE") + pl.col("COUNTY")) + .drop("STATE", "COUNTY") + .rename( + { + "STNAME": "state_name", + "CTYNAME": "county_name", + "POPESTIMATE2020": "popn_2020", + "POPESTIMATE2021": "popn_2021", + "POPESTIMATE2022": "popn_2022", + "POPESTIMATE2023": "popn_2023", + } + ) + .select( + "state_name", + "county_name", + "county_fips", + "popn_2020", + "popn_2021", + "popn_2022", + "popn_2023", + ) + .collect() + ) + + assert df.select(pl.col("county_fips").n_unique()).item() == df.height, ( + "Expected to have one row per county FIPS" + ) + + return df + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/csv_helper/impute.py b/src/csv_helper/impute.py index 20e3bdd..7e1e868 100644 --- a/src/csv_helper/impute.py +++ b/src/csv_helper/impute.py @@ -45,6 +45,8 @@ def check(df: pl.DataFrame, fill_cols: list[str], fill_flag: str) -> pl.DataFram ) +# TODO: instead of separate lazy func, let this take df or lf +# or have bool arg that determines whether .lazy() conversion happens? def impute_columns( df: pl.DataFrame, fill_cols: list[str], diff --git a/uv.lock b/uv.lock index 2c59e96..d98828c 100644 --- a/uv.lock +++ b/uv.lock @@ -10,6 +10,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a4/07/14f8ad37f2d12a5ce41206c21820d8cb6561b728e51fad4530dff0552a67/cachetools-5.5.0-py3-none-any.whl", hash = "sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292", size = 9524 }, ] +[[package]] +name = "certifi" +version = "2025.1.31" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1c/ab/c9f1e32b7b1bf505bf26f0ef697775960db7932abeb7b516de930ba2705f/certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651", size = 167577 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/fc/bce832fd4fd99766c04d1ee0eead6b0ec6486fb100ae5e74c1d91292b982/certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe", size = 166393 }, +] + [[package]] name = "chardet" version = "5.2.0" @@ -19,6 +28,54 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/38/6f/f5fbc992a329ee4e0f288c1fe0e2ad9485ed064cac731ed2fe47dcc38cbf/chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970", size = 199385 }, ] +[[package]] +name = "charset-normalizer" +version = "3.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/16/b0/572805e227f01586461c80e0fd25d65a2115599cc9dad142fee4b747c357/charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3", size = 123188 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/72/80/41ef5d5a7935d2d3a773e3eaebf0a9350542f2cab4eac59a7a4741fbbbbe/charset_normalizer-3.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125", size = 194995 }, + { url = "https://files.pythonhosted.org/packages/7a/28/0b9fefa7b8b080ec492110af6d88aa3dea91c464b17d53474b6e9ba5d2c5/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1", size = 139471 }, + { url = "https://files.pythonhosted.org/packages/71/64/d24ab1a997efb06402e3fc07317e94da358e2585165930d9d59ad45fcae2/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3", size = 149831 }, + { url = "https://files.pythonhosted.org/packages/37/ed/be39e5258e198655240db5e19e0b11379163ad7070962d6b0c87ed2c4d39/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd", size = 142335 }, + { url = "https://files.pythonhosted.org/packages/88/83/489e9504711fa05d8dde1574996408026bdbdbd938f23be67deebb5eca92/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00", size = 143862 }, + { url = "https://files.pythonhosted.org/packages/c6/c7/32da20821cf387b759ad24627a9aca289d2822de929b8a41b6241767b461/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12", size = 145673 }, + { url = "https://files.pythonhosted.org/packages/68/85/f4288e96039abdd5aeb5c546fa20a37b50da71b5cf01e75e87f16cd43304/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77", size = 140211 }, + { url = "https://files.pythonhosted.org/packages/28/a3/a42e70d03cbdabc18997baf4f0227c73591a08041c149e710045c281f97b/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146", size = 148039 }, + { url = "https://files.pythonhosted.org/packages/85/e4/65699e8ab3014ecbe6f5c71d1a55d810fb716bbfd74f6283d5c2aa87febf/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd", size = 151939 }, + { url = "https://files.pythonhosted.org/packages/b1/82/8e9fe624cc5374193de6860aba3ea8070f584c8565ee77c168ec13274bd2/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6", size = 149075 }, + { url = "https://files.pythonhosted.org/packages/3d/7b/82865ba54c765560c8433f65e8acb9217cb839a9e32b42af4aa8e945870f/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8", size = 144340 }, + { url = "https://files.pythonhosted.org/packages/b5/b6/9674a4b7d4d99a0d2df9b215da766ee682718f88055751e1e5e753c82db0/charset_normalizer-3.4.1-cp311-cp311-win32.whl", hash = "sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b", size = 95205 }, + { url = "https://files.pythonhosted.org/packages/1e/ab/45b180e175de4402dcf7547e4fb617283bae54ce35c27930a6f35b6bef15/charset_normalizer-3.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76", size = 102441 }, + { url = "https://files.pythonhosted.org/packages/0a/9a/dd1e1cdceb841925b7798369a09279bd1cf183cef0f9ddf15a3a6502ee45/charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545", size = 196105 }, + { url = "https://files.pythonhosted.org/packages/d3/8c/90bfabf8c4809ecb648f39794cf2a84ff2e7d2a6cf159fe68d9a26160467/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7", size = 140404 }, + { url = "https://files.pythonhosted.org/packages/ad/8f/e410d57c721945ea3b4f1a04b74f70ce8fa800d393d72899f0a40526401f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757", size = 150423 }, + { url = "https://files.pythonhosted.org/packages/f0/b8/e6825e25deb691ff98cf5c9072ee0605dc2acfca98af70c2d1b1bc75190d/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa", size = 143184 }, + { url = "https://files.pythonhosted.org/packages/3e/a2/513f6cbe752421f16d969e32f3583762bfd583848b763913ddab8d9bfd4f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d", size = 145268 }, + { url = "https://files.pythonhosted.org/packages/74/94/8a5277664f27c3c438546f3eb53b33f5b19568eb7424736bdc440a88a31f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616", size = 147601 }, + { url = "https://files.pythonhosted.org/packages/7c/5f/6d352c51ee763623a98e31194823518e09bfa48be2a7e8383cf691bbb3d0/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b", size = 141098 }, + { url = "https://files.pythonhosted.org/packages/78/d4/f5704cb629ba5ab16d1d3d741396aec6dc3ca2b67757c45b0599bb010478/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d", size = 149520 }, + { url = "https://files.pythonhosted.org/packages/c5/96/64120b1d02b81785f222b976c0fb79a35875457fa9bb40827678e54d1bc8/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a", size = 152852 }, + { url = "https://files.pythonhosted.org/packages/84/c9/98e3732278a99f47d487fd3468bc60b882920cef29d1fa6ca460a1fdf4e6/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9", size = 150488 }, + { url = "https://files.pythonhosted.org/packages/13/0e/9c8d4cb99c98c1007cc11eda969ebfe837bbbd0acdb4736d228ccaabcd22/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1", size = 146192 }, + { url = "https://files.pythonhosted.org/packages/b2/21/2b6b5b860781a0b49427309cb8670785aa543fb2178de875b87b9cc97746/charset_normalizer-3.4.1-cp312-cp312-win32.whl", hash = "sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35", size = 95550 }, + { url = "https://files.pythonhosted.org/packages/21/5b/1b390b03b1d16c7e382b561c5329f83cc06623916aab983e8ab9239c7d5c/charset_normalizer-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f", size = 102785 }, + { url = "https://files.pythonhosted.org/packages/38/94/ce8e6f63d18049672c76d07d119304e1e2d7c6098f0841b51c666e9f44a0/charset_normalizer-3.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda", size = 195698 }, + { url = "https://files.pythonhosted.org/packages/24/2e/dfdd9770664aae179a96561cc6952ff08f9a8cd09a908f259a9dfa063568/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313", size = 140162 }, + { url = "https://files.pythonhosted.org/packages/24/4e/f646b9093cff8fc86f2d60af2de4dc17c759de9d554f130b140ea4738ca6/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9", size = 150263 }, + { url = "https://files.pythonhosted.org/packages/5e/67/2937f8d548c3ef6e2f9aab0f6e21001056f692d43282b165e7c56023e6dd/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b", size = 142966 }, + { url = "https://files.pythonhosted.org/packages/52/ed/b7f4f07de100bdb95c1756d3a4d17b90c1a3c53715c1a476f8738058e0fa/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11", size = 144992 }, + { url = "https://files.pythonhosted.org/packages/96/2c/d49710a6dbcd3776265f4c923bb73ebe83933dfbaa841c5da850fe0fd20b/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f", size = 147162 }, + { url = "https://files.pythonhosted.org/packages/b4/41/35ff1f9a6bd380303dea55e44c4933b4cc3c4850988927d4082ada230273/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd", size = 140972 }, + { url = "https://files.pythonhosted.org/packages/fb/43/c6a0b685fe6910d08ba971f62cd9c3e862a85770395ba5d9cad4fede33ab/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2", size = 149095 }, + { url = "https://files.pythonhosted.org/packages/4c/ff/a9a504662452e2d2878512115638966e75633519ec11f25fca3d2049a94a/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886", size = 152668 }, + { url = "https://files.pythonhosted.org/packages/6c/71/189996b6d9a4b932564701628af5cee6716733e9165af1d5e1b285c530ed/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601", size = 150073 }, + { url = "https://files.pythonhosted.org/packages/e4/93/946a86ce20790e11312c87c75ba68d5f6ad2208cfb52b2d6a2c32840d922/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd", size = 145732 }, + { url = "https://files.pythonhosted.org/packages/cd/e5/131d2fb1b0dddafc37be4f3a2fa79aa4c037368be9423061dccadfd90091/charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407", size = 95391 }, + { url = "https://files.pythonhosted.org/packages/27/f2/4f9a69cc7712b9b5ad8fdb87039fd89abba997ad5cbe690d1835d40405b0/charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971", size = 102702 }, + { url = "https://files.pythonhosted.org/packages/0e/f6/65ecc6878a89bb1c23a086ea335ad4bf21a588990c3f535a227b9eea9108/charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85", size = 49767 }, +] + [[package]] name = "click" version = "8.1.7" @@ -47,6 +104,7 @@ source = { editable = "." } dependencies = [ { name = "numpy" }, { name = "polars" }, + { name = "requests" }, { name = "typer" }, ] @@ -61,6 +119,7 @@ dev = [ requires-dist = [ { name = "numpy", specifier = ">=2.0.1" }, { name = "polars", specifier = ">=1.4.1" }, + { name = "requests", specifier = ">=2.32.3" }, { name = "typer", specifier = ">=0.12.3" }, ] @@ -89,6 +148,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b9/f8/feced7779d755758a52d1f6635d990b8d98dc0a29fa568bbe0625f18fdf3/filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0", size = 16163 }, ] +[[package]] +name = "idna" +version = "3.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 }, +] + [[package]] name = "iniconfig" version = "2.0.0" @@ -244,6 +312,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6b/77/7440a06a8ead44c7757a64362dd22df5760f9b12dc5f11b6188cd2fc27a0/pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2", size = 342341 }, ] +[[package]] +name = "requests" +version = "2.32.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 }, +] + [[package]] name = "rich" version = "13.9.4" @@ -324,6 +407,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438 }, ] +[[package]] +name = "urllib3" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/aa/63/e53da845320b757bf29ef6a9062f5c669fe997973f966045cb019c3f4b66/urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d", size = 307268 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/19/4ec628951a74043532ca2cf5d97b7b14863931476d117c471e8e2b1eb39f/urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df", size = 128369 }, +] + [[package]] name = "uv" version = "0.5.5" From 1c5d5f09d99d430d2bdf13ad52b7013e2fe90157 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Tue, 20 May 2025 16:42:33 -0400 Subject: [PATCH 05/47] Overhaul complete() function. Should be generic over DataFrame and LazyFrame. Added some simple tests. --- src/csv_helper/impute.py | 78 ++++++++++++++-------------------- tests/complete_test.py | 92 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 45 deletions(-) create mode 100644 tests/complete_test.py diff --git a/src/csv_helper/impute.py b/src/csv_helper/impute.py index 7e1e868..1833553 100644 --- a/src/csv_helper/impute.py +++ b/src/csv_helper/impute.py @@ -1,10 +1,12 @@ -from typing import NamedTuple +from typing import Any, NamedTuple, TypeVar import numpy as np import polars as pl from polars._typing import PolarsDataType +# TODO: make this a check func that returns bool instead? +# and separate this into another func like summarize()? def check(df: pl.DataFrame, fill_cols: list[str], fill_flag: str) -> pl.DataFrame: """ Return dataframe with counts and proportion of instances of `fill_flag` in each of @@ -47,6 +49,7 @@ def check(df: pl.DataFrame, fill_cols: list[str], fill_flag: str) -> pl.DataFram # TODO: instead of separate lazy func, let this take df or lf # or have bool arg that determines whether .lazy() conversion happens? +# TODO: add asserts for shape? def impute_columns( df: pl.DataFrame, fill_cols: list[str], @@ -174,54 +177,38 @@ def parse_fill_range(fill_range: tuple[int, int]) -> FillRange: return fill_range_int -def complete_total_rows(df: pl.DataFrame, columns: list[pl.Series]) -> pl.DataFrame: - """ - Generate missing rows based on unique combinations of the - given list of series. The missing values will be nulls. - """ - lfs = [pl.LazyFrame(col.unique()) for col in columns] - combos = lfs[0] - for lf in lfs[1:]: - combos = combos.join(lf, how="cross") - - df_combos = combos.collect() - - col_names = [col.name for col in columns] - df = df_combos.join( - df, - on=col_names, - how="left", - validate="1:1", - ) - - return df +TFrame = TypeVar("TFrame", pl.DataFrame, pl.LazyFrame) -def complete_present_rows(df: pl.DataFrame, columns: list[str]) -> pl.DataFrame: +def complete(df: TFrame, *columns: str | pl.Series) -> TFrame: """ - Generate missing rows based on the unique combinations - of the given columns' values. The missing values will be nulls. - """ - df_expand = df.select(pl.col(columns).unique().implode()) - for col in columns: - df_expand = df_expand.explode(col) - - df = df_expand.join(df, on=columns, how="left", coalesce=True) - - return df + Generate rows for implicit missing values based on column combinations, + thus making them explicit missing values. Generated values marked as null. - -def _complete_rows_lazy( - lf: pl.DataFrame | pl.LazyFrame, columns: list[str] -) -> pl.DataFrame: - lf = lf.lazy() - lf_expand = lf.select(pl.col(columns).unique().implode()) + If columns are referenced with strings, then only existing values in those + columns are used for completion. If Series are specified instead, then + those Series can specify the full set of possible values, provided that + the Series is named after an existing column. + """ + cols = [] for col in columns: - lf_expand = lf_expand.explode(col) + if isinstance(col, str): + cols.append(pl.col(col).unique().implode()) + elif isinstance(col, pl.Series): + cols.append(col.unique().implode()) + else: + raise TypeError( + f"The columns argument(s) must be either string or polars Series. Got {type(col)} instead." + ) - df = lf_expand.join(lf, on=columns, how="left", coalesce=True).collect() + unique_combos = df.select(cols) + col_names = unique_combos.collect_schema().names() + for col in col_names: + unique_combos = unique_combos.explode(col) - return df + return unique_combos.join( + df, on=col_names, how="left", coalesce=True, validate="1:1" + ) def impute_column_pair( @@ -241,8 +228,8 @@ def impute_column_pair( of fill_cols to that type. Currently, the only options are Polars numeric types. """ - # TODO: this should also handle denom being in another file like the CLI - # command? + # TODO: should this also handle denom being in another file or dataframe (like the CLI + # command?) if numerator not in df.columns: raise ValueError(f"Column {numerator} doesn't exist") @@ -284,8 +271,9 @@ def impute_column_pair( & (pl.col(denominator) <= fill_range_int.ub) ) .then( - pl.int_ranges(fill_range_int.lb, pl.col(denominator) + 1) + # TODO: look into high mem consumption for this pl.when() # TODO: use of seed? + pl.int_ranges(fill_range_int.lb, pl.col(denominator) + 1) .list.sample(1) .explode() ) diff --git a/tests/complete_test.py b/tests/complete_test.py new file mode 100644 index 0000000..400adf2 --- /dev/null +++ b/tests/complete_test.py @@ -0,0 +1,92 @@ +import polars as pl +from polars.testing import assert_frame_equal + +from csv_helper import impute + + +def test_complete_exists(): + df = pl.DataFrame( + { + "country": ["France", "France", "UK", "UK", "Spain"], + "year": [2020, 2021, 2019, 2020, 2022], + "value": [1, 2, 3, 4, 5], + } + ) + df = df.pipe(impute.complete, "country", "year").sort("country", "year") + result = pl.DataFrame( + { + "country": [ + country for country in ["France", "UK", "Spain"] for _ in range(4) + ], + "year": [y for _ in range(3) for y in range(2019, 2023)], + "value": [None, 1, 2, None, 3, 4, None, None, None, None, None, 5], + } + ).sort("country", "year") + + assert_frame_equal(df, result) + + lf = pl.LazyFrame( + { + "country": ["France", "France", "UK", "UK", "Spain"], + "year": [2020, 2021, 2019, 2020, 2022], + "value": [1, 2, 3, 4, 5], + } + ) + lf = lf.pipe(impute.complete, "country", "year").sort("country", "year") + result = pl.LazyFrame( + { + "country": [ + country for country in ["France", "UK", "Spain"] for _ in range(4) + ], + "year": [y for _ in range(3) for y in range(2019, 2023)], + "value": [None, 1, 2, None, 3, 4, None, None, None, None, None, 5], + } + ).sort("country", "year") + + assert_frame_equal(lf, result) + + +def test_complete_not_exists(): + # TODO: add lazy test + df = pl.DataFrame( + { + "country": ["France", "France", "UK", "UK", "Spain"], + "year": [2020, 2021, 2019, 2020, 2022], + "value": [1, 2, 3, 4, 5], + } + ) + df = df.pipe( + impute.complete, + pl.Series("country", ["France", "UK", "Spain", "China"]), + "year", + ).sort("country", "year") + result = pl.DataFrame( + { + "country": [ + country + for country in ["China", "France", "UK", "Spain"] + for _ in range(4) + ], + "year": [y for _ in range(4) for y in range(2019, 2023)], + "value": [ + None, + None, + None, + None, + None, + 1, + 2, + None, + 3, + 4, + None, + None, + None, + None, + None, + 5, + ], + } + ).sort("country", "year") + + assert_frame_equal(df, result) From 96da5dd5902448e1ad36d0a64a766c72f22e625f Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Tue, 20 May 2025 22:36:13 -0400 Subject: [PATCH 06/47] Upgrade deps and python version to 3.13. --- .python-version | 2 +- pyproject.toml | 29 ++--- uv.lock | 310 ++++++++++++++++-------------------------------- 3 files changed, 108 insertions(+), 233 deletions(-) diff --git a/.python-version b/.python-version index 2c07333..24ee5b1 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.11 +3.13 diff --git a/pyproject.toml b/pyproject.toml index 4e4ea3e..3c598e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,19 +1,12 @@ [project] name = "csv-helper" -version = "0.2.2" -description = "A simple CLI for imputing masked counts in CSV data" +version = "0.2.3" +description = "A simple CLI and library for imputing masked counts in CSV data" readme = "README.md" -authors = [ - {name = "Andrew Tiu", email = "andrew.tiu88@gmail.com"}, -] -license = {text = "MIT"} -requires-python = ">=3.11" -dependencies = [ - "typer>=0.12.3", - "polars>=1.4.1", - "numpy>=2.0.1", - "requests>=2.32.3", -] +authors = [{ name = "Andrew Tiu", email = "andrew.tiu88@gmail.com" }] +license = { text = "MIT" } +requires-python = ">=3.13" +dependencies = ["typer>=0.12.3", "polars>=1.4.1", "numpy>=2.0.1"] [project.scripts] csv-helper = "csv_helper.main:app" @@ -23,17 +16,11 @@ requires = ["pdm-backend"] build-backend = "pdm.backend" [dependency-groups] -dev = [ - "pytest>=8.3.2", - "tox>=4.23.2", - "tox-uv>=1.16.0", -] +dev = ["pytest>=8.3.2", "tox>=4.23.2", "tox-uv>=1.16.0"] [tool.pyright] include = ["src"] -exclude = [ - "**/__pycache__" -] +exclude = ["**/__pycache__"] [tool.pytest.ini_options] pythonpath = ["src"] diff --git a/uv.lock b/uv.lock index d98828c..ee77f91 100644 --- a/uv.lock +++ b/uv.lock @@ -1,22 +1,13 @@ version = 1 -requires-python = ">=3.11" +requires-python = ">=3.13" [[package]] name = "cachetools" -version = "5.5.0" +version = "5.5.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c3/38/a0f315319737ecf45b4319a8cd1f3a908e29d9277b46942263292115eee7/cachetools-5.5.0.tar.gz", hash = "sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a", size = 27661 } +sdist = { url = "https://files.pythonhosted.org/packages/6c/81/3747dad6b14fa2cf53fcf10548cf5aea6913e96fab41a3c198676f8948a5/cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4", size = 28380 } wheels = [ - { url = "https://files.pythonhosted.org/packages/a4/07/14f8ad37f2d12a5ce41206c21820d8cb6561b728e51fad4530dff0552a67/cachetools-5.5.0-py3-none-any.whl", hash = "sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292", size = 9524 }, -] - -[[package]] -name = "certifi" -version = "2025.1.31" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1c/ab/c9f1e32b7b1bf505bf26f0ef697775960db7932abeb7b516de930ba2705f/certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651", size = 167577 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/38/fc/bce832fd4fd99766c04d1ee0eead6b0ec6486fb100ae5e74c1d91292b982/certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe", size = 166393 }, + { url = "https://files.pythonhosted.org/packages/72/76/20fa66124dbe6be5cafeb312ece67de6b61dd91a0247d1ea13db4ebb33c2/cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a", size = 10080 }, ] [[package]] @@ -28,64 +19,16 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/38/6f/f5fbc992a329ee4e0f288c1fe0e2ad9485ed064cac731ed2fe47dcc38cbf/chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970", size = 199385 }, ] -[[package]] -name = "charset-normalizer" -version = "3.4.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/16/b0/572805e227f01586461c80e0fd25d65a2115599cc9dad142fee4b747c357/charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3", size = 123188 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/72/80/41ef5d5a7935d2d3a773e3eaebf0a9350542f2cab4eac59a7a4741fbbbbe/charset_normalizer-3.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125", size = 194995 }, - { url = "https://files.pythonhosted.org/packages/7a/28/0b9fefa7b8b080ec492110af6d88aa3dea91c464b17d53474b6e9ba5d2c5/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1", size = 139471 }, - { url = "https://files.pythonhosted.org/packages/71/64/d24ab1a997efb06402e3fc07317e94da358e2585165930d9d59ad45fcae2/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3", size = 149831 }, - { url = "https://files.pythonhosted.org/packages/37/ed/be39e5258e198655240db5e19e0b11379163ad7070962d6b0c87ed2c4d39/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd", size = 142335 }, - { url = "https://files.pythonhosted.org/packages/88/83/489e9504711fa05d8dde1574996408026bdbdbd938f23be67deebb5eca92/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00", size = 143862 }, - { url = "https://files.pythonhosted.org/packages/c6/c7/32da20821cf387b759ad24627a9aca289d2822de929b8a41b6241767b461/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12", size = 145673 }, - { url = "https://files.pythonhosted.org/packages/68/85/f4288e96039abdd5aeb5c546fa20a37b50da71b5cf01e75e87f16cd43304/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77", size = 140211 }, - { url = "https://files.pythonhosted.org/packages/28/a3/a42e70d03cbdabc18997baf4f0227c73591a08041c149e710045c281f97b/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146", size = 148039 }, - { url = "https://files.pythonhosted.org/packages/85/e4/65699e8ab3014ecbe6f5c71d1a55d810fb716bbfd74f6283d5c2aa87febf/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd", size = 151939 }, - { url = "https://files.pythonhosted.org/packages/b1/82/8e9fe624cc5374193de6860aba3ea8070f584c8565ee77c168ec13274bd2/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6", size = 149075 }, - { url = "https://files.pythonhosted.org/packages/3d/7b/82865ba54c765560c8433f65e8acb9217cb839a9e32b42af4aa8e945870f/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8", size = 144340 }, - { url = "https://files.pythonhosted.org/packages/b5/b6/9674a4b7d4d99a0d2df9b215da766ee682718f88055751e1e5e753c82db0/charset_normalizer-3.4.1-cp311-cp311-win32.whl", hash = "sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b", size = 95205 }, - { url = "https://files.pythonhosted.org/packages/1e/ab/45b180e175de4402dcf7547e4fb617283bae54ce35c27930a6f35b6bef15/charset_normalizer-3.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76", size = 102441 }, - { url = "https://files.pythonhosted.org/packages/0a/9a/dd1e1cdceb841925b7798369a09279bd1cf183cef0f9ddf15a3a6502ee45/charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545", size = 196105 }, - { url = "https://files.pythonhosted.org/packages/d3/8c/90bfabf8c4809ecb648f39794cf2a84ff2e7d2a6cf159fe68d9a26160467/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7", size = 140404 }, - { url = "https://files.pythonhosted.org/packages/ad/8f/e410d57c721945ea3b4f1a04b74f70ce8fa800d393d72899f0a40526401f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757", size = 150423 }, - { url = "https://files.pythonhosted.org/packages/f0/b8/e6825e25deb691ff98cf5c9072ee0605dc2acfca98af70c2d1b1bc75190d/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa", size = 143184 }, - { url = "https://files.pythonhosted.org/packages/3e/a2/513f6cbe752421f16d969e32f3583762bfd583848b763913ddab8d9bfd4f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d", size = 145268 }, - { url = "https://files.pythonhosted.org/packages/74/94/8a5277664f27c3c438546f3eb53b33f5b19568eb7424736bdc440a88a31f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616", size = 147601 }, - { url = "https://files.pythonhosted.org/packages/7c/5f/6d352c51ee763623a98e31194823518e09bfa48be2a7e8383cf691bbb3d0/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b", size = 141098 }, - { url = "https://files.pythonhosted.org/packages/78/d4/f5704cb629ba5ab16d1d3d741396aec6dc3ca2b67757c45b0599bb010478/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d", size = 149520 }, - { url = "https://files.pythonhosted.org/packages/c5/96/64120b1d02b81785f222b976c0fb79a35875457fa9bb40827678e54d1bc8/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a", size = 152852 }, - { url = "https://files.pythonhosted.org/packages/84/c9/98e3732278a99f47d487fd3468bc60b882920cef29d1fa6ca460a1fdf4e6/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9", size = 150488 }, - { url = "https://files.pythonhosted.org/packages/13/0e/9c8d4cb99c98c1007cc11eda969ebfe837bbbd0acdb4736d228ccaabcd22/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1", size = 146192 }, - { url = "https://files.pythonhosted.org/packages/b2/21/2b6b5b860781a0b49427309cb8670785aa543fb2178de875b87b9cc97746/charset_normalizer-3.4.1-cp312-cp312-win32.whl", hash = "sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35", size = 95550 }, - { url = "https://files.pythonhosted.org/packages/21/5b/1b390b03b1d16c7e382b561c5329f83cc06623916aab983e8ab9239c7d5c/charset_normalizer-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f", size = 102785 }, - { url = "https://files.pythonhosted.org/packages/38/94/ce8e6f63d18049672c76d07d119304e1e2d7c6098f0841b51c666e9f44a0/charset_normalizer-3.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda", size = 195698 }, - { url = "https://files.pythonhosted.org/packages/24/2e/dfdd9770664aae179a96561cc6952ff08f9a8cd09a908f259a9dfa063568/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313", size = 140162 }, - { url = "https://files.pythonhosted.org/packages/24/4e/f646b9093cff8fc86f2d60af2de4dc17c759de9d554f130b140ea4738ca6/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9", size = 150263 }, - { url = "https://files.pythonhosted.org/packages/5e/67/2937f8d548c3ef6e2f9aab0f6e21001056f692d43282b165e7c56023e6dd/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b", size = 142966 }, - { url = "https://files.pythonhosted.org/packages/52/ed/b7f4f07de100bdb95c1756d3a4d17b90c1a3c53715c1a476f8738058e0fa/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11", size = 144992 }, - { url = "https://files.pythonhosted.org/packages/96/2c/d49710a6dbcd3776265f4c923bb73ebe83933dfbaa841c5da850fe0fd20b/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f", size = 147162 }, - { url = "https://files.pythonhosted.org/packages/b4/41/35ff1f9a6bd380303dea55e44c4933b4cc3c4850988927d4082ada230273/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd", size = 140972 }, - { url = "https://files.pythonhosted.org/packages/fb/43/c6a0b685fe6910d08ba971f62cd9c3e862a85770395ba5d9cad4fede33ab/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2", size = 149095 }, - { url = "https://files.pythonhosted.org/packages/4c/ff/a9a504662452e2d2878512115638966e75633519ec11f25fca3d2049a94a/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886", size = 152668 }, - { url = "https://files.pythonhosted.org/packages/6c/71/189996b6d9a4b932564701628af5cee6716733e9165af1d5e1b285c530ed/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601", size = 150073 }, - { url = "https://files.pythonhosted.org/packages/e4/93/946a86ce20790e11312c87c75ba68d5f6ad2208cfb52b2d6a2c32840d922/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd", size = 145732 }, - { url = "https://files.pythonhosted.org/packages/cd/e5/131d2fb1b0dddafc37be4f3a2fa79aa4c037368be9423061dccadfd90091/charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407", size = 95391 }, - { url = "https://files.pythonhosted.org/packages/27/f2/4f9a69cc7712b9b5ad8fdb87039fd89abba997ad5cbe690d1835d40405b0/charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971", size = 102702 }, - { url = "https://files.pythonhosted.org/packages/0e/f6/65ecc6878a89bb1c23a086ea335ad4bf21a588990c3f535a227b9eea9108/charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85", size = 49767 }, -] - [[package]] name = "click" -version = "8.1.7" +version = "8.1.8" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "colorama", marker = "platform_system == 'Windows'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/96/d3/f04c7bfcf5c1862a2a5b845c6b2b360488cf47af55dfa79c98f6a6bf98b5/click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de", size = 336121 } +sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593 } wheels = [ - { url = "https://files.pythonhosted.org/packages/00/2e/d53fa4befbf2cfa713304affc7ca780ce4fc1fd8710527771b58311a3229/click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28", size = 97941 }, + { url = "https://files.pythonhosted.org/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", size = 98188 }, ] [[package]] @@ -104,7 +47,6 @@ source = { editable = "." } dependencies = [ { name = "numpy" }, { name = "polars" }, - { name = "requests" }, { name = "typer" }, ] @@ -119,7 +61,6 @@ dev = [ requires-dist = [ { name = "numpy", specifier = ">=2.0.1" }, { name = "polars", specifier = ">=1.4.1" }, - { name = "requests", specifier = ">=2.32.3" }, { name = "typer", specifier = ">=0.12.3" }, ] @@ -141,29 +82,20 @@ wheels = [ [[package]] name = "filelock" -version = "3.16.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/9d/db/3ef5bb276dae18d6ec2124224403d1d67bccdbefc17af4cc8f553e341ab1/filelock-3.16.1.tar.gz", hash = "sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435", size = 18037 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b9/f8/feced7779d755758a52d1f6635d990b8d98dc0a29fa568bbe0625f18fdf3/filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0", size = 16163 }, -] - -[[package]] -name = "idna" -version = "3.10" +version = "3.18.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490 } +sdist = { url = "https://files.pythonhosted.org/packages/0a/10/c23352565a6544bdc5353e0b15fc1c563352101f30e24bf500207a54df9a/filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2", size = 18075 } wheels = [ - { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 }, + { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215 }, ] [[package]] name = "iniconfig" -version = "2.0.0" +version = "2.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d7/4b/cbd8e699e64a6f16ca3a8220661b5f83792b3017d0f79807cb8708d33913/iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", size = 4646 } +sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 }, + { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050 }, ] [[package]] @@ -189,117 +121,97 @@ wheels = [ [[package]] name = "numpy" -version = "2.1.3" +version = "2.2.6" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/25/ca/1166b75c21abd1da445b97bf1fa2f14f423c6cfb4fc7c4ef31dccf9f6a94/numpy-2.1.3.tar.gz", hash = "sha256:aa08e04e08aaf974d4458def539dece0d28146d866a39da5639596f4921fd761", size = 20166090 } +sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ad/81/c8167192eba5247593cd9d305ac236847c2912ff39e11402e72ae28a4985/numpy-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4d1167c53b93f1f5d8a139a742b3c6f4d429b54e74e6b57d0eff40045187b15d", size = 21156252 }, - { url = "https://files.pythonhosted.org/packages/da/74/5a60003fc3d8a718d830b08b654d0eea2d2db0806bab8f3c2aca7e18e010/numpy-2.1.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c80e4a09b3d95b4e1cac08643f1152fa71a0a821a2d4277334c88d54b2219a41", size = 13784119 }, - { url = "https://files.pythonhosted.org/packages/47/7c/864cb966b96fce5e63fcf25e1e4d957fe5725a635e5f11fe03f39dd9d6b5/numpy-2.1.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:576a1c1d25e9e02ed7fa5477f30a127fe56debd53b8d2c89d5578f9857d03ca9", size = 5352978 }, - { url = "https://files.pythonhosted.org/packages/09/ac/61d07930a4993dd9691a6432de16d93bbe6aa4b1c12a5e573d468eefc1ca/numpy-2.1.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:973faafebaae4c0aaa1a1ca1ce02434554d67e628b8d805e61f874b84e136b09", size = 6892570 }, - { url = "https://files.pythonhosted.org/packages/27/2f/21b94664f23af2bb52030653697c685022119e0dc93d6097c3cb45bce5f9/numpy-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:762479be47a4863e261a840e8e01608d124ee1361e48b96916f38b119cfda04a", size = 13896715 }, - { url = "https://files.pythonhosted.org/packages/7a/f0/80811e836484262b236c684a75dfc4ba0424bc670e765afaa911468d9f39/numpy-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc6f24b3d1ecc1eebfbf5d6051faa49af40b03be1aaa781ebdadcbc090b4539b", size = 16339644 }, - { url = "https://files.pythonhosted.org/packages/fa/81/ce213159a1ed8eb7d88a2a6ef4fbdb9e4ffd0c76b866c350eb4e3c37e640/numpy-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:17ee83a1f4fef3c94d16dc1802b998668b5419362c8a4f4e8a491de1b41cc3ee", size = 16712217 }, - { url = "https://files.pythonhosted.org/packages/7d/84/4de0b87d5a72f45556b2a8ee9fc8801e8518ec867fc68260c1f5dcb3903f/numpy-2.1.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:15cb89f39fa6d0bdfb600ea24b250e5f1a3df23f901f51c8debaa6a5d122b2f0", size = 14399053 }, - { url = "https://files.pythonhosted.org/packages/7e/1c/e5fabb9ad849f9d798b44458fd12a318d27592d4bc1448e269dec070ff04/numpy-2.1.3-cp311-cp311-win32.whl", hash = "sha256:d9beb777a78c331580705326d2367488d5bc473b49a9bc3036c154832520aca9", size = 6534741 }, - { url = "https://files.pythonhosted.org/packages/1e/48/a9a4b538e28f854bfb62e1dea3c8fea12e90216a276c7777ae5345ff29a7/numpy-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:d89dd2b6da69c4fff5e39c28a382199ddedc3a5be5390115608345dec660b9e2", size = 12869487 }, - { url = "https://files.pythonhosted.org/packages/8a/f0/385eb9970309643cbca4fc6eebc8bb16e560de129c91258dfaa18498da8b/numpy-2.1.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f55ba01150f52b1027829b50d70ef1dafd9821ea82905b63936668403c3b471e", size = 20849658 }, - { url = "https://files.pythonhosted.org/packages/54/4a/765b4607f0fecbb239638d610d04ec0a0ded9b4951c56dc68cef79026abf/numpy-2.1.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:13138eadd4f4da03074851a698ffa7e405f41a0845a6b1ad135b81596e4e9958", size = 13492258 }, - { url = "https://files.pythonhosted.org/packages/bd/a7/2332679479c70b68dccbf4a8eb9c9b5ee383164b161bee9284ac141fbd33/numpy-2.1.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:a6b46587b14b888e95e4a24d7b13ae91fa22386c199ee7b418f449032b2fa3b8", size = 5090249 }, - { url = "https://files.pythonhosted.org/packages/c1/67/4aa00316b3b981a822c7a239d3a8135be2a6945d1fd11d0efb25d361711a/numpy-2.1.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:0fa14563cc46422e99daef53d725d0c326e99e468a9320a240affffe87852564", size = 6621704 }, - { url = "https://files.pythonhosted.org/packages/5e/da/1a429ae58b3b6c364eeec93bf044c532f2ff7b48a52e41050896cf15d5b1/numpy-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8637dcd2caa676e475503d1f8fdb327bc495554e10838019651b76d17b98e512", size = 13606089 }, - { url = "https://files.pythonhosted.org/packages/9e/3e/3757f304c704f2f0294a6b8340fcf2be244038be07da4cccf390fa678a9f/numpy-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2312b2aa89e1f43ecea6da6ea9a810d06aae08321609d8dc0d0eda6d946a541b", size = 16043185 }, - { url = "https://files.pythonhosted.org/packages/43/97/75329c28fea3113d00c8d2daf9bc5828d58d78ed661d8e05e234f86f0f6d/numpy-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a38c19106902bb19351b83802531fea19dee18e5b37b36454f27f11ff956f7fc", size = 16410751 }, - { url = "https://files.pythonhosted.org/packages/ad/7a/442965e98b34e0ae9da319f075b387bcb9a1e0658276cc63adb8c9686f7b/numpy-2.1.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:02135ade8b8a84011cbb67dc44e07c58f28575cf9ecf8ab304e51c05528c19f0", size = 14082705 }, - { url = "https://files.pythonhosted.org/packages/ac/b6/26108cf2cfa5c7e03fb969b595c93131eab4a399762b51ce9ebec2332e80/numpy-2.1.3-cp312-cp312-win32.whl", hash = "sha256:e6988e90fcf617da2b5c78902fe8e668361b43b4fe26dbf2d7b0f8034d4cafb9", size = 6239077 }, - { url = "https://files.pythonhosted.org/packages/a6/84/fa11dad3404b7634aaab50733581ce11e5350383311ea7a7010f464c0170/numpy-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:0d30c543f02e84e92c4b1f415b7c6b5326cbe45ee7882b6b77db7195fb971e3a", size = 12566858 }, - { url = "https://files.pythonhosted.org/packages/4d/0b/620591441457e25f3404c8057eb924d04f161244cb8a3680d529419aa86e/numpy-2.1.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:96fe52fcdb9345b7cd82ecd34547fca4321f7656d500eca497eb7ea5a926692f", size = 20836263 }, - { url = "https://files.pythonhosted.org/packages/45/e1/210b2d8b31ce9119145433e6ea78046e30771de3fe353f313b2778142f34/numpy-2.1.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f653490b33e9c3a4c1c01d41bc2aef08f9475af51146e4a7710c450cf9761598", size = 13507771 }, - { url = "https://files.pythonhosted.org/packages/55/44/aa9ee3caee02fa5a45f2c3b95cafe59c44e4b278fbbf895a93e88b308555/numpy-2.1.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:dc258a761a16daa791081d026f0ed4399b582712e6fc887a95af09df10c5ca57", size = 5075805 }, - { url = "https://files.pythonhosted.org/packages/78/d6/61de6e7e31915ba4d87bbe1ae859e83e6582ea14c6add07c8f7eefd8488f/numpy-2.1.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:016d0f6f5e77b0f0d45d77387ffa4bb89816b57c835580c3ce8e099ef830befe", size = 6608380 }, - { url = "https://files.pythonhosted.org/packages/3e/46/48bdf9b7241e317e6cf94276fe11ba673c06d1fdf115d8b4ebf616affd1a/numpy-2.1.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c181ba05ce8299c7aa3125c27b9c2167bca4a4445b7ce73d5febc411ca692e43", size = 13602451 }, - { url = "https://files.pythonhosted.org/packages/70/50/73f9a5aa0810cdccda9c1d20be3cbe4a4d6ea6bfd6931464a44c95eef731/numpy-2.1.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5641516794ca9e5f8a4d17bb45446998c6554704d888f86df9b200e66bdcce56", size = 16039822 }, - { url = "https://files.pythonhosted.org/packages/ad/cd/098bc1d5a5bc5307cfc65ee9369d0ca658ed88fbd7307b0d49fab6ca5fa5/numpy-2.1.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ea4dedd6e394a9c180b33c2c872b92f7ce0f8e7ad93e9585312b0c5a04777a4a", size = 16411822 }, - { url = "https://files.pythonhosted.org/packages/83/a2/7d4467a2a6d984549053b37945620209e702cf96a8bc658bc04bba13c9e2/numpy-2.1.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b0df3635b9c8ef48bd3be5f862cf71b0a4716fa0e702155c45067c6b711ddcef", size = 14079598 }, - { url = "https://files.pythonhosted.org/packages/e9/6a/d64514dcecb2ee70bfdfad10c42b76cab657e7ee31944ff7a600f141d9e9/numpy-2.1.3-cp313-cp313-win32.whl", hash = "sha256:50ca6aba6e163363f132b5c101ba078b8cbd3fa92c7865fd7d4d62d9779ac29f", size = 6236021 }, - { url = "https://files.pythonhosted.org/packages/bb/f9/12297ed8d8301a401e7d8eb6b418d32547f1d700ed3c038d325a605421a4/numpy-2.1.3-cp313-cp313-win_amd64.whl", hash = "sha256:747641635d3d44bcb380d950679462fae44f54b131be347d5ec2bce47d3df9ed", size = 12560405 }, - { url = "https://files.pythonhosted.org/packages/a7/45/7f9244cd792e163b334e3a7f02dff1239d2890b6f37ebf9e82cbe17debc0/numpy-2.1.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:996bb9399059c5b82f76b53ff8bb686069c05acc94656bb259b1d63d04a9506f", size = 20859062 }, - { url = "https://files.pythonhosted.org/packages/b1/b4/a084218e7e92b506d634105b13e27a3a6645312b93e1c699cc9025adb0e1/numpy-2.1.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:45966d859916ad02b779706bb43b954281db43e185015df6eb3323120188f9e4", size = 13515839 }, - { url = "https://files.pythonhosted.org/packages/27/45/58ed3f88028dcf80e6ea580311dc3edefdd94248f5770deb980500ef85dd/numpy-2.1.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:baed7e8d7481bfe0874b566850cb0b85243e982388b7b23348c6db2ee2b2ae8e", size = 5116031 }, - { url = "https://files.pythonhosted.org/packages/37/a8/eb689432eb977d83229094b58b0f53249d2209742f7de529c49d61a124a0/numpy-2.1.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:a9f7f672a3388133335589cfca93ed468509cb7b93ba3105fce780d04a6576a0", size = 6629977 }, - { url = "https://files.pythonhosted.org/packages/42/a3/5355ad51ac73c23334c7caaed01adadfda49544f646fcbfbb4331deb267b/numpy-2.1.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7aac50327da5d208db2eec22eb11e491e3fe13d22653dce51b0f4109101b408", size = 13575951 }, - { url = "https://files.pythonhosted.org/packages/c4/70/ea9646d203104e647988cb7d7279f135257a6b7e3354ea6c56f8bafdb095/numpy-2.1.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4394bc0dbd074b7f9b52024832d16e019decebf86caf909d94f6b3f77a8ee3b6", size = 16022655 }, - { url = "https://files.pythonhosted.org/packages/14/ce/7fc0612903e91ff9d0b3f2eda4e18ef9904814afcae5b0f08edb7f637883/numpy-2.1.3-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:50d18c4358a0a8a53f12a8ba9d772ab2d460321e6a93d6064fc22443d189853f", size = 16399902 }, - { url = "https://files.pythonhosted.org/packages/ef/62/1d3204313357591c913c32132a28f09a26357e33ea3c4e2fe81269e0dca1/numpy-2.1.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:14e253bd43fc6b37af4921b10f6add6925878a42a0c5fe83daee390bca80bc17", size = 14067180 }, - { url = "https://files.pythonhosted.org/packages/24/d7/78a40ed1d80e23a774cb8a34ae8a9493ba1b4271dde96e56ccdbab1620ef/numpy-2.1.3-cp313-cp313t-win32.whl", hash = "sha256:08788d27a5fd867a663f6fc753fd7c3ad7e92747efc73c53bca2f19f8bc06f48", size = 6291907 }, - { url = "https://files.pythonhosted.org/packages/86/09/a5ab407bd7f5f5599e6a9261f964ace03a73e7c6928de906981c31c38082/numpy-2.1.3-cp313-cp313t-win_amd64.whl", hash = "sha256:2564fbdf2b99b3f815f2107c1bbc93e2de8ee655a69c261363a1172a79a257d4", size = 12644098 }, + { url = "https://files.pythonhosted.org/packages/f9/5c/6657823f4f594f72b5471f1db1ab12e26e890bb2e41897522d134d2a3e81/numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84", size = 20867828 }, + { url = "https://files.pythonhosted.org/packages/dc/9e/14520dc3dadf3c803473bd07e9b2bd1b69bc583cb2497b47000fed2fa92f/numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b", size = 14143006 }, + { url = "https://files.pythonhosted.org/packages/4f/06/7e96c57d90bebdce9918412087fc22ca9851cceaf5567a45c1f404480e9e/numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d", size = 5076765 }, + { url = "https://files.pythonhosted.org/packages/73/ed/63d920c23b4289fdac96ddbdd6132e9427790977d5457cd132f18e76eae0/numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566", size = 6617736 }, + { url = "https://files.pythonhosted.org/packages/85/c5/e19c8f99d83fd377ec8c7e0cf627a8049746da54afc24ef0a0cb73d5dfb5/numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f", size = 14010719 }, + { url = "https://files.pythonhosted.org/packages/19/49/4df9123aafa7b539317bf6d342cb6d227e49f7a35b99c287a6109b13dd93/numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f", size = 16526072 }, + { url = "https://files.pythonhosted.org/packages/b2/6c/04b5f47f4f32f7c2b0e7260442a8cbcf8168b0e1a41ff1495da42f42a14f/numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868", size = 15503213 }, + { url = "https://files.pythonhosted.org/packages/17/0a/5cd92e352c1307640d5b6fec1b2ffb06cd0dabe7d7b8227f97933d378422/numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d", size = 18316632 }, + { url = "https://files.pythonhosted.org/packages/f0/3b/5cba2b1d88760ef86596ad0f3d484b1cbff7c115ae2429678465057c5155/numpy-2.2.6-cp313-cp313-win32.whl", hash = "sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd", size = 6244532 }, + { url = "https://files.pythonhosted.org/packages/cb/3b/d58c12eafcb298d4e6d0d40216866ab15f59e55d148a5658bb3132311fcf/numpy-2.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c", size = 12610885 }, + { url = "https://files.pythonhosted.org/packages/6b/9e/4bf918b818e516322db999ac25d00c75788ddfd2d2ade4fa66f1f38097e1/numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6", size = 20963467 }, + { url = "https://files.pythonhosted.org/packages/61/66/d2de6b291507517ff2e438e13ff7b1e2cdbdb7cb40b3ed475377aece69f9/numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda", size = 14225144 }, + { url = "https://files.pythonhosted.org/packages/e4/25/480387655407ead912e28ba3a820bc69af9adf13bcbe40b299d454ec011f/numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40", size = 5200217 }, + { url = "https://files.pythonhosted.org/packages/aa/4a/6e313b5108f53dcbf3aca0c0f3e9c92f4c10ce57a0a721851f9785872895/numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8", size = 6712014 }, + { url = "https://files.pythonhosted.org/packages/b7/30/172c2d5c4be71fdf476e9de553443cf8e25feddbe185e0bd88b096915bcc/numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f", size = 14077935 }, + { url = "https://files.pythonhosted.org/packages/12/fb/9e743f8d4e4d3c710902cf87af3512082ae3d43b945d5d16563f26ec251d/numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa", size = 16600122 }, + { url = "https://files.pythonhosted.org/packages/12/75/ee20da0e58d3a66f204f38916757e01e33a9737d0b22373b3eb5a27358f9/numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571", size = 15586143 }, + { url = "https://files.pythonhosted.org/packages/76/95/bef5b37f29fc5e739947e9ce5179ad402875633308504a52d188302319c8/numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1", size = 18385260 }, + { url = "https://files.pythonhosted.org/packages/09/04/f2f83279d287407cf36a7a8053a5abe7be3622a4363337338f2585e4afda/numpy-2.2.6-cp313-cp313t-win32.whl", hash = "sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff", size = 6377225 }, + { url = "https://files.pythonhosted.org/packages/67/0e/35082d13c09c02c011cf21570543d202ad929d961c02a147493cb0c2bdf5/numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06", size = 12771374 }, ] [[package]] name = "packaging" -version = "24.2" +version = "25.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d0/63/68dbb6eb2de9cb10ee4c9c14a0148804425e13c4fb20d61cce69f53106da/packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f", size = 163950 } +sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727 } wheels = [ - { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451 }, + { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469 }, ] [[package]] name = "platformdirs" -version = "4.3.6" +version = "4.3.8" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/13/fc/128cc9cb8f03208bdbf93d3aa862e16d376844a14f9a0ce5cf4507372de4/platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907", size = 21302 } +sdist = { url = "https://files.pythonhosted.org/packages/fe/8b/3c73abc9c759ecd3f1f7ceff6685840859e8070c4d947c93fae71f6a0bf2/platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc", size = 21362 } wheels = [ - { url = "https://files.pythonhosted.org/packages/3c/a6/bc1012356d8ece4d66dd75c4b9fc6c1f6650ddd5991e421177d9f8f671be/platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb", size = 18439 }, + { url = "https://files.pythonhosted.org/packages/fe/39/979e8e21520d4e47a0bbe349e2713c0aac6f3d853d0e5b34d76206c439aa/platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4", size = 18567 }, ] [[package]] name = "pluggy" -version = "1.5.0" +version = "1.6.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/96/2d/02d4312c973c6050a18b314a5ad0b3210edb65a906f868e31c111dede4a6/pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", size = 67955 } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412 } wheels = [ - { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 }, + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538 }, ] [[package]] name = "polars" -version = "1.21.0" +version = "1.29.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/98/49/3733f0a34fd2504264579bad2c66021e175ab548b21767340721e10a1dcf/polars-1.21.0.tar.gz", hash = "sha256:7692d0fe0fb4faac18ef9423de55789e289f4d3f26d42519bd23ef8afb672d62", size = 4323012 } +sdist = { url = "https://files.pythonhosted.org/packages/0b/92/8d0e80fef779a392b1a736b554ffba62403026bad7df8a9de8b61dce018f/polars-1.29.0.tar.gz", hash = "sha256:d2acb71fce1ff0ea76db5f648abd91a7a6c460fafabce9a2e8175184efa00d02", size = 4582973 } wheels = [ - { url = "https://files.pythonhosted.org/packages/d4/c3/976f0251e96c957143905530b236f1e278b28a8eb5850eab94595bf5d220/polars-1.21.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:063f8807f633f8fd15458a43971d930f6ee568b8e95936d7736c9054fc4f6f52", size = 31015281 }, - { url = "https://files.pythonhosted.org/packages/94/33/c55c19dde172e34dd7a5074a1dcac6472074236131698269db236550283e/polars-1.21.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:519863e0990e3323e7a32fc66bac3ad9da51938a1ffce6c09a92e0b1adb026a5", size = 28033973 }, - { url = "https://files.pythonhosted.org/packages/da/72/b108cd7e063f03f5b029edbd73ca514291dd3e3d88617965d09df64d71ba/polars-1.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbecddca35c57efde99070517db5d2c63d4c6d0e3c992123ba3be93e86e7bfac", size = 31641844 }, - { url = "https://files.pythonhosted.org/packages/ac/0a/1df51a9e09fb9974a511eb098e13afed916e8643556799799884f22c7869/polars-1.21.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:d9ce8e6f0d8140e67b0f7c276d22bb5f3345ce7412558643c8b5c270db254b64", size = 29005158 }, - { url = "https://files.pythonhosted.org/packages/90/4b/f75f0eb9527c943440c6ed90be7e97146a00699fee69f9d5aff577f15659/polars-1.21.0-cp39-abi3-win_amd64.whl", hash = "sha256:c4517abb008af890e4ca8fb6bb0372868381017af0ecadf9d062e2f91f50b276", size = 31729901 }, - { url = "https://files.pythonhosted.org/packages/e6/a0/d48548f4c9e139b02eacfc074bfd02d98d9bb5f9bf9c03ec5649a481d8ff/polars-1.21.0-cp39-abi3-win_arm64.whl", hash = "sha256:6bb0ba805defb05b76fdca392e48d84d1f16403de5be25d4dd8cdc7fccfd4251", size = 28179572 }, + { url = "https://files.pythonhosted.org/packages/e7/5f/b277179cfce1258fecf4ad73cf627f670be41fdf088727090f68ca9c96ff/polars-1.29.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:d053ee3217df31468caf2f5ddb9fd0f3a94fd42afdf7d9abe23d9d424adca02b", size = 34206809 }, + { url = "https://files.pythonhosted.org/packages/34/e7/634e5cb55ce8bef23ac8ad8e3834c9045f4b3cbdff1fb9e7826d864436e6/polars-1.29.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:14131078e365eae5ccda3e67383cd43c0c0598d7f760bdf1cb4082566c5494ce", size = 31100055 }, + { url = "https://files.pythonhosted.org/packages/50/15/0e9072e410731980ebc567c60a0a5f02bc2183310e48704ef83682cdd54c/polars-1.29.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54f6902da333f99208b8d27765d580ba0299b412787c0564275912122c228e40", size = 34828438 }, + { url = "https://files.pythonhosted.org/packages/69/c0/90fcaac5c95aa225b3899698289c0424d429ef72248b593f15294f95a35e/polars-1.29.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:7a0ac6a11088279af4d715f4b58068835f551fa5368504a53401743006115e78", size = 32073830 }, + { url = "https://files.pythonhosted.org/packages/17/ed/e5e570e22a03549a3c5397035a006b2c6343856a9fd15cccb5db39bdfa0a/polars-1.29.0-cp39-abi3-win_amd64.whl", hash = "sha256:f5aac4656e58b1e12f9481950981ef68b5b0e53dd4903bd72472efd2d09a74c8", size = 34971841 }, + { url = "https://files.pythonhosted.org/packages/45/fd/9039f609d76b3ebb13777f289502a00b52709aea5c35aed01d1090ac142f/polars-1.29.0-cp39-abi3-win_arm64.whl", hash = "sha256:0c105b07b980b77fe88c3200b015bf4695e53185385f0f244c13e2d1027c7bbf", size = 31298689 }, ] [[package]] name = "pygments" -version = "2.18.0" +version = "2.19.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/8e/62/8336eff65bcbc8e4cb5d05b55faf041285951b6e80f33e2bff2024788f31/pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199", size = 4891905 } +sdist = { url = "https://files.pythonhosted.org/packages/7c/2d/c3338d48ea6cc0feb8446d8e6937e1408088a72a39937982cc6111d17f84/pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", size = 4968581 } wheels = [ - { url = "https://files.pythonhosted.org/packages/f7/3f/01c8b82017c199075f8f788d0d906b9ffbbc5a47dc9918a945e13d5a2bda/pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a", size = 1205513 }, + { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 }, ] [[package]] name = "pyproject-api" -version = "1.8.0" +version = "1.9.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "packaging" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/bb/19/441e0624a8afedd15bbcce96df1b80479dd0ff0d965f5ce8fde4f2f6ffad/pyproject_api-1.8.0.tar.gz", hash = "sha256:77b8049f2feb5d33eefcc21b57f1e279636277a8ac8ad6b5871037b243778496", size = 22340 } +sdist = { url = "https://files.pythonhosted.org/packages/19/fd/437901c891f58a7b9096511750247535e891d2d5a5a6eefbc9386a2b41d5/pyproject_api-1.9.1.tar.gz", hash = "sha256:43c9918f49daab37e302038fc1aed54a8c7a91a9fa935d00b9a485f37e0f5335", size = 22710 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ba/f4/3c4ddfcc0c19c217c6de513842d286de8021af2f2ab79bbb86c00342d778/pyproject_api-1.8.0-py3-none-any.whl", hash = "sha256:3d7d347a047afe796fd5d1885b1e391ba29be7169bd2f102fcd378f04273d228", size = 13100 }, + { url = "https://files.pythonhosted.org/packages/ef/e6/c293c06695d4a3ab0260ef124a74ebadba5f4c511ce3a4259e976902c00b/pyproject_api-1.9.1-py3-none-any.whl", hash = "sha256:7d6238d92f8962773dd75b5f0c4a6a27cce092a14b623b811dba656f3b628948", size = 13158 }, ] [[package]] name = "pytest" -version = "8.3.3" +version = "8.3.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, @@ -307,37 +219,22 @@ dependencies = [ { name = "packaging" }, { name = "pluggy" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8b/6c/62bbd536103af674e227c41a8f3dcd022d591f6eed5facb5a0f31ee33bbc/pytest-8.3.3.tar.gz", hash = "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181", size = 1442487 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/6b/77/7440a06a8ead44c7757a64362dd22df5760f9b12dc5f11b6188cd2fc27a0/pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2", size = 342341 }, -] - -[[package]] -name = "requests" -version = "2.32.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "certifi" }, - { name = "charset-normalizer" }, - { name = "idna" }, - { name = "urllib3" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218 } +sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891 } wheels = [ - { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 }, + { url = "https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", size = 343634 }, ] [[package]] name = "rich" -version = "13.9.4" +version = "14.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markdown-it-py" }, { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ab/3a/0316b28d0761c6734d6bc14e770d85506c986c85ffb239e688eeaab2c2bc/rich-13.9.4.tar.gz", hash = "sha256:439594978a49a09530cff7ebc4b5c7103ef57baf48d5ea3184f21d9a2befa098", size = 223149 } +sdist = { url = "https://files.pythonhosted.org/packages/a1/53/830aa4c3066a8ab0ae9a9955976fb770fe9c6102117c8ec4ab3ea62d89e8/rich-14.0.0.tar.gz", hash = "sha256:82f1bc23a6a21ebca4ae0c45af9bdbc492ed20231dcb63f297d6d1021a9d5725", size = 224078 } wheels = [ - { url = "https://files.pythonhosted.org/packages/19/71/39c7c0d87f8d4e6c020a393182060eaefeeae6c01dab6a84ec346f2567df/rich-13.9.4-py3-none-any.whl", hash = "sha256:6049d5e6ec054bf2779ab3358186963bac2ea89175919d699e378b99738c2a90", size = 242424 }, + { url = "https://files.pythonhosted.org/packages/0d/9b/63f4c7ebc259242c89b3acafdb37b41d1185c07ff0011164674e9076b491/rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0", size = 243229 }, ] [[package]] @@ -351,7 +248,7 @@ wheels = [ [[package]] name = "tox" -version = "4.23.2" +version = "4.26.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cachetools" }, @@ -364,28 +261,28 @@ dependencies = [ { name = "pyproject-api" }, { name = "virtualenv" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/1f/86/32b10f91b4b975a37ac402b0f9fa016775088e0565c93602ba0b3c729ce8/tox-4.23.2.tar.gz", hash = "sha256:86075e00e555df6e82e74cfc333917f91ecb47ffbc868dcafbd2672e332f4a2c", size = 189998 } +sdist = { url = "https://files.pythonhosted.org/packages/fd/3c/dcec0c00321a107f7f697fd00754c5112572ea6dcacb40b16d8c3eea7c37/tox-4.26.0.tar.gz", hash = "sha256:a83b3b67b0159fa58e44e646505079e35a43317a62d2ae94725e0586266faeca", size = 197260 } wheels = [ - { url = "https://files.pythonhosted.org/packages/af/c0/124b73d01c120e917383bc6c53ebc34efdf7243faa9fca64d105c94cf2ab/tox-4.23.2-py3-none-any.whl", hash = "sha256:452bc32bb031f2282881a2118923176445bac783ab97c874b8770ab4c3b76c38", size = 166758 }, + { url = "https://files.pythonhosted.org/packages/de/14/f58b4087cf248b18c795b5c838c7a8d1428dfb07cb468dad3ec7f54041ab/tox-4.26.0-py3-none-any.whl", hash = "sha256:75f17aaf09face9b97bd41645028d9f722301e912be8b4c65a3f938024560224", size = 172761 }, ] [[package]] name = "tox-uv" -version = "1.16.0" +version = "1.25.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "packaging" }, { name = "tox" }, { name = "uv" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ed/5e/c3d2a45ab5465dddbbc267a589c9cfce23b91750d49af10738a08c98534e/tox_uv-1.16.0.tar.gz", hash = "sha256:71b2e2fa6c35c1360b91a302df1d65b3e5a1f656b321c5ebf7b84545804c9f01", size = 16337 } +sdist = { url = "https://files.pythonhosted.org/packages/5d/3a/3e445f25978a716ba6674f33f687d9336d0312086a277a778a5e9e9220d7/tox_uv-1.25.0.tar.gz", hash = "sha256:59ee5e694c41fef7bbcf058f22a5f9b6a8509698def2ea60c08554f4e36b9fcc", size = 21114 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ad/8d/1baa9f725ddd4824708759cf7b74bc43379f5f7feb079fde0629d7b32b3e/tox_uv-1.16.0-py3-none-any.whl", hash = "sha256:e6f0b525a687e745ab878d07cbf5c7e85d582028d4a7c8935f95e84350651432", size = 13661 }, + { url = "https://files.pythonhosted.org/packages/3c/a7/f5c29e0e6faaccefcab607f672b176927144e9412c8183d21301ea2a6f6c/tox_uv-1.25.0-py3-none-any.whl", hash = "sha256:50cfe7795dcd49b2160d7d65b5ece8717f38cfedc242c852a40ec0a71e159bf7", size = 16431 }, ] [[package]] name = "typer" -version = "0.14.0" +version = "0.15.4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, @@ -393,64 +290,55 @@ dependencies = [ { name = "shellingham" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/0d/7e/24af5b9aaa0872f9f6dc5dcf789dc3e57ceb23b4c570b852cd4db0d98f14/typer-0.14.0.tar.gz", hash = "sha256:af58f737f8d0c0c37b9f955a6d39000b9ff97813afcbeef56af5e37cf743b45a", size = 98836 } +sdist = { url = "https://files.pythonhosted.org/packages/6c/89/c527e6c848739be8ceb5c44eb8208c52ea3515c6cf6406aa61932887bf58/typer-0.15.4.tar.gz", hash = "sha256:89507b104f9b6a0730354f27c39fae5b63ccd0c95b1ce1f1a6ba0cfd329997c3", size = 101559 } wheels = [ - { url = "https://files.pythonhosted.org/packages/bb/d8/a3ab71d5587b42b832a7ef2e65b3e51a18f8da32b6ce169637d4d21995ed/typer-0.14.0-py3-none-any.whl", hash = "sha256:f476233a25770ab3e7b2eebf7c68f3bc702031681a008b20167573a4b7018f09", size = 44707 }, + { url = "https://files.pythonhosted.org/packages/c9/62/d4ba7afe2096d5659ec3db8b15d8665bdcb92a3c6ff0b95e99895b335a9c/typer-0.15.4-py3-none-any.whl", hash = "sha256:eb0651654dcdea706780c466cf06d8f174405a659ffff8f163cfbfee98c0e173", size = 45258 }, ] [[package]] name = "typing-extensions" -version = "4.12.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/df/db/f35a00659bc03fec321ba8bce9420de607a1d37f8342eee1863174c69557/typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8", size = 85321 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438 }, -] - -[[package]] -name = "urllib3" -version = "2.3.0" +version = "4.13.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/aa/63/e53da845320b757bf29ef6a9062f5c669fe997973f966045cb019c3f4b66/urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d", size = 307268 } +sdist = { url = "https://files.pythonhosted.org/packages/f6/37/23083fcd6e35492953e8d2aaaa68b860eb422b34627b13f2ce3eb6106061/typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef", size = 106967 } wheels = [ - { url = "https://files.pythonhosted.org/packages/c8/19/4ec628951a74043532ca2cf5d97b7b14863931476d117c471e8e2b1eb39f/urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df", size = 128369 }, + { url = "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", size = 45806 }, ] [[package]] name = "uv" -version = "0.5.5" +version = "0.7.6" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/46/95/ba000f161640015c9f2ecc2681f38eb161b8edac600a0451b36e4ad15aa4/uv-0.5.5.tar.gz", hash = "sha256:7f8db4bdf7eaef6be271457c4b2a167f41ad115434944a09f5034018a29b4093", size = 2324705 } +sdist = { url = "https://files.pythonhosted.org/packages/f5/1f/5f2579f4efccb7044e3c1f0b445fa6ac04e5c40c95818ee8c94e3733fe85/uv-0.7.6.tar.gz", hash = "sha256:bd188ac9d9902f1652130837ede39768d7c8f72b0a68fd484ba884d88e963b66", size = 3251391 } wheels = [ - { url = "https://files.pythonhosted.org/packages/6d/91/ac61feea1b11e4dc342a4fd174f311b628a04ec3b4614a24676c4d214bd1/uv-0.5.5-py3-none-linux_armv6l.whl", hash = "sha256:d091e88a9c2c830169c3ccf95fd972759e0ab629dacc2d5eff525e5ba3583904", size = 13887911 }, - { url = "https://files.pythonhosted.org/packages/84/18/461af22fd1f80f86548013639ab345810dbf35aa44dff1732c6faf311a48/uv-0.5.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:f0bfc7ced2fe0c85b3070dfa219072a1406133e18aab2f2fe10b6455ede0f8b2", size = 13900075 }, - { url = "https://files.pythonhosted.org/packages/68/5f/7a236ad48f81c580691f9e5f28dd47289a9819f18410f12ee3c621791efd/uv-0.5.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:553901e95cb5a4da1da19e288c29c5f886793f981750400e5cef48e3031b970b", size = 12869530 }, - { url = "https://files.pythonhosted.org/packages/94/d6/d6f441fd041fb4883332b999481896e28502b51681bcb786a001fb1e4a50/uv-0.5.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:a4f0c7647187044056dc6f6f5d31b01f445d8695eb7d2f442b29fd5c9216a56f", size = 13158009 }, - { url = "https://files.pythonhosted.org/packages/9b/3d/cc1e44e14266bbdb71eb020d14454f4f24f72fcc2eb84a52bf809e030918/uv-0.5.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:34e894c922ba29a59bbe812a458a7095a575f76b87dfc362e0c3f4f650d6f631", size = 13676515 }, - { url = "https://files.pythonhosted.org/packages/10/5f/b81ed7ab715687a790b9b06a6f4e2781d7f7222840eead0392c3ef6f80ea/uv-0.5.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f5569798fc8eaad58fbb4fb70ced8f09ebe607fbbfb95fa42c559f57bbe0cabd", size = 14245499 }, - { url = "https://files.pythonhosted.org/packages/f7/e1/ce5a88bc5ed61ee310a8e0acc3fa3032280bb2d49514fd48b801bb36f96f/uv-0.5.5-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:59d53cce11718ce5d5367afc8c93ebcfc5e1cddfa4a44aedbf08d08d9b738381", size = 14920230 }, - { url = "https://files.pythonhosted.org/packages/03/84/6fa2deb5bed9dab815cb7f57e14885ecf8de22249b992f9c4ea5dd042c28/uv-0.5.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3dee9517ebba13d07d8f139c439c5ff63e438d31ebda4d7eb0af8d0f0cc6a181", size = 14701845 }, - { url = "https://files.pythonhosted.org/packages/59/b2/7078d52a73c1e13d984c22fae1888993e5cd3f40cea029ab2666d79d92a7/uv-0.5.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:29286cd6b9f8e040d02894a67c6b6304811ea393ca9dfade109e93cf4b3b842c", size = 18963098 }, - { url = "https://files.pythonhosted.org/packages/b9/89/a91d927574ce2103d22b7beab319468d915a1d04757cf82b6eaf79b56a10/uv-0.5.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f7f04ae5a5430873d8610d8ea0a5d35df92e60bf701f80b3cf24857e0ac5e72", size = 14442788 }, - { url = "https://files.pythonhosted.org/packages/46/3d/2c5a9d362771aeef13c2ba1c9b1267b469e11331b874486f03f6c6fc5eb1/uv-0.5.5-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:5a47345ccafc0105b2f0cc22fcb0bb05be4d0e60df67f5beea28069b0bb372c8", size = 13394955 }, - { url = "https://files.pythonhosted.org/packages/cf/92/8d646385401472e7358cdf0fc86015edc9bf9132c9b79d7e2a11c300dd84/uv-0.5.5-py3-none-musllinux_1_1_armv7l.whl", hash = "sha256:b55d507bfe2bd2330c41680e4b0077972381f40975a59b53007254196abc4477", size = 13639522 }, - { url = "https://files.pythonhosted.org/packages/da/23/5e00b71f9c8b9c16f0947bf84b71265bafdf24947d2e271a657da00a0c6a/uv-0.5.5-py3-none-musllinux_1_1_i686.whl", hash = "sha256:365715e7247c2cd8ef661e8f96927b181248f689c07e48b076c9dbc78a4a0877", size = 13957451 }, - { url = "https://files.pythonhosted.org/packages/02/38/0c624a8d89416fa6c849c6670066c74f420aa898eafdf7cefd3adf77d686/uv-0.5.5-py3-none-musllinux_1_1_ppc64le.whl", hash = "sha256:0314a4b9a25bf00afe4e5472c338c8c6bd34688c23d63ce1ad35462cf087b492", size = 15840287 }, - { url = "https://files.pythonhosted.org/packages/a8/bd/1a81700d2555be504f0b1ff82f86d666384805e354c8eec2fdf7558d0cf9/uv-0.5.5-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:f4e9ddcffc29f009f692cda699912b02f6a12089d741b71d2fcd0b181eb71c5d", size = 14560282 }, - { url = "https://files.pythonhosted.org/packages/cd/71/c0b137c62a5a8ab5ecc0a8f505b77e5a05d5b94fd9da7560a247dac000f3/uv-0.5.5-py3-none-win32.whl", hash = "sha256:9af7018430da1f0960eee1592c820c343e2619f2d71f66c3be62da330826c537", size = 13810875 }, - { url = "https://files.pythonhosted.org/packages/e3/d8/56dbbe07aba3cf561fff52b8ed75ce7b694a0710ab676fae4cd7fbf1bdae/uv-0.5.5-py3-none-win_amd64.whl", hash = "sha256:69e15f24493d86c3a2da3764891e35a033ceda09404c1f9b386671d509db95f3", size = 15604848 }, + { url = "https://files.pythonhosted.org/packages/f6/d6/b32f27b599f09f08086c80ba88431f06ad93e1efcd9eb3f9ab18fc6dbaf5/uv-0.7.6-py3-none-linux_armv6l.whl", hash = "sha256:434f1820a8fbf54494c53d8ebb2b6509d98a2792876a2d990f90ac70afc9a11a", size = 16659078 }, + { url = "https://files.pythonhosted.org/packages/82/53/df3043448390d2ec604cb4ca418a811204c41b14030d3f9188ba5d179d0d/uv-0.7.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:0bad870f797971423d7f654423cf3ccd3bbd3688f88aee3f84e79af008c6abae", size = 16839320 }, + { url = "https://files.pythonhosted.org/packages/a2/19/c0f00312762396c68c1c45e744fc1933bb777af8ae9874f20462b0cd0042/uv-0.7.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:8a86cfefd0b9cd3b8a8577e79a0e61d52ade23a7876ed5b5312cc1f05baa140b", size = 15592857 }, + { url = "https://files.pythonhosted.org/packages/07/62/ffe522bcd4c2f74ced5aeeaa576c3750bfbb7a14aaee609984703fdc6b5d/uv-0.7.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:4cd32743d2c0c0b40ffbde48163ae2835353d319472aadabd71e9dcf98152e8b", size = 16065179 }, + { url = "https://files.pythonhosted.org/packages/1c/7a/301e7abb34bb6f0b83ef4c92899ce3dd98f5d0d74bc3ff8d6fb8c1aaf0de/uv-0.7.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:32aecfd27bd724d8ca8bafa811a69d436fcd403d589b025fbbd2e967eb154b46", size = 16394680 }, + { url = "https://files.pythonhosted.org/packages/1a/0c/b35bcf37d2b9c547fa92800566dd7cb4eb85168fe8da57872ce717d135d1/uv-0.7.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e15ac957e0a319dba40c897b9408c93e603d2317807384ec8f7d47a9e17c0d85", size = 17201299 }, + { url = "https://files.pythonhosted.org/packages/cd/67/397adca676c233dcf0ac84c05c2aedc07aaf5bf6f9622b04069e9f3aa81a/uv-0.7.6-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:832d7741117c41455ff43569b88892ec0a81938750a8bc4307e1160b70c91f3c", size = 18063672 }, + { url = "https://files.pythonhosted.org/packages/2f/66/b5a27f3027903a5b735b6ec45e18fae19dc29973620bd4431d0ff5cfa0c4/uv-0.7.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:17c79eec35c65bbd25180203be7266dd7d43381e02e28a8f2cb6ee809d008837", size = 17792312 }, + { url = "https://files.pythonhosted.org/packages/9d/4b/902cdb6ad576c4a9899aaad879b2853775e64aed5921f236f5589e6cf098/uv-0.7.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c18b2437e254906b1f48710e1fc1b313052e2ee7261ff104d58b25ef2d347d98", size = 22166764 }, + { url = "https://files.pythonhosted.org/packages/89/b5/9c34b2aefb5ec8c2be9175d7ee5bfd5f925e2d354b9dd80b137a1e2d6727/uv-0.7.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f46cfd2de04dd261cc75158c293de64f99cc907ab0d395f3a0f97c94e7f076a", size = 17447866 }, + { url = "https://files.pythonhosted.org/packages/7a/e1/86ba96a12114af4f83013b4afac4058b63e2caa319c2fa1c07652632b922/uv-0.7.6-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:c44311ed1a32e397d81e346e7b868e4ae22f2df2e5ba601e055683fa4cc68323", size = 16337444 }, + { url = "https://files.pythonhosted.org/packages/7d/cc/362751b0477d604a1ce54939bcc5a67f262fa76bbdd679d2aec355cdc3b5/uv-0.7.6-py3-none-musllinux_1_1_armv7l.whl", hash = "sha256:5e283166816f129f29023a4bfdf49fdb33e1e2bcb4e555e9d6996122867a44af", size = 16370656 }, + { url = "https://files.pythonhosted.org/packages/8e/93/38557f828bcab2b203c3aab85037981f4c548bbcae01c46b1207a95fc068/uv-0.7.6-py3-none-musllinux_1_1_i686.whl", hash = "sha256:72e9337db681a16a7203abe112fedc249f01fe4cadd6d65d23c85031183dcf23", size = 16753839 }, + { url = "https://files.pythonhosted.org/packages/ae/01/290f42244b2373988082ca1fa7d3f5b989dd3b31038cd9d3f443457e72ed/uv-0.7.6-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:310e488493d03a843b838e9301af1731b02bc93b14bcaa38c62d448cebbdca3c", size = 17596256 }, + { url = "https://files.pythonhosted.org/packages/8a/fd/9ddc1b5f45ff0c4884d896e77b1b2211d552875cbed6966c9ee242b361f4/uv-0.7.6-py3-none-win32.whl", hash = "sha256:e3fb41bd4bf88ab21df773b642465fffc469e173645eb986d000db38d7bb8e3c", size = 16972489 }, + { url = "https://files.pythonhosted.org/packages/2f/5d/b3889f63bbb997f12a797dd08c10d896f4cf9b7c639b06ee517dffb9ed33/uv-0.7.6-py3-none-win_amd64.whl", hash = "sha256:4026513441dc01326f8bc04517956385442523ed1d40400e14723d8fb3d9c321", size = 18457656 }, + { url = "https://files.pythonhosted.org/packages/48/25/2695c5cb6b000fc17980bf9ecf86862e0897af8a515fae0c766a245b7a29/uv-0.7.6-py3-none-win_arm64.whl", hash = "sha256:ad79d71d2bb4cc1cb22d09771a23f70190e3b5fa41668da208e694b50b900178", size = 17113987 }, ] [[package]] name = "virtualenv" -version = "20.28.0" +version = "20.31.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "distlib" }, { name = "filelock" }, { name = "platformdirs" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/bf/75/53316a5a8050069228a2f6d11f32046cfa94fbb6cc3f08703f59b873de2e/virtualenv-20.28.0.tar.gz", hash = "sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa", size = 7650368 } +sdist = { url = "https://files.pythonhosted.org/packages/56/2c/444f465fb2c65f40c3a104fd0c495184c4f2336d65baf398e3c75d72ea94/virtualenv-20.31.2.tar.gz", hash = "sha256:e10c0a9d02835e592521be48b332b6caee6887f332c111aa79a09b9e79efc2af", size = 6076316 } wheels = [ - { url = "https://files.pythonhosted.org/packages/10/f9/0919cf6f1432a8c4baa62511f8f8da8225432d22e83e3476f5be1a1edc6e/virtualenv-20.28.0-py3-none-any.whl", hash = "sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0", size = 4276702 }, + { url = "https://files.pythonhosted.org/packages/f3/40/b1c265d4b2b62b58576588510fc4d1fe60a86319c8de99fd8e9fec617d2c/virtualenv-20.31.2-py3-none-any.whl", hash = "sha256:36efd0d9650ee985f0cad72065001e66d49a6f24eb44d98980f630686243cf11", size = 6057982 }, ] From 1747a3ee6f6317866ea628d8c541dca1d16f57a7 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Tue, 20 May 2025 22:39:21 -0400 Subject: [PATCH 07/47] Use new generic notation --- src/csv_helper/impute.py | 8 +- uv.lock | 185 ++++++++++++++++++++------------------- 2 files changed, 96 insertions(+), 97 deletions(-) diff --git a/src/csv_helper/impute.py b/src/csv_helper/impute.py index 1833553..5120aca 100644 --- a/src/csv_helper/impute.py +++ b/src/csv_helper/impute.py @@ -1,4 +1,4 @@ -from typing import Any, NamedTuple, TypeVar +from typing import NamedTuple import numpy as np import polars as pl @@ -50,6 +50,7 @@ def check(df: pl.DataFrame, fill_cols: list[str], fill_flag: str) -> pl.DataFram # TODO: instead of separate lazy func, let this take df or lf # or have bool arg that determines whether .lazy() conversion happens? # TODO: add asserts for shape? +# TODO: use TFrame here? def impute_columns( df: pl.DataFrame, fill_cols: list[str], @@ -177,10 +178,7 @@ def parse_fill_range(fill_range: tuple[int, int]) -> FillRange: return fill_range_int -TFrame = TypeVar("TFrame", pl.DataFrame, pl.LazyFrame) - - -def complete(df: TFrame, *columns: str | pl.Series) -> TFrame: +def complete[T: (pl.DataFrame, pl.LazyFrame)](df: T, *columns: str | pl.Series) -> T: """ Generate rows for implicit missing values based on column combinations, thus making them explicit missing values. Generated values marked as null. diff --git a/uv.lock b/uv.lock index ee77f91..ff5ed4e 100644 --- a/uv.lock +++ b/uv.lock @@ -1,22 +1,23 @@ version = 1 +revision = 2 requires-python = ">=3.13" [[package]] name = "cachetools" version = "5.5.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6c/81/3747dad6b14fa2cf53fcf10548cf5aea6913e96fab41a3c198676f8948a5/cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4", size = 28380 } +sdist = { url = "https://files.pythonhosted.org/packages/6c/81/3747dad6b14fa2cf53fcf10548cf5aea6913e96fab41a3c198676f8948a5/cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4", size = 28380, upload-time = "2025-02-20T21:01:19.524Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/72/76/20fa66124dbe6be5cafeb312ece67de6b61dd91a0247d1ea13db4ebb33c2/cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a", size = 10080 }, + { url = "https://files.pythonhosted.org/packages/72/76/20fa66124dbe6be5cafeb312ece67de6b61dd91a0247d1ea13db4ebb33c2/cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a", size = 10080, upload-time = "2025-02-20T21:01:16.647Z" }, ] [[package]] name = "chardet" version = "5.2.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/f7b6ab21ec75897ed80c17d79b15951a719226b9fababf1e40ea74d69079/chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7", size = 2069618 } +sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/f7b6ab21ec75897ed80c17d79b15951a719226b9fababf1e40ea74d69079/chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7", size = 2069618, upload-time = "2023-08-01T19:23:02.662Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/38/6f/f5fbc992a329ee4e0f288c1fe0e2ad9485ed064cac731ed2fe47dcc38cbf/chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970", size = 199385 }, + { url = "https://files.pythonhosted.org/packages/38/6f/f5fbc992a329ee4e0f288c1fe0e2ad9485ed064cac731ed2fe47dcc38cbf/chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970", size = 199385, upload-time = "2023-08-01T19:23:00.661Z" }, ] [[package]] @@ -24,25 +25,25 @@ name = "click" version = "8.1.8" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "platform_system == 'Windows'" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593 } +sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593, upload-time = "2024-12-21T18:38:44.339Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", size = 98188 }, + { url = "https://files.pythonhosted.org/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", size = 98188, upload-time = "2024-12-21T18:38:41.666Z" }, ] [[package]] name = "colorama" version = "0.4.6" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, ] [[package]] name = "csv-helper" -version = "0.2.2" +version = "0.2.3" source = { editable = "." } dependencies = [ { name = "numpy" }, @@ -75,27 +76,27 @@ dev = [ name = "distlib" version = "0.3.9" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0d/dd/1bec4c5ddb504ca60fc29472f3d27e8d4da1257a854e1d96742f15c1d02d/distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403", size = 613923 } +sdist = { url = "https://files.pythonhosted.org/packages/0d/dd/1bec4c5ddb504ca60fc29472f3d27e8d4da1257a854e1d96742f15c1d02d/distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403", size = 613923, upload-time = "2024-10-09T18:35:47.551Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/91/a1/cf2472db20f7ce4a6be1253a81cfdf85ad9c7885ffbed7047fb72c24cf87/distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87", size = 468973 }, + { url = "https://files.pythonhosted.org/packages/91/a1/cf2472db20f7ce4a6be1253a81cfdf85ad9c7885ffbed7047fb72c24cf87/distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87", size = 468973, upload-time = "2024-10-09T18:35:44.272Z" }, ] [[package]] name = "filelock" version = "3.18.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0a/10/c23352565a6544bdc5353e0b15fc1c563352101f30e24bf500207a54df9a/filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2", size = 18075 } +sdist = { url = "https://files.pythonhosted.org/packages/0a/10/c23352565a6544bdc5353e0b15fc1c563352101f30e24bf500207a54df9a/filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2", size = 18075, upload-time = "2025-03-14T07:11:40.47Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215 }, + { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215, upload-time = "2025-03-14T07:11:39.145Z" }, ] [[package]] name = "iniconfig" version = "2.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793 } +sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload-time = "2025-03-19T20:09:59.721Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050 }, + { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" }, ] [[package]] @@ -105,96 +106,96 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "mdurl" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596 } +sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596, upload-time = "2023-06-03T06:41:14.443Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528 }, + { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528, upload-time = "2023-06-03T06:41:11.019Z" }, ] [[package]] name = "mdurl" version = "0.1.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729 } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 }, + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, ] [[package]] name = "numpy" version = "2.2.6" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440 } +sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f9/5c/6657823f4f594f72b5471f1db1ab12e26e890bb2e41897522d134d2a3e81/numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84", size = 20867828 }, - { url = "https://files.pythonhosted.org/packages/dc/9e/14520dc3dadf3c803473bd07e9b2bd1b69bc583cb2497b47000fed2fa92f/numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b", size = 14143006 }, - { url = "https://files.pythonhosted.org/packages/4f/06/7e96c57d90bebdce9918412087fc22ca9851cceaf5567a45c1f404480e9e/numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d", size = 5076765 }, - { url = "https://files.pythonhosted.org/packages/73/ed/63d920c23b4289fdac96ddbdd6132e9427790977d5457cd132f18e76eae0/numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566", size = 6617736 }, - { url = "https://files.pythonhosted.org/packages/85/c5/e19c8f99d83fd377ec8c7e0cf627a8049746da54afc24ef0a0cb73d5dfb5/numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f", size = 14010719 }, - { url = "https://files.pythonhosted.org/packages/19/49/4df9123aafa7b539317bf6d342cb6d227e49f7a35b99c287a6109b13dd93/numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f", size = 16526072 }, - { url = "https://files.pythonhosted.org/packages/b2/6c/04b5f47f4f32f7c2b0e7260442a8cbcf8168b0e1a41ff1495da42f42a14f/numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868", size = 15503213 }, - { url = "https://files.pythonhosted.org/packages/17/0a/5cd92e352c1307640d5b6fec1b2ffb06cd0dabe7d7b8227f97933d378422/numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d", size = 18316632 }, - { url = "https://files.pythonhosted.org/packages/f0/3b/5cba2b1d88760ef86596ad0f3d484b1cbff7c115ae2429678465057c5155/numpy-2.2.6-cp313-cp313-win32.whl", hash = "sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd", size = 6244532 }, - { url = "https://files.pythonhosted.org/packages/cb/3b/d58c12eafcb298d4e6d0d40216866ab15f59e55d148a5658bb3132311fcf/numpy-2.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c", size = 12610885 }, - { url = "https://files.pythonhosted.org/packages/6b/9e/4bf918b818e516322db999ac25d00c75788ddfd2d2ade4fa66f1f38097e1/numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6", size = 20963467 }, - { url = "https://files.pythonhosted.org/packages/61/66/d2de6b291507517ff2e438e13ff7b1e2cdbdb7cb40b3ed475377aece69f9/numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda", size = 14225144 }, - { url = "https://files.pythonhosted.org/packages/e4/25/480387655407ead912e28ba3a820bc69af9adf13bcbe40b299d454ec011f/numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40", size = 5200217 }, - { url = "https://files.pythonhosted.org/packages/aa/4a/6e313b5108f53dcbf3aca0c0f3e9c92f4c10ce57a0a721851f9785872895/numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8", size = 6712014 }, - { url = "https://files.pythonhosted.org/packages/b7/30/172c2d5c4be71fdf476e9de553443cf8e25feddbe185e0bd88b096915bcc/numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f", size = 14077935 }, - { url = "https://files.pythonhosted.org/packages/12/fb/9e743f8d4e4d3c710902cf87af3512082ae3d43b945d5d16563f26ec251d/numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa", size = 16600122 }, - { url = "https://files.pythonhosted.org/packages/12/75/ee20da0e58d3a66f204f38916757e01e33a9737d0b22373b3eb5a27358f9/numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571", size = 15586143 }, - { url = "https://files.pythonhosted.org/packages/76/95/bef5b37f29fc5e739947e9ce5179ad402875633308504a52d188302319c8/numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1", size = 18385260 }, - { url = "https://files.pythonhosted.org/packages/09/04/f2f83279d287407cf36a7a8053a5abe7be3622a4363337338f2585e4afda/numpy-2.2.6-cp313-cp313t-win32.whl", hash = "sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff", size = 6377225 }, - { url = "https://files.pythonhosted.org/packages/67/0e/35082d13c09c02c011cf21570543d202ad929d961c02a147493cb0c2bdf5/numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06", size = 12771374 }, + { url = "https://files.pythonhosted.org/packages/f9/5c/6657823f4f594f72b5471f1db1ab12e26e890bb2e41897522d134d2a3e81/numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84", size = 20867828, upload-time = "2025-05-17T21:37:56.699Z" }, + { url = "https://files.pythonhosted.org/packages/dc/9e/14520dc3dadf3c803473bd07e9b2bd1b69bc583cb2497b47000fed2fa92f/numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b", size = 14143006, upload-time = "2025-05-17T21:38:18.291Z" }, + { url = "https://files.pythonhosted.org/packages/4f/06/7e96c57d90bebdce9918412087fc22ca9851cceaf5567a45c1f404480e9e/numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d", size = 5076765, upload-time = "2025-05-17T21:38:27.319Z" }, + { url = "https://files.pythonhosted.org/packages/73/ed/63d920c23b4289fdac96ddbdd6132e9427790977d5457cd132f18e76eae0/numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566", size = 6617736, upload-time = "2025-05-17T21:38:38.141Z" }, + { url = "https://files.pythonhosted.org/packages/85/c5/e19c8f99d83fd377ec8c7e0cf627a8049746da54afc24ef0a0cb73d5dfb5/numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f", size = 14010719, upload-time = "2025-05-17T21:38:58.433Z" }, + { url = "https://files.pythonhosted.org/packages/19/49/4df9123aafa7b539317bf6d342cb6d227e49f7a35b99c287a6109b13dd93/numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f", size = 16526072, upload-time = "2025-05-17T21:39:22.638Z" }, + { url = "https://files.pythonhosted.org/packages/b2/6c/04b5f47f4f32f7c2b0e7260442a8cbcf8168b0e1a41ff1495da42f42a14f/numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868", size = 15503213, upload-time = "2025-05-17T21:39:45.865Z" }, + { url = "https://files.pythonhosted.org/packages/17/0a/5cd92e352c1307640d5b6fec1b2ffb06cd0dabe7d7b8227f97933d378422/numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d", size = 18316632, upload-time = "2025-05-17T21:40:13.331Z" }, + { url = "https://files.pythonhosted.org/packages/f0/3b/5cba2b1d88760ef86596ad0f3d484b1cbff7c115ae2429678465057c5155/numpy-2.2.6-cp313-cp313-win32.whl", hash = "sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd", size = 6244532, upload-time = "2025-05-17T21:43:46.099Z" }, + { url = "https://files.pythonhosted.org/packages/cb/3b/d58c12eafcb298d4e6d0d40216866ab15f59e55d148a5658bb3132311fcf/numpy-2.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c", size = 12610885, upload-time = "2025-05-17T21:44:05.145Z" }, + { url = "https://files.pythonhosted.org/packages/6b/9e/4bf918b818e516322db999ac25d00c75788ddfd2d2ade4fa66f1f38097e1/numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6", size = 20963467, upload-time = "2025-05-17T21:40:44Z" }, + { url = "https://files.pythonhosted.org/packages/61/66/d2de6b291507517ff2e438e13ff7b1e2cdbdb7cb40b3ed475377aece69f9/numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda", size = 14225144, upload-time = "2025-05-17T21:41:05.695Z" }, + { url = "https://files.pythonhosted.org/packages/e4/25/480387655407ead912e28ba3a820bc69af9adf13bcbe40b299d454ec011f/numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40", size = 5200217, upload-time = "2025-05-17T21:41:15.903Z" }, + { url = "https://files.pythonhosted.org/packages/aa/4a/6e313b5108f53dcbf3aca0c0f3e9c92f4c10ce57a0a721851f9785872895/numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8", size = 6712014, upload-time = "2025-05-17T21:41:27.321Z" }, + { url = "https://files.pythonhosted.org/packages/b7/30/172c2d5c4be71fdf476e9de553443cf8e25feddbe185e0bd88b096915bcc/numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f", size = 14077935, upload-time = "2025-05-17T21:41:49.738Z" }, + { url = "https://files.pythonhosted.org/packages/12/fb/9e743f8d4e4d3c710902cf87af3512082ae3d43b945d5d16563f26ec251d/numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa", size = 16600122, upload-time = "2025-05-17T21:42:14.046Z" }, + { url = "https://files.pythonhosted.org/packages/12/75/ee20da0e58d3a66f204f38916757e01e33a9737d0b22373b3eb5a27358f9/numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571", size = 15586143, upload-time = "2025-05-17T21:42:37.464Z" }, + { url = "https://files.pythonhosted.org/packages/76/95/bef5b37f29fc5e739947e9ce5179ad402875633308504a52d188302319c8/numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1", size = 18385260, upload-time = "2025-05-17T21:43:05.189Z" }, + { url = "https://files.pythonhosted.org/packages/09/04/f2f83279d287407cf36a7a8053a5abe7be3622a4363337338f2585e4afda/numpy-2.2.6-cp313-cp313t-win32.whl", hash = "sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff", size = 6377225, upload-time = "2025-05-17T21:43:16.254Z" }, + { url = "https://files.pythonhosted.org/packages/67/0e/35082d13c09c02c011cf21570543d202ad929d961c02a147493cb0c2bdf5/numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06", size = 12771374, upload-time = "2025-05-17T21:43:35.479Z" }, ] [[package]] name = "packaging" version = "25.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727 } +sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469 }, + { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, ] [[package]] name = "platformdirs" version = "4.3.8" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fe/8b/3c73abc9c759ecd3f1f7ceff6685840859e8070c4d947c93fae71f6a0bf2/platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc", size = 21362 } +sdist = { url = "https://files.pythonhosted.org/packages/fe/8b/3c73abc9c759ecd3f1f7ceff6685840859e8070c4d947c93fae71f6a0bf2/platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc", size = 21362, upload-time = "2025-05-07T22:47:42.121Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fe/39/979e8e21520d4e47a0bbe349e2713c0aac6f3d853d0e5b34d76206c439aa/platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4", size = 18567 }, + { url = "https://files.pythonhosted.org/packages/fe/39/979e8e21520d4e47a0bbe349e2713c0aac6f3d853d0e5b34d76206c439aa/platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4", size = 18567, upload-time = "2025-05-07T22:47:40.376Z" }, ] [[package]] name = "pluggy" version = "1.6.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412 } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538 }, + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] [[package]] name = "polars" version = "1.29.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0b/92/8d0e80fef779a392b1a736b554ffba62403026bad7df8a9de8b61dce018f/polars-1.29.0.tar.gz", hash = "sha256:d2acb71fce1ff0ea76db5f648abd91a7a6c460fafabce9a2e8175184efa00d02", size = 4582973 } +sdist = { url = "https://files.pythonhosted.org/packages/0b/92/8d0e80fef779a392b1a736b554ffba62403026bad7df8a9de8b61dce018f/polars-1.29.0.tar.gz", hash = "sha256:d2acb71fce1ff0ea76db5f648abd91a7a6c460fafabce9a2e8175184efa00d02", size = 4582973, upload-time = "2025-04-30T20:57:22.46Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e7/5f/b277179cfce1258fecf4ad73cf627f670be41fdf088727090f68ca9c96ff/polars-1.29.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:d053ee3217df31468caf2f5ddb9fd0f3a94fd42afdf7d9abe23d9d424adca02b", size = 34206809 }, - { url = "https://files.pythonhosted.org/packages/34/e7/634e5cb55ce8bef23ac8ad8e3834c9045f4b3cbdff1fb9e7826d864436e6/polars-1.29.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:14131078e365eae5ccda3e67383cd43c0c0598d7f760bdf1cb4082566c5494ce", size = 31100055 }, - { url = "https://files.pythonhosted.org/packages/50/15/0e9072e410731980ebc567c60a0a5f02bc2183310e48704ef83682cdd54c/polars-1.29.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54f6902da333f99208b8d27765d580ba0299b412787c0564275912122c228e40", size = 34828438 }, - { url = "https://files.pythonhosted.org/packages/69/c0/90fcaac5c95aa225b3899698289c0424d429ef72248b593f15294f95a35e/polars-1.29.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:7a0ac6a11088279af4d715f4b58068835f551fa5368504a53401743006115e78", size = 32073830 }, - { url = "https://files.pythonhosted.org/packages/17/ed/e5e570e22a03549a3c5397035a006b2c6343856a9fd15cccb5db39bdfa0a/polars-1.29.0-cp39-abi3-win_amd64.whl", hash = "sha256:f5aac4656e58b1e12f9481950981ef68b5b0e53dd4903bd72472efd2d09a74c8", size = 34971841 }, - { url = "https://files.pythonhosted.org/packages/45/fd/9039f609d76b3ebb13777f289502a00b52709aea5c35aed01d1090ac142f/polars-1.29.0-cp39-abi3-win_arm64.whl", hash = "sha256:0c105b07b980b77fe88c3200b015bf4695e53185385f0f244c13e2d1027c7bbf", size = 31298689 }, + { url = "https://files.pythonhosted.org/packages/e7/5f/b277179cfce1258fecf4ad73cf627f670be41fdf088727090f68ca9c96ff/polars-1.29.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:d053ee3217df31468caf2f5ddb9fd0f3a94fd42afdf7d9abe23d9d424adca02b", size = 34206809, upload-time = "2025-04-30T20:56:14.744Z" }, + { url = "https://files.pythonhosted.org/packages/34/e7/634e5cb55ce8bef23ac8ad8e3834c9045f4b3cbdff1fb9e7826d864436e6/polars-1.29.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:14131078e365eae5ccda3e67383cd43c0c0598d7f760bdf1cb4082566c5494ce", size = 31100055, upload-time = "2025-04-30T20:56:19.43Z" }, + { url = "https://files.pythonhosted.org/packages/50/15/0e9072e410731980ebc567c60a0a5f02bc2183310e48704ef83682cdd54c/polars-1.29.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54f6902da333f99208b8d27765d580ba0299b412787c0564275912122c228e40", size = 34828438, upload-time = "2025-04-30T20:56:22.839Z" }, + { url = "https://files.pythonhosted.org/packages/69/c0/90fcaac5c95aa225b3899698289c0424d429ef72248b593f15294f95a35e/polars-1.29.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:7a0ac6a11088279af4d715f4b58068835f551fa5368504a53401743006115e78", size = 32073830, upload-time = "2025-04-30T20:56:26.742Z" }, + { url = "https://files.pythonhosted.org/packages/17/ed/e5e570e22a03549a3c5397035a006b2c6343856a9fd15cccb5db39bdfa0a/polars-1.29.0-cp39-abi3-win_amd64.whl", hash = "sha256:f5aac4656e58b1e12f9481950981ef68b5b0e53dd4903bd72472efd2d09a74c8", size = 34971841, upload-time = "2025-04-30T20:56:29.953Z" }, + { url = "https://files.pythonhosted.org/packages/45/fd/9039f609d76b3ebb13777f289502a00b52709aea5c35aed01d1090ac142f/polars-1.29.0-cp39-abi3-win_arm64.whl", hash = "sha256:0c105b07b980b77fe88c3200b015bf4695e53185385f0f244c13e2d1027c7bbf", size = 31298689, upload-time = "2025-04-30T20:56:33.449Z" }, ] [[package]] name = "pygments" version = "2.19.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7c/2d/c3338d48ea6cc0feb8446d8e6937e1408088a72a39937982cc6111d17f84/pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", size = 4968581 } +sdist = { url = "https://files.pythonhosted.org/packages/7c/2d/c3338d48ea6cc0feb8446d8e6937e1408088a72a39937982cc6111d17f84/pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", size = 4968581, upload-time = "2025-01-06T17:26:30.443Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 }, + { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293, upload-time = "2025-01-06T17:26:25.553Z" }, ] [[package]] @@ -204,9 +205,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "packaging" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/19/fd/437901c891f58a7b9096511750247535e891d2d5a5a6eefbc9386a2b41d5/pyproject_api-1.9.1.tar.gz", hash = "sha256:43c9918f49daab37e302038fc1aed54a8c7a91a9fa935d00b9a485f37e0f5335", size = 22710 } +sdist = { url = "https://files.pythonhosted.org/packages/19/fd/437901c891f58a7b9096511750247535e891d2d5a5a6eefbc9386a2b41d5/pyproject_api-1.9.1.tar.gz", hash = "sha256:43c9918f49daab37e302038fc1aed54a8c7a91a9fa935d00b9a485f37e0f5335", size = 22710, upload-time = "2025-05-12T14:41:58.025Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ef/e6/c293c06695d4a3ab0260ef124a74ebadba5f4c511ce3a4259e976902c00b/pyproject_api-1.9.1-py3-none-any.whl", hash = "sha256:7d6238d92f8962773dd75b5f0c4a6a27cce092a14b623b811dba656f3b628948", size = 13158 }, + { url = "https://files.pythonhosted.org/packages/ef/e6/c293c06695d4a3ab0260ef124a74ebadba5f4c511ce3a4259e976902c00b/pyproject_api-1.9.1-py3-none-any.whl", hash = "sha256:7d6238d92f8962773dd75b5f0c4a6a27cce092a14b623b811dba656f3b628948", size = 13158, upload-time = "2025-05-12T14:41:56.217Z" }, ] [[package]] @@ -219,9 +220,9 @@ dependencies = [ { name = "packaging" }, { name = "pluggy" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891 } +sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891, upload-time = "2025-03-02T12:54:54.503Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", size = 343634 }, + { url = "https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", size = 343634, upload-time = "2025-03-02T12:54:52.069Z" }, ] [[package]] @@ -232,18 +233,18 @@ dependencies = [ { name = "markdown-it-py" }, { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a1/53/830aa4c3066a8ab0ae9a9955976fb770fe9c6102117c8ec4ab3ea62d89e8/rich-14.0.0.tar.gz", hash = "sha256:82f1bc23a6a21ebca4ae0c45af9bdbc492ed20231dcb63f297d6d1021a9d5725", size = 224078 } +sdist = { url = "https://files.pythonhosted.org/packages/a1/53/830aa4c3066a8ab0ae9a9955976fb770fe9c6102117c8ec4ab3ea62d89e8/rich-14.0.0.tar.gz", hash = "sha256:82f1bc23a6a21ebca4ae0c45af9bdbc492ed20231dcb63f297d6d1021a9d5725", size = 224078, upload-time = "2025-03-30T14:15:14.23Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0d/9b/63f4c7ebc259242c89b3acafdb37b41d1185c07ff0011164674e9076b491/rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0", size = 243229 }, + { url = "https://files.pythonhosted.org/packages/0d/9b/63f4c7ebc259242c89b3acafdb37b41d1185c07ff0011164674e9076b491/rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0", size = 243229, upload-time = "2025-03-30T14:15:12.283Z" }, ] [[package]] name = "shellingham" version = "1.5.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310 } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755 }, + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, ] [[package]] @@ -261,9 +262,9 @@ dependencies = [ { name = "pyproject-api" }, { name = "virtualenv" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fd/3c/dcec0c00321a107f7f697fd00754c5112572ea6dcacb40b16d8c3eea7c37/tox-4.26.0.tar.gz", hash = "sha256:a83b3b67b0159fa58e44e646505079e35a43317a62d2ae94725e0586266faeca", size = 197260 } +sdist = { url = "https://files.pythonhosted.org/packages/fd/3c/dcec0c00321a107f7f697fd00754c5112572ea6dcacb40b16d8c3eea7c37/tox-4.26.0.tar.gz", hash = "sha256:a83b3b67b0159fa58e44e646505079e35a43317a62d2ae94725e0586266faeca", size = 197260, upload-time = "2025-05-13T15:04:28.481Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/de/14/f58b4087cf248b18c795b5c838c7a8d1428dfb07cb468dad3ec7f54041ab/tox-4.26.0-py3-none-any.whl", hash = "sha256:75f17aaf09face9b97bd41645028d9f722301e912be8b4c65a3f938024560224", size = 172761 }, + { url = "https://files.pythonhosted.org/packages/de/14/f58b4087cf248b18c795b5c838c7a8d1428dfb07cb468dad3ec7f54041ab/tox-4.26.0-py3-none-any.whl", hash = "sha256:75f17aaf09face9b97bd41645028d9f722301e912be8b4c65a3f938024560224", size = 172761, upload-time = "2025-05-13T15:04:26.207Z" }, ] [[package]] @@ -275,9 +276,9 @@ dependencies = [ { name = "tox" }, { name = "uv" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5d/3a/3e445f25978a716ba6674f33f687d9336d0312086a277a778a5e9e9220d7/tox_uv-1.25.0.tar.gz", hash = "sha256:59ee5e694c41fef7bbcf058f22a5f9b6a8509698def2ea60c08554f4e36b9fcc", size = 21114 } +sdist = { url = "https://files.pythonhosted.org/packages/5d/3a/3e445f25978a716ba6674f33f687d9336d0312086a277a778a5e9e9220d7/tox_uv-1.25.0.tar.gz", hash = "sha256:59ee5e694c41fef7bbcf058f22a5f9b6a8509698def2ea60c08554f4e36b9fcc", size = 21114, upload-time = "2025-02-21T16:37:51.796Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3c/a7/f5c29e0e6faaccefcab607f672b176927144e9412c8183d21301ea2a6f6c/tox_uv-1.25.0-py3-none-any.whl", hash = "sha256:50cfe7795dcd49b2160d7d65b5ece8717f38cfedc242c852a40ec0a71e159bf7", size = 16431 }, + { url = "https://files.pythonhosted.org/packages/3c/a7/f5c29e0e6faaccefcab607f672b176927144e9412c8183d21301ea2a6f6c/tox_uv-1.25.0-py3-none-any.whl", hash = "sha256:50cfe7795dcd49b2160d7d65b5ece8717f38cfedc242c852a40ec0a71e159bf7", size = 16431, upload-time = "2025-02-21T16:37:49.657Z" }, ] [[package]] @@ -290,43 +291,43 @@ dependencies = [ { name = "shellingham" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6c/89/c527e6c848739be8ceb5c44eb8208c52ea3515c6cf6406aa61932887bf58/typer-0.15.4.tar.gz", hash = "sha256:89507b104f9b6a0730354f27c39fae5b63ccd0c95b1ce1f1a6ba0cfd329997c3", size = 101559 } +sdist = { url = "https://files.pythonhosted.org/packages/6c/89/c527e6c848739be8ceb5c44eb8208c52ea3515c6cf6406aa61932887bf58/typer-0.15.4.tar.gz", hash = "sha256:89507b104f9b6a0730354f27c39fae5b63ccd0c95b1ce1f1a6ba0cfd329997c3", size = 101559, upload-time = "2025-05-14T16:34:57.704Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c9/62/d4ba7afe2096d5659ec3db8b15d8665bdcb92a3c6ff0b95e99895b335a9c/typer-0.15.4-py3-none-any.whl", hash = "sha256:eb0651654dcdea706780c466cf06d8f174405a659ffff8f163cfbfee98c0e173", size = 45258 }, + { url = "https://files.pythonhosted.org/packages/c9/62/d4ba7afe2096d5659ec3db8b15d8665bdcb92a3c6ff0b95e99895b335a9c/typer-0.15.4-py3-none-any.whl", hash = "sha256:eb0651654dcdea706780c466cf06d8f174405a659ffff8f163cfbfee98c0e173", size = 45258, upload-time = "2025-05-14T16:34:55.583Z" }, ] [[package]] name = "typing-extensions" version = "4.13.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f6/37/23083fcd6e35492953e8d2aaaa68b860eb422b34627b13f2ce3eb6106061/typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef", size = 106967 } +sdist = { url = "https://files.pythonhosted.org/packages/f6/37/23083fcd6e35492953e8d2aaaa68b860eb422b34627b13f2ce3eb6106061/typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef", size = 106967, upload-time = "2025-04-10T14:19:05.416Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", size = 45806 }, + { url = "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", size = 45806, upload-time = "2025-04-10T14:19:03.967Z" }, ] [[package]] name = "uv" version = "0.7.6" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f5/1f/5f2579f4efccb7044e3c1f0b445fa6ac04e5c40c95818ee8c94e3733fe85/uv-0.7.6.tar.gz", hash = "sha256:bd188ac9d9902f1652130837ede39768d7c8f72b0a68fd484ba884d88e963b66", size = 3251391 } +sdist = { url = "https://files.pythonhosted.org/packages/f5/1f/5f2579f4efccb7044e3c1f0b445fa6ac04e5c40c95818ee8c94e3733fe85/uv-0.7.6.tar.gz", hash = "sha256:bd188ac9d9902f1652130837ede39768d7c8f72b0a68fd484ba884d88e963b66", size = 3251391, upload-time = "2025-05-20T00:31:46.993Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f6/d6/b32f27b599f09f08086c80ba88431f06ad93e1efcd9eb3f9ab18fc6dbaf5/uv-0.7.6-py3-none-linux_armv6l.whl", hash = "sha256:434f1820a8fbf54494c53d8ebb2b6509d98a2792876a2d990f90ac70afc9a11a", size = 16659078 }, - { url = "https://files.pythonhosted.org/packages/82/53/df3043448390d2ec604cb4ca418a811204c41b14030d3f9188ba5d179d0d/uv-0.7.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:0bad870f797971423d7f654423cf3ccd3bbd3688f88aee3f84e79af008c6abae", size = 16839320 }, - { url = "https://files.pythonhosted.org/packages/a2/19/c0f00312762396c68c1c45e744fc1933bb777af8ae9874f20462b0cd0042/uv-0.7.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:8a86cfefd0b9cd3b8a8577e79a0e61d52ade23a7876ed5b5312cc1f05baa140b", size = 15592857 }, - { url = "https://files.pythonhosted.org/packages/07/62/ffe522bcd4c2f74ced5aeeaa576c3750bfbb7a14aaee609984703fdc6b5d/uv-0.7.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:4cd32743d2c0c0b40ffbde48163ae2835353d319472aadabd71e9dcf98152e8b", size = 16065179 }, - { url = "https://files.pythonhosted.org/packages/1c/7a/301e7abb34bb6f0b83ef4c92899ce3dd98f5d0d74bc3ff8d6fb8c1aaf0de/uv-0.7.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:32aecfd27bd724d8ca8bafa811a69d436fcd403d589b025fbbd2e967eb154b46", size = 16394680 }, - { url = "https://files.pythonhosted.org/packages/1a/0c/b35bcf37d2b9c547fa92800566dd7cb4eb85168fe8da57872ce717d135d1/uv-0.7.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e15ac957e0a319dba40c897b9408c93e603d2317807384ec8f7d47a9e17c0d85", size = 17201299 }, - { url = "https://files.pythonhosted.org/packages/cd/67/397adca676c233dcf0ac84c05c2aedc07aaf5bf6f9622b04069e9f3aa81a/uv-0.7.6-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:832d7741117c41455ff43569b88892ec0a81938750a8bc4307e1160b70c91f3c", size = 18063672 }, - { url = "https://files.pythonhosted.org/packages/2f/66/b5a27f3027903a5b735b6ec45e18fae19dc29973620bd4431d0ff5cfa0c4/uv-0.7.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:17c79eec35c65bbd25180203be7266dd7d43381e02e28a8f2cb6ee809d008837", size = 17792312 }, - { url = "https://files.pythonhosted.org/packages/9d/4b/902cdb6ad576c4a9899aaad879b2853775e64aed5921f236f5589e6cf098/uv-0.7.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c18b2437e254906b1f48710e1fc1b313052e2ee7261ff104d58b25ef2d347d98", size = 22166764 }, - { url = "https://files.pythonhosted.org/packages/89/b5/9c34b2aefb5ec8c2be9175d7ee5bfd5f925e2d354b9dd80b137a1e2d6727/uv-0.7.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f46cfd2de04dd261cc75158c293de64f99cc907ab0d395f3a0f97c94e7f076a", size = 17447866 }, - { url = "https://files.pythonhosted.org/packages/7a/e1/86ba96a12114af4f83013b4afac4058b63e2caa319c2fa1c07652632b922/uv-0.7.6-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:c44311ed1a32e397d81e346e7b868e4ae22f2df2e5ba601e055683fa4cc68323", size = 16337444 }, - { url = "https://files.pythonhosted.org/packages/7d/cc/362751b0477d604a1ce54939bcc5a67f262fa76bbdd679d2aec355cdc3b5/uv-0.7.6-py3-none-musllinux_1_1_armv7l.whl", hash = "sha256:5e283166816f129f29023a4bfdf49fdb33e1e2bcb4e555e9d6996122867a44af", size = 16370656 }, - { url = "https://files.pythonhosted.org/packages/8e/93/38557f828bcab2b203c3aab85037981f4c548bbcae01c46b1207a95fc068/uv-0.7.6-py3-none-musllinux_1_1_i686.whl", hash = "sha256:72e9337db681a16a7203abe112fedc249f01fe4cadd6d65d23c85031183dcf23", size = 16753839 }, - { url = "https://files.pythonhosted.org/packages/ae/01/290f42244b2373988082ca1fa7d3f5b989dd3b31038cd9d3f443457e72ed/uv-0.7.6-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:310e488493d03a843b838e9301af1731b02bc93b14bcaa38c62d448cebbdca3c", size = 17596256 }, - { url = "https://files.pythonhosted.org/packages/8a/fd/9ddc1b5f45ff0c4884d896e77b1b2211d552875cbed6966c9ee242b361f4/uv-0.7.6-py3-none-win32.whl", hash = "sha256:e3fb41bd4bf88ab21df773b642465fffc469e173645eb986d000db38d7bb8e3c", size = 16972489 }, - { url = "https://files.pythonhosted.org/packages/2f/5d/b3889f63bbb997f12a797dd08c10d896f4cf9b7c639b06ee517dffb9ed33/uv-0.7.6-py3-none-win_amd64.whl", hash = "sha256:4026513441dc01326f8bc04517956385442523ed1d40400e14723d8fb3d9c321", size = 18457656 }, - { url = "https://files.pythonhosted.org/packages/48/25/2695c5cb6b000fc17980bf9ecf86862e0897af8a515fae0c766a245b7a29/uv-0.7.6-py3-none-win_arm64.whl", hash = "sha256:ad79d71d2bb4cc1cb22d09771a23f70190e3b5fa41668da208e694b50b900178", size = 17113987 }, + { url = "https://files.pythonhosted.org/packages/f6/d6/b32f27b599f09f08086c80ba88431f06ad93e1efcd9eb3f9ab18fc6dbaf5/uv-0.7.6-py3-none-linux_armv6l.whl", hash = "sha256:434f1820a8fbf54494c53d8ebb2b6509d98a2792876a2d990f90ac70afc9a11a", size = 16659078, upload-time = "2025-05-20T00:31:02.827Z" }, + { url = "https://files.pythonhosted.org/packages/82/53/df3043448390d2ec604cb4ca418a811204c41b14030d3f9188ba5d179d0d/uv-0.7.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:0bad870f797971423d7f654423cf3ccd3bbd3688f88aee3f84e79af008c6abae", size = 16839320, upload-time = "2025-05-20T00:31:06.654Z" }, + { url = "https://files.pythonhosted.org/packages/a2/19/c0f00312762396c68c1c45e744fc1933bb777af8ae9874f20462b0cd0042/uv-0.7.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:8a86cfefd0b9cd3b8a8577e79a0e61d52ade23a7876ed5b5312cc1f05baa140b", size = 15592857, upload-time = "2025-05-20T00:31:09.495Z" }, + { url = "https://files.pythonhosted.org/packages/07/62/ffe522bcd4c2f74ced5aeeaa576c3750bfbb7a14aaee609984703fdc6b5d/uv-0.7.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:4cd32743d2c0c0b40ffbde48163ae2835353d319472aadabd71e9dcf98152e8b", size = 16065179, upload-time = "2025-05-20T00:31:12.226Z" }, + { url = "https://files.pythonhosted.org/packages/1c/7a/301e7abb34bb6f0b83ef4c92899ce3dd98f5d0d74bc3ff8d6fb8c1aaf0de/uv-0.7.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:32aecfd27bd724d8ca8bafa811a69d436fcd403d589b025fbbd2e967eb154b46", size = 16394680, upload-time = "2025-05-20T00:31:14.943Z" }, + { url = "https://files.pythonhosted.org/packages/1a/0c/b35bcf37d2b9c547fa92800566dd7cb4eb85168fe8da57872ce717d135d1/uv-0.7.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e15ac957e0a319dba40c897b9408c93e603d2317807384ec8f7d47a9e17c0d85", size = 17201299, upload-time = "2025-05-20T00:31:17.633Z" }, + { url = "https://files.pythonhosted.org/packages/cd/67/397adca676c233dcf0ac84c05c2aedc07aaf5bf6f9622b04069e9f3aa81a/uv-0.7.6-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:832d7741117c41455ff43569b88892ec0a81938750a8bc4307e1160b70c91f3c", size = 18063672, upload-time = "2025-05-20T00:31:20.128Z" }, + { url = "https://files.pythonhosted.org/packages/2f/66/b5a27f3027903a5b735b6ec45e18fae19dc29973620bd4431d0ff5cfa0c4/uv-0.7.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:17c79eec35c65bbd25180203be7266dd7d43381e02e28a8f2cb6ee809d008837", size = 17792312, upload-time = "2025-05-20T00:31:22.574Z" }, + { url = "https://files.pythonhosted.org/packages/9d/4b/902cdb6ad576c4a9899aaad879b2853775e64aed5921f236f5589e6cf098/uv-0.7.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c18b2437e254906b1f48710e1fc1b313052e2ee7261ff104d58b25ef2d347d98", size = 22166764, upload-time = "2025-05-20T00:31:25.414Z" }, + { url = "https://files.pythonhosted.org/packages/89/b5/9c34b2aefb5ec8c2be9175d7ee5bfd5f925e2d354b9dd80b137a1e2d6727/uv-0.7.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f46cfd2de04dd261cc75158c293de64f99cc907ab0d395f3a0f97c94e7f076a", size = 17447866, upload-time = "2025-05-20T00:31:27.909Z" }, + { url = "https://files.pythonhosted.org/packages/7a/e1/86ba96a12114af4f83013b4afac4058b63e2caa319c2fa1c07652632b922/uv-0.7.6-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:c44311ed1a32e397d81e346e7b868e4ae22f2df2e5ba601e055683fa4cc68323", size = 16337444, upload-time = "2025-05-20T00:31:30.122Z" }, + { url = "https://files.pythonhosted.org/packages/7d/cc/362751b0477d604a1ce54939bcc5a67f262fa76bbdd679d2aec355cdc3b5/uv-0.7.6-py3-none-musllinux_1_1_armv7l.whl", hash = "sha256:5e283166816f129f29023a4bfdf49fdb33e1e2bcb4e555e9d6996122867a44af", size = 16370656, upload-time = "2025-05-20T00:31:32.349Z" }, + { url = "https://files.pythonhosted.org/packages/8e/93/38557f828bcab2b203c3aab85037981f4c548bbcae01c46b1207a95fc068/uv-0.7.6-py3-none-musllinux_1_1_i686.whl", hash = "sha256:72e9337db681a16a7203abe112fedc249f01fe4cadd6d65d23c85031183dcf23", size = 16753839, upload-time = "2025-05-20T00:31:34.995Z" }, + { url = "https://files.pythonhosted.org/packages/ae/01/290f42244b2373988082ca1fa7d3f5b989dd3b31038cd9d3f443457e72ed/uv-0.7.6-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:310e488493d03a843b838e9301af1731b02bc93b14bcaa38c62d448cebbdca3c", size = 17596256, upload-time = "2025-05-20T00:31:37.696Z" }, + { url = "https://files.pythonhosted.org/packages/8a/fd/9ddc1b5f45ff0c4884d896e77b1b2211d552875cbed6966c9ee242b361f4/uv-0.7.6-py3-none-win32.whl", hash = "sha256:e3fb41bd4bf88ab21df773b642465fffc469e173645eb986d000db38d7bb8e3c", size = 16972489, upload-time = "2025-05-20T00:31:39.961Z" }, + { url = "https://files.pythonhosted.org/packages/2f/5d/b3889f63bbb997f12a797dd08c10d896f4cf9b7c639b06ee517dffb9ed33/uv-0.7.6-py3-none-win_amd64.whl", hash = "sha256:4026513441dc01326f8bc04517956385442523ed1d40400e14723d8fb3d9c321", size = 18457656, upload-time = "2025-05-20T00:31:42.476Z" }, + { url = "https://files.pythonhosted.org/packages/48/25/2695c5cb6b000fc17980bf9ecf86862e0897af8a515fae0c766a245b7a29/uv-0.7.6-py3-none-win_arm64.whl", hash = "sha256:ad79d71d2bb4cc1cb22d09771a23f70190e3b5fa41668da208e694b50b900178", size = 17113987, upload-time = "2025-05-20T00:31:44.932Z" }, ] [[package]] @@ -338,7 +339,7 @@ dependencies = [ { name = "filelock" }, { name = "platformdirs" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/56/2c/444f465fb2c65f40c3a104fd0c495184c4f2336d65baf398e3c75d72ea94/virtualenv-20.31.2.tar.gz", hash = "sha256:e10c0a9d02835e592521be48b332b6caee6887f332c111aa79a09b9e79efc2af", size = 6076316 } +sdist = { url = "https://files.pythonhosted.org/packages/56/2c/444f465fb2c65f40c3a104fd0c495184c4f2336d65baf398e3c75d72ea94/virtualenv-20.31.2.tar.gz", hash = "sha256:e10c0a9d02835e592521be48b332b6caee6887f332c111aa79a09b9e79efc2af", size = 6076316, upload-time = "2025-05-08T17:58:23.811Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f3/40/b1c265d4b2b62b58576588510fc4d1fe60a86319c8de99fd8e9fec617d2c/virtualenv-20.31.2-py3-none-any.whl", hash = "sha256:36efd0d9650ee985f0cad72065001e66d49a6f24eb44d98980f630686243cf11", size = 6057982 }, + { url = "https://files.pythonhosted.org/packages/f3/40/b1c265d4b2b62b58576588510fc4d1fe60a86319c8de99fd8e9fec617d2c/virtualenv-20.31.2-py3-none-any.whl", hash = "sha256:36efd0d9650ee985f0cad72065001e66d49a6f24eb44d98980f630686243cf11", size = 6057982, upload-time = "2025-05-08T17:58:21.15Z" }, ] From 649792e6647fd0c8cc6f186791464de8de2bcbf3 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Wed, 21 May 2025 09:50:20 -0400 Subject: [PATCH 08/47] Update impute columns func to cover DF and LF. --- src/csv_helper/impute.py | 160 +++++++++++++++------------------------ tests/complete_test.py | 4 +- tests/impute_test.py | 54 +++++++++++++ tests/main_test.py | 3 +- 4 files changed, 117 insertions(+), 104 deletions(-) create mode 100644 tests/impute_test.py diff --git a/src/csv_helper/impute.py b/src/csv_helper/impute.py index 5120aca..9adcde0 100644 --- a/src/csv_helper/impute.py +++ b/src/csv_helper/impute.py @@ -47,26 +47,23 @@ def check(df: pl.DataFrame, fill_cols: list[str], fill_flag: str) -> pl.DataFram ) -# TODO: instead of separate lazy func, let this take df or lf -# or have bool arg that determines whether .lazy() conversion happens? -# TODO: add asserts for shape? -# TODO: use TFrame here? -def impute_columns( - df: pl.DataFrame, - fill_cols: list[str], +def columns[T: (pl.DataFrame, pl.LazyFrame)]( + df: T, + columns: list[str], fill_flag: str, fill_range: tuple[int, int], - col_type: PolarsDataType = pl.Int64, + dtype: PolarsDataType = pl.Int64, seed: int | None = None, -) -> pl.DataFrame: +) -> T: """ - Fill instances of `fill_flag` (a string) in the given column - with random integers in the given range (inclusive). + Independently fill instances of `fill_flag` (a string) + in the given columns with random integers in the given range + (bounds inclusive). - If `col_type` is specified, will attempt to cast the final result - of `fill_cols` to that Polars type. + If `dtype` is specified, will attempt to cast the filled columns + to that Polars type. Otherwise, assumes pl.Int64. """ - for col in fill_cols: + for col in columns: if col not in df.columns: raise ValueError(f"Column {col} doesn't exist") @@ -77,89 +74,50 @@ def impute_columns( fill_range_int = parse_fill_range(fill_range) - if len(fill_cols) > 1: + n_cols = len(columns) + if n_cols > 1: rng = np.random.default_rng(seed) - n = (len(fill_cols), df.height) - # must gen enough numbers for whole column up-front, otherwise reused + n_rows = df.lazy().select(pl.len()).collect().item() + # must gen enough numbers for all columns up-front, otherwise they get reused + shape = (n_cols, n_rows) fill_nums = rng.integers( fill_range_int.lb, fill_range_int.ub, - size=n, - endpoint=True, + size=shape, + endpoint=True, # include ub in sample ) - for col, num in zip(fill_cols, fill_nums): + for col, num in zip(columns, fill_nums): df = df.with_columns( pl.when(pl.col(col) == fill_flag) .then(pl.lit(num)) .otherwise(pl.col(col)) .alias(col) - .cast(col_type) + .cast(dtype) ) else: - fill_col = fill_cols[0] + column = columns[0] # NOTE: this implementation and numpy implementation for filling values are roughly the same speed # with this Polars-only impl barely faster df = df.with_columns( - pl.when(pl.col(fill_col) == fill_flag) + pl.when(pl.col(column) == fill_flag) .then( pl.int_range(fill_range_int.lb, fill_range_int.ub + 1).sample( pl.len(), with_replacement=True, seed=seed ) ) - .otherwise(pl.col(fill_col)) - .alias(fill_col) - .cast(col_type) - ) - - return df - - -def _impute_columns_lazy( - lf: pl.DataFrame | pl.LazyFrame, - fill_column: str, - fill_flag: str, - fill_range: tuple[int, int], - seed: int | None = None, -) -> pl.DataFrame: - """ - Experimental with Lazy - """ - lf = lf.lazy() - - if fill_column not in lf.collect_schema().names(): - raise ValueError(f"Column {fill_column} doesn't exist") - - if not _fill_flag_exists_lazy(lf, fill_column, fill_flag): - raise ValueError( - f"Column {fill_column} doesn't contain any instances of '{fill_flag}'" - ) - - fill_range_int = parse_fill_range(fill_range) - - df = lf.with_columns( - pl.when(pl.col(fill_column) == fill_flag) - .then( - pl.int_range(fill_range_int.lb, fill_range_int.ub + 1).sample( - pl.len(), with_replacement=True, seed=seed - ) + .otherwise(pl.col(column)) + .alias(column) + .cast(dtype) ) - .otherwise(pl.col(fill_column)) - .alias(fill_column) - ).collect() return df -def fill_flag_exists(df: pl.DataFrame, fill_col: str, fill_flag: str) -> bool: - return df.select((pl.col(fill_col) == fill_flag).any()).item() - - -def _fill_flag_exists_lazy( - df: pl.DataFrame | pl.LazyFrame, fill_col: str, fill_flag: str +def fill_flag_exists[T: (pl.DataFrame, pl.LazyFrame)]( + df: T, column: str, fill_flag: str ) -> bool: - lf = df.lazy() - return lf.select((pl.col(fill_col) == fill_flag).any()).collect().item() + return df.lazy().select((pl.col(column) == fill_flag).any()).collect().item() class FillRange(NamedTuple): @@ -178,37 +136,6 @@ def parse_fill_range(fill_range: tuple[int, int]) -> FillRange: return fill_range_int -def complete[T: (pl.DataFrame, pl.LazyFrame)](df: T, *columns: str | pl.Series) -> T: - """ - Generate rows for implicit missing values based on column combinations, - thus making them explicit missing values. Generated values marked as null. - - If columns are referenced with strings, then only existing values in those - columns are used for completion. If Series are specified instead, then - those Series can specify the full set of possible values, provided that - the Series is named after an existing column. - """ - cols = [] - for col in columns: - if isinstance(col, str): - cols.append(pl.col(col).unique().implode()) - elif isinstance(col, pl.Series): - cols.append(col.unique().implode()) - else: - raise TypeError( - f"The columns argument(s) must be either string or polars Series. Got {type(col)} instead." - ) - - unique_combos = df.select(cols) - col_names = unique_combos.collect_schema().names() - for col in col_names: - unique_combos = unique_combos.explode(col) - - return unique_combos.join( - df, on=col_names, how="left", coalesce=True, validate="1:1" - ) - - def impute_column_pair( df: pl.DataFrame, numerator: str, @@ -292,3 +219,34 @@ def impute_column_pair( ) return df + + +def complete[T: (pl.DataFrame, pl.LazyFrame)](df: T, *columns: str | pl.Series) -> T: + """ + Generate rows for implicit missing values based on column combinations, + thus making them explicit missing values. Generated values marked as null. + + If columns are referenced with strings, then only existing values in those + columns are used for completion. If Series are specified instead, then + those Series can specify the full set of possible values, provided that + the Series is named after an existing column. + """ + cols = [] + for col in columns: + if isinstance(col, str): + cols.append(pl.col(col).unique().implode()) + elif isinstance(col, pl.Series): + cols.append(col.unique().implode()) + else: + raise TypeError( + f"The columns argument(s) must be either string or polars Series. Got {type(col)} instead." + ) + + unique_combos = df.select(cols) + col_names = unique_combos.collect_schema().names() + for col in col_names: + unique_combos = unique_combos.explode(col) + + return unique_combos.join( + df, on=col_names, how="left", coalesce=True, validate="1:1" + ) diff --git a/tests/complete_test.py b/tests/complete_test.py index 400adf2..cad9223 100644 --- a/tests/complete_test.py +++ b/tests/complete_test.py @@ -4,7 +4,7 @@ from csv_helper import impute -def test_complete_exists(): +def test_complete_exists() -> None: df = pl.DataFrame( { "country": ["France", "France", "UK", "UK", "Spain"], @@ -46,7 +46,7 @@ def test_complete_exists(): assert_frame_equal(lf, result) -def test_complete_not_exists(): +def test_complete_not_exists() -> None: # TODO: add lazy test df = pl.DataFrame( { diff --git a/tests/impute_test.py b/tests/impute_test.py new file mode 100644 index 0000000..b3ba96f --- /dev/null +++ b/tests/impute_test.py @@ -0,0 +1,54 @@ +import polars as pl + +from csv_helper import impute + +df_inp = pl.DataFrame( + { + "id": ["A", "A", "A", "B", "B", "C", "C", "A", "A", "A", "D", "D"], + "count": [ + "10", + "15", + "<=5", + "<=5", + "12", + "50", + "<=5", + "10", + "15", + "<=5", + "<=5", + "<=5", + ], + "count_2": [ + "15", + "10", + "<=5", + "12", + "<=5", + "<=5", + "10", + "50", + "<=5", + "<=5", + "15", + "<=5", + ], + } +) + +# TODO: test values are <= 5 + + +def test_impute_columns() -> None: + df = df_inp.pipe(impute.columns, ["count"], "<=5", (1, 5), pl.String) + + assert df.select((pl.col("count") == "<=5").any()).item() is False + + +def test_impute_columns_multi() -> None: + df = df_inp.pipe(impute.columns, ["count", "count_2"], "<=5", (1, 5), pl.String) + + assert ( + df.select((pl.col("count") == "<=5").any()).item() is False + and df.select((pl.col("count_2") == "<=5").any()).item() is False + ) diff --git a/tests/main_test.py b/tests/main_test.py index 74f7c88..fe740fb 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -6,9 +6,10 @@ import polars as pl import pytest -from csv_helper.main import app from typer.testing import CliRunner +from csv_helper.main import app + runner = CliRunner() From dde9123188285572389811f51ed87c6509818cb2 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Wed, 21 May 2025 09:51:52 -0400 Subject: [PATCH 09/47] Remove file --- src/csv_helper/census.py | 107 --------------------------------------- 1 file changed, 107 deletions(-) delete mode 100644 src/csv_helper/census.py diff --git a/src/csv_helper/census.py b/src/csv_helper/census.py deleted file mode 100644 index 24d68ea..0000000 --- a/src/csv_helper/census.py +++ /dev/null @@ -1,107 +0,0 @@ -from io import StringIO -from pathlib import Path - -import polars as pl -import requests - -OUT = Path(__file__).parents[2] - - -def main() -> int: - df = get_census_popn("state", ".") - with pl.Config(tbl_cols=-1): - print(df) - - return 0 - - -# TODO: should consider the county FIPS changes over time, boundary changes, etc. -# TODO: irregular data availability and naming schemes mean it's impossible to generalize this? -# TODO: sep function or just state? -def get_census_popn(geo: str, out: str | Path | None = None) -> pl.DataFrame: - """ - Request and return state or county population data from Census Bureau's - FTP site. Restricted to 2020-2023 dataset, which has data from - 2020 to 2023. Also takes optional path to a directory in which - to save the raw data. - """ - # TODO: this county file actually has state data too so could just it for both? - # but then we're throwing away most of it so inefficient - arg_mapper = { - "state": ("2020-2023", "state", "NST-EST2023-ALLDATA"), - "county": ("2020-2023", "counties", "co-est2023-alldata"), - } - - time, geo, file_name = arg_mapper[geo] - url = f"https://www2.census.gov/programs-surveys/popest/datasets/{time}/{geo}/totals/{file_name}.csv" - - try: - req = requests.get(url) - req.raise_for_status() - except requests.exceptions.HTTPError: - print("HTTP error while requesting") - raise - except requests.exceptions.RequestException: - print("Some fatal request error") - raise - - if out is not None: - out = Path(out) - if not out.is_dir(): - raise ValueError(f"Directory at {out} doesn't exist") - - with open(out / f"{file_name}.csv", "w") as f: - f.write(req.text) - - with StringIO(req.text) as f: - lf = pl.scan_csv(f, schema_overrides={"STATE": pl.String, "COUNTY": pl.String}) - - invalid_states = ["60", "66", "69", "72", "74", "78"] - df = ( - lf.select( - "STATE", - "COUNTY", - "STNAME", - "CTYNAME", - "POPESTIMATE2020", - "POPESTIMATE2021", - "POPESTIMATE2022", - "POPESTIMATE2023", - ) - .filter( - pl.col("COUNTY") != "000", - ~pl.col("STATE").is_in(invalid_states), - ) - .with_columns(county_fips=pl.col("STATE") + pl.col("COUNTY")) - .drop("STATE", "COUNTY") - .rename( - { - "STNAME": "state_name", - "CTYNAME": "county_name", - "POPESTIMATE2020": "popn_2020", - "POPESTIMATE2021": "popn_2021", - "POPESTIMATE2022": "popn_2022", - "POPESTIMATE2023": "popn_2023", - } - ) - .select( - "state_name", - "county_name", - "county_fips", - "popn_2020", - "popn_2021", - "popn_2022", - "popn_2023", - ) - .collect() - ) - - assert df.select(pl.col("county_fips").n_unique()).item() == df.height, ( - "Expected to have one row per county FIPS" - ) - - return df - - -if __name__ == "__main__": - raise SystemExit(main()) From 7bfc292c3677f76d86840f8e50b6763052fd605e Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Wed, 21 May 2025 12:12:42 -0400 Subject: [PATCH 10/47] Potentially better type hint for dtype. --- src/csv_helper/impute.py | 53 ++++++++++++++++++++++------------------ tests/impute_test.py | 3 ++- 2 files changed, 31 insertions(+), 25 deletions(-) diff --git a/src/csv_helper/impute.py b/src/csv_helper/impute.py index 9adcde0..9db6fb9 100644 --- a/src/csv_helper/impute.py +++ b/src/csv_helper/impute.py @@ -2,7 +2,15 @@ import numpy as np import polars as pl -from polars._typing import PolarsDataType +from polars._typing import PolarsIntegerType + + +def foo( + df: pl.DataFrame, + column: str, + dtype: PolarsIntegerType | type[pl.Float64] | type[pl.Float32] = pl.Int64, +) -> pl.DataFrame: + return df.with_columns(pl.col(column).cast(dtype)) # TODO: make this a check func that returns bool instead? @@ -52,7 +60,7 @@ def columns[T: (pl.DataFrame, pl.LazyFrame)]( columns: list[str], fill_flag: str, fill_range: tuple[int, int], - dtype: PolarsDataType = pl.Int64, + dtype: PolarsIntegerType | type[pl.Float64] | type[pl.Float32] = pl.Int64, seed: int | None = None, ) -> T: """ @@ -61,7 +69,7 @@ def columns[T: (pl.DataFrame, pl.LazyFrame)]( (bounds inclusive). If `dtype` is specified, will attempt to cast the filled columns - to that Polars type. Otherwise, assumes pl.Int64. + to that Polars type. Only supports Polars integer and float types. """ for col in columns: if col not in df.columns: @@ -136,26 +144,28 @@ def parse_fill_range(fill_range: tuple[int, int]) -> FillRange: return fill_range_int -def impute_column_pair( - df: pl.DataFrame, +def column_pair[T: (pl.DataFrame, pl.LazyFrame)]( + df: T, numerator: str, denominator: str, fill_flag: str, fill_range: tuple[int, int], - col_type: PolarsDataType = pl.Int64, + dtype: PolarsIntegerType | type[pl.Float64] | type[pl.Float32] = pl.Int64, seed: int | None = None, -) -> pl.DataFrame: +) -> T: """ - Fill instances of the fill_flag in both the numerator column - and the denominator column such that numerator <= denominator. + Fill instances of the `fill_flag` in both the `numerator` column + and the `denominator` column such that numerator <= denominator. - If col_type is specified, will attempt to cast the final result - of fill_cols to that type. Currently, the only options are - Polars numeric types. - """ - # TODO: should this also handle denom being in another file or dataframe (like the CLI - # command?) + If `dtype` is specified, will attempt to cast the final result + to that Polars type. Only supports Polars integer and float types. + Note: `seed` is only used for (1) imputing the denominator and (2) the + numerator case where the denominator is greater than the `fill_range` + upper bound. This is because we cannot guarantee desired reproducible + behavior in the numerator when denominator is less than or equal to the + `fill_range` upper bound since such imputation happens per-row. + """ if numerator not in df.columns: raise ValueError(f"Column {numerator} doesn't exist") @@ -174,7 +184,6 @@ def impute_column_pair( fill_range_int = parse_fill_range(fill_range) - # TODO: I think repeated use of the same seed is undesirable df = df.with_columns( pl.when(pl.col(denominator) == fill_flag) .then( @@ -186,18 +195,15 @@ def impute_column_pair( ) .otherwise(pl.col(denominator)) .alias(denominator) - .cast(col_type) - ) - - df = df.with_columns( + .cast(dtype) + ).with_columns( # TODO: use list b/c no arr.sample() what about struct perf? + # TODO: look into high mem consumption b/c of pl.int_ranges() pl.when( (pl.col(numerator) == fill_flag) & (pl.col(denominator) <= fill_range_int.ub) ) .then( - # TODO: look into high mem consumption for this pl.when() - # TODO: use of seed? pl.int_ranges(fill_range_int.lb, pl.col(denominator) + 1) .list.sample(1) .explode() @@ -207,7 +213,6 @@ def impute_column_pair( ) .then( pl.int_range(fill_range_int.lb, fill_range_int.ub + 1).sample( - # TODO: use of seed? pl.len(), with_replacement=True, seed=seed, @@ -215,7 +220,7 @@ def impute_column_pair( ) .otherwise(pl.col(numerator)) .alias(numerator) - .cast(pl.Int64) + .cast(dtype) ) return df diff --git a/tests/impute_test.py b/tests/impute_test.py index b3ba96f..737bb33 100644 --- a/tests/impute_test.py +++ b/tests/impute_test.py @@ -37,9 +37,10 @@ ) # TODO: test values are <= 5 +# TODO: test with seed? -def test_impute_columns() -> None: +def test_impute_columns_single() -> None: df = df_inp.pipe(impute.columns, ["count"], "<=5", (1, 5), pl.String) assert df.select((pl.col("count") == "<=5").any()).item() is False From 0a2fbbe820973e8c3224a286689757d0d0915b40 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Thu, 22 May 2025 07:46:04 -0400 Subject: [PATCH 11/47] Rename CLI test file. Specify what should be more correct types for dtype param. --- src/csv_helper/impute.py | 8 ++++---- tests/{main_test.py => cli_test.py} | 0 2 files changed, 4 insertions(+), 4 deletions(-) rename tests/{main_test.py => cli_test.py} (100%) diff --git a/src/csv_helper/impute.py b/src/csv_helper/impute.py index 9db6fb9..c4af1d5 100644 --- a/src/csv_helper/impute.py +++ b/src/csv_helper/impute.py @@ -2,13 +2,13 @@ import numpy as np import polars as pl -from polars._typing import PolarsIntegerType +from polars.datatypes.classes import FloatType, IntegerType def foo( df: pl.DataFrame, column: str, - dtype: PolarsIntegerType | type[pl.Float64] | type[pl.Float32] = pl.Int64, + dtype: type[IntegerType] | type[FloatType] = pl.Int64, ) -> pl.DataFrame: return df.with_columns(pl.col(column).cast(dtype)) @@ -60,7 +60,7 @@ def columns[T: (pl.DataFrame, pl.LazyFrame)]( columns: list[str], fill_flag: str, fill_range: tuple[int, int], - dtype: PolarsIntegerType | type[pl.Float64] | type[pl.Float32] = pl.Int64, + dtype: type[IntegerType] | type[FloatType] = pl.Int64, seed: int | None = None, ) -> T: """ @@ -150,7 +150,7 @@ def column_pair[T: (pl.DataFrame, pl.LazyFrame)]( denominator: str, fill_flag: str, fill_range: tuple[int, int], - dtype: PolarsIntegerType | type[pl.Float64] | type[pl.Float32] = pl.Int64, + dtype: type[IntegerType] | type[FloatType] = pl.Int64, seed: int | None = None, ) -> T: """ diff --git a/tests/main_test.py b/tests/cli_test.py similarity index 100% rename from tests/main_test.py rename to tests/cli_test.py From 6f7340f277f9611124ab3ec0d8c3d2368fa01ea2 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Thu, 22 May 2025 09:57:17 -0400 Subject: [PATCH 12/47] Reorganize modules. Clean up remaining funcs. --- src/csv_helper/__init__.py | 1 - src/csv_helper/complete.py | 34 ++++++++++++++++ src/csv_helper/impute.py | 80 ++++++++++---------------------------- tests/complete_test.py | 8 ++-- tests/impute_test.py | 11 +++--- 5 files changed, 65 insertions(+), 69 deletions(-) create mode 100644 src/csv_helper/complete.py diff --git a/src/csv_helper/__init__.py b/src/csv_helper/__init__.py index 4fb493f..e69de29 100644 --- a/src/csv_helper/__init__.py +++ b/src/csv_helper/__init__.py @@ -1 +0,0 @@ -from . import impute diff --git a/src/csv_helper/complete.py b/src/csv_helper/complete.py new file mode 100644 index 0000000..7959d3a --- /dev/null +++ b/src/csv_helper/complete.py @@ -0,0 +1,34 @@ +import polars as pl + + +def complete[T: (pl.DataFrame, pl.LazyFrame)](df: T, *columns: str | pl.Series) -> T: + """ + Generate rows for implicit missing values based on column combinations, + thus making them explicit missing values. Generated values marked as null. + + If columns are referenced with strings, then only existing values in those + columns are used for completion. If Series are specified instead, then + those Series can specify the full set of possible values, provided that + the Series is named after an existing column. + """ + cols = [] + for col in columns: + if isinstance(col, str): + cols.append(pl.col(col).unique().implode()) + elif isinstance(col, pl.Series): + cols.append(col.unique().implode()) + else: + raise TypeError( + f"The columns argument(s) must be either string or polars Series. Got {type(col)} instead." + ) + + unique_combos = df.select(cols) + col_names = unique_combos.collect_schema().names() + for col in col_names: + unique_combos = unique_combos.explode(col) + + res = unique_combos.join( + df, on=col_names, how="left", coalesce=True, validate="1:1" + ) + + return res diff --git a/src/csv_helper/impute.py b/src/csv_helper/impute.py index c4af1d5..8b92783 100644 --- a/src/csv_helper/impute.py +++ b/src/csv_helper/impute.py @@ -5,33 +5,25 @@ from polars.datatypes.classes import FloatType, IntegerType -def foo( - df: pl.DataFrame, - column: str, - dtype: type[IntegerType] | type[FloatType] = pl.Int64, -) -> pl.DataFrame: - return df.with_columns(pl.col(column).cast(dtype)) - - -# TODO: make this a check func that returns bool instead? -# and separate this into another func like summarize()? -def check(df: pl.DataFrame, fill_cols: list[str], fill_flag: str) -> pl.DataFrame: +def check[T: (pl.DataFrame, pl.LazyFrame)]( + df: T, columns: list[str], fill_flag: str +) -> T: """ - Return dataframe with counts and proportion of instances of `fill_flag` in each of - the given `fill_cols` + Summarize counts and proportion of instances of `fill_flag` in each of + the given columns. """ - for col in fill_cols: + for col in columns: if col not in df.columns: raise ValueError(f"Column {col} doesn't exist") - if not fill_flag_exists(df, col, fill_flag): + if not _fill_flag_exists(df, col, fill_flag): raise ValueError( f"Column {col} doesn't contain any instances of '{fill_flag}'" ) - if len(fill_cols) > 1: + if len(columns) > 1: return ( - df.select(fill_cols) + df.select(columns) .unpivot(variable_name="column", value_name="value") .group_by("column") .agg( @@ -42,7 +34,8 @@ def check(df: pl.DataFrame, fill_cols: list[str], fill_flag: str) -> pl.DataFram .sort("column") ) - fill_col = fill_cols[0] + fill_col = columns[0] + return ( df.select(fill_col) .unpivot(variable_name="column", value_name="value") @@ -75,12 +68,12 @@ def columns[T: (pl.DataFrame, pl.LazyFrame)]( if col not in df.columns: raise ValueError(f"Column {col} doesn't exist") - if not fill_flag_exists(df, col, fill_flag): + if not _fill_flag_exists(df, col, fill_flag): raise ValueError( f"Column {col} doesn't contain any instances of '{fill_flag}'" ) - fill_range_int = parse_fill_range(fill_range) + fill_range_int = _parse_fill_range(fill_range) n_cols = len(columns) if n_cols > 1: @@ -122,22 +115,22 @@ def columns[T: (pl.DataFrame, pl.LazyFrame)]( return df -def fill_flag_exists[T: (pl.DataFrame, pl.LazyFrame)]( +def _fill_flag_exists[T: (pl.DataFrame, pl.LazyFrame)]( df: T, column: str, fill_flag: str ) -> bool: return df.lazy().select((pl.col(column) == fill_flag).any()).collect().item() -class FillRange(NamedTuple): +class _FillRange(NamedTuple): lb: int ub: int -def parse_fill_range(fill_range: tuple[int, int]) -> FillRange: +def _parse_fill_range(fill_range: tuple[int, int]) -> _FillRange: if len(fill_range) != 2: raise ValueError("Must only pass 2 values") - fill_range_int = FillRange(*fill_range) + fill_range_int = _FillRange(*fill_range) if fill_range_int.lb > fill_range_int.ub: raise ValueError("Lower bound can't be greater than the upper bound") @@ -172,17 +165,17 @@ def column_pair[T: (pl.DataFrame, pl.LazyFrame)]( if denominator not in df.columns: raise ValueError(f"Column {numerator} doesn't exist") - if not fill_flag_exists(df, numerator, fill_flag): + if not _fill_flag_exists(df, numerator, fill_flag): raise ValueError( f"Column {numerator} doesn't contain any instances of '{fill_flag}'" ) - if not fill_flag_exists(df, denominator, fill_flag): + if not _fill_flag_exists(df, denominator, fill_flag): raise ValueError( f"Column {denominator} doesn't contain any instances of '{fill_flag}'" ) - fill_range_int = parse_fill_range(fill_range) + fill_range_int = _parse_fill_range(fill_range) df = df.with_columns( pl.when(pl.col(denominator) == fill_flag) @@ -198,7 +191,7 @@ def column_pair[T: (pl.DataFrame, pl.LazyFrame)]( .cast(dtype) ).with_columns( # TODO: use list b/c no arr.sample() what about struct perf? - # TODO: look into high mem consumption b/c of pl.int_ranges() + # NOTE: high mem consumption b/c of pl.int_ranges(), but not sure how to improve pl.when( (pl.col(numerator) == fill_flag) & (pl.col(denominator) <= fill_range_int.ub) @@ -224,34 +217,3 @@ def column_pair[T: (pl.DataFrame, pl.LazyFrame)]( ) return df - - -def complete[T: (pl.DataFrame, pl.LazyFrame)](df: T, *columns: str | pl.Series) -> T: - """ - Generate rows for implicit missing values based on column combinations, - thus making them explicit missing values. Generated values marked as null. - - If columns are referenced with strings, then only existing values in those - columns are used for completion. If Series are specified instead, then - those Series can specify the full set of possible values, provided that - the Series is named after an existing column. - """ - cols = [] - for col in columns: - if isinstance(col, str): - cols.append(pl.col(col).unique().implode()) - elif isinstance(col, pl.Series): - cols.append(col.unique().implode()) - else: - raise TypeError( - f"The columns argument(s) must be either string or polars Series. Got {type(col)} instead." - ) - - unique_combos = df.select(cols) - col_names = unique_combos.collect_schema().names() - for col in col_names: - unique_combos = unique_combos.explode(col) - - return unique_combos.join( - df, on=col_names, how="left", coalesce=True, validate="1:1" - ) diff --git a/tests/complete_test.py b/tests/complete_test.py index cad9223..5519429 100644 --- a/tests/complete_test.py +++ b/tests/complete_test.py @@ -1,7 +1,7 @@ import polars as pl from polars.testing import assert_frame_equal -from csv_helper import impute +from csv_helper import complete def test_complete_exists() -> None: @@ -12,7 +12,7 @@ def test_complete_exists() -> None: "value": [1, 2, 3, 4, 5], } ) - df = df.pipe(impute.complete, "country", "year").sort("country", "year") + df = df.pipe(complete.complete, "country", "year").sort("country", "year") result = pl.DataFrame( { "country": [ @@ -32,7 +32,7 @@ def test_complete_exists() -> None: "value": [1, 2, 3, 4, 5], } ) - lf = lf.pipe(impute.complete, "country", "year").sort("country", "year") + lf = lf.pipe(complete.complete, "country", "year").sort("country", "year") result = pl.LazyFrame( { "country": [ @@ -56,7 +56,7 @@ def test_complete_not_exists() -> None: } ) df = df.pipe( - impute.complete, + complete.complete, pl.Series("country", ["France", "UK", "Spain", "China"]), "year", ).sort("country", "year") diff --git a/tests/impute_test.py b/tests/impute_test.py index 737bb33..e01be52 100644 --- a/tests/impute_test.py +++ b/tests/impute_test.py @@ -41,15 +41,16 @@ def test_impute_columns_single() -> None: - df = df_inp.pipe(impute.columns, ["count"], "<=5", (1, 5), pl.String) + df = df_inp.pipe(impute.columns, ["count"], "<=5", (1, 5)) - assert df.select((pl.col("count") == "<=5").any()).item() is False + assert df.select((pl.col("count").cast(pl.String) == "<=5").any()).item() is False def test_impute_columns_multi() -> None: - df = df_inp.pipe(impute.columns, ["count", "count_2"], "<=5", (1, 5), pl.String) + df = df_inp.pipe(impute.columns, ["count", "count_2"], "<=5", (1, 5)) assert ( - df.select((pl.col("count") == "<=5").any()).item() is False - and df.select((pl.col("count_2") == "<=5").any()).item() is False + df.select((pl.col("count").cast(pl.String) == "<=5").any()).item() is False + and df.select((pl.col("count_2").cast(pl.String) == "<=5").any()).item() + is False ) From ceda0c5faaae5c30d7174cf78a23f963cd68c7e6 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Thu, 22 May 2025 10:43:49 -0400 Subject: [PATCH 13/47] Small changes --- src/csv_helper/complete.py | 10 +++++++--- src/csv_helper/impute.py | 6 +++--- tests/impute_test.py | 4 ++-- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/csv_helper/complete.py b/src/csv_helper/complete.py index 7959d3a..e735418 100644 --- a/src/csv_helper/complete.py +++ b/src/csv_helper/complete.py @@ -27,8 +27,12 @@ def complete[T: (pl.DataFrame, pl.LazyFrame)](df: T, *columns: str | pl.Series) for col in col_names: unique_combos = unique_combos.explode(col) - res = unique_combos.join( - df, on=col_names, how="left", coalesce=True, validate="1:1" + df = unique_combos.join( + df, + on=col_names, + how="left", + coalesce=True, + validate="1:1", ) - return res + return df diff --git a/src/csv_helper/impute.py b/src/csv_helper/impute.py index 8b92783..08339ea 100644 --- a/src/csv_helper/impute.py +++ b/src/csv_helper/impute.py @@ -156,7 +156,7 @@ def column_pair[T: (pl.DataFrame, pl.LazyFrame)]( Note: `seed` is only used for (1) imputing the denominator and (2) the numerator case where the denominator is greater than the `fill_range` upper bound. This is because we cannot guarantee desired reproducible - behavior in the numerator when denominator is less than or equal to the + behavior for the numerator when denominator is less than or equal to the `fill_range` upper bound since such imputation happens per-row. """ if numerator not in df.columns: @@ -190,8 +190,8 @@ def column_pair[T: (pl.DataFrame, pl.LazyFrame)]( .alias(denominator) .cast(dtype) ).with_columns( - # TODO: use list b/c no arr.sample() what about struct perf? - # NOTE: high mem consumption b/c of pl.int_ranges(), but not sure how to improve + # NOTE: sometimes oddly high mem consumption b/c of pl.int_ranges(), + # but not sure how to improve pl.when( (pl.col(numerator) == fill_flag) & (pl.col(denominator) <= fill_range_int.ub) diff --git a/tests/impute_test.py b/tests/impute_test.py index e01be52..7fb8ac0 100644 --- a/tests/impute_test.py +++ b/tests/impute_test.py @@ -46,8 +46,8 @@ def test_impute_columns_single() -> None: assert df.select((pl.col("count").cast(pl.String) == "<=5").any()).item() is False -def test_impute_columns_multi() -> None: - df = df_inp.pipe(impute.columns, ["count", "count_2"], "<=5", (1, 5)) +def test_impute_pair() -> None: + df = df_inp.pipe(impute.column_pair, "count", "count_2", "<=5", (1, 5)) assert ( df.select((pl.col("count").cast(pl.String) == "<=5").any()).item() is False From 7ad132708b25e007f7210a2b5dbe25860b84886d Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Thu, 22 May 2025 11:26:53 -0400 Subject: [PATCH 14/47] Remove pyright section from pyproject.toml. Shouldn't be necessary. --- pyproject.toml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3c598e3..ef332aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,9 +18,5 @@ build-backend = "pdm.backend" [dependency-groups] dev = ["pytest>=8.3.2", "tox>=4.23.2", "tox-uv>=1.16.0"] -[tool.pyright] -include = ["src"] -exclude = ["**/__pycache__"] - [tool.pytest.ini_options] pythonpath = ["src"] From 84ca6507306beba79688c143a712f831bf4a333f Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Thu, 22 May 2025 16:40:59 -0400 Subject: [PATCH 15/47] Update env, pyproject.toml, remove unnecessary pytest config table --- pyproject.toml | 15 ++++++++------- src/csv_helper/{main.py => cli.py} | 0 tests/cli_test.py | 6 +++--- uv.lock | 22 +++++++++++----------- 4 files changed, 22 insertions(+), 21 deletions(-) rename src/csv_helper/{main.py => cli.py} (100%) diff --git a/pyproject.toml b/pyproject.toml index ef332aa..2c907c6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,15 +1,19 @@ [project] name = "csv-helper" version = "0.2.3" -description = "A simple CLI and library for imputing masked counts in CSV data" +description = "A simple library and CLI for working with CSV data" readme = "README.md" authors = [{ name = "Andrew Tiu", email = "andrew.tiu88@gmail.com" }] -license = { text = "MIT" } +license = "MIT" requires-python = ">=3.13" -dependencies = ["typer>=0.12.3", "polars>=1.4.1", "numpy>=2.0.1"] +dependencies = [ + "numpy>=2.2.6", + "polars>=1.30.0", + "typer>=0.15.4", +] [project.scripts] -csv-helper = "csv_helper.main:app" +csv-helper = "csv_helper.cli:app" [build-system] requires = ["pdm-backend"] @@ -17,6 +21,3 @@ build-backend = "pdm.backend" [dependency-groups] dev = ["pytest>=8.3.2", "tox>=4.23.2", "tox-uv>=1.16.0"] - -[tool.pytest.ini_options] -pythonpath = ["src"] diff --git a/src/csv_helper/main.py b/src/csv_helper/cli.py similarity index 100% rename from src/csv_helper/main.py rename to src/csv_helper/cli.py diff --git a/tests/cli_test.py b/tests/cli_test.py index fe740fb..19c4411 100644 --- a/tests/cli_test.py +++ b/tests/cli_test.py @@ -8,7 +8,7 @@ import pytest from typer.testing import CliRunner -from csv_helper.main import app +from csv_helper.cli import app runner = CliRunner() @@ -49,8 +49,8 @@ def test_data_sep(tmp_path) -> Path: return data_dir / "test_pair_sep" -# NOTE: can also access funcs in csv_helper.main directly: -# from csv_helper.main import preview +# NOTE: can also access funcs in csv_helper.cli directly: +# from csv_helper.cli import preview # preview("./tests/data/test_impute_data.csv", 10) diff --git a/uv.lock b/uv.lock index ff5ed4e..b120c29 100644 --- a/uv.lock +++ b/uv.lock @@ -60,9 +60,9 @@ dev = [ [package.metadata] requires-dist = [ - { name = "numpy", specifier = ">=2.0.1" }, - { name = "polars", specifier = ">=1.4.1" }, - { name = "typer", specifier = ">=0.12.3" }, + { name = "numpy", specifier = ">=2.2.6" }, + { name = "polars", specifier = ">=1.30.0" }, + { name = "typer", specifier = ">=0.15.4" }, ] [package.metadata.requires-dev] @@ -177,16 +177,16 @@ wheels = [ [[package]] name = "polars" -version = "1.29.0" +version = "1.30.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0b/92/8d0e80fef779a392b1a736b554ffba62403026bad7df8a9de8b61dce018f/polars-1.29.0.tar.gz", hash = "sha256:d2acb71fce1ff0ea76db5f648abd91a7a6c460fafabce9a2e8175184efa00d02", size = 4582973, upload-time = "2025-04-30T20:57:22.46Z" } +sdist = { url = "https://files.pythonhosted.org/packages/82/b6/8dbdf626c0705a57f052708c9fc0860ffc2aa97955930d5faaf6a66fcfd3/polars-1.30.0.tar.gz", hash = "sha256:dfe94ae84a5efd9ba74e616e3e125b24ca155494a931890a8f17480737c4db45", size = 4668318, upload-time = "2025-05-21T13:33:24.175Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e7/5f/b277179cfce1258fecf4ad73cf627f670be41fdf088727090f68ca9c96ff/polars-1.29.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:d053ee3217df31468caf2f5ddb9fd0f3a94fd42afdf7d9abe23d9d424adca02b", size = 34206809, upload-time = "2025-04-30T20:56:14.744Z" }, - { url = "https://files.pythonhosted.org/packages/34/e7/634e5cb55ce8bef23ac8ad8e3834c9045f4b3cbdff1fb9e7826d864436e6/polars-1.29.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:14131078e365eae5ccda3e67383cd43c0c0598d7f760bdf1cb4082566c5494ce", size = 31100055, upload-time = "2025-04-30T20:56:19.43Z" }, - { url = "https://files.pythonhosted.org/packages/50/15/0e9072e410731980ebc567c60a0a5f02bc2183310e48704ef83682cdd54c/polars-1.29.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54f6902da333f99208b8d27765d580ba0299b412787c0564275912122c228e40", size = 34828438, upload-time = "2025-04-30T20:56:22.839Z" }, - { url = "https://files.pythonhosted.org/packages/69/c0/90fcaac5c95aa225b3899698289c0424d429ef72248b593f15294f95a35e/polars-1.29.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:7a0ac6a11088279af4d715f4b58068835f551fa5368504a53401743006115e78", size = 32073830, upload-time = "2025-04-30T20:56:26.742Z" }, - { url = "https://files.pythonhosted.org/packages/17/ed/e5e570e22a03549a3c5397035a006b2c6343856a9fd15cccb5db39bdfa0a/polars-1.29.0-cp39-abi3-win_amd64.whl", hash = "sha256:f5aac4656e58b1e12f9481950981ef68b5b0e53dd4903bd72472efd2d09a74c8", size = 34971841, upload-time = "2025-04-30T20:56:29.953Z" }, - { url = "https://files.pythonhosted.org/packages/45/fd/9039f609d76b3ebb13777f289502a00b52709aea5c35aed01d1090ac142f/polars-1.29.0-cp39-abi3-win_arm64.whl", hash = "sha256:0c105b07b980b77fe88c3200b015bf4695e53185385f0f244c13e2d1027c7bbf", size = 31298689, upload-time = "2025-04-30T20:56:33.449Z" }, + { url = "https://files.pythonhosted.org/packages/40/48/e9b2cb379abcc9f7aff2e701098fcdb9fe6d85dc4ad4cec7b35d39c70951/polars-1.30.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:4c33bc97c29b7112f0e689a2f8a33143973a3ff466c70b25c7fd1880225de6dd", size = 35704342, upload-time = "2025-05-21T13:32:22.996Z" }, + { url = "https://files.pythonhosted.org/packages/36/ca/f545f61282f75eea4dfde4db2944963dcd59abd50c20e33a1c894da44dad/polars-1.30.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:e3d05914c364b8e39a5b10dcf97e84d76e516b3b1693880bf189a93aab3ca00d", size = 32459857, upload-time = "2025-05-21T13:32:27.728Z" }, + { url = "https://files.pythonhosted.org/packages/76/20/e018cd87d7cb6f8684355f31f4e193222455a6e8f7b942f4a2934f5969c7/polars-1.30.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a52af3862082b868c1febeae650af8ae8a2105d2cb28f0449179a7b44f54ccf", size = 36267243, upload-time = "2025-05-21T13:32:31.796Z" }, + { url = "https://files.pythonhosted.org/packages/cb/e7/b88b973021be07b13d91b9301cc14392c994225ef5107a32a8ffd3fd6424/polars-1.30.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:ffb3ef133454275d4254442257c5f71dd6e393ce365c97997dadeb6fa9d6d4b5", size = 33416871, upload-time = "2025-05-21T13:32:35.077Z" }, + { url = "https://files.pythonhosted.org/packages/dd/7c/d46d4381adeac537b8520b653dc30cb8b7edbf59883d71fbb989e9005de1/polars-1.30.0-cp39-abi3-win_amd64.whl", hash = "sha256:c26b633a9bd530c5fc09d317fca3bb3e16c772bd7df7549a9d8ec1934773cc5d", size = 36363630, upload-time = "2025-05-21T13:32:38.286Z" }, + { url = "https://files.pythonhosted.org/packages/fb/b5/5056d0c12aadb57390d0627492bef8b1abf3549474abb9ae0fd4e2bfa885/polars-1.30.0-cp39-abi3-win_arm64.whl", hash = "sha256:476f1bde65bc7b4d9f80af370645c2981b5798d67c151055e58534e89e96f2a8", size = 32643590, upload-time = "2025-05-21T13:32:42.107Z" }, ] [[package]] From affa793c8e7e2b182e2fffa38f388c0e38408306 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Thu, 22 May 2025 18:35:07 -0400 Subject: [PATCH 16/47] Testing nox over tox --- .github/workflows/tests.yml | 14 ++-- .python-version | 2 +- noxfile.py | 15 ++++ pyproject.toml | 7 +- uv.lock | 148 +++++++++++++++--------------------- 5 files changed, 90 insertions(+), 96 deletions(-) create mode 100644 noxfile.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 21f615b..8905ef5 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -21,25 +21,23 @@ jobs: matrix: os: [ubuntu-latest, macos-latest, windows-latest] python-version: - - "3.11" - "3.12" + - "3.13" steps: - name: Checkout uses: actions/checkout@v4 - name: Install uv - uses: astral-sh/setup-uv@v4 + uses: astral-sh/setup-uv@v5 with: - version: "0.5.5" + version: "0.7.7" - name: Set up Python ${{ matrix.python-version }} run: uv python install ${{ matrix.python-version }} - name: Install project - run: uv sync --all-extras --dev + run: uv sync --locked --all-extras --dev - - name: Run tests with tox - run: | - uv run -- tox --version - uv run -- tox -e py + - name: Run tests with nox on ${{ matrix.os }} + run: uv run -- nox -s "${{ matrix.python-version }}" diff --git a/.python-version b/.python-version index 24ee5b1..2c20ac9 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.13 +3.13.3 diff --git a/noxfile.py b/noxfile.py new file mode 100644 index 0000000..4683324 --- /dev/null +++ b/noxfile.py @@ -0,0 +1,15 @@ +import nox + +nox.options.default_venv_backend = "uv" + + +@nox.session(python=["3.12", "3.13"]) +def tests(session): + """Run tests with pytest.""" + session.run( + "uv", + "sync", + f"--python={session.virtualenv.location}", + env={"UV_PROJECT_ENVIRONMENT": session.virtualenv.location}, + ) + session.run("pytest", "-vv", "tests") diff --git a/pyproject.toml b/pyproject.toml index 2c907c6..23a1fa1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ description = "A simple library and CLI for working with CSV data" readme = "README.md" authors = [{ name = "Andrew Tiu", email = "andrew.tiu88@gmail.com" }] license = "MIT" -requires-python = ">=3.13" +requires-python = ">=3.12" dependencies = [ "numpy>=2.2.6", "polars>=1.30.0", @@ -20,4 +20,7 @@ requires = ["pdm-backend"] build-backend = "pdm.backend" [dependency-groups] -dev = ["pytest>=8.3.2", "tox>=4.23.2", "tox-uv>=1.16.0"] +dev = [ + "nox>=2025.5.1", + "pytest>=8.3.5", +] diff --git a/uv.lock b/uv.lock index b120c29..a49d157 100644 --- a/uv.lock +++ b/uv.lock @@ -1,23 +1,23 @@ version = 1 revision = 2 -requires-python = ">=3.13" +requires-python = ">=3.12" [[package]] -name = "cachetools" -version = "5.5.2" +name = "argcomplete" +version = "3.6.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6c/81/3747dad6b14fa2cf53fcf10548cf5aea6913e96fab41a3c198676f8948a5/cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4", size = 28380, upload-time = "2025-02-20T21:01:19.524Z" } +sdist = { url = "https://files.pythonhosted.org/packages/16/0f/861e168fc813c56a78b35f3c30d91c6757d1fd185af1110f1aec784b35d0/argcomplete-3.6.2.tar.gz", hash = "sha256:d0519b1bc867f5f4f4713c41ad0aba73a4a5f007449716b16f385f2166dc6adf", size = 73403, upload-time = "2025-04-03T04:57:03.52Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/72/76/20fa66124dbe6be5cafeb312ece67de6b61dd91a0247d1ea13db4ebb33c2/cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a", size = 10080, upload-time = "2025-02-20T21:01:16.647Z" }, + { url = "https://files.pythonhosted.org/packages/31/da/e42d7a9d8dd33fa775f467e4028a47936da2f01e4b0e561f9ba0d74cb0ca/argcomplete-3.6.2-py3-none-any.whl", hash = "sha256:65b3133a29ad53fb42c48cf5114752c7ab66c1c38544fdf6460f450c09b42591", size = 43708, upload-time = "2025-04-03T04:57:01.591Z" }, ] [[package]] -name = "chardet" -version = "5.2.0" +name = "attrs" +version = "25.3.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/f7b6ab21ec75897ed80c17d79b15951a719226b9fababf1e40ea74d69079/chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7", size = 2069618, upload-time = "2023-08-01T19:23:02.662Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/1367933a8532ee6ff8d63537de4f1177af4bff9f3e829baf7331f595bb24/attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b", size = 812032, upload-time = "2025-03-13T11:10:22.779Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/38/6f/f5fbc992a329ee4e0f288c1fe0e2ad9485ed064cac731ed2fe47dcc38cbf/chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970", size = 199385, upload-time = "2023-08-01T19:23:00.661Z" }, + { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815, upload-time = "2025-03-13T11:10:21.14Z" }, ] [[package]] @@ -41,6 +41,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, ] +[[package]] +name = "colorlog" +version = "6.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d3/7a/359f4d5df2353f26172b3cc39ea32daa39af8de522205f512f458923e677/colorlog-6.9.0.tar.gz", hash = "sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2", size = 16624, upload-time = "2024-10-29T18:34:51.011Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e3/51/9b208e85196941db2f0654ad0357ca6388ab3ed67efdbfc799f35d1f83aa/colorlog-6.9.0-py3-none-any.whl", hash = "sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff", size = 11424, upload-time = "2024-10-29T18:34:49.815Z" }, +] + [[package]] name = "csv-helper" version = "0.2.3" @@ -53,9 +65,8 @@ dependencies = [ [package.dev-dependencies] dev = [ + { name = "nox" }, { name = "pytest" }, - { name = "tox" }, - { name = "tox-uv" }, ] [package.metadata] @@ -67,9 +78,20 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ - { name = "pytest", specifier = ">=8.3.2" }, - { name = "tox", specifier = ">=4.23.2" }, - { name = "tox-uv", specifier = ">=1.16.0" }, + { name = "nox", specifier = ">=2025.5.1" }, + { name = "pytest", specifier = ">=8.3.5" }, +] + +[[package]] +name = "dependency-groups" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/62/55/f054de99871e7beb81935dea8a10b90cd5ce42122b1c3081d5282fdb3621/dependency_groups-1.3.1.tar.gz", hash = "sha256:78078301090517fd938c19f64a53ce98c32834dfe0dee6b88004a569a6adfefd", size = 10093, upload-time = "2025-05-02T00:34:29.452Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/99/c7/d1ec24fb280caa5a79b6b950db565dab30210a66259d17d5bb2b3a9f878d/dependency_groups-1.3.1-py3-none-any.whl", hash = "sha256:51aeaa0dfad72430fcfb7bcdbefbd75f3792e5919563077f30bc0d73f4493030", size = 8664, upload-time = "2025-05-02T00:34:27.085Z" }, ] [[package]] @@ -120,12 +142,39 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, ] +[[package]] +name = "nox" +version = "2025.5.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "argcomplete" }, + { name = "attrs" }, + { name = "colorlog" }, + { name = "dependency-groups" }, + { name = "packaging" }, + { name = "virtualenv" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b4/80/47712208c410defec169992e57c179f0f4d92f5dd17ba8daca50a8077e23/nox-2025.5.1.tar.gz", hash = "sha256:2a571dfa7a58acc726521ac3cd8184455ebcdcbf26401c7b737b5bc6701427b2", size = 4023334, upload-time = "2025-05-01T16:35:48.056Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a6/be/7b423b02b09eb856beffe76fe8c4121c99852db74dd12a422dcb72d1134e/nox-2025.5.1-py3-none-any.whl", hash = "sha256:56abd55cf37ff523c254fcec4d152ed51e5fe80e2ab8317221d8b828ac970a31", size = 71753, upload-time = "2025-05-01T16:35:46.037Z" }, +] + [[package]] name = "numpy" version = "2.2.6" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" } wheels = [ + { url = "https://files.pythonhosted.org/packages/82/5d/c00588b6cf18e1da539b45d3598d3557084990dcc4331960c15ee776ee41/numpy-2.2.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41c5a21f4a04fa86436124d388f6ed60a9343a6f767fced1a8a71c3fbca038ff", size = 20875348, upload-time = "2025-05-17T21:34:39.648Z" }, + { url = "https://files.pythonhosted.org/packages/66/ee/560deadcdde6c2f90200450d5938f63a34b37e27ebff162810f716f6a230/numpy-2.2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de749064336d37e340f640b05f24e9e3dd678c57318c7289d222a8a2f543e90c", size = 14119362, upload-time = "2025-05-17T21:35:01.241Z" }, + { url = "https://files.pythonhosted.org/packages/3c/65/4baa99f1c53b30adf0acd9a5519078871ddde8d2339dc5a7fde80d9d87da/numpy-2.2.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:894b3a42502226a1cac872f840030665f33326fc3dac8e57c607905773cdcde3", size = 5084103, upload-time = "2025-05-17T21:35:10.622Z" }, + { url = "https://files.pythonhosted.org/packages/cc/89/e5a34c071a0570cc40c9a54eb472d113eea6d002e9ae12bb3a8407fb912e/numpy-2.2.6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:71594f7c51a18e728451bb50cc60a3ce4e6538822731b2933209a1f3614e9282", size = 6625382, upload-time = "2025-05-17T21:35:21.414Z" }, + { url = "https://files.pythonhosted.org/packages/f8/35/8c80729f1ff76b3921d5c9487c7ac3de9b2a103b1cd05e905b3090513510/numpy-2.2.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2618db89be1b4e05f7a1a847a9c1c0abd63e63a1607d892dd54668dd92faf87", size = 14018462, upload-time = "2025-05-17T21:35:42.174Z" }, + { url = "https://files.pythonhosted.org/packages/8c/3d/1e1db36cfd41f895d266b103df00ca5b3cbe965184df824dec5c08c6b803/numpy-2.2.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd83c01228a688733f1ded5201c678f0c53ecc1006ffbc404db9f7a899ac6249", size = 16527618, upload-time = "2025-05-17T21:36:06.711Z" }, + { url = "https://files.pythonhosted.org/packages/61/c6/03ed30992602c85aa3cd95b9070a514f8b3c33e31124694438d88809ae36/numpy-2.2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37c0ca431f82cd5fa716eca9506aefcabc247fb27ba69c5062a6d3ade8cf8f49", size = 15505511, upload-time = "2025-05-17T21:36:29.965Z" }, + { url = "https://files.pythonhosted.org/packages/b7/25/5761d832a81df431e260719ec45de696414266613c9ee268394dd5ad8236/numpy-2.2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fe27749d33bb772c80dcd84ae7e8df2adc920ae8297400dabec45f0dedb3f6de", size = 18313783, upload-time = "2025-05-17T21:36:56.883Z" }, + { url = "https://files.pythonhosted.org/packages/57/0a/72d5a3527c5ebffcd47bde9162c39fae1f90138c961e5296491ce778e682/numpy-2.2.6-cp312-cp312-win32.whl", hash = "sha256:4eeaae00d789f66c7a25ac5f34b71a7035bb474e679f410e5e1a94deb24cf2d4", size = 6246506, upload-time = "2025-05-17T21:37:07.368Z" }, + { url = "https://files.pythonhosted.org/packages/36/fa/8c9210162ca1b88529ab76b41ba02d433fd54fecaf6feb70ef9f124683f1/numpy-2.2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c1f9540be57940698ed329904db803cf7a402f3fc200bfe599334c9bd84a40b2", size = 12614190, upload-time = "2025-05-17T21:37:26.213Z" }, { url = "https://files.pythonhosted.org/packages/f9/5c/6657823f4f594f72b5471f1db1ab12e26e890bb2e41897522d134d2a3e81/numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84", size = 20867828, upload-time = "2025-05-17T21:37:56.699Z" }, { url = "https://files.pythonhosted.org/packages/dc/9e/14520dc3dadf3c803473bd07e9b2bd1b69bc583cb2497b47000fed2fa92f/numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b", size = 14143006, upload-time = "2025-05-17T21:38:18.291Z" }, { url = "https://files.pythonhosted.org/packages/4f/06/7e96c57d90bebdce9918412087fc22ca9851cceaf5567a45c1f404480e9e/numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d", size = 5076765, upload-time = "2025-05-17T21:38:27.319Z" }, @@ -198,18 +247,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293, upload-time = "2025-01-06T17:26:25.553Z" }, ] -[[package]] -name = "pyproject-api" -version = "1.9.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "packaging" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/19/fd/437901c891f58a7b9096511750247535e891d2d5a5a6eefbc9386a2b41d5/pyproject_api-1.9.1.tar.gz", hash = "sha256:43c9918f49daab37e302038fc1aed54a8c7a91a9fa935d00b9a485f37e0f5335", size = 22710, upload-time = "2025-05-12T14:41:58.025Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ef/e6/c293c06695d4a3ab0260ef124a74ebadba5f4c511ce3a4259e976902c00b/pyproject_api-1.9.1-py3-none-any.whl", hash = "sha256:7d6238d92f8962773dd75b5f0c4a6a27cce092a14b623b811dba656f3b628948", size = 13158, upload-time = "2025-05-12T14:41:56.217Z" }, -] - [[package]] name = "pytest" version = "8.3.5" @@ -247,40 +284,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, ] -[[package]] -name = "tox" -version = "4.26.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cachetools" }, - { name = "chardet" }, - { name = "colorama" }, - { name = "filelock" }, - { name = "packaging" }, - { name = "platformdirs" }, - { name = "pluggy" }, - { name = "pyproject-api" }, - { name = "virtualenv" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/fd/3c/dcec0c00321a107f7f697fd00754c5112572ea6dcacb40b16d8c3eea7c37/tox-4.26.0.tar.gz", hash = "sha256:a83b3b67b0159fa58e44e646505079e35a43317a62d2ae94725e0586266faeca", size = 197260, upload-time = "2025-05-13T15:04:28.481Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/de/14/f58b4087cf248b18c795b5c838c7a8d1428dfb07cb468dad3ec7f54041ab/tox-4.26.0-py3-none-any.whl", hash = "sha256:75f17aaf09face9b97bd41645028d9f722301e912be8b4c65a3f938024560224", size = 172761, upload-time = "2025-05-13T15:04:26.207Z" }, -] - -[[package]] -name = "tox-uv" -version = "1.25.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "packaging" }, - { name = "tox" }, - { name = "uv" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/5d/3a/3e445f25978a716ba6674f33f687d9336d0312086a277a778a5e9e9220d7/tox_uv-1.25.0.tar.gz", hash = "sha256:59ee5e694c41fef7bbcf058f22a5f9b6a8509698def2ea60c08554f4e36b9fcc", size = 21114, upload-time = "2025-02-21T16:37:51.796Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3c/a7/f5c29e0e6faaccefcab607f672b176927144e9412c8183d21301ea2a6f6c/tox_uv-1.25.0-py3-none-any.whl", hash = "sha256:50cfe7795dcd49b2160d7d65b5ece8717f38cfedc242c852a40ec0a71e159bf7", size = 16431, upload-time = "2025-02-21T16:37:49.657Z" }, -] - [[package]] name = "typer" version = "0.15.4" @@ -305,31 +308,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", size = 45806, upload-time = "2025-04-10T14:19:03.967Z" }, ] -[[package]] -name = "uv" -version = "0.7.6" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f5/1f/5f2579f4efccb7044e3c1f0b445fa6ac04e5c40c95818ee8c94e3733fe85/uv-0.7.6.tar.gz", hash = "sha256:bd188ac9d9902f1652130837ede39768d7c8f72b0a68fd484ba884d88e963b66", size = 3251391, upload-time = "2025-05-20T00:31:46.993Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f6/d6/b32f27b599f09f08086c80ba88431f06ad93e1efcd9eb3f9ab18fc6dbaf5/uv-0.7.6-py3-none-linux_armv6l.whl", hash = "sha256:434f1820a8fbf54494c53d8ebb2b6509d98a2792876a2d990f90ac70afc9a11a", size = 16659078, upload-time = "2025-05-20T00:31:02.827Z" }, - { url = "https://files.pythonhosted.org/packages/82/53/df3043448390d2ec604cb4ca418a811204c41b14030d3f9188ba5d179d0d/uv-0.7.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:0bad870f797971423d7f654423cf3ccd3bbd3688f88aee3f84e79af008c6abae", size = 16839320, upload-time = "2025-05-20T00:31:06.654Z" }, - { url = "https://files.pythonhosted.org/packages/a2/19/c0f00312762396c68c1c45e744fc1933bb777af8ae9874f20462b0cd0042/uv-0.7.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:8a86cfefd0b9cd3b8a8577e79a0e61d52ade23a7876ed5b5312cc1f05baa140b", size = 15592857, upload-time = "2025-05-20T00:31:09.495Z" }, - { url = "https://files.pythonhosted.org/packages/07/62/ffe522bcd4c2f74ced5aeeaa576c3750bfbb7a14aaee609984703fdc6b5d/uv-0.7.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:4cd32743d2c0c0b40ffbde48163ae2835353d319472aadabd71e9dcf98152e8b", size = 16065179, upload-time = "2025-05-20T00:31:12.226Z" }, - { url = "https://files.pythonhosted.org/packages/1c/7a/301e7abb34bb6f0b83ef4c92899ce3dd98f5d0d74bc3ff8d6fb8c1aaf0de/uv-0.7.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:32aecfd27bd724d8ca8bafa811a69d436fcd403d589b025fbbd2e967eb154b46", size = 16394680, upload-time = "2025-05-20T00:31:14.943Z" }, - { url = "https://files.pythonhosted.org/packages/1a/0c/b35bcf37d2b9c547fa92800566dd7cb4eb85168fe8da57872ce717d135d1/uv-0.7.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e15ac957e0a319dba40c897b9408c93e603d2317807384ec8f7d47a9e17c0d85", size = 17201299, upload-time = "2025-05-20T00:31:17.633Z" }, - { url = "https://files.pythonhosted.org/packages/cd/67/397adca676c233dcf0ac84c05c2aedc07aaf5bf6f9622b04069e9f3aa81a/uv-0.7.6-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:832d7741117c41455ff43569b88892ec0a81938750a8bc4307e1160b70c91f3c", size = 18063672, upload-time = "2025-05-20T00:31:20.128Z" }, - { url = "https://files.pythonhosted.org/packages/2f/66/b5a27f3027903a5b735b6ec45e18fae19dc29973620bd4431d0ff5cfa0c4/uv-0.7.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:17c79eec35c65bbd25180203be7266dd7d43381e02e28a8f2cb6ee809d008837", size = 17792312, upload-time = "2025-05-20T00:31:22.574Z" }, - { url = "https://files.pythonhosted.org/packages/9d/4b/902cdb6ad576c4a9899aaad879b2853775e64aed5921f236f5589e6cf098/uv-0.7.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c18b2437e254906b1f48710e1fc1b313052e2ee7261ff104d58b25ef2d347d98", size = 22166764, upload-time = "2025-05-20T00:31:25.414Z" }, - { url = "https://files.pythonhosted.org/packages/89/b5/9c34b2aefb5ec8c2be9175d7ee5bfd5f925e2d354b9dd80b137a1e2d6727/uv-0.7.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f46cfd2de04dd261cc75158c293de64f99cc907ab0d395f3a0f97c94e7f076a", size = 17447866, upload-time = "2025-05-20T00:31:27.909Z" }, - { url = "https://files.pythonhosted.org/packages/7a/e1/86ba96a12114af4f83013b4afac4058b63e2caa319c2fa1c07652632b922/uv-0.7.6-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:c44311ed1a32e397d81e346e7b868e4ae22f2df2e5ba601e055683fa4cc68323", size = 16337444, upload-time = "2025-05-20T00:31:30.122Z" }, - { url = "https://files.pythonhosted.org/packages/7d/cc/362751b0477d604a1ce54939bcc5a67f262fa76bbdd679d2aec355cdc3b5/uv-0.7.6-py3-none-musllinux_1_1_armv7l.whl", hash = "sha256:5e283166816f129f29023a4bfdf49fdb33e1e2bcb4e555e9d6996122867a44af", size = 16370656, upload-time = "2025-05-20T00:31:32.349Z" }, - { url = "https://files.pythonhosted.org/packages/8e/93/38557f828bcab2b203c3aab85037981f4c548bbcae01c46b1207a95fc068/uv-0.7.6-py3-none-musllinux_1_1_i686.whl", hash = "sha256:72e9337db681a16a7203abe112fedc249f01fe4cadd6d65d23c85031183dcf23", size = 16753839, upload-time = "2025-05-20T00:31:34.995Z" }, - { url = "https://files.pythonhosted.org/packages/ae/01/290f42244b2373988082ca1fa7d3f5b989dd3b31038cd9d3f443457e72ed/uv-0.7.6-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:310e488493d03a843b838e9301af1731b02bc93b14bcaa38c62d448cebbdca3c", size = 17596256, upload-time = "2025-05-20T00:31:37.696Z" }, - { url = "https://files.pythonhosted.org/packages/8a/fd/9ddc1b5f45ff0c4884d896e77b1b2211d552875cbed6966c9ee242b361f4/uv-0.7.6-py3-none-win32.whl", hash = "sha256:e3fb41bd4bf88ab21df773b642465fffc469e173645eb986d000db38d7bb8e3c", size = 16972489, upload-time = "2025-05-20T00:31:39.961Z" }, - { url = "https://files.pythonhosted.org/packages/2f/5d/b3889f63bbb997f12a797dd08c10d896f4cf9b7c639b06ee517dffb9ed33/uv-0.7.6-py3-none-win_amd64.whl", hash = "sha256:4026513441dc01326f8bc04517956385442523ed1d40400e14723d8fb3d9c321", size = 18457656, upload-time = "2025-05-20T00:31:42.476Z" }, - { url = "https://files.pythonhosted.org/packages/48/25/2695c5cb6b000fc17980bf9ecf86862e0897af8a515fae0c766a245b7a29/uv-0.7.6-py3-none-win_arm64.whl", hash = "sha256:ad79d71d2bb4cc1cb22d09771a23f70190e3b5fa41668da208e694b50b900178", size = 17113987, upload-time = "2025-05-20T00:31:44.932Z" }, -] - [[package]] name = "virtualenv" version = "20.31.2" From d7e5d62bff6b849fca1dd08b424adba94da9c2a5 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Thu, 22 May 2025 18:38:50 -0400 Subject: [PATCH 17/47] Add typing to noxfile and hopefully fix bad session specification. --- .github/workflows/tests.yml | 2 +- noxfile.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8905ef5..1ef5fd6 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -40,4 +40,4 @@ jobs: run: uv sync --locked --all-extras --dev - name: Run tests with nox on ${{ matrix.os }} - run: uv run -- nox -s "${{ matrix.python-version }}" + run: uv run -- nox -s "tests-${{ matrix.python-version }}" diff --git a/noxfile.py b/noxfile.py index 4683324..db5d9b8 100644 --- a/noxfile.py +++ b/noxfile.py @@ -4,7 +4,7 @@ @nox.session(python=["3.12", "3.13"]) -def tests(session): +def tests(session: nox.Session) -> None: """Run tests with pytest.""" session.run( "uv", From 89874f4e36fee7efac027b9adb3e3e3e56f3bd63 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Thu, 22 May 2025 18:39:46 -0400 Subject: [PATCH 18/47] Delete tox config file> --- tox.ini | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 tox.ini diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 042c010..0000000 --- a/tox.ini +++ /dev/null @@ -1,11 +0,0 @@ -[tox] -env_list = py{311,312} - -[testenv] -runner = uv-venv-lock-runner -description = run tests -extras = - dev -; groups = test -; commands = test -commands = uv run -- pytest -v From ceab0e6fd8a67f459c78b04dc6c63d1d5c177289 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Thu, 22 May 2025 19:19:59 -0400 Subject: [PATCH 19/47] Update README --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 5323329..658f51e 100644 --- a/README.md +++ b/README.md @@ -2,13 +2,13 @@ ![Tests](https://github.com/winter-again/csv-helper/workflows/Tests/badge.svg) -A CLI for working with CSV data. Currently, primary focus is on workflows for imputing masked counts data. - -Note: `tests/data` has testing data +A simple library and CLI for working with CSV data. ## Installation -```bash +```sh +uv add "csv-helper @ git+ssh://git@github.com/winter-again/csv-helper" +# using pip pip install git+https://git@github.com/winter-again/csv-helper # or via SSH pip install git+ssh://git@github.com/winter-again/csv-helper From bde5be56a1f8f5b7a3bd338be10c57e9e50b2546 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Fri, 23 May 2025 08:09:42 -0400 Subject: [PATCH 20/47] Don't think uv sync inside of nox is needed. --- .github/workflows/tests.yml | 2 +- noxfile.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1ef5fd6..ae95311 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -40,4 +40,4 @@ jobs: run: uv sync --locked --all-extras --dev - name: Run tests with nox on ${{ matrix.os }} - run: uv run -- nox -s "tests-${{ matrix.python-version }}" + run: uv run --locked -- nox -s "tests-${{ matrix.python-version }}" diff --git a/noxfile.py b/noxfile.py index db5d9b8..c9e68bf 100644 --- a/noxfile.py +++ b/noxfile.py @@ -6,10 +6,10 @@ @nox.session(python=["3.12", "3.13"]) def tests(session: nox.Session) -> None: """Run tests with pytest.""" - session.run( - "uv", - "sync", - f"--python={session.virtualenv.location}", - env={"UV_PROJECT_ENVIRONMENT": session.virtualenv.location}, - ) + # session.run( + # "uv", + # "sync", + # f"--python={session.virtualenv.location}", + # env={"UV_PROJECT_ENVIRONMENT": session.virtualenv.location}, + # ) session.run("pytest", "-vv", "tests") From 292d4daf356fb11947d11aeb37ea0ae1c6a1a4d0 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Fri, 23 May 2025 08:13:49 -0400 Subject: [PATCH 21/47] Add uv sync inside of nox session back --- noxfile.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/noxfile.py b/noxfile.py index c9e68bf..db5d9b8 100644 --- a/noxfile.py +++ b/noxfile.py @@ -6,10 +6,10 @@ @nox.session(python=["3.12", "3.13"]) def tests(session: nox.Session) -> None: """Run tests with pytest.""" - # session.run( - # "uv", - # "sync", - # f"--python={session.virtualenv.location}", - # env={"UV_PROJECT_ENVIRONMENT": session.virtualenv.location}, - # ) + session.run( + "uv", + "sync", + f"--python={session.virtualenv.location}", + env={"UV_PROJECT_ENVIRONMENT": session.virtualenv.location}, + ) session.run("pytest", "-vv", "tests") From 2e816c2430860232e84f701b4c5c4eb9a59f6927 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Fri, 23 May 2025 08:54:56 -0400 Subject: [PATCH 22/47] Need to run uv sync before nox? --- .github/workflows/tests.yml | 13 ++++++------- noxfile.py | 4 +++- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ae95311..532e554 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -4,13 +4,9 @@ on: push: branches: - main - paths-ignore: - - "*.md" pull_request: branches: - main - paths-ignore: - - "*.md" jobs: test: @@ -34,10 +30,13 @@ jobs: version: "0.7.7" - name: Set up Python ${{ matrix.python-version }} - run: uv python install ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version-file: ${{ matrix.python-version }} + # run: uv python install ${{ matrix.python-version }} - - name: Install project - run: uv sync --locked --all-extras --dev + # - name: Install project + # run: uv sync --locked --all-extras - name: Run tests with nox on ${{ matrix.os }} run: uv run --locked -- nox -s "tests-${{ matrix.python-version }}" diff --git a/noxfile.py b/noxfile.py index db5d9b8..0ca55cc 100644 --- a/noxfile.py +++ b/noxfile.py @@ -6,9 +6,11 @@ @nox.session(python=["3.12", "3.13"]) def tests(session: nox.Session) -> None: """Run tests with pytest.""" - session.run( + # TODO: run_install vs run? needs install step... + session.run_install( "uv", "sync", + "--locked", f"--python={session.virtualenv.location}", env={"UV_PROJECT_ENVIRONMENT": session.virtualenv.location}, ) From b8ac30bada8e27a62ff8fca869969da6cb4a2c3d Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Fri, 23 May 2025 08:56:32 -0400 Subject: [PATCH 23/47] Fix typo in specifying python from matrix --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 532e554..e8597b1 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -32,7 +32,7 @@ jobs: - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: - python-version-file: ${{ matrix.python-version }} + python-version: ${{ matrix.python-version }} # run: uv python install ${{ matrix.python-version }} # - name: Install project From f69f236cd228e2d7b69c4716169f8b91313b4034 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Fri, 23 May 2025 09:15:57 -0400 Subject: [PATCH 24/47] Trying to figure out how nox can run without explicit install --- .github/workflows/tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e8597b1..ee2b0d4 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -37,6 +37,8 @@ jobs: # - name: Install project # run: uv sync --locked --all-extras + - name: Check nox + run: uv run nox --list - name: Run tests with nox on ${{ matrix.os }} run: uv run --locked -- nox -s "tests-${{ matrix.python-version }}" From 91cb28c57ffb9282d67fd6c53b3e1cd205b3ee88 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Fri, 23 May 2025 09:17:24 -0400 Subject: [PATCH 25/47] Is it because of uv cache? --- .github/workflows/tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ee2b0d4..514d7a9 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -28,6 +28,7 @@ jobs: uses: astral-sh/setup-uv@v5 with: version: "0.7.7" + enable-cache: false - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 From d8f858d4a3fca9e81abe4b8848a1672684972fbb Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Fri, 23 May 2025 09:22:49 -0400 Subject: [PATCH 26/47] Try with --no-sync --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 514d7a9..ac6ce17 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -39,7 +39,7 @@ jobs: # - name: Install project # run: uv sync --locked --all-extras - name: Check nox - run: uv run nox --list + run: uv run --no-sync -- nox --list - name: Run tests with nox on ${{ matrix.os }} run: uv run --locked -- nox -s "tests-${{ matrix.python-version }}" From 38bd6fad55326ddc62b3ce1e50ec0f69f5bc2f52 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Fri, 23 May 2025 09:37:14 -0400 Subject: [PATCH 27/47] Try again --- .github/workflows/tests.yml | 7 ++----- noxfile.py | 1 - 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ac6ce17..7b97452 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -28,7 +28,6 @@ jobs: uses: astral-sh/setup-uv@v5 with: version: "0.7.7" - enable-cache: false - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 @@ -36,10 +35,8 @@ jobs: python-version: ${{ matrix.python-version }} # run: uv python install ${{ matrix.python-version }} - # - name: Install project - # run: uv sync --locked --all-extras - - name: Check nox - run: uv run --no-sync -- nox --list + - name: Install project + run: uv sync --locked "--python=${{ matrix.python-version }}" - name: Run tests with nox on ${{ matrix.os }} run: uv run --locked -- nox -s "tests-${{ matrix.python-version }}" diff --git a/noxfile.py b/noxfile.py index 0ca55cc..3da33c4 100644 --- a/noxfile.py +++ b/noxfile.py @@ -6,7 +6,6 @@ @nox.session(python=["3.12", "3.13"]) def tests(session: nox.Session) -> None: """Run tests with pytest.""" - # TODO: run_install vs run? needs install step... session.run_install( "uv", "sync", From 7777ab873c96aff67abb13b49fa1ddc04f6d119b Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Fri, 23 May 2025 09:39:37 -0400 Subject: [PATCH 28/47] Remove --python flag from sync --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7b97452..0c01076 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -36,7 +36,7 @@ jobs: # run: uv python install ${{ matrix.python-version }} - name: Install project - run: uv sync --locked "--python=${{ matrix.python-version }}" + run: uv sync --locked - name: Run tests with nox on ${{ matrix.os }} run: uv run --locked -- nox -s "tests-${{ matrix.python-version }}" From 1cb5d8e9a5c1628a551b10133893be0fec565dc1 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Fri, 23 May 2025 09:54:29 -0400 Subject: [PATCH 29/47] Check behavior of auto python install on uv sync --- .github/workflows/tests.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 0c01076..2d63717 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -25,9 +25,9 @@ jobs: uses: actions/checkout@v4 - name: Install uv - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@v6 with: - version: "0.7.7" + version: "latest" - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 @@ -36,7 +36,7 @@ jobs: # run: uv python install ${{ matrix.python-version }} - name: Install project - run: uv sync --locked + run: uv sync --locked --python="3.13" --no-python-downloads - name: Run tests with nox on ${{ matrix.os }} run: uv run --locked -- nox -s "tests-${{ matrix.python-version }}" From f77d7c44730ee9f55368e233e3e83705151556b2 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Fri, 23 May 2025 09:56:50 -0400 Subject: [PATCH 30/47] Potentially unnecessary but force uv sync to use matrix's python ver --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2d63717..3f01c12 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -36,7 +36,7 @@ jobs: # run: uv python install ${{ matrix.python-version }} - name: Install project - run: uv sync --locked --python="3.13" --no-python-downloads + run: uv sync --locked --python="${{ matrix.python-version }}" - name: Run tests with nox on ${{ matrix.os }} run: uv run --locked -- nox -s "tests-${{ matrix.python-version }}" From 7c87a8be124d6e6efcfc018eb768eac0602e8d78 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Fri, 23 May 2025 10:01:32 -0400 Subject: [PATCH 31/47] Same for uv run command. Why should it install another version that isn't what's used in testing? --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3f01c12..833fc5d 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -39,4 +39,4 @@ jobs: run: uv sync --locked --python="${{ matrix.python-version }}" - name: Run tests with nox on ${{ matrix.os }} - run: uv run --locked -- nox -s "tests-${{ matrix.python-version }}" + run: uv run --locked --python="${{ matrix.python-version }}" -- nox -s "tests-${{ matrix.python-version }}" From 55cba725463a24da3cdef02a776f2f6cc9a5c0d6 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Fri, 23 May 2025 10:08:37 -0400 Subject: [PATCH 32/47] Try without python versions in noxfile --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 3da33c4..a81a954 100644 --- a/noxfile.py +++ b/noxfile.py @@ -3,7 +3,7 @@ nox.options.default_venv_backend = "uv" -@nox.session(python=["3.12", "3.13"]) +@nox.session def tests(session: nox.Session) -> None: """Run tests with pytest.""" session.run_install( From fa5b092de092d3b8887a67e4f092f16b6cc1b8ad Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Fri, 23 May 2025 16:37:01 -0400 Subject: [PATCH 33/47] Try using uv tool run --- .github/workflows/tests.yml | 9 +++++---- noxfile.py | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 833fc5d..99a8979 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -30,13 +30,14 @@ jobs: version: "latest" - name: Set up Python ${{ matrix.python-version }} + # run: uv python install ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - # run: uv python install ${{ matrix.python-version }} - - name: Install project - run: uv sync --locked --python="${{ matrix.python-version }}" + # - name: Install project + # run: uv sync --locked --python="${{ matrix.python-version }}" - name: Run tests with nox on ${{ matrix.os }} - run: uv run --locked --python="${{ matrix.python-version }}" -- nox -s "tests-${{ matrix.python-version }}" + # run: uv run --locked --python="${{ matrix.python-version }}" -- nox -s "tests-${{ matrix.python-version }}" + run: uv tool run nox -s "tests-${{ matrix.python-version }}" diff --git a/noxfile.py b/noxfile.py index a81a954..3da33c4 100644 --- a/noxfile.py +++ b/noxfile.py @@ -3,7 +3,7 @@ nox.options.default_venv_backend = "uv" -@nox.session +@nox.session(python=["3.12", "3.13"]) def tests(session: nox.Session) -> None: """Run tests with pytest.""" session.run_install( From ef5ceedd1731377f6e968845b609b6d7992c1e19 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Fri, 23 May 2025 16:42:03 -0400 Subject: [PATCH 34/47] Try uv pip install --system . --- .github/workflows/tests.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 99a8979..e84e0de 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -37,7 +37,9 @@ jobs: # - name: Install project # run: uv sync --locked --python="${{ matrix.python-version }}" + + - name: Install nox + run: uv pip install --system . - name: Run tests with nox on ${{ matrix.os }} - # run: uv run --locked --python="${{ matrix.python-version }}" -- nox -s "tests-${{ matrix.python-version }}" - run: uv tool run nox -s "tests-${{ matrix.python-version }}" + run: uv run --locked --python="${{ matrix.python-version }}" -- nox -s "tests-${{ matrix.python-version }}" From 80b9c1d3982db2475082112326f696b529b6a3bc Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Fri, 23 May 2025 16:44:23 -0400 Subject: [PATCH 35/47] Try uv sync --locked --only-dev --- .github/workflows/tests.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e84e0de..a6e97fb 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -35,11 +35,11 @@ jobs: with: python-version: ${{ matrix.python-version }} - # - name: Install project - # run: uv sync --locked --python="${{ matrix.python-version }}" - - name: Install nox - run: uv pip install --system . + run: uv sync --locked --only-dev + + # - name: Install nox + # run: uv pip install --system . - name: Run tests with nox on ${{ matrix.os }} run: uv run --locked --python="${{ matrix.python-version }}" -- nox -s "tests-${{ matrix.python-version }}" From af1c8e6c0fb881152ad80026cab90af64287442e Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Fri, 23 May 2025 16:46:00 -0400 Subject: [PATCH 36/47] Get rid of --python flag --- .github/workflows/tests.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a6e97fb..03e61e7 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -38,8 +38,5 @@ jobs: - name: Install nox run: uv sync --locked --only-dev - # - name: Install nox - # run: uv pip install --system . - - name: Run tests with nox on ${{ matrix.os }} - run: uv run --locked --python="${{ matrix.python-version }}" -- nox -s "tests-${{ matrix.python-version }}" + run: uv run --locked -- nox -s "tests-${{ matrix.python-version }}" From fefc706a9d1d800c1e7ceb65c731ecc495a7f656 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Fri, 23 May 2025 16:57:48 -0400 Subject: [PATCH 37/47] Maybe middle ground --- .github/workflows/tests.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 03e61e7..c896e37 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -35,8 +35,8 @@ jobs: with: python-version: ${{ matrix.python-version }} - - name: Install nox - run: uv sync --locked --only-dev + - name: Install dev dependencies + run: uv sync --locked --only-dev --python="${{ matrix.python-version }}" - name: Run tests with nox on ${{ matrix.os }} - run: uv run --locked -- nox -s "tests-${{ matrix.python-version }}" + run: uv run --locked --python="${{ matrix.python-version }}" -- nox -s "tests-${{ matrix.python-version }}" From c4992f559330b987bbb8259904a340fd2e026249 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Sun, 25 May 2025 10:18:02 -0400 Subject: [PATCH 38/47] Overhaul of impute file command structure. Reduce verbose output and simplify. Deciding against rich print and instead defining console for greater control of highlighting/markup. --- .github/workflows/tests.yml | 1 - .gitignore | 6 +- src/csv_helper/cli.py | 161 ++++++++++++++++-------------------- src/csv_helper/impute.py | 1 + tests/impute_test.py | 1 + uv.lock | 2 +- 6 files changed, 75 insertions(+), 97 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index c896e37..aec5c81 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -30,7 +30,6 @@ jobs: version: "latest" - name: Set up Python ${{ matrix.python-version }} - # run: uv python install ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} diff --git a/.gitignore b/.gitignore index 4fbdf0d..31f2fe3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,5 @@ .venv/ -/data/ __pycache__/ -.pdm-python dist/ sdist/ -test.py -scratch.py -co-est2023-alldata.csv +/data/ diff --git a/src/csv_helper/cli.py b/src/csv_helper/cli.py index 71deeac..3b28444 100644 --- a/src/csv_helper/cli.py +++ b/src/csv_helper/cli.py @@ -2,24 +2,26 @@ from enum import Enum from importlib.metadata import version from pathlib import Path -from typing import NamedTuple, Optional +from typing import NamedTuple import click import numpy as np import polars as pl import typer from numpy.random import Generator -from rich import print from rich.console import Console from rich.progress import Progress, SpinnerColumn, TextColumn from rich.prompt import Confirm from rich.table import Table from typing_extensions import Annotated +from . import impute + app = typer.Typer(no_args_is_help=True, help="A CLI for working with CSV data") impute_app = typer.Typer(no_args_is_help=True, help="Impute CSV data") app.add_typer(impute_app, name="impute") +console = Console() err_console = Console(stderr=True) @@ -86,8 +88,13 @@ def check( help="The CSV file to check", ), ], - fill_col: Annotated[ - str, typer.Option("--col", "-c", help="Name of the column to check") + columns: Annotated[ + list[str], + typer.Option( + "--col", + "-c", + help="Name of a column to check. Specify this for each column you want checked.", + ), ], fill_flag: Annotated[ str, @@ -95,19 +102,12 @@ def check( ], ) -> None: """ - Check a column in a CSV file for occurrences of some string flag. + Summarize counts and proportion of instances of `fill_flag` in each of + the given columns. """ df = pl.read_csv(input, infer_schema_length=0) - if fill_col not in df.columns: - err_console.print(f"Column {fill_col} cannot be found in {input}") - raise typer.Abort() - - imp_size = df.filter(pl.col(fill_col) == fill_flag).height - print( - f"Found [blue]{imp_size:_}[/blue] occurrences of '{fill_flag}' in '{fill_col}' -> [blue]{(imp_size / df.height):0.2f}[/blue] of rows (n = {df.height:_})" - ) - print(df.filter(pl.col(fill_col) == fill_flag).head()) + print(impute.check(df, columns, fill_flag)) class FillRange(NamedTuple): @@ -117,11 +117,20 @@ class FillRange(NamedTuple): # NOTE: see https://github.com/fastapi/typer/issues/182#issuecomment-1708245110 # and https://github.com/fastapi/typer/issues/151#issuecomment-1975322806 -# for workaround for working with enums like this such that Typer understands the args properly -# without having to translate strings or ints to the values we really want +# for this workaround for working with enums such that Typer understands the args properly +# without having to map strings or ints to the values we really want class ColType(Enum): - INT64 = pl.Int64 + FLOAT32 = pl.Float32 FLOAT64 = pl.Float64 + INT8 = pl.Int8 + INT16 = pl.Int16 + INT32 = pl.Int32 + INT64 = pl.Int64 + INT128 = pl.Int128 + UINT8 = pl.UInt8 + UINT16 = pl.UInt16 + UINT32 = pl.UInt32 + UINT64 = pl.UInt64 def validate_inp_out(input: Path, output: Path, force: bool) -> None: @@ -193,27 +202,20 @@ def impute_file( help="Path to target CSV file", ), ], - output: Annotated[ - Path, - # NOTE: if exists = False, other checks still run if the Path happens to (file/dir) exist - typer.Argument( - exists=False, - file_okay=True, - dir_okay=False, - writable=True, - readable=False, - help="Path to save the output CSV file", + columns: Annotated[ + list[str], + typer.Option( + "--col", + "-c", + help="Name of a column to impute. Specify this for each colum you wanted imputed.", ), ], - fill_col: Annotated[ - str, typer.Option("--col", "-c", help="Name of the column to impute") - ], fill_flag: Annotated[ str, typer.Option( "--flag", "-f", - help="Flag (string) to look for and replace in the target column", + help="Flag/marker to find and replace in the target column(s)", ), ], fill_range: Annotated[ @@ -222,10 +224,24 @@ def impute_file( "--range", "-r", metavar="TEXT", - help="Closed, integer interval from which to sample random integer for imputation. Specify as comma-separated values. For example: '1,5' corresponds to the range [1, 5]", + help='Closed, integer interval from which to sample random integer for imputation. Specify as comma-separated values. For example: "1,5" corresponds to the range [1, 5]', parser=parse_fill_range, ), ], + output: Annotated[ + Path | None, + # NOTE: if exists = False, other checks still run if the Path happens to (file/dir) exist + typer.Option( + "--out", + "-o", + exists=False, + file_okay=True, + dir_okay=False, + writable=True, + readable=False, + help="Path to save the imputed CSV file. If not specified, defaults to printing result to stdout", + ), + ] = None, col_type: Annotated[ str, typer.Option( @@ -236,8 +252,8 @@ def impute_file( ), ] = ColType.INT64.name, seed: Annotated[ - int, typer.Option("--seed", "-s", help="Random seed for reproducibility") - ] = 123, + int | None, typer.Option("--seed", "-s", help="Random seed for reproducibility") + ] = None, verbose: Annotated[ bool, typer.Option( @@ -260,25 +276,16 @@ def impute_file( ] = False, ) -> None: """ - Impute a target column in a CSV file. Will look for the specified filler flag in the target column - and replace it with a random integer from the specified range. Save the result to a new CSV file. + Impute target column(s) in a CSV file. Will look for the specified flag and replace + it with a random integer from the specified range. Optionally, save the result to a new CSV file. """ - validate_inp_out(input, output, force) - create_dir = check_create_dir(output) + create_dir = False + if output is not None: + validate_inp_out(input, output, force) + create_dir = check_create_dir(output) df = pl.read_csv(input, infer_schema_length=0) - if fill_col not in df.columns: - err_console.print(f"Column {fill_col} cannot be found in {input}") - raise typer.Abort() - - if not fill_flag_exists(df, fill_col, fill_flag): - err_console.print(f"Cannot find any instances of '{fill_flag}' in {fill_col}") - raise typer.Abort() - - if verbose: - imp_size = df.filter(pl.col(fill_col) == fill_flag).height - with Progress( SpinnerColumn(), TextColumn("[progress.description]{task.description}"), @@ -286,49 +293,27 @@ def impute_file( ) as progress: progress.add_task(description="Imputing...", total=None) - # WARN: setting seed means that each use of this CLI cmd with same seed - # will generate same integers, but repeated calls inside of this func - # won't generate the same set of integers - rng = np.random.default_rng(seed) - cast_type = ColType[col_type] - t0 = time.perf_counter() - df = df.with_columns( - pl.when(pl.col(fill_col) == fill_flag) - .then( - pl.lit( - # NOTE: must specify size to be height of df despite not filling every row - # thus, we get "new" rand int per row - rng.integers( - fill_range.lb, fill_range.ub, size=df.height, endpoint=True - ) - ) - ) - .otherwise(pl.col(fill_col)) - .alias(fill_col) - .cast(cast_type.value) + df = impute.columns( + df, columns, fill_flag, fill_range, ColType[col_type].value, seed ) t1 = time.perf_counter() - if create_dir: - output.parent.mkdir(parents=True) + if output is not None: + if create_dir: + output.parent.mkdir(parents=True) - df.write_csv(output) + df.write_csv(output) - print("[green]Finished imputing[/green]...") + console.print("[green]Finished imputing[/green]...") if verbose: - table = Table(title="Imputation statistics", show_header=False) - table.add_row("[blue]Count of imputed values[/blue]", f"{imp_size:_}") - table.add_row( - "[blue]Proportion of imputed values[/blue]", - f"{(imp_size / df.height):0.2f} (n = {df.height:_})", + console.print(f"\n[bold]Time taken[/bold]: {(t1 - t0):0.3f}s", highlight=False) + console.print("[bold]Preview of result:[/bold]") + console.print( + df.filter(pl.col(col) <= fill_range.ub for col in columns).head(), + highlight=False, ) - table.add_row("[blue]Seed[/blue]", f"{seed}") - table.add_row("[blue]Time taken[/blue]", f"~{(t1 - t0):0.3f} s") - print(table) - - print(df.filter(pl.col(fill_col) <= fill_range.ub).head()) class FillCols(NamedTuple): @@ -440,7 +425,7 @@ def impute_pair( ), ] = False, sep_denom: Annotated[ - Optional[Path], + Path | None, typer.Option( "--sep-denom", exists=True, @@ -457,7 +442,7 @@ def impute_pair( ), ] = None, sep_cols: Annotated[ - Optional[list[str]], + list[str] | None, typer.Option( "--sep-cols", help="Comma-separated list of column names on which to join the numerator and denominator data", @@ -465,7 +450,7 @@ def impute_pair( ), ] = None, sep_out: Annotated[ - Optional[Path], + Path | None, typer.Option( "--sep-out", exists=False, @@ -872,7 +857,3 @@ def impute_dir( print(table) print(df.filter(pl.col(fill_col) <= fill_range.ub).head()) - - -if __name__ == "__main__": - app() diff --git a/src/csv_helper/impute.py b/src/csv_helper/impute.py index 08339ea..76a28e6 100644 --- a/src/csv_helper/impute.py +++ b/src/csv_helper/impute.py @@ -45,6 +45,7 @@ def check[T: (pl.DataFrame, pl.LazyFrame)]( prop=pl.col("value").filter(pl.col("value") == fill_flag).count() / pl.count(), ) + .sort("column") ) diff --git a/tests/impute_test.py b/tests/impute_test.py index 7fb8ac0..c756555 100644 --- a/tests/impute_test.py +++ b/tests/impute_test.py @@ -2,6 +2,7 @@ from csv_helper import impute +# TODO: make reusable df_inp = pl.DataFrame( { "id": ["A", "A", "A", "B", "B", "C", "C", "A", "A", "A", "D", "D"], diff --git a/uv.lock b/uv.lock index a49d157..61d4994 100644 --- a/uv.lock +++ b/uv.lock @@ -55,7 +55,7 @@ wheels = [ [[package]] name = "csv-helper" -version = "0.2.3" +version = "0.3.0" source = { editable = "." } dependencies = [ { name = "numpy" }, From abb8014da283c5916e72de9dcc76e40e551d7f73 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Wed, 28 May 2025 15:18:42 -0400 Subject: [PATCH 39/47] Improve impute tests --- tests/impute_test.py | 183 +++++++++++++++++++++++++++++++------------ 1 file changed, 132 insertions(+), 51 deletions(-) diff --git a/tests/impute_test.py b/tests/impute_test.py index c756555..abc85b5 100644 --- a/tests/impute_test.py +++ b/tests/impute_test.py @@ -1,57 +1,138 @@ +from io import StringIO + import polars as pl +import pytest +from polars.testing import assert_frame_equal, assert_frame_not_equal from csv_helper import impute -# TODO: make reusable -df_inp = pl.DataFrame( - { - "id": ["A", "A", "A", "B", "B", "C", "C", "A", "A", "A", "D", "D"], - "count": [ - "10", - "15", - "<=5", - "<=5", - "12", - "50", - "<=5", - "10", - "15", - "<=5", - "<=5", - "<=5", - ], - "count_2": [ - "15", - "10", - "<=5", - "12", - "<=5", - "<=5", - "10", - "50", - "<=5", - "<=5", - "15", - "<=5", - ], - } -) - -# TODO: test values are <= 5 -# TODO: test with seed? - - -def test_impute_columns_single() -> None: - df = df_inp.pipe(impute.columns, ["count"], "<=5", (1, 5)) - - assert df.select((pl.col("count").cast(pl.String) == "<=5").any()).item() is False - - -def test_impute_pair() -> None: - df = df_inp.pipe(impute.column_pair, "count", "count_2", "<=5", (1, 5)) - - assert ( - df.select((pl.col("count").cast(pl.String) == "<=5").any()).item() is False - and df.select((pl.col("count_2").cast(pl.String) == "<=5").any()).item() + +@pytest.fixture +def df_inp() -> pl.DataFrame: + data = """ + id,numerator,denominator,imp_num,imp_denom + A,10,15,false,false + A,<=5,<=5,true,true + A,12,23,false,false + B,<=5,<=5,true,true + A,22,24,false,false + B,<=5,13,true,false + B,<=5,<=5,true,true + A,10,15,false,false + C,<=5,<=5,false,true + C,<=5,<=5,true,true + A,<=5,<=5,true,true + A,22,15,false,false + B,<=5,13,true,false + A,<=5,<=5,false,true + C,100,128,false,false + C,<=5,<=5,true,true + D,<=5,<=5,true,true + A,22,23,false,false + B,<=5,18,true,false + H,8,17,false,false + A,10,16,false,false + A,<=5,<=5,true,true + H,<=5,<=5,true,true + A,22,88,false,false + B,<=5,23,true,false + C,<=5,<=5,true,true + A,<=5,<=5,false,true + C,100,1300,false,false + C,<=5,<=5,true,true + D,<=5,<=5,true,true + """ + df = pl.read_csv( + StringIO(data), + schema={ + "id": pl.String, + "numerator": pl.String, + "denominator": pl.String, + "imp_num": pl.Boolean, + "imp_denom": pl.Boolean, + }, + ) + + return df + + +def test_impute_columns_single(df_inp: pl.DataFrame) -> None: + df_out = df_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5)) + + assert df_inp.shape == df_out.shape + assert ( + df_out.select((pl.col("numerator").cast(pl.String) == "<=5").any()).item() + is False + ) + assert ( + df_out.filter(pl.col("imp_num")).select((pl.col("numerator") <= 5).all()).item() + is True + ) + + +def test_impute_columns_multi(df_inp: pl.DataFrame) -> None: + df_out = df_inp.pipe(impute.columns, ["numerator", "denominator"], "<=5", (1, 5)) + + assert df_inp.shape == df_out.shape + assert ( + df_out.select((pl.col("numerator").cast(pl.String) == "<=5").any()).item() is False ) + assert ( + df_out.select((pl.col("denominator").cast(pl.String) == "<=5").any()).item() + is False + ) + + assert ( + df_out.filter(pl.col("imp_num")).select((pl.col("numerator") <= 5).all()).item() + is True + ) + assert ( + df_out.filter(pl.col("imp_denom")) + .select((pl.col("denominator") <= 5).all()) + .item() + is True + ) + + +def test_impute_columns_seed(df_inp: pl.DataFrame) -> None: + df_1 = df_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5)) + df_2 = df_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5)) + + assert df_1.shape == df_2.shape + assert_frame_not_equal(df_1, df_2) + + df_1 = df_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5), seed=18) + df_2 = df_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5), seed=18) + + assert df_1.shape == df_2.shape + assert_frame_equal(df_1, df_2) + + +def test_impute_pair(df_inp: pl.DataFrame) -> None: + df_out = df_inp.pipe(impute.column_pair, "numerator", "denominator", "<=5", (1, 5)) + + assert df_inp.shape == df_out.shape + assert ( + df_out.select((pl.col("numerator").cast(pl.String) == "<=5").any()).item() + is False + and df_out.select((pl.col("denominator").cast(pl.String) == "<=5").any()).item() + is False + ) + assert ( + df_out.filter(pl.col("imp_num")).select((pl.col("numerator") <= 5).all()).item() + is True + ) + assert ( + df_out.filter(pl.col("imp_denom")) + .select((pl.col("denominator") <= 5).all()) + .item() + is True + ) + + assert ( + df_out.filter(pl.col("imp_denom")) + .select((pl.col("numerator") <= pl.col("denominator")).all()) + .item() + is True + ) From 2ef64cd733d1f8ba64ef95c51cd69394fdf30d04 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Wed, 28 May 2025 15:34:27 -0400 Subject: [PATCH 40/47] Use lazy() in some cases. Tests for LazyFrames. --- src/csv_helper/impute.py | 8 +- tests/impute_test.py | 153 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 157 insertions(+), 4 deletions(-) diff --git a/src/csv_helper/impute.py b/src/csv_helper/impute.py index 76a28e6..9edcaea 100644 --- a/src/csv_helper/impute.py +++ b/src/csv_helper/impute.py @@ -13,7 +13,7 @@ def check[T: (pl.DataFrame, pl.LazyFrame)]( the given columns. """ for col in columns: - if col not in df.columns: + if col not in df.lazy().collect_schema().names(): raise ValueError(f"Column {col} doesn't exist") if not _fill_flag_exists(df, col, fill_flag): @@ -66,7 +66,7 @@ def columns[T: (pl.DataFrame, pl.LazyFrame)]( to that Polars type. Only supports Polars integer and float types. """ for col in columns: - if col not in df.columns: + if col not in df.lazy().collect_schema().names(): raise ValueError(f"Column {col} doesn't exist") if not _fill_flag_exists(df, col, fill_flag): @@ -160,10 +160,10 @@ def column_pair[T: (pl.DataFrame, pl.LazyFrame)]( behavior for the numerator when denominator is less than or equal to the `fill_range` upper bound since such imputation happens per-row. """ - if numerator not in df.columns: + if numerator not in df.lazy().collect_schema().names(): raise ValueError(f"Column {numerator} doesn't exist") - if denominator not in df.columns: + if denominator not in df.lazy().collect_schema().names(): raise ValueError(f"Column {numerator} doesn't exist") if not _fill_flag_exists(df, numerator, fill_flag): diff --git a/tests/impute_test.py b/tests/impute_test.py index abc85b5..597e522 100644 --- a/tests/impute_test.py +++ b/tests/impute_test.py @@ -56,6 +56,55 @@ def df_inp() -> pl.DataFrame: return df +@pytest.fixture +def lf_inp() -> pl.LazyFrame: + data = """ + id,numerator,denominator,imp_num,imp_denom + A,10,15,false,false + A,<=5,<=5,true,true + A,12,23,false,false + B,<=5,<=5,true,true + A,22,24,false,false + B,<=5,13,true,false + B,<=5,<=5,true,true + A,10,15,false,false + C,<=5,<=5,false,true + C,<=5,<=5,true,true + A,<=5,<=5,true,true + A,22,15,false,false + B,<=5,13,true,false + A,<=5,<=5,false,true + C,100,128,false,false + C,<=5,<=5,true,true + D,<=5,<=5,true,true + A,22,23,false,false + B,<=5,18,true,false + H,8,17,false,false + A,10,16,false,false + A,<=5,<=5,true,true + H,<=5,<=5,true,true + A,22,88,false,false + B,<=5,23,true,false + C,<=5,<=5,true,true + A,<=5,<=5,false,true + C,100,1300,false,false + C,<=5,<=5,true,true + D,<=5,<=5,true,true + """ + lf = pl.scan_csv( + StringIO(data), + schema={ + "id": pl.String, + "numerator": pl.String, + "denominator": pl.String, + "imp_num": pl.Boolean, + "imp_denom": pl.Boolean, + }, + ) + + return lf + + def test_impute_columns_single(df_inp: pl.DataFrame) -> None: df_out = df_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5)) @@ -70,6 +119,25 @@ def test_impute_columns_single(df_inp: pl.DataFrame) -> None: ) +def test_impute_columns_single_lazy(lf_inp: pl.LazyFrame) -> None: + lf_out = lf_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5)) + + assert lf_inp.collect().shape == lf_out.collect().shape + assert ( + lf_out.select((pl.col("numerator").cast(pl.String) == "<=5").any()) + .collect() + .item() + is False + ) + assert ( + lf_out.filter(pl.col("imp_num")) + .select((pl.col("numerator") <= 5).all()) + .collect() + .item() + is True + ) + + def test_impute_columns_multi(df_inp: pl.DataFrame) -> None: df_out = df_inp.pipe(impute.columns, ["numerator", "denominator"], "<=5", (1, 5)) @@ -95,6 +163,39 @@ def test_impute_columns_multi(df_inp: pl.DataFrame) -> None: ) +def test_impute_columns_multi_lazy(lf_inp: pl.LazyFrame) -> None: + lf_out = lf_inp.pipe(impute.columns, ["numerator", "denominator"], "<=5", (1, 5)) + + assert lf_inp.collect().shape == lf_out.collect().shape + assert ( + lf_out.select((pl.col("numerator").cast(pl.String) == "<=5").any()) + .collect() + .item() + is False + ) + assert ( + lf_out.select((pl.col("denominator").cast(pl.String) == "<=5").any()) + .collect() + .item() + is False + ) + + assert ( + lf_out.filter(pl.col("imp_num")) + .select((pl.col("numerator") <= 5).all()) + .collect() + .item() + is True + ) + assert ( + lf_out.filter(pl.col("imp_denom")) + .select((pl.col("denominator") <= 5).all()) + .collect() + .item() + is True + ) + + def test_impute_columns_seed(df_inp: pl.DataFrame) -> None: df_1 = df_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5)) df_2 = df_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5)) @@ -109,6 +210,20 @@ def test_impute_columns_seed(df_inp: pl.DataFrame) -> None: assert_frame_equal(df_1, df_2) +def test_impute_columns_seed_lazy(lf_inp: pl.LazyFrame) -> None: + lf_1 = lf_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5)) + lf_2 = lf_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5)) + + assert lf_1.collect().shape == lf_2.collect().shape + assert_frame_not_equal(lf_1, lf_2) + + lf_1 = lf_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5), seed=18) + lf_2 = lf_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5), seed=18) + + assert lf_1.collect().shape == lf_2.collect().shape + assert_frame_equal(lf_1, lf_2) + + def test_impute_pair(df_inp: pl.DataFrame) -> None: df_out = df_inp.pipe(impute.column_pair, "numerator", "denominator", "<=5", (1, 5)) @@ -136,3 +251,41 @@ def test_impute_pair(df_inp: pl.DataFrame) -> None: .item() is True ) + + +def test_impute_pair_lazy(lf_inp: pl.LazyFrame) -> None: + lf_out = lf_inp.pipe(impute.column_pair, "numerator", "denominator", "<=5", (1, 5)) + + assert lf_inp.collect().shape == lf_out.collect().shape + assert ( + lf_out.select((pl.col("numerator").cast(pl.String) == "<=5").any()) + .collect() + .item() + is False + and lf_out.select((pl.col("denominator").cast(pl.String) == "<=5").any()) + .collect() + .item() + is False + ) + assert ( + lf_out.filter(pl.col("imp_num")) + .select((pl.col("numerator") <= 5).all()) + .collect() + .item() + is True + ) + assert ( + lf_out.filter(pl.col("imp_denom")) + .select((pl.col("denominator") <= 5).all()) + .collect() + .item() + is True + ) + + assert ( + lf_out.filter(pl.col("imp_denom")) + .select((pl.col("numerator") <= pl.col("denominator")).all()) + .collect() + .item() + is True + ) From 4558b0c2e0e745d4478c4b5a8b54ff7535685d97 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Wed, 28 May 2025 16:36:41 -0400 Subject: [PATCH 41/47] Improve complete module tests --- tests/complete_test.py | 240 ++++++++++++++++++++++++++++------------- tests/impute_test.py | 9 +- 2 files changed, 173 insertions(+), 76 deletions(-) diff --git a/tests/complete_test.py b/tests/complete_test.py index 5519429..737a6f5 100644 --- a/tests/complete_test.py +++ b/tests/complete_test.py @@ -1,92 +1,188 @@ +import textwrap +from io import StringIO + import polars as pl +import pytest from polars.testing import assert_frame_equal from csv_helper import complete -def test_complete_exists() -> None: - df = pl.DataFrame( - { - "country": ["France", "France", "UK", "UK", "Spain"], - "year": [2020, 2021, 2019, 2020, 2022], - "value": [1, 2, 3, 4, 5], - } +@pytest.fixture +def df_inp() -> pl.DataFrame: + data = """\ + country,year,value + France,2020,1 + France,2021,2 + UK,2019,3 + UK,2020,4 + Spain,2022,5 + """ + df = pl.read_csv( + StringIO(textwrap.dedent(data)), + schema={ + "country": pl.String, + "year": pl.Int64, + "value": pl.Int64, + }, ) - df = df.pipe(complete.complete, "country", "year").sort("country", "year") - result = pl.DataFrame( - { - "country": [ - country for country in ["France", "UK", "Spain"] for _ in range(4) - ], - "year": [y for _ in range(3) for y in range(2019, 2023)], - "value": [None, 1, 2, None, 3, 4, None, None, None, None, None, 5], - } - ).sort("country", "year") - assert_frame_equal(df, result) + return df + - lf = pl.LazyFrame( - { - "country": ["France", "France", "UK", "UK", "Spain"], - "year": [2020, 2021, 2019, 2020, 2022], - "value": [1, 2, 3, 4, 5], - } +@pytest.fixture +def lf_inp() -> pl.LazyFrame: + data = """\ + country,year,value + France,2020,1 + France,2021,2 + UK,2019,3 + UK,2020,4 + Spain,2022,5 + """ + lf = pl.scan_csv( + StringIO(textwrap.dedent(data)), + schema={ + "country": pl.String, + "year": pl.Int64, + "value": pl.Int64, + }, ) - lf = lf.pipe(complete.complete, "country", "year").sort("country", "year") - result = pl.LazyFrame( - { - "country": [ - country for country in ["France", "UK", "Spain"] for _ in range(4) - ], - "year": [y for _ in range(3) for y in range(2019, 2023)], - "value": [None, 1, 2, None, 3, 4, None, None, None, None, None, 5], - } + + return lf + + +def test_complete_existing(df_inp: pl.DataFrame) -> None: + df_out = df_inp.pipe(complete.complete, "country", "year").sort("country", "year") + + data_res = """\ + country,year,value + France,2019, + France,2020,1 + France,2021,2 + France,2022, + UK,2019,3 + UK,2020,4 + UK,2021, + UK,2022, + Spain,2019, + Spain,2020, + Spain,2021, + Spain,2022,5 + """ + result = pl.read_csv( + StringIO(textwrap.dedent(data_res)), + schema={ + "country": pl.String, + "year": pl.Int64, + "value": pl.Int64, + }, ).sort("country", "year") - assert_frame_equal(lf, result) + assert_frame_equal(df_out, result) -def test_complete_not_exists() -> None: - # TODO: add lazy test - df = pl.DataFrame( - { - "country": ["France", "France", "UK", "UK", "Spain"], - "year": [2020, 2021, 2019, 2020, 2022], - "value": [1, 2, 3, 4, 5], - } - ) - df = df.pipe( +def test_complete_existing_lazy(lf_inp: pl.LazyFrame) -> None: + lf_out = lf_inp.pipe(complete.complete, "country", "year").sort("country", "year") + + data_res = """\ + country,year,value + France,2019, + France,2020,1 + France,2021,2 + France,2022, + UK,2019,3 + UK,2020,4 + UK,2021, + UK,2022, + Spain,2019, + Spain,2020, + Spain,2021, + Spain,2022,5 + """ + result = pl.read_csv( + StringIO(textwrap.dedent(data_res)), + schema={ + "country": pl.String, + "year": pl.Int64, + "value": pl.Int64, + }, + ).sort("country", "year") + + assert_frame_equal(lf_out.collect(), result) + + +def test_complete_non_existing(df_inp: pl.DataFrame) -> None: + df_out = df_inp.pipe( complete.complete, pl.Series("country", ["France", "UK", "Spain", "China"]), "year", ).sort("country", "year") - result = pl.DataFrame( - { - "country": [ - country - for country in ["China", "France", "UK", "Spain"] - for _ in range(4) - ], - "year": [y for _ in range(4) for y in range(2019, 2023)], - "value": [ - None, - None, - None, - None, - None, - 1, - 2, - None, - 3, - 4, - None, - None, - None, - None, - None, - 5, - ], - } + + data_res = """\ + country,year,value + China,2019, + China,2020, + China,2021, + China,2022, + France,2019, + France,2020,1 + France,2021,2 + France,2022, + UK,2019,3 + UK,2020,4 + UK,2021, + UK,2022, + Spain,2019, + Spain,2020, + Spain,2021, + Spain,2022,5 + """ + result = pl.read_csv( + StringIO(textwrap.dedent(data_res)), + schema={ + "country": pl.String, + "year": pl.Int64, + "value": pl.Int64, + }, + ).sort("country", "year") + + assert_frame_equal(df_out, result) + + +def test_complete_non_existing_lazy(lf_inp: pl.LazyFrame) -> None: + lf_out = lf_inp.pipe( + complete.complete, + pl.Series("country", ["France", "UK", "Spain", "China"]), + "year", + ).sort("country", "year") + + data_res = """\ + country,year,value + China,2019, + China,2020, + China,2021, + China,2022, + France,2019, + France,2020,1 + France,2021,2 + France,2022, + UK,2019,3 + UK,2020,4 + UK,2021, + UK,2022, + Spain,2019, + Spain,2020, + Spain,2021, + Spain,2022,5 + """ + result = pl.read_csv( + StringIO(textwrap.dedent(data_res)), + schema={ + "country": pl.String, + "year": pl.Int64, + "value": pl.Int64, + }, ).sort("country", "year") - assert_frame_equal(df, result) + assert_frame_equal(lf_out.collect(), result) diff --git a/tests/impute_test.py b/tests/impute_test.py index 597e522..506781f 100644 --- a/tests/impute_test.py +++ b/tests/impute_test.py @@ -1,3 +1,4 @@ +import textwrap from io import StringIO import polars as pl @@ -9,7 +10,7 @@ @pytest.fixture def df_inp() -> pl.DataFrame: - data = """ + data = """\ id,numerator,denominator,imp_num,imp_denom A,10,15,false,false A,<=5,<=5,true,true @@ -43,7 +44,7 @@ def df_inp() -> pl.DataFrame: D,<=5,<=5,true,true """ df = pl.read_csv( - StringIO(data), + StringIO(textwrap.dedent(data)), schema={ "id": pl.String, "numerator": pl.String, @@ -58,7 +59,7 @@ def df_inp() -> pl.DataFrame: @pytest.fixture def lf_inp() -> pl.LazyFrame: - data = """ + data = """\ id,numerator,denominator,imp_num,imp_denom A,10,15,false,false A,<=5,<=5,true,true @@ -92,7 +93,7 @@ def lf_inp() -> pl.LazyFrame: D,<=5,<=5,true,true """ lf = pl.scan_csv( - StringIO(data), + StringIO(textwrap.dedent(data)), schema={ "id": pl.String, "numerator": pl.String, From 76cdb1e439b710c9b782d9095f232fd75dc32e34 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Sat, 21 Jun 2025 08:18:22 -0400 Subject: [PATCH 42/47] Clean up complete tests --- src/csv_helper/complete.py | 8 +- tests/complete_test.py | 157 +++++++++++++++---------------------- 2 files changed, 69 insertions(+), 96 deletions(-) diff --git a/src/csv_helper/complete.py b/src/csv_helper/complete.py index e735418..56873d0 100644 --- a/src/csv_helper/complete.py +++ b/src/csv_helper/complete.py @@ -6,10 +6,10 @@ def complete[T: (pl.DataFrame, pl.LazyFrame)](df: T, *columns: str | pl.Series) Generate rows for implicit missing values based on column combinations, thus making them explicit missing values. Generated values marked as null. - If columns are referenced with strings, then only existing values in those + If columns are referenced by name, then only existing values in those columns are used for completion. If Series are specified instead, then - those Series can specify the full set of possible values, provided that - the Series is named after an existing column. + those Series can specify the full set of possible values. The Series must be + named after an existing column. """ cols = [] for col in columns: @@ -19,7 +19,7 @@ def complete[T: (pl.DataFrame, pl.LazyFrame)](df: T, *columns: str | pl.Series) cols.append(col.unique().implode()) else: raise TypeError( - f"The columns argument(s) must be either string or polars Series. Got {type(col)} instead." + f"The columns must be either string or polars Series. Got {type(col)} instead." ) unique_combos = df.select(cols) diff --git a/tests/complete_test.py b/tests/complete_test.py index 737a6f5..08f31af 100644 --- a/tests/complete_test.py +++ b/tests/complete_test.py @@ -25,37 +25,19 @@ def df_inp() -> pl.DataFrame: "year": pl.Int64, "value": pl.Int64, }, - ) + ).sort("country", "year") return df @pytest.fixture -def lf_inp() -> pl.LazyFrame: - data = """\ - country,year,value - France,2020,1 - France,2021,2 - UK,2019,3 - UK,2020,4 - Spain,2022,5 - """ - lf = pl.scan_csv( - StringIO(textwrap.dedent(data)), - schema={ - "country": pl.String, - "year": pl.Int64, - "value": pl.Int64, - }, - ) +def lf_inp(df_inp: pl.DataFrame) -> pl.LazyFrame: + return df_inp.lazy() - return lf - -def test_complete_existing(df_inp: pl.DataFrame) -> None: - df_out = df_inp.pipe(complete.complete, "country", "year").sort("country", "year") - - data_res = """\ +@pytest.fixture +def df_out() -> pl.DataFrame: + data = """\ country,year,value France,2019, France,2020,1 @@ -70,8 +52,8 @@ def test_complete_existing(df_inp: pl.DataFrame) -> None: Spain,2021, Spain,2022,5 """ - result = pl.read_csv( - StringIO(textwrap.dedent(data_res)), + df = pl.read_csv( + StringIO(textwrap.dedent(data)), schema={ "country": pl.String, "year": pl.Int64, @@ -79,47 +61,52 @@ def test_complete_existing(df_inp: pl.DataFrame) -> None: }, ).sort("country", "year") - assert_frame_equal(df_out, result) + return df -def test_complete_existing_lazy(lf_inp: pl.LazyFrame) -> None: - lf_out = lf_inp.pipe(complete.complete, "country", "year").sort("country", "year") +def test_complete_existing(df_inp: pl.DataFrame, df_out: pl.DataFrame) -> None: + df = df_inp.pipe(complete.complete, "country", "year").sort("country", "year") - data_res = """\ - country,year,value - France,2019, - France,2020,1 - France,2021,2 - France,2022, - UK,2019,3 - UK,2020,4 - UK,2021, - UK,2022, - Spain,2019, - Spain,2020, - Spain,2021, - Spain,2022,5 - """ - result = pl.read_csv( - StringIO(textwrap.dedent(data_res)), - schema={ - "country": pl.String, - "year": pl.Int64, - "value": pl.Int64, - }, - ).sort("country", "year") + assert_frame_equal(df, df_out) - assert_frame_equal(lf_out.collect(), result) +def test_complete_existing_series(df_inp: pl.DataFrame, df_out: pl.DataFrame) -> None: + country = pl.Series("country", ["France", "UK", "Spain"]) + year = pl.Series("year", [year for year in range(2019, 2023)]) + df = df_inp.pipe(complete.complete, country, year).sort("country", "year") + + assert_frame_equal(df, df_out) + + +def test_complete_exception(df_inp: pl.DataFrame) -> None: + with pytest.raises(TypeError): + df_inp.pipe(complete.complete, 0, 1).sort("country", "year") # pyright: ignore[reportArgumentType] + + +def test_complete_existing_lazy(lf_inp: pl.LazyFrame, df_out: pl.DataFrame) -> None: + lf = lf_inp.pipe(complete.complete, "country", "year").sort("country", "year") + + assert_frame_equal(lf.collect(), df_out) + + +def test_complete_existing_lazy_series( + lf_inp: pl.LazyFrame, df_out: pl.DataFrame +) -> None: + country = pl.Series("country", ["France", "UK", "Spain"]) + year = pl.Series("year", [year for year in range(2019, 2023)]) + lf = lf_inp.pipe(complete.complete, country, year).sort("country", "year") + + assert_frame_equal(lf.collect(), df_out) + + +def test_complete_exception_lazy(lf_inp: pl.LazyFrame) -> None: + with pytest.raises(TypeError): + lf_inp.pipe(complete.complete, 0, 1).sort("country", "year") # pyright: ignore[reportArgumentType] -def test_complete_non_existing(df_inp: pl.DataFrame) -> None: - df_out = df_inp.pipe( - complete.complete, - pl.Series("country", ["France", "UK", "Spain", "China"]), - "year", - ).sort("country", "year") - data_res = """\ +@pytest.fixture +def df_out_non_exist() -> pl.DataFrame: + data = """\ country,year,value China,2019, China,2020, @@ -138,8 +125,8 @@ def test_complete_non_existing(df_inp: pl.DataFrame) -> None: Spain,2021, Spain,2022,5 """ - result = pl.read_csv( - StringIO(textwrap.dedent(data_res)), + df = pl.read_csv( + StringIO(textwrap.dedent(data)), schema={ "country": pl.String, "year": pl.Int64, @@ -147,42 +134,28 @@ def test_complete_non_existing(df_inp: pl.DataFrame) -> None: }, ).sort("country", "year") - assert_frame_equal(df_out, result) + return df -def test_complete_non_existing_lazy(lf_inp: pl.LazyFrame) -> None: - lf_out = lf_inp.pipe( +def test_complete_non_existing( + df_inp: pl.DataFrame, df_out_non_exist: pl.DataFrame +) -> None: + df = df_inp.pipe( complete.complete, pl.Series("country", ["France", "UK", "Spain", "China"]), "year", ).sort("country", "year") - data_res = """\ - country,year,value - China,2019, - China,2020, - China,2021, - China,2022, - France,2019, - France,2020,1 - France,2021,2 - France,2022, - UK,2019,3 - UK,2020,4 - UK,2021, - UK,2022, - Spain,2019, - Spain,2020, - Spain,2021, - Spain,2022,5 - """ - result = pl.read_csv( - StringIO(textwrap.dedent(data_res)), - schema={ - "country": pl.String, - "year": pl.Int64, - "value": pl.Int64, - }, + assert_frame_equal(df, df_out_non_exist) + + +def test_complete_non_existing_lazy( + lf_inp: pl.LazyFrame, df_out_non_exist: pl.DataFrame +) -> None: + lf = lf_inp.pipe( + complete.complete, + pl.Series("country", ["France", "UK", "Spain", "China"]), + "year", ).sort("country", "year") - assert_frame_equal(lf_out.collect(), result) + assert_frame_equal(lf.collect(), df_out_non_exist) From 38b4728f94bcebb03bbbd194df08a4e782aa1423 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Sat, 21 Jun 2025 08:19:54 -0400 Subject: [PATCH 43/47] Remove --locked? --- .github/workflows/tests.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index aec5c81..f45e2b0 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -35,7 +35,8 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dev dependencies - run: uv sync --locked --only-dev --python="${{ matrix.python-version }}" + # run: uv sync --locked --only-dev --python="${{ matrix.python-version }}" + run: uv sync --only-dev --python="${{ matrix.python-version }}" - name: Run tests with nox on ${{ matrix.os }} run: uv run --locked --python="${{ matrix.python-version }}" -- nox -s "tests-${{ matrix.python-version }}" From 8c96e5998d024206ca4c42b11f4e908d4fbcb4ea Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Sat, 21 Jun 2025 09:03:48 -0400 Subject: [PATCH 44/47] Improve --- .github/workflows/tests.yml | 1 + src/csv_helper/impute.py | 37 +++++----- tests/complete_test.py | 5 -- tests/impute_test.py | 130 ++++++++++++++++++++---------------- 4 files changed, 94 insertions(+), 79 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f45e2b0..e898406 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -35,6 +35,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dev dependencies + # TODO: maybe have to add explicit uv lock? # run: uv sync --locked --only-dev --python="${{ matrix.python-version }}" run: uv sync --only-dev --python="${{ matrix.python-version }}" diff --git a/src/csv_helper/impute.py b/src/csv_helper/impute.py index 9edcaea..9d66ed9 100644 --- a/src/csv_helper/impute.py +++ b/src/csv_helper/impute.py @@ -65,6 +65,10 @@ def columns[T: (pl.DataFrame, pl.LazyFrame)]( If `dtype` is specified, will attempt to cast the filled columns to that Polars type. Only supports Polars integer and float types. """ + n_cols = len(columns) + if n_cols == 0: + raise ValueError("Must specify at least one column to impute") + for col in columns: if col not in df.lazy().collect_schema().names(): raise ValueError(f"Column {col} doesn't exist") @@ -76,8 +80,22 @@ def columns[T: (pl.DataFrame, pl.LazyFrame)]( fill_range_int = _parse_fill_range(fill_range) - n_cols = len(columns) - if n_cols > 1: + if n_cols == 1: + column = columns[0] + # NOTE: this implementation and numpy implementation for filling values are roughly the same speed + # with this Polars-only impl barely faster + df = df.with_columns( + pl.when(pl.col(column) == fill_flag) + .then( + pl.int_range(fill_range_int.lb, fill_range_int.ub + 1).sample( + pl.len(), with_replacement=True, seed=seed + ) + ) + .otherwise(pl.col(column)) + .alias(column) + .cast(dtype) + ) + else: rng = np.random.default_rng(seed) n_rows = df.lazy().select(pl.len()).collect().item() # must gen enough numbers for all columns up-front, otherwise they get reused @@ -97,21 +115,6 @@ def columns[T: (pl.DataFrame, pl.LazyFrame)]( .alias(col) .cast(dtype) ) - else: - column = columns[0] - # NOTE: this implementation and numpy implementation for filling values are roughly the same speed - # with this Polars-only impl barely faster - df = df.with_columns( - pl.when(pl.col(column) == fill_flag) - .then( - pl.int_range(fill_range_int.lb, fill_range_int.ub + 1).sample( - pl.len(), with_replacement=True, seed=seed - ) - ) - .otherwise(pl.col(column)) - .alias(column) - .cast(dtype) - ) return df diff --git a/tests/complete_test.py b/tests/complete_test.py index 08f31af..cd4865b 100644 --- a/tests/complete_test.py +++ b/tests/complete_test.py @@ -99,11 +99,6 @@ def test_complete_existing_lazy_series( assert_frame_equal(lf.collect(), df_out) -def test_complete_exception_lazy(lf_inp: pl.LazyFrame) -> None: - with pytest.raises(TypeError): - lf_inp.pipe(complete.complete, 0, 1).sort("country", "year") # pyright: ignore[reportArgumentType] - - @pytest.fixture def df_out_non_exist() -> pl.DataFrame: data = """\ diff --git a/tests/impute_test.py b/tests/impute_test.py index 506781f..a11e3bc 100644 --- a/tests/impute_test.py +++ b/tests/impute_test.py @@ -10,6 +10,7 @@ @pytest.fixture def df_inp() -> pl.DataFrame: + # NOTE: imp_num and imp_denom independently denote whether col needs imputation data = """\ id,numerator,denominator,imp_num,imp_denom A,10,15,false,false @@ -58,52 +59,8 @@ def df_inp() -> pl.DataFrame: @pytest.fixture -def lf_inp() -> pl.LazyFrame: - data = """\ - id,numerator,denominator,imp_num,imp_denom - A,10,15,false,false - A,<=5,<=5,true,true - A,12,23,false,false - B,<=5,<=5,true,true - A,22,24,false,false - B,<=5,13,true,false - B,<=5,<=5,true,true - A,10,15,false,false - C,<=5,<=5,false,true - C,<=5,<=5,true,true - A,<=5,<=5,true,true - A,22,15,false,false - B,<=5,13,true,false - A,<=5,<=5,false,true - C,100,128,false,false - C,<=5,<=5,true,true - D,<=5,<=5,true,true - A,22,23,false,false - B,<=5,18,true,false - H,8,17,false,false - A,10,16,false,false - A,<=5,<=5,true,true - H,<=5,<=5,true,true - A,22,88,false,false - B,<=5,23,true,false - C,<=5,<=5,true,true - A,<=5,<=5,false,true - C,100,1300,false,false - C,<=5,<=5,true,true - D,<=5,<=5,true,true - """ - lf = pl.scan_csv( - StringIO(textwrap.dedent(data)), - schema={ - "id": pl.String, - "numerator": pl.String, - "denominator": pl.String, - "imp_num": pl.Boolean, - "imp_denom": pl.Boolean, - }, - ) - - return lf +def lf_inp(df_inp: pl.DataFrame) -> pl.LazyFrame: + return df_inp.lazy() def test_impute_columns_single(df_inp: pl.DataFrame) -> None: @@ -120,6 +77,11 @@ def test_impute_columns_single(df_inp: pl.DataFrame) -> None: ) +def test_impute_columns_no_cols_exception(df_inp: pl.DataFrame) -> None: + with pytest.raises(ValueError): + df_inp.pipe(impute.columns, [], "<=5", (1, 5)) + + def test_impute_columns_single_lazy(lf_inp: pl.LazyFrame) -> None: lf_out = lf_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5)) @@ -198,32 +160,32 @@ def test_impute_columns_multi_lazy(lf_inp: pl.LazyFrame) -> None: def test_impute_columns_seed(df_inp: pl.DataFrame) -> None: - df_1 = df_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5)) - df_2 = df_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5)) - - assert df_1.shape == df_2.shape - assert_frame_not_equal(df_1, df_2) - df_1 = df_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5), seed=18) df_2 = df_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5), seed=18) assert df_1.shape == df_2.shape assert_frame_equal(df_1, df_2) + df_1 = df_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5), seed=1) + df_2 = df_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5), seed=10) -def test_impute_columns_seed_lazy(lf_inp: pl.LazyFrame) -> None: - lf_1 = lf_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5)) - lf_2 = lf_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5)) + assert df_1.shape == df_2.shape + assert_frame_not_equal(df_1, df_2) - assert lf_1.collect().shape == lf_2.collect().shape - assert_frame_not_equal(lf_1, lf_2) +def test_impute_columns_seed_lazy(lf_inp: pl.LazyFrame) -> None: lf_1 = lf_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5), seed=18) lf_2 = lf_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5), seed=18) assert lf_1.collect().shape == lf_2.collect().shape assert_frame_equal(lf_1, lf_2) + lf_1 = lf_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5), seed=1) + lf_2 = lf_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5), seed=10) + + assert lf_1.collect().shape == lf_2.collect().shape + assert_frame_not_equal(lf_1, lf_2) + def test_impute_pair(df_inp: pl.DataFrame) -> None: df_out = df_inp.pipe(impute.column_pair, "numerator", "denominator", "<=5", (1, 5)) @@ -254,6 +216,60 @@ def test_impute_pair(df_inp: pl.DataFrame) -> None: ) +def test_impute_pair_seed(df_inp: pl.DataFrame) -> None: + df_1 = df_inp.pipe( + impute.column_pair, "numerator", "denominator", "<=5", (1, 5), seed=18 + ) + df_2 = df_inp.pipe( + impute.column_pair, "numerator", "denominator", "<=5", (1, 5), seed=18 + ) + + assert df_1.shape == df_2.shape + # can only guarantee seed reproducibility in these 2 cases + assert_frame_equal(df_1.select("denominator"), df_2.select("denominator")) + assert_frame_equal( + df_1.filter(pl.col("denominator") > 5).select("numerator"), + df_2.filter(pl.col("denominator") > 5).select("numerator"), + ) + + df_1 = df_inp.pipe( + impute.column_pair, "numerator", "denominator", "<=5", (1, 5), seed=1 + ) + df_2 = df_inp.pipe( + impute.column_pair, "numerator", "denominator", "<=5", (1, 5), seed=10 + ) + + assert df_1.shape == df_2.shape + assert_frame_not_equal(df_1, df_2) + + +def test_impute_pair_seed_lazy(lf_inp: pl.LazyFrame) -> None: + lf_1 = lf_inp.pipe( + impute.column_pair, "numerator", "denominator", "<=5", (1, 5), seed=18 + ) + lf_2 = lf_inp.pipe( + impute.column_pair, "numerator", "denominator", "<=5", (1, 5), seed=18 + ) + + assert lf_1.collect().shape == lf_2.collect().shape + # can only guarantee seed reproducibility in these 2 cases + assert_frame_equal(lf_1.select("denominator"), lf_2.select("denominator")) + assert_frame_equal( + lf_1.filter(pl.col("denominator") > 5).select("numerator"), + lf_2.filter(pl.col("denominator") > 5).select("numerator"), + ) + + lf_1 = lf_inp.pipe( + impute.column_pair, "numerator", "denominator", "<=5", (1, 5), seed=1 + ) + lf_2 = lf_inp.pipe( + impute.column_pair, "numerator", "denominator", "<=5", (1, 5), seed=10 + ) + + assert lf_1.collect().shape == lf_2.collect().shape + assert_frame_not_equal(lf_1, lf_2) + + def test_impute_pair_lazy(lf_inp: pl.LazyFrame) -> None: lf_out = lf_inp.pipe(impute.column_pair, "numerator", "denominator", "<=5", (1, 5)) From 4e01053d2493e561c7ef7aaf0b54d8aab2a52d1e Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Sat, 21 Jun 2025 13:27:15 -0400 Subject: [PATCH 45/47] Working tests --- .gitignore | 2 +- pyproject.toml | 2 +- src/csv_helper/cli.py | 538 ++++++++++----------------------------- src/csv_helper/impute.py | 6 +- tests/cli_test.py | 379 ++++++--------------------- 5 files changed, 228 insertions(+), 699 deletions(-) diff --git a/.gitignore b/.gitignore index 31f2fe3..17b08d3 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,4 @@ __pycache__/ dist/ sdist/ -/data/ +# /data/ diff --git a/pyproject.toml b/pyproject.toml index 23a1fa1..76169b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "csv-helper" -version = "0.2.3" +version = "0.3.0" description = "A simple library and CLI for working with CSV data" readme = "README.md" authors = [{ name = "Andrew Tiu", email = "andrew.tiu88@gmail.com" }] diff --git a/src/csv_helper/cli.py b/src/csv_helper/cli.py index 3b28444..f327bb4 100644 --- a/src/csv_helper/cli.py +++ b/src/csv_helper/cli.py @@ -5,14 +5,11 @@ from typing import NamedTuple import click -import numpy as np import polars as pl import typer -from numpy.random import Generator from rich.console import Console from rich.progress import Progress, SpinnerColumn, TextColumn from rich.prompt import Confirm -from rich.table import Table from typing_extensions import Annotated from . import impute @@ -25,13 +22,11 @@ err_console = Console(stderr=True) -def print_version(val: bool): +def version_callback(value: bool): """Print CLI version""" - if not val: - return - - print(f"csv-helper version {version('csv_helper')}") - raise typer.Exit() + if value: + print(f"csv-helper version {version(__package__)}") # pyright: ignore[reportArgumentType] + raise typer.Exit() @app.callback() @@ -42,14 +37,14 @@ def callback( "-v", is_eager=True, help="Print the version and exit.", - callback=print_version, + callback=version_callback, ), ) -> None: pass @app.command() -def preview( +def show( input: Annotated[ Path, typer.Argument( @@ -57,19 +52,18 @@ def preview( file_okay=True, dir_okay=False, readable=True, - help="The CSV file to preview", + help="Target CSV file", ), ], n_rows: Annotated[ - int, typer.Option("--nrows", "-n", min=1, help="Number of rows to preview") + int, typer.Option("--nrows", "-n", min=1, help="Number of rows to show") ] = 10, ) -> None: """ - Preview a given CSV file. + Show preview of a given CSV file. """ df = pl.read_csv(input, infer_schema_length=0) - print(f"File: {input}") if n_rows > df.height: print(df) else: @@ -107,7 +101,13 @@ def check( """ df = pl.read_csv(input, infer_schema_length=0) - print(impute.check(df, columns, fill_flag)) + try: + out = impute.check(df, columns, fill_flag) + except ValueError as e: + if f"doesn't contain any instances of '{fill_flag}'" in str(e): + print(e) + else: + print(out) class FillRange(NamedTuple): @@ -139,7 +139,7 @@ def validate_inp_out(input: Path, output: Path, force: bool) -> None: f"[blue bold]{output}[/blue bold] already exists. Do you want to overwrite it?" ) if not overwrite_file: - print("Won't overwrite") + err_console.print("Won't overwrite") raise typer.Abort() if input == output and not force: @@ -155,9 +155,10 @@ def check_create_dir(output: Path) -> bool: f"The specified output's parent directory [blue bold]{output.parent}[/blue bold] doesn't exist. Do you want to create it along with any missing parents?" ) if not create_dir: - print("Won't create directories") - raise typer.Abort() + return False + return True + return False @@ -165,12 +166,14 @@ def all_cols_exist(df: pl.DataFrame, fill_cols: list[str]) -> bool: for col in fill_cols: if col not in df.columns: return False + return True def fill_flag_exists(df: pl.DataFrame, fill_col: str, fill_flag: str) -> bool: if df.select((pl.col(fill_col) == fill_flag).any()).item(): return True + return False @@ -199,7 +202,7 @@ def impute_file( file_okay=True, dir_okay=False, readable=True, - help="Path to target CSV file", + help="Target CSV file", ), ], columns: Annotated[ @@ -207,7 +210,7 @@ def impute_file( typer.Option( "--col", "-c", - help="Name of a column to impute. Specify this for each colum you wanted imputed.", + help="Name of column to impute. Specify this for each colum you wanted imputed.", ), ], fill_flag: Annotated[ @@ -230,16 +233,17 @@ def impute_file( ], output: Annotated[ Path | None, - # NOTE: if exists = False, other checks still run if the Path happens to (file/dir) exist typer.Option( "--out", "-o", + # NOTE: if exists=False, file/directory doesn't need to exist; + # if doesn't exist, other checks skipped exists=False, file_okay=True, dir_okay=False, writable=True, readable=False, - help="Path to save the imputed CSV file. If not specified, defaults to printing result to stdout", + help="Path to save the imputed CSV file. If not specified, defaults to printing result to stdout.", ), ] = None, col_type: Annotated[ @@ -247,7 +251,7 @@ def impute_file( typer.Option( "--type", "-t", - help="Intended data type of the target column. Can be a Polars Int64 or Float64.", + help="Intended data type of the target column. Can be a Polars int or float type.", click_type=click.Choice(ColType._member_names_, case_sensitive=False), ), ] = ColType.INT64.name, @@ -284,6 +288,10 @@ def impute_file( validate_inp_out(input, output, force) create_dir = check_create_dir(output) + if not output.parent.is_dir() and not create_dir: + err_console.print("Won't create directories") + raise typer.Abort() + df = pl.read_csv(input, infer_schema_length=0) with Progress( @@ -303,17 +311,13 @@ def impute_file( if create_dir: output.parent.mkdir(parents=True) - df.write_csv(output) - - console.print("[green]Finished imputing[/green]...") + df.write_csv(output, separator=",") if verbose: - console.print(f"\n[bold]Time taken[/bold]: {(t1 - t0):0.3f}s", highlight=False) - console.print("[bold]Preview of result:[/bold]") - console.print( - df.filter(pl.col(col) <= fill_range.ub for col in columns).head(), - highlight=False, - ) + console.print(f"[bold]Time taken[/bold]: {(t1 - t0):0.3f}s", highlight=False) + + if output is None: + print(df.head(10)) class FillCols(NamedTuple): @@ -333,16 +337,6 @@ def parse_sep_cols(sep_cols: str) -> list[str]: return [col.strip() for col in sep_cols.split(",")] -def impute_capped(denom: int, fill_range: FillRange, rng: Generator) -> int: - """ - Return a random integer from a range that is capped - at the 'denominator' value - """ - # WARN: specifying size=1 instead of leaving size = None - # will return single-value list instead of just the value - return rng.integers(fill_range.lb, denom, endpoint=True) - - @impute_app.command("pair") def impute_pair( input: Annotated[ @@ -352,34 +346,20 @@ def impute_pair( file_okay=True, dir_okay=False, readable=True, - help="Path to target CSV file", + help="Target CSV file", ), ], - output: Annotated[ - Path, - typer.Argument( - exists=False, - file_okay=True, - dir_okay=False, - writable=True, - readable=False, - help="Path to save the output CSV file", - ), + numerator: Annotated[ + str, typer.Option("--numerator", "-n", help="Numerator in the pair imputation") ], - fill_cols: Annotated[ - FillCols, - typer.Option( - "--cols", - "-c", - metavar="TEXT", - help="Pair of columns (numerator and denominator) to be imputed. Specify as comma-separated values. For example, 'count_col,denom_col' specifies 'count_col' as the numerator and 'denom_col' as the denominator.", - parser=parse_fill_cols, - ), + denominator: Annotated[ + str, + typer.Option("--denominator", "-d", help="Denominator in the pair imputation"), ], fill_flag: Annotated[ str, typer.Option( - "--flag", "-f", help="Flag (string) to look for and replace in the columns" + "--flag", "-f", help="Flag/marker to find and replace in the target columns" ), ], fill_range: Annotated[ @@ -388,22 +368,35 @@ def impute_pair( "--range", "-r", metavar="TEXT", - help="Closed, integer interval from which to sample random integer for imputation. Specify as comma-separated values. For example: '1,5' corresponds to the range [1, 5]", + help='Closed, integer interval from which to sample random integer for imputation. Specify as comma-separated values. For example: "1,5" corresponds to the range [1, 5]', parser=parse_fill_range, ), ], + output: Annotated[ + Path | None, + typer.Option( + "--out", + "-o", + exists=False, + file_okay=True, + dir_okay=False, + writable=True, + readable=False, + help="Path to save the imputed CSV file. If not specified, defaults to printing result to stdout.", + ), + ] = None, col_type: Annotated[ str, typer.Option( "--type", "-t", - help="Intended data type of target columns. Can be a Polars Int64 or Float64.", + help="Intended data type of target columns. Can be a Polars int or float type.", click_type=click.Choice(ColType._member_names_, case_sensitive=False), ), ] = ColType.INT64.name, seed: Annotated[ - int, typer.Option("--seed", "-s", help="Random seed for reproducibility") - ] = 123, + int | None, typer.Option("--seed", "-s", help="Random seed for reproducibility") + ] = None, verbose: Annotated[ bool, typer.Option( @@ -418,9 +411,9 @@ def impute_pair( "--force", "-F", help=""" - Allow overwriting data even if (1) the specified output file already exists, - (2) the path to the input file is identical to the path of the output file, or - (3) --sep-out is specfied and that file already exists. All checks will be ignored. + Allow overwriting data even if (1) the specified output file already exists or + (2) the path to the input file is identical to the path of the output file. Both + checks will be ignored. """, ), ] = False, @@ -433,20 +426,16 @@ def impute_pair( dir_okay=False, readable=True, help=""" - Path to some separate CSV file in which to look for denominator data. - Currently only supports a separate file that has the exact same - structure as the input file except for the numerator column being - swapped for the denominator column (because this performs an inner - join on all those columns). + Path to a separate CSV file in which to look for denominator column. Will perform an + inner join between the input file and this file containing the denominator. """, ), ] = None, sep_cols: Annotated[ list[str] | None, typer.Option( - "--sep-cols", - help="Comma-separated list of column names on which to join the numerator and denominator data", - parser=parse_sep_cols, + "--sep-col", + help="Name of column on which to join the numerator and denominator data. Specify for each column to be used.", ), ] = None, sep_out: Annotated[ @@ -458,10 +447,11 @@ def impute_pair( dir_okay=False, writable=True, readable=False, - help="Path to save imputed denominator data from --sep-denom", + help="Path to save imputed version of the separate denominator file", ), ] = None, ): + # TODO: review """ Impute a pair of columns in a CSV file. Will look for the flag in both of the specified columns and substitute with a random @@ -479,50 +469,53 @@ def impute_pair( to specify where to save the imputed version of the denominator data from --sep-denom. """ - validate_inp_out(input, output, force) - create_dir = check_create_dir(output) + create_dir = False + if output is not None: + validate_inp_out(input, output, force) + create_dir = check_create_dir(output) + + create_sep_dir = False + if sep_out is not None: + create_sep_dir = check_create_dir(sep_out) - if (sep_cols is not None or sep_out is not None) and sep_denom is None: + if sep_denom is None and (sep_cols is not None or sep_out is not None): err_console.print("Must specify --sep-denom to use --sep-cols or --sep-out") raise typer.Abort() + if sep_denom is not None and sep_cols is None: + err_console.print("Must specify --sep-cols if using --sep-denom") + raise typer.Abort() + df = pl.read_csv(input, infer_schema_length=0) - if sep_denom is not None: - if sep_cols is None: - err_console.print("You must specify both --sep-denom and --sep-cols") + if sep_denom is None: + if numerator not in df.columns or denominator not in df.columns: + err_console.print("Invalid numerator or denominator column specified") raise typer.Abort() - + else: # NOTE: extract since it gives nested list; maybe some type coercion going on - sep_cols = sep_cols[0] + # sep_cols = sep_cols[0] df_denom = pl.read_csv(sep_denom, infer_schema_length=0) - if ( - fill_cols.numerator not in df.columns - or fill_cols.denominator not in df_denom.columns - ): - err_console.print("Invalid columns specified for --cols") - raise typer.Abort() - - if not all_cols_exist(df, sep_cols) or not all_cols_exist(df_denom, sep_cols): - err_console.print( - "Some of the --sep-cols are missing from the numerator or denominator data" - ) + if numerator not in df.columns or denominator not in df_denom.columns: + err_console.print("Invalid numerator or denominator column specified") raise typer.Abort() - # TODO: might need more sophisticated checks here to ensure the join goes ok or fails gracefully - if fill_cols.denominator not in df_denom.columns: + if sep_cols is not None and ( + not all_cols_exist(df, sep_cols) or not all_cols_exist(df_denom, sep_cols) + ): err_console.print( - "Separate denominator data doesn't contain the given denominator column" + "Some of the --sep-col columns are missing from the numerator or denominator data" ) raise typer.Abort() if sep_out is not None: - if not fill_flag_exists(df_denom, fill_cols.denominator, fill_flag): + # TODO: needed? + if not fill_flag_exists(df_denom, denominator, fill_flag): print( f""" The denominator file {sep_denom} doesn't contain any instancees of {fill_flag} - in {fill_cols.denominator}. Rerun the command without specifying --sep-out. + in {denominator}. Rerun the command without specifying --sep-out. """ ) raise typer.Abort() @@ -535,26 +528,6 @@ def impute_pair( print("Won't overwrite") raise typer.Abort() - imp_sizes = ( - len(df.filter(pl.col(fill_cols.numerator) == fill_flag)), - len(df_denom.filter(pl.col(fill_cols.denominator) == fill_flag)), - ) - else: - if not all_cols_exist(df, list(fill_cols)): - err_console.print("Invalid columns specified for --cols") - raise typer.Abort() - - imp_sizes = ( - len(df.filter(pl.col(fill_cols.numerator) == fill_flag)), - len(df.filter(pl.col(fill_cols.denominator) == fill_flag)), - ) - - if imp_sizes[0] == 0 and imp_sizes[1] == 0: - err_console.print( - f"Cannot find any instances of {fill_flag} in either {fill_cols.numerator} or {fill_cols.denominator}" - ) - raise typer.Abort() - with Progress( SpinnerColumn(), TextColumn("[progress.description]{task.description}"), @@ -562,298 +535,67 @@ def impute_pair( ) as progress: progress.add_task(description="Imputing...", total=None) - rng = np.random.default_rng(seed) - cast_type = ColType[col_type] - - t0 = time.perf_counter() - # NOTE: impute df_denom before attempting join - if sep_denom is not None: - df_denom = df_denom.with_columns( - pl.when(pl.col(fill_cols.denominator) == fill_flag) - .then( - pl.lit( - rng.integers( - fill_range.lb, - fill_range.ub, - size=df_denom.height, - endpoint=True, - ) - ) - ) - .otherwise(pl.col(fill_cols.denominator)) - .alias(fill_cols.denominator) - .cast(cast_type.value) + if sep_denom is None: + t0 = time.perf_counter() + df = impute.column_pair( + df, + numerator, + denominator, + fill_flag, + fill_range, + ColType[col_type].value, + seed, ) - - # NOTE: `validate` default is "m:m" -> forcing a 1:1 relationship of the join + t1 = time.perf_counter() + else: + t0 = time.perf_counter() try: df = df.join( df_denom, on=sep_cols, how="inner", coalesce=True, validate="1:1" ) except pl.exceptions.ComputeError: err_console.print( - "The join with --sep-denom failed because there is not a 1:1 relationship between the join keys." + "The join with --sep-denom failed because there is not a 1:1 relationship between the join columns specified via --sep-col." ) raise typer.Abort() - else: - df = df.with_columns( - pl.when(pl.col(fill_cols.denominator) == fill_flag) - .then( - pl.lit( - rng.integers( - fill_range.lb, fill_range.ub, size=df.height, endpoint=True - ) - ) - ) - .otherwise(pl.col(fill_cols.denominator)) - .alias(fill_cols.denominator) - .cast(cast_type.value) - ) - # NOTE: at this point, imputation of denom is done regardless of whether sep file or not - df = df.with_columns( - pl.when( - (pl.col(fill_cols.numerator) == fill_flag) - & (pl.col(fill_cols.denominator) <= fill_range.ub) - ) - # map_elements() will run Python so it's slow - .then( - pl.col(fill_cols.denominator).map_elements( - lambda denom: impute_capped( - denom, - fill_range, - rng, - ), - return_dtype=pl.Int64, - ) - ) - .when( - (pl.col(fill_cols.numerator) == fill_flag) - & (pl.col(fill_cols.denominator) > fill_range.ub) + df = impute.column_pair( + df, + numerator, + denominator, + fill_flag, + fill_range, + ColType[col_type].value, + seed, ) - .then( - pl.lit( - rng.integers( - fill_range.lb, fill_range.ub, size=df.height, endpoint=True - ) - ) - ) - .otherwise(pl.col(fill_cols.numerator)) - .alias(fill_cols.numerator) - .cast(cast_type.value) - ) - t1 = time.perf_counter() + t1 = time.perf_counter() - # TODO: consider create_dir also for sep_out? - if create_dir: - output.parent.mkdir(parents=True) + if output is not None: + if create_dir: + output.parent.mkdir(parents=True) - if sep_denom is not None: - if sep_out is not None: - df.select(pl.col("*").exclude(fill_cols.numerator)).write_csv(sep_out) + if sep_denom is not None: + df.select(pl.col("*").exclude(denominator)).write_csv( + output, separator="," + ) - df.select(pl.col("*").exclude(fill_cols.denominator)).write_csv(output) - else: - df.write_csv(output) + if sep_out is not None: + if create_sep_dir: + sep_out.parent.mkdir(parents=True) - print("[green]Finished imputing[/green]...") + df.select(pl.col("*").exclude(numerator)).write_csv( + sep_out, separator="," + ) + else: + df.write_csv(output, separator=",") if verbose: - table = Table(title="Imputation statistics", show_header=False) - table.add_row( - f"[blue]Count of imputed values in[/blue] '{fill_cols.numerator}'", - f"{imp_sizes[0]:_}", - ) - table.add_row( - f"[blue]Proportion of imputed values in[/blue] '{fill_cols.numerator}'", - f"{(imp_sizes[0] / df.height):0.2f} (n = {df.height:_})", - end_section=True, - ) - table.add_row( - f"[blue]Count of imputed values in[/blue] '{fill_cols.denominator}'", - f"{imp_sizes[1]:_}", - ) - table.add_row( - f"[blue]Proportion of imputed values in[/blue] '{fill_cols.denominator}'", - f"{(imp_sizes[1] / df.height):0.2f} (n = {df.height:_})", - end_section=True, - ) - table.add_row("[blue]Seed[/blue]", f"{seed}") - table.add_row("[blue]Time taken[/blue]", f"~{(t1 - t0):0.3f} s") - print(table) + console.print(f"[bold]Time taken[/bold]: {(t1 - t0):0.3f}s", highlight=False) + if output is None: print( df.filter( - (pl.col(fill_cols.numerator) <= fill_range.ub) - | (pl.col(fill_cols.denominator) <= fill_range.ub) - ).head() + (pl.col(numerator) <= fill_range.ub) + | (pl.col(denominator) <= fill_range.ub) + ).head(10) ) - - -@impute_app.command("dir") -def impute_dir( - input_dir: Annotated[ - Path, - typer.Argument( - exists=True, - file_okay=False, - dir_okay=True, - readable=True, - help="Directory of CSV files to impute", - ), - ], - output_dir: Annotated[ - Path, - typer.Argument( - exists=False, - file_okay=False, - dir_okay=True, - writable=True, - help="Directory to save output CSV files", - ), - ], - fill_col: Annotated[ - str, typer.Option("--col", "-c", help="Name of the column to impute") - ], - fill_flag: Annotated[ - str, - typer.Option( - "--flag", - "-f", - help="Flag (string) to look for and replace in the target column", - ), - ], - fill_range: Annotated[ - FillRange, - typer.Option( - "--range", - "-r", - metavar="TEXT", - help="Closed, integer interval from which to sample random integer for imputation. Specify as comma-separated values. For example: '1,5' corresponds to the range [1, 5]", - parser=parse_fill_range, - ), - ], - col_type: Annotated[ - str, - typer.Option( - "--type", - "-t", - help="Intended data type of the target column. Can be a Polars Int64 or Float64.", - click_type=click.Choice(ColType._member_names_, case_sensitive=False), - ), - ] = ColType.INT64.name, - seed: Annotated[ - int, typer.Option("--seed", "-s", help="Random seed for reproducibility") - ] = 123, - force: Annotated[ - bool, - typer.Option( - "--force", - "-F", - help="Force imputing the data if INPUT is identical to OUTPUT", - ), - ] = False, - suffix: Annotated[ - str, - typer.Option( - "--suffix", "-x", help="Optional suffix to append to each imputed CSV file" - ), - ] = "", - verbose: Annotated[ - bool, - typer.Option( - "--verbose", - "-v", - help="Whether to show additional imputation summary information", - ), - ] = False, -) -> None: - """ - Impute a target column for a directory of uniform CSV files. Will look for a specific filler flag in the target column - and replace with a random integer from the the specified range. Save the result in the given output directory. - """ - files = list(input_dir.glob("*.csv")) - if len(files) == 0: - err_console.print( - f"The specified input directory [blue bold]{input_dir}[/blue bold] is either empty or doesn't contain any CSV files." - ) - raise typer.Abort() - - create_dir = False - if not output_dir.is_dir(): - create_dir = Confirm.ask( - f"The specified output directory [blue bold]{output_dir}[/blue bold] doesn't exist. Do you want to create it along with any missing parents?" - ) - if not create_dir: - print("Won't create directories") - raise typer.Abort() - - for file in files: - if suffix != "": - output_file = output_dir / f"{file.stem}_{suffix}{file.suffix}" - else: - output_file = output_dir / file.name - - if output_file.is_file() and not force: - overwrite_file = Confirm.ask( - f"The intended output file [blue bold]{output_file}[/blue bold] already exists. Should it be overwritten?" - ) - if not overwrite_file: - print("Won't overwrite") - raise typer.Abort() - - df = pl.read_csv(file, infer_schema_length=0) - - if not all_cols_exist(df, [fill_col]): - err_console.print(f"Column {fill_col} cannot be found in {file}") - raise typer.Abort() - - if verbose: - imp_size = len(df.filter(pl.col(fill_col) == fill_flag)) - - if not fill_flag_exists(df, fill_col, fill_flag): - err_console.print( - f"Cannot find any instances of '{fill_flag}' in {fill_col}" - ) - raise typer.Abort() - - rng = np.random.default_rng(seed) - cast_type = ColType[col_type] - - t0 = time.perf_counter() - df = df.with_columns( - pl.when(pl.col(fill_col) == fill_flag) - .then( - pl.lit( - # NOTE: must specify size to be height of df despite not filling every row - # thus, we get "new" rand int per row - rng.integers( - fill_range.lb, fill_range.ub, size=df.height, endpoint=True - ) - ) - ) - .otherwise(pl.col(fill_col)) - .alias(fill_col) - .cast(cast_type.value) - ) - t1 = time.perf_counter() - - if create_dir: - output_dir.mkdir(parents=True, exist_ok=True) - - df.write_csv(output_file) - - print(f"\nFinished imputing [blue]{file}[/blue]...") - - if verbose: - table = Table(title="Imputation statistics", show_header=False) - table.add_row("[blue]Count of imputed values[/blue]", f"{imp_size:_}") - table.add_row( - "[blue]Proportion of imputed values[/blue]", - f"{(imp_size / df.height):0.2f} (n = {df.height:_})", - ) - table.add_row("[blue]Seed[/blue]", f"{seed}") - table.add_row("[blue]Time taken[/blue]", f"~{(t1 - t0):0.3f} s") - print(table) - - print(df.filter(pl.col(fill_col) <= fill_range.ub).head()) diff --git a/src/csv_helper/impute.py b/src/csv_helper/impute.py index 9d66ed9..810ad07 100644 --- a/src/csv_helper/impute.py +++ b/src/csv_helper/impute.py @@ -18,7 +18,7 @@ def check[T: (pl.DataFrame, pl.LazyFrame)]( if not _fill_flag_exists(df, col, fill_flag): raise ValueError( - f"Column {col} doesn't contain any instances of '{fill_flag}'" + f"Column '{col}' doesn't contain any instances of '{fill_flag}'" ) if len(columns) > 1: @@ -29,7 +29,7 @@ def check[T: (pl.DataFrame, pl.LazyFrame)]( .agg( count=pl.col("value").filter(pl.col("value") == fill_flag).count(), prop=pl.col("value").filter(pl.col("value") == fill_flag).count() - / pl.count(), + / pl.len(), ) .sort("column") ) @@ -43,7 +43,7 @@ def check[T: (pl.DataFrame, pl.LazyFrame)]( .agg( count=pl.col("value").filter(pl.col("value") == fill_flag).count(), prop=pl.col("value").filter(pl.col("value") == fill_flag).count() - / pl.count(), + / pl.len(), ) .sort("column") ) diff --git a/tests/cli_test.py b/tests/cli_test.py index 19c4411..e19cd0e 100644 --- a/tests/cli_test.py +++ b/tests/cli_test.py @@ -1,11 +1,11 @@ import shutil +import textwrap from importlib.metadata import version -from pathlib import Path, PureWindowsPath -from sys import platform -from textwrap import dedent +from pathlib import Path import polars as pl import pytest +from polars.testing import assert_frame_equal from typer.testing import CliRunner from csv_helper.cli import app @@ -14,7 +14,7 @@ @pytest.fixture -def test_data(tmp_path) -> Path: +def test_data(tmp_path: Path) -> Path: """ Fixture that moves test CSV data to new dir for testing and returns the file's path @@ -22,11 +22,12 @@ def test_data(tmp_path) -> Path: data_dir = tmp_path / "data" data_dir.mkdir() shutil.copy("./tests/data/test_impute_data.csv", data_dir) + return data_dir / "test_impute_data.csv" @pytest.fixture -def test_data_dir(tmp_path) -> Path: +def test_data_dir(tmp_path: Path) -> Path: """ Fixture that moves test dir of CSV data to new dir for testing and returns the dir's path @@ -34,11 +35,12 @@ def test_data_dir(tmp_path) -> Path: data_dir = tmp_path / "data" data_dir.mkdir() shutil.copytree("./tests/data/test_dir", data_dir / "test_dir") + return data_dir / "test_dir" @pytest.fixture -def test_data_sep(tmp_path) -> Path: +def test_data_sep(tmp_path: Path) -> Path: """ Fixture that moves test dir of pair CSV data to new dir for testing and returns the dir's path @@ -46,34 +48,22 @@ def test_data_sep(tmp_path) -> Path: data_dir = tmp_path / "data" data_dir.mkdir() shutil.copytree("./tests/data/test_pair_sep", data_dir / "test_pair_sep") - return data_dir / "test_pair_sep" - -# NOTE: can also access funcs in csv_helper.cli directly: -# from csv_helper.cli import preview -# preview("./tests/data/test_impute_data.csv", 10) + return data_dir / "test_pair_sep" -def test_show_version(): +def test_print_version(): result = runner.invoke(app, ["--version"]) ver = version("csv_helper") + assert result.stdout.replace("\n", "") == f"csv-helper version {ver}" -def test_preview(test_data): - result = runner.invoke(app, ["preview", str(test_data), "-n", "15"]) +def test_show(test_data): + result = runner.invoke(app, ["show", str(test_data), "-n", "15"]) assert result.exit_code == 0 - if platform == "linux" or platform == "darwin": - msg = f"File: {test_data}" - elif platform == "win32": - msg = f"File: {PureWindowsPath(test_data)}" - - # NOTE: stripping newlines and then slicing; for some reason on macos and windows - # the stdout has newlines inserted - assert result.stdout.replace("\n", "")[: len(msg)] == msg - - out = dedent( + out = textwrap.dedent( """\ shape: (15, 4) ┌────────┬───────────┬───────┬───────────┐ @@ -95,12 +85,13 @@ def test_preview(test_data): └────────┴───────────┴───────┴───────────┘ """ ) - assert out in result.stdout + assert result.stdout == out -def test_preview_not_file(tmp_path): + +def test_show_not_file(tmp_path): dir = tmp_path / "data" - result = runner.invoke(app, ["preview", str(dir), "-n", "15"]) + result = runner.invoke(app, ["show", str(dir), "-n", "15"]) assert result.exit_code == 2 @@ -108,23 +99,19 @@ def test_check(test_data): result = runner.invoke(app, ["check", str(test_data), "-c", "cases", "-f", "<=5"]) assert result.exit_code == 0 - out = dedent( + out = textwrap.dedent( """\ - Found 308 occurrences of '<=5' in 'cases' -> 0.62 of rows (n = 500) - shape: (5, 4) - ┌────────┬───────────┬───────┬───────────┐ - │ county ┆ year_week ┆ cases ┆ all_cause │ - │ --- ┆ --- ┆ --- ┆ --- │ - │ str ┆ str ┆ str ┆ str │ - ╞════════╪═══════════╪═══════╪═══════════╡ - │ 55107 ┆ 2020-05 ┆ <=5 ┆ 334 │ - │ 28101 ┆ 2021-20 ┆ <=5 ┆ <=5 │ - │ 35043 ┆ 2022-24 ┆ <=5 ┆ 5862 │ - │ 28043 ┆ 2023-09 ┆ <=5 ┆ 811 │ - │ 26093 ┆ 2020-42 ┆ <=5 ┆ 7606 │ - └────────┴───────────┴───────┴───────────┘ + shape: (1, 3) + ┌────────┬───────┬───────┐ + │ column ┆ count ┆ prop │ + │ --- ┆ --- ┆ --- │ + │ str ┆ u32 ┆ f64 │ + ╞════════╪═══════╪═══════╡ + │ cases ┆ 308 ┆ 0.616 │ + └────────┴───────┴───────┘ """ ) + assert result.stdout == out @@ -144,6 +131,7 @@ def test_impute_file(tmp_path, test_data): "impute", "file", str(test_data), + "-o", str(out_file), "-c", "cases", @@ -151,28 +139,19 @@ def test_impute_file(tmp_path, test_data): f"<={fill_range[1]}", "-r", f"{fill_range[0]},{fill_range[1]}", - "-s", - "8", ], ) assert result.exit_code == 0 assert out_file.is_file() is True - df_in = ( - pl.read_csv(test_data, infer_schema_length=0) - # .with_row_index(name="id") - # .select("id", "cases") - ) - df_out = ( - pl.read_csv(out_file, infer_schema_length=0) - # .with_row_index(name="id") - # .select("id", "cases") - ) + df_in = pl.read_csv(test_data, infer_schema_length=0) + df_out = pl.read_csv(out_file, infer_schema_length=0) assert df_in.shape == df_out.shape df = df_in.join( df_out, on=["county", "year_week"], how="inner", suffix="_imputed" ).filter(pl.col("cases") == f"<={fill_range[1]}") + assert ( df.select((pl.col("cases_imputed") == f"<={fill_range[1]}").any()).item() is False @@ -197,6 +176,7 @@ def test_impute_file_repro(tmp_path, test_data): "impute", "file", str(test_data), + "-o", str(out_file_1), "-c", "cases", @@ -205,7 +185,7 @@ def test_impute_file_repro(tmp_path, test_data): "-r", f"{fill_range[0]},{fill_range[1]}", "-s", - "123", + "88", ], ) assert result_1.exit_code == 0 @@ -217,6 +197,7 @@ def test_impute_file_repro(tmp_path, test_data): "impute", "file", str(test_data), + "-o", str(out_file_2), "-c", "cases", @@ -225,7 +206,7 @@ def test_impute_file_repro(tmp_path, test_data): "-r", f"{fill_range[0]},{fill_range[1]}", "-s", - "123", + "88", ], ) assert result_2.exit_code == 0 @@ -237,7 +218,7 @@ def test_impute_file_repro(tmp_path, test_data): df_2 = pl.read_csv(out_file_2, infer_schema_length=0) assert df_2.select((pl.col("cases") == f"<={fill_range[1]}").any()).item() is False - assert df_1.equals(df_2) is True + assert_frame_equal(df_1, df_2) def test_impute_file_output_exists(tmp_path, test_data): @@ -254,6 +235,7 @@ def test_impute_file_output_exists(tmp_path, test_data): "impute", "file", str(test_data), + "-o", str(out_file), "-c", "cases", @@ -261,8 +243,6 @@ def test_impute_file_output_exists(tmp_path, test_data): f"<={fill_range[1]}", "-r", f"{fill_range[0]},{fill_range[1]}", - "-s", - "8", ], ) assert result.exit_code == 1 @@ -282,6 +262,7 @@ def test_impute_file_overwrite(tmp_path, test_data): "impute", "file", str(test_data), + "-o", str(out_file), "-c", "cases", @@ -289,210 +270,12 @@ def test_impute_file_overwrite(tmp_path, test_data): f"<={fill_range[1]}", "-r", f"{fill_range[0]},{fill_range[1]}", - "-s", - "8", ], input="y\n", ) assert result.exit_code == 0 -def test_impute_dir(tmp_path, test_data_dir): - out_dir = Path(tmp_path) / "test_impute_dir_output" - out_dir.mkdir() - fill_range = (1, 5) - - result = runner.invoke( - app, - [ - "impute", - "dir", - str(test_data_dir), - str(out_dir), - "-c", - "cases", - "-f", - f"<={fill_range[1]}", - "-r", - f"{fill_range[0]},{fill_range[1]}", - "-s", - "8", - ], - ) - assert result.exit_code == 0 - assert out_dir.is_dir() - for i in range(5): - f = out_dir / f"test_impute_data_{i}.csv" - assert f.is_file() is True - - for input, output in zip(test_data_dir.iterdir(), out_dir.iterdir()): - df_in = ( - pl.read_csv(input, infer_schema_length=0) - # .with_row_index(name="id") - # .select("id", "cases", "all_cause") - ) - df_out = ( - pl.read_csv(output, infer_schema_length=0) - # .with_row_index(name="id") - # .select("id", "cases", "all_cause") - ) - assert df_in.shape == df_out.shape - - df = df_in.join( - df_out, on=["county", "year_week"], how="inner", suffix="_imputed" - ).filter( - (pl.col("cases") == f"<={fill_range[1]}") - | (pl.col("all_cause") == f"<={fill_range[1]}") - ) - assert ( - df.select((pl.col("cases_imputed") == f"<={fill_range[1]}").any()).item() - is False - ) - assert ( - df.select("cases_imputed") - .cast(pl.Int64) - .select( - pl.col("cases_imputed").is_between(fill_range[0], fill_range[1]).all() - ) - .item() - is True - ) - - -def test_impute_dir_force(tmp_path, test_data_dir): - out_dir = Path(tmp_path) / "test_impute_dir_output" - out_dir.mkdir() - fill_range = (1, 5) - - inp_files = test_data_dir.glob("*.csv") - for file in inp_files: - out_file = out_dir / file.name - out_file.touch() - - result = runner.invoke( - app, - [ - "impute", - "dir", - str(test_data_dir), - str(out_dir), - "-c", - "cases", - "-f", - f"<={fill_range[1]}", - "-r", - f"{fill_range[0]},{fill_range[1]}", - "-s", - "8", - "--force", - ], - ) - assert result.exit_code == 0 - assert out_dir.is_dir() - for i in range(5): - f = out_dir / f"test_impute_data_{i}.csv" - assert f.is_file() is True - - for input, output in zip(test_data_dir.iterdir(), out_dir.iterdir()): - df_in = ( - pl.read_csv(input, infer_schema_length=0) - # .with_row_index(name="id") - # .select("id", "cases", "all_cause") - ) - df_out = ( - pl.read_csv(output, infer_schema_length=0) - # .with_row_index(name="id") - # .select("id", "cases", "all_cause") - ) - assert df_in.shape == df_out.shape - - df = df_in.join( - df_out, on=["county", "year_week"], how="inner", suffix="_imputed" - ).filter( - (pl.col("cases") == f"<={fill_range[1]}") - | (pl.col("all_cause") == f"<={fill_range[1]}") - ) - assert ( - df.select((pl.col("cases_imputed") == f"<={fill_range[1]}").any()).item() - is False - ) - assert ( - df.select("cases_imputed") - .cast(pl.Int64) - .select( - pl.col("cases_imputed").is_between(fill_range[0], fill_range[1]).all() - ) - .item() - is True - ) - - -def test_impute_dir_suffix(tmp_path, test_data_dir): - out_dir = Path(tmp_path) / "test_impute_dir_output" - out_dir.mkdir() - fill_range = (1, 5) - suffix = "imputed" - - result = runner.invoke( - app, - [ - "impute", - "dir", - str(test_data_dir), - str(out_dir), - "-c", - "cases", - "-f", - f"<={fill_range[1]}", - "-r", - f"{fill_range[0]},{fill_range[1]}", - "-s", - "8", - "-x", - suffix, - ], - ) - assert result.exit_code == 0 - assert out_dir.is_dir() is True - - for i in range(5): - f = out_dir / f"test_impute_data_{i}_{suffix}.csv" - assert f.is_file() is True - - for input, output in zip(test_data_dir.iterdir(), out_dir.iterdir()): - df_in = ( - pl.read_csv(input, infer_schema_length=0) - # .with_row_index(name="id") - # .select("id", "cases", "all_cause") - ) - df_out = ( - pl.read_csv(output, infer_schema_length=0) - # .with_row_index(name="id") - # .select("id", "cases", "all_cause") - ) - assert df_in.shape == df_out.shape - - df = df_in.join( - df_out, on=["county", "year_week"], how="inner", suffix="_imputed" - ).filter( - (pl.col("cases") == f"<={fill_range[1]}") - | (pl.col("all_cause") == f"<={fill_range[1]}") - ) - assert ( - df.select((pl.col("cases_imputed") == f"<={fill_range[1]}").any()).item() - is False - ) - assert ( - df.select("cases_imputed") - .cast(pl.Int64) - .select( - pl.col("cases_imputed").is_between(fill_range[0], fill_range[1]).all() - ) - .item() - is True - ) - - def test_impute_pair(tmp_path, test_data): out_file = tmp_path / "test_impute_pair_output.csv" fill_range = (1, 5) @@ -503,34 +286,31 @@ def test_impute_pair(tmp_path, test_data): "impute", "pair", str(test_data), - str(out_file), - "-c", - "cases,all_cause", + "-n", + "cases", + "-d", + "all_cause", "-f", f"<={fill_range[1]}", "-r", f"{fill_range[0]},{fill_range[1]}", - "-s", - "8", + "-o", + str(out_file), ], ) assert result.exit_code == 0 assert out_file.is_file() is True - df_in = ( - pl.read_csv(test_data, infer_schema_length=0) - # .with_row_index(name="id") - # .select("id", "cases", "all_cause") - ) - df_out = ( - pl.read_csv(out_file, infer_schema_length=0) - # .with_row_index(name="id") - # .select("id", "cases", "all_cause") - ) + df_in = pl.read_csv(test_data, infer_schema_length=0) + df_out = pl.read_csv(out_file, infer_schema_length=0) assert df_in.shape == df_out.shape df = df_in.join( - df_out, on=["county", "year_week"], how="inner", suffix="_imputed" + df_out, + on=["county", "year_week"], + how="inner", + suffix="_imputed", + validate="1:1", ).filter( (pl.col("cases") == f"<={fill_range[1]}") | (pl.col("all_cause") == f"<={fill_range[1]}") @@ -570,7 +350,6 @@ def test_impute_pair_sep(tmp_path, test_data_sep): num_file = test_data_sep / "test_impute_numerator_only_data.csv" out_file = tmp_path / "numerator_output.csv" denom_file = test_data_sep / "test_impute_denom_only_data.csv" - sep_cols = "county,year_week" fill_range = (1, 5) result = runner.invoke( @@ -579,19 +358,22 @@ def test_impute_pair_sep(tmp_path, test_data_sep): "impute", "pair", str(num_file), - str(out_file), - "-c", - "cases,all_cause", + "-n", + "cases", + "-d", + "all_cause", "-f", f"<={fill_range[1]}", "-r", f"{fill_range[0]},{fill_range[1]}", - "-s", - "8", + "-o", + str(out_file), "--sep-denom", str(denom_file), - "--sep-cols", - sep_cols, + "--sep-col", + "county", + "--sep-col", + "year_week", ], ) assert result.exit_code == 0 @@ -599,6 +381,7 @@ def test_impute_pair_sep(tmp_path, test_data_sep): df_num = pl.read_csv(num_file, infer_schema_length=0) df_out = pl.read_csv(out_file, infer_schema_length=0) + assert df_num.shape == df_out.shape assert ( df_out.select((pl.col("cases") == f"<={fill_range[1]}").any()).item() is False @@ -626,7 +409,6 @@ def test_impute_pair_join_fails(tmp_path, test_data_sep): num_file = test_data_sep / "test_impute_numerator_only_data.csv" out_file = tmp_path / "numerator_output.csv" denom_file = test_data_sep / "test_impute_denom_only_join_fails.csv" - sep_cols = "county,year_week" fill_range = (1, 5) result = runner.invoke( @@ -635,19 +417,22 @@ def test_impute_pair_join_fails(tmp_path, test_data_sep): "impute", "pair", str(num_file), - str(out_file), - "-c", - "cases,all_cause", + "-n", + "cases", + "-d", + "all_cause", "-f", f"<={fill_range[1]}", "-r", f"{fill_range[0]},{fill_range[1]}", - "-s", - "8", + "-o", + str(out_file), "--sep-denom", str(denom_file), - "--sep-cols", - sep_cols, + "--sep-col", + "county", + "--sep-col", + "year_week", ], ) assert result.exit_code == 1 @@ -657,7 +442,6 @@ def test_impute_pair_sep_output(tmp_path, test_data_sep): num_file = test_data_sep / "test_impute_numerator_only_data.csv" out_file = tmp_path / "test_impute_sep_files_numerator_output.csv" denom_file = test_data_sep / "test_impute_denom_only_data.csv" - sep_cols = "county,year_week" sep_out = tmp_path / "test_impute_sep_files_denom_output.csv" fill_range = (1, 5) @@ -667,19 +451,22 @@ def test_impute_pair_sep_output(tmp_path, test_data_sep): "impute", "pair", str(num_file), - str(out_file), - "-c", - "cases,all_cause", + "-n", + "cases", + "-d", + "all_cause", "-f", f"<={fill_range[1]}", "-r", f"{fill_range[0]},{fill_range[1]}", - "-s", - "8", + "-o", + str(out_file), "--sep-denom", str(denom_file), - "--sep-cols", - sep_cols, + "--sep-col", + "county", + "--sep-col", + "year_week", "--sep-out", str(sep_out), ], From 31c6c51c19941412b8e2779210b304e9aabb5acf Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Sat, 21 Jun 2025 13:27:46 -0400 Subject: [PATCH 46/47] Remove Windows --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e898406..6eab520 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-latest, windows-latest] + os: [ubuntu-latest, macos-latest] python-version: - "3.12" - "3.13" From 423da4eb41ea43598ede3fb3d1be276e52bb2f67 Mon Sep 17 00:00:00 2001 From: Andrew Tiu <63322884+winter-again@users.noreply.github.com> Date: Sat, 21 Jun 2025 13:28:05 -0400 Subject: [PATCH 47/47] Add Windows back --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6eab520..e898406 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-latest] + os: [ubuntu-latest, macos-latest, windows-latest] python-version: - "3.12" - "3.13"