diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 21f615b..e898406 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -4,13 +4,9 @@ on: push: branches: - main - paths-ignore: - - "*.md" pull_request: branches: - main - paths-ignore: - - "*.md" jobs: test: @@ -21,25 +17,27 @@ jobs: matrix: os: [ubuntu-latest, macos-latest, windows-latest] python-version: - - "3.11" - "3.12" + - "3.13" steps: - name: Checkout uses: actions/checkout@v4 - name: Install uv - uses: astral-sh/setup-uv@v4 + uses: astral-sh/setup-uv@v6 with: - version: "0.5.5" + version: "latest" - name: Set up Python ${{ matrix.python-version }} - run: uv python install ${{ matrix.python-version }} - - - name: Install project - run: uv sync --all-extras --dev + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} - - name: Run tests with tox - run: | - uv run -- tox --version - uv run -- tox -e py + - name: Install dev dependencies + # TODO: maybe have to add explicit uv lock? + # run: uv sync --locked --only-dev --python="${{ matrix.python-version }}" + run: uv sync --only-dev --python="${{ matrix.python-version }}" + + - name: Run tests with nox on ${{ matrix.os }} + run: uv run --locked --python="${{ matrix.python-version }}" -- nox -s "tests-${{ matrix.python-version }}" diff --git a/.gitignore b/.gitignore index 3152050..17b08d3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,5 @@ .venv/ -/data/ __pycache__/ -.pdm-python dist/ sdist/ -test.py +# /data/ diff --git a/.python-version b/.python-version index 2c07333..2c20ac9 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.11 +3.13.3 diff --git a/README.md b/README.md index 994ea61..658f51e 100644 --- a/README.md +++ b/README.md @@ -2,17 +2,17 @@ ![Tests](https://github.com/winter-again/csv-helper/workflows/Tests/badge.svg) -A CLI for working with CSV data. Currently, primary focus is on workflows for imputing masked counts data. - -Note: `tests/data` has testing data +A simple library and CLI for working with CSV data. ## Installation -```bash +```sh +uv add "csv-helper @ git+ssh://git@github.com/winter-again/csv-helper" +# using pip pip install git+https://git@github.com/winter-again/csv-helper # or via SSH pip install git+ssh://git@github.com/winter-again/csv-helper -# designate specific version +# designate specific version tag or branch pip install git+ssh://git@github.com/winter-again/csv-helper.git@v0.1.0 ``` diff --git a/noxfile.py b/noxfile.py new file mode 100644 index 0000000..3da33c4 --- /dev/null +++ b/noxfile.py @@ -0,0 +1,16 @@ +import nox + +nox.options.default_venv_backend = "uv" + + +@nox.session(python=["3.12", "3.13"]) +def tests(session: nox.Session) -> None: + """Run tests with pytest.""" + session.run_install( + "uv", + "sync", + "--locked", + f"--python={session.virtualenv.location}", + env={"UV_PROJECT_ENVIRONMENT": session.virtualenv.location}, + ) + session.run("pytest", "-vv", "tests") diff --git a/pyproject.toml b/pyproject.toml index 3af0b9b..76169b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,21 +1,19 @@ [project] name = "csv-helper" -version = "0.2.2" -description = "A simple CLI for imputing masked counts in CSV data" +version = "0.3.0" +description = "A simple library and CLI for working with CSV data" readme = "README.md" -authors = [ - {name = "Andrew Tiu", email = "andrew.tiu88@gmail.com"}, -] -license = {text = "MIT"} -requires-python = ">=3.11" +authors = [{ name = "Andrew Tiu", email = "andrew.tiu88@gmail.com" }] +license = "MIT" +requires-python = ">=3.12" dependencies = [ - "typer>=0.12.3", - "polars>=1.4.1", - "numpy>=2.0.1", + "numpy>=2.2.6", + "polars>=1.30.0", + "typer>=0.15.4", ] [project.scripts] -csv-helper = "csv_helper.main:app" +csv-helper = "csv_helper.cli:app" [build-system] requires = ["pdm-backend"] @@ -23,16 +21,6 @@ build-backend = "pdm.backend" [dependency-groups] dev = [ - "pytest>=8.3.2", - "tox>=4.23.2", - "tox-uv>=1.16.0", -] - -[tool.pyright] -include = ["src"] -exclude = [ - "**/__pycache__" + "nox>=2025.5.1", + "pytest>=8.3.5", ] - -[tool.pytest.ini_options] -pythonpath = ["src"] diff --git a/src/csv_helper/cli.py b/src/csv_helper/cli.py new file mode 100644 index 0000000..f327bb4 --- /dev/null +++ b/src/csv_helper/cli.py @@ -0,0 +1,601 @@ +import time +from enum import Enum +from importlib.metadata import version +from pathlib import Path +from typing import NamedTuple + +import click +import polars as pl +import typer +from rich.console import Console +from rich.progress import Progress, SpinnerColumn, TextColumn +from rich.prompt import Confirm +from typing_extensions import Annotated + +from . import impute + +app = typer.Typer(no_args_is_help=True, help="A CLI for working with CSV data") +impute_app = typer.Typer(no_args_is_help=True, help="Impute CSV data") +app.add_typer(impute_app, name="impute") + +console = Console() +err_console = Console(stderr=True) + + +def version_callback(value: bool): + """Print CLI version""" + if value: + print(f"csv-helper version {version(__package__)}") # pyright: ignore[reportArgumentType] + raise typer.Exit() + + +@app.callback() +def callback( + version: bool = typer.Option( + False, + "--version", + "-v", + is_eager=True, + help="Print the version and exit.", + callback=version_callback, + ), +) -> None: + pass + + +@app.command() +def show( + input: Annotated[ + Path, + typer.Argument( + exists=True, + file_okay=True, + dir_okay=False, + readable=True, + help="Target CSV file", + ), + ], + n_rows: Annotated[ + int, typer.Option("--nrows", "-n", min=1, help="Number of rows to show") + ] = 10, +) -> None: + """ + Show preview of a given CSV file. + """ + df = pl.read_csv(input, infer_schema_length=0) + + if n_rows > df.height: + print(df) + else: + print(df.head(n_rows)) + + +@app.command() +def check( + input: Annotated[ + Path, + typer.Argument( + exists=True, + file_okay=True, + dir_okay=False, + readable=True, + help="The CSV file to check", + ), + ], + columns: Annotated[ + list[str], + typer.Option( + "--col", + "-c", + help="Name of a column to check. Specify this for each column you want checked.", + ), + ], + fill_flag: Annotated[ + str, + typer.Option("--flag", "-f", help="Flag (string) to look for in COL"), + ], +) -> None: + """ + Summarize counts and proportion of instances of `fill_flag` in each of + the given columns. + """ + df = pl.read_csv(input, infer_schema_length=0) + + try: + out = impute.check(df, columns, fill_flag) + except ValueError as e: + if f"doesn't contain any instances of '{fill_flag}'" in str(e): + print(e) + else: + print(out) + + +class FillRange(NamedTuple): + lb: int + ub: int + + +# NOTE: see https://github.com/fastapi/typer/issues/182#issuecomment-1708245110 +# and https://github.com/fastapi/typer/issues/151#issuecomment-1975322806 +# for this workaround for working with enums such that Typer understands the args properly +# without having to map strings or ints to the values we really want +class ColType(Enum): + FLOAT32 = pl.Float32 + FLOAT64 = pl.Float64 + INT8 = pl.Int8 + INT16 = pl.Int16 + INT32 = pl.Int32 + INT64 = pl.Int64 + INT128 = pl.Int128 + UINT8 = pl.UInt8 + UINT16 = pl.UInt16 + UINT32 = pl.UInt32 + UINT64 = pl.UInt64 + + +def validate_inp_out(input: Path, output: Path, force: bool) -> None: + if output.is_file() and not force: + overwrite_file = Confirm.ask( + f"[blue bold]{output}[/blue bold] already exists. Do you want to overwrite it?" + ) + if not overwrite_file: + err_console.print("Won't overwrite") + raise typer.Abort() + + if input == output and not force: + err_console.print( + "Cannot specify output to be identical to input. Use the --force/-F option to force this behavior" + ) + raise typer.Abort() + + +def check_create_dir(output: Path) -> bool: + if not output.parent.is_dir(): + create_dir = Confirm.ask( + f"The specified output's parent directory [blue bold]{output.parent}[/blue bold] doesn't exist. Do you want to create it along with any missing parents?" + ) + if not create_dir: + return False + + return True + + return False + + +def all_cols_exist(df: pl.DataFrame, fill_cols: list[str]) -> bool: + for col in fill_cols: + if col not in df.columns: + return False + + return True + + +def fill_flag_exists(df: pl.DataFrame, fill_col: str, fill_flag: str) -> bool: + if df.select((pl.col(fill_col) == fill_flag).any()).item(): + return True + + return False + + +def parse_fill_range(fill_range: str) -> FillRange: + fill_range_parsed = tuple(x.strip() for x in fill_range.split(",")) + if len(fill_range_parsed) != 2: + raise typer.BadParameter(f"Invalid fill range: {fill_range}") + + if not fill_range_parsed[0].isdigit() or not fill_range_parsed[1].isdigit(): + raise typer.BadParameter(f"Invalid fill range: {fill_range}") + + fill_range_int = FillRange(int(fill_range_parsed[0]), int(fill_range_parsed[1])) + if fill_range_int.lb > fill_range_int.ub: + err_console.print(f"Invalid fill range given: {fill_range}") + raise typer.Abort() + + return fill_range_int + + +@impute_app.command("file") +def impute_file( + input: Annotated[ + Path, + typer.Argument( + exists=True, + file_okay=True, + dir_okay=False, + readable=True, + help="Target CSV file", + ), + ], + columns: Annotated[ + list[str], + typer.Option( + "--col", + "-c", + help="Name of column to impute. Specify this for each colum you wanted imputed.", + ), + ], + fill_flag: Annotated[ + str, + typer.Option( + "--flag", + "-f", + help="Flag/marker to find and replace in the target column(s)", + ), + ], + fill_range: Annotated[ + FillRange, + typer.Option( + "--range", + "-r", + metavar="TEXT", + help='Closed, integer interval from which to sample random integer for imputation. Specify as comma-separated values. For example: "1,5" corresponds to the range [1, 5]', + parser=parse_fill_range, + ), + ], + output: Annotated[ + Path | None, + typer.Option( + "--out", + "-o", + # NOTE: if exists=False, file/directory doesn't need to exist; + # if doesn't exist, other checks skipped + exists=False, + file_okay=True, + dir_okay=False, + writable=True, + readable=False, + help="Path to save the imputed CSV file. If not specified, defaults to printing result to stdout.", + ), + ] = None, + col_type: Annotated[ + str, + typer.Option( + "--type", + "-t", + help="Intended data type of the target column. Can be a Polars int or float type.", + click_type=click.Choice(ColType._member_names_, case_sensitive=False), + ), + ] = ColType.INT64.name, + seed: Annotated[ + int | None, typer.Option("--seed", "-s", help="Random seed for reproducibility") + ] = None, + verbose: Annotated[ + bool, + typer.Option( + "--verbose", + "-v", + help="Whether to show additional imputation summary information", + ), + ] = False, + force: Annotated[ + bool, + typer.Option( + "--force", + "-F", + help=""" + Allow overwriting data even if (1) the specified output file already exists or + (2) the path to the input file is identical to the path of the output file. Both + checks will be ignored. + """, + ), + ] = False, +) -> None: + """ + Impute target column(s) in a CSV file. Will look for the specified flag and replace + it with a random integer from the specified range. Optionally, save the result to a new CSV file. + """ + create_dir = False + if output is not None: + validate_inp_out(input, output, force) + create_dir = check_create_dir(output) + + if not output.parent.is_dir() and not create_dir: + err_console.print("Won't create directories") + raise typer.Abort() + + df = pl.read_csv(input, infer_schema_length=0) + + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + transient=True, + ) as progress: + progress.add_task(description="Imputing...", total=None) + + t0 = time.perf_counter() + df = impute.columns( + df, columns, fill_flag, fill_range, ColType[col_type].value, seed + ) + t1 = time.perf_counter() + + if output is not None: + if create_dir: + output.parent.mkdir(parents=True) + + df.write_csv(output, separator=",") + + if verbose: + console.print(f"[bold]Time taken[/bold]: {(t1 - t0):0.3f}s", highlight=False) + + if output is None: + print(df.head(10)) + + +class FillCols(NamedTuple): + numerator: str + denominator: str + + +def parse_fill_cols(fill_cols: str) -> FillCols: + fill_cols_parsed = tuple(x.strip() for x in fill_cols.split(",")) + if len(fill_cols_parsed) != 2: + raise typer.BadParameter(f"Invalid fill cols: {fill_cols}") + + return FillCols(fill_cols_parsed[0], fill_cols_parsed[1]) + + +def parse_sep_cols(sep_cols: str) -> list[str]: + return [col.strip() for col in sep_cols.split(",")] + + +@impute_app.command("pair") +def impute_pair( + input: Annotated[ + Path, + typer.Argument( + exists=True, + file_okay=True, + dir_okay=False, + readable=True, + help="Target CSV file", + ), + ], + numerator: Annotated[ + str, typer.Option("--numerator", "-n", help="Numerator in the pair imputation") + ], + denominator: Annotated[ + str, + typer.Option("--denominator", "-d", help="Denominator in the pair imputation"), + ], + fill_flag: Annotated[ + str, + typer.Option( + "--flag", "-f", help="Flag/marker to find and replace in the target columns" + ), + ], + fill_range: Annotated[ + FillRange, + typer.Option( + "--range", + "-r", + metavar="TEXT", + help='Closed, integer interval from which to sample random integer for imputation. Specify as comma-separated values. For example: "1,5" corresponds to the range [1, 5]', + parser=parse_fill_range, + ), + ], + output: Annotated[ + Path | None, + typer.Option( + "--out", + "-o", + exists=False, + file_okay=True, + dir_okay=False, + writable=True, + readable=False, + help="Path to save the imputed CSV file. If not specified, defaults to printing result to stdout.", + ), + ] = None, + col_type: Annotated[ + str, + typer.Option( + "--type", + "-t", + help="Intended data type of target columns. Can be a Polars int or float type.", + click_type=click.Choice(ColType._member_names_, case_sensitive=False), + ), + ] = ColType.INT64.name, + seed: Annotated[ + int | None, typer.Option("--seed", "-s", help="Random seed for reproducibility") + ] = None, + verbose: Annotated[ + bool, + typer.Option( + "--verbose", + "-v", + help="Whether to show additional imputation summary information", + ), + ] = False, + force: Annotated[ + bool, + typer.Option( + "--force", + "-F", + help=""" + Allow overwriting data even if (1) the specified output file already exists or + (2) the path to the input file is identical to the path of the output file. Both + checks will be ignored. + """, + ), + ] = False, + sep_denom: Annotated[ + Path | None, + typer.Option( + "--sep-denom", + exists=True, + file_okay=True, + dir_okay=False, + readable=True, + help=""" + Path to a separate CSV file in which to look for denominator column. Will perform an + inner join between the input file and this file containing the denominator. + """, + ), + ] = None, + sep_cols: Annotated[ + list[str] | None, + typer.Option( + "--sep-col", + help="Name of column on which to join the numerator and denominator data. Specify for each column to be used.", + ), + ] = None, + sep_out: Annotated[ + Path | None, + typer.Option( + "--sep-out", + exists=False, + file_okay=True, + dir_okay=False, + writable=True, + readable=False, + help="Path to save imputed version of the separate denominator file", + ), + ] = None, +): + # TODO: review + """ + Impute a pair of columns in a CSV file. Will look for the + flag in both of the specified columns and substitute with a random + integer in the closed range. Note, the pair of columns + is comprised of a numerator column and a denominator column such that + the imputed values of the numerator column must not exceed the imputed + values of the denominator column. + + Separate denominator data: + + If --sep-denom is provided, then that file will be used instead to source + the denominator column. You must then also use --sep-cols to specify the + columns to use for (inner) joining the numerator and denominator data. Note + that a 1:1 relationship in the join is enforced. Optionally, use --sep-out + to specify where to save the imputed version of the denominator data from + --sep-denom. + """ + create_dir = False + if output is not None: + validate_inp_out(input, output, force) + create_dir = check_create_dir(output) + + create_sep_dir = False + if sep_out is not None: + create_sep_dir = check_create_dir(sep_out) + + if sep_denom is None and (sep_cols is not None or sep_out is not None): + err_console.print("Must specify --sep-denom to use --sep-cols or --sep-out") + raise typer.Abort() + + if sep_denom is not None and sep_cols is None: + err_console.print("Must specify --sep-cols if using --sep-denom") + raise typer.Abort() + + df = pl.read_csv(input, infer_schema_length=0) + + if sep_denom is None: + if numerator not in df.columns or denominator not in df.columns: + err_console.print("Invalid numerator or denominator column specified") + raise typer.Abort() + else: + # NOTE: extract since it gives nested list; maybe some type coercion going on + # sep_cols = sep_cols[0] + df_denom = pl.read_csv(sep_denom, infer_schema_length=0) + + if numerator not in df.columns or denominator not in df_denom.columns: + err_console.print("Invalid numerator or denominator column specified") + raise typer.Abort() + + if sep_cols is not None and ( + not all_cols_exist(df, sep_cols) or not all_cols_exist(df_denom, sep_cols) + ): + err_console.print( + "Some of the --sep-col columns are missing from the numerator or denominator data" + ) + raise typer.Abort() + + if sep_out is not None: + # TODO: needed? + if not fill_flag_exists(df_denom, denominator, fill_flag): + print( + f""" + The denominator file {sep_denom} doesn't contain any instancees of {fill_flag} + in {denominator}. Rerun the command without specifying --sep-out. + """ + ) + raise typer.Abort() + + if sep_out.is_file() and not force: + overwrite_out = Confirm.ask( + f"[blue bold]{sep_out}[/blue bold] already exists. Do you want to overwrite it?" + ) + if not overwrite_out: + print("Won't overwrite") + raise typer.Abort() + + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + transient=True, + ) as progress: + progress.add_task(description="Imputing...", total=None) + + if sep_denom is None: + t0 = time.perf_counter() + df = impute.column_pair( + df, + numerator, + denominator, + fill_flag, + fill_range, + ColType[col_type].value, + seed, + ) + t1 = time.perf_counter() + else: + t0 = time.perf_counter() + try: + df = df.join( + df_denom, on=sep_cols, how="inner", coalesce=True, validate="1:1" + ) + except pl.exceptions.ComputeError: + err_console.print( + "The join with --sep-denom failed because there is not a 1:1 relationship between the join columns specified via --sep-col." + ) + raise typer.Abort() + + df = impute.column_pair( + df, + numerator, + denominator, + fill_flag, + fill_range, + ColType[col_type].value, + seed, + ) + t1 = time.perf_counter() + + if output is not None: + if create_dir: + output.parent.mkdir(parents=True) + + if sep_denom is not None: + df.select(pl.col("*").exclude(denominator)).write_csv( + output, separator="," + ) + + if sep_out is not None: + if create_sep_dir: + sep_out.parent.mkdir(parents=True) + + df.select(pl.col("*").exclude(numerator)).write_csv( + sep_out, separator="," + ) + else: + df.write_csv(output, separator=",") + + if verbose: + console.print(f"[bold]Time taken[/bold]: {(t1 - t0):0.3f}s", highlight=False) + + if output is None: + print( + df.filter( + (pl.col(numerator) <= fill_range.ub) + | (pl.col(denominator) <= fill_range.ub) + ).head(10) + ) diff --git a/src/csv_helper/complete.py b/src/csv_helper/complete.py new file mode 100644 index 0000000..56873d0 --- /dev/null +++ b/src/csv_helper/complete.py @@ -0,0 +1,38 @@ +import polars as pl + + +def complete[T: (pl.DataFrame, pl.LazyFrame)](df: T, *columns: str | pl.Series) -> T: + """ + Generate rows for implicit missing values based on column combinations, + thus making them explicit missing values. Generated values marked as null. + + If columns are referenced by name, then only existing values in those + columns are used for completion. If Series are specified instead, then + those Series can specify the full set of possible values. The Series must be + named after an existing column. + """ + cols = [] + for col in columns: + if isinstance(col, str): + cols.append(pl.col(col).unique().implode()) + elif isinstance(col, pl.Series): + cols.append(col.unique().implode()) + else: + raise TypeError( + f"The columns must be either string or polars Series. Got {type(col)} instead." + ) + + unique_combos = df.select(cols) + col_names = unique_combos.collect_schema().names() + for col in col_names: + unique_combos = unique_combos.explode(col) + + df = unique_combos.join( + df, + on=col_names, + how="left", + coalesce=True, + validate="1:1", + ) + + return df diff --git a/src/csv_helper/impute.py b/src/csv_helper/impute.py new file mode 100644 index 0000000..810ad07 --- /dev/null +++ b/src/csv_helper/impute.py @@ -0,0 +1,223 @@ +from typing import NamedTuple + +import numpy as np +import polars as pl +from polars.datatypes.classes import FloatType, IntegerType + + +def check[T: (pl.DataFrame, pl.LazyFrame)]( + df: T, columns: list[str], fill_flag: str +) -> T: + """ + Summarize counts and proportion of instances of `fill_flag` in each of + the given columns. + """ + for col in columns: + if col not in df.lazy().collect_schema().names(): + raise ValueError(f"Column {col} doesn't exist") + + if not _fill_flag_exists(df, col, fill_flag): + raise ValueError( + f"Column '{col}' doesn't contain any instances of '{fill_flag}'" + ) + + if len(columns) > 1: + return ( + df.select(columns) + .unpivot(variable_name="column", value_name="value") + .group_by("column") + .agg( + count=pl.col("value").filter(pl.col("value") == fill_flag).count(), + prop=pl.col("value").filter(pl.col("value") == fill_flag).count() + / pl.len(), + ) + .sort("column") + ) + + fill_col = columns[0] + + return ( + df.select(fill_col) + .unpivot(variable_name="column", value_name="value") + .group_by("column") + .agg( + count=pl.col("value").filter(pl.col("value") == fill_flag).count(), + prop=pl.col("value").filter(pl.col("value") == fill_flag).count() + / pl.len(), + ) + .sort("column") + ) + + +def columns[T: (pl.DataFrame, pl.LazyFrame)]( + df: T, + columns: list[str], + fill_flag: str, + fill_range: tuple[int, int], + dtype: type[IntegerType] | type[FloatType] = pl.Int64, + seed: int | None = None, +) -> T: + """ + Independently fill instances of `fill_flag` (a string) + in the given columns with random integers in the given range + (bounds inclusive). + + If `dtype` is specified, will attempt to cast the filled columns + to that Polars type. Only supports Polars integer and float types. + """ + n_cols = len(columns) + if n_cols == 0: + raise ValueError("Must specify at least one column to impute") + + for col in columns: + if col not in df.lazy().collect_schema().names(): + raise ValueError(f"Column {col} doesn't exist") + + if not _fill_flag_exists(df, col, fill_flag): + raise ValueError( + f"Column {col} doesn't contain any instances of '{fill_flag}'" + ) + + fill_range_int = _parse_fill_range(fill_range) + + if n_cols == 1: + column = columns[0] + # NOTE: this implementation and numpy implementation for filling values are roughly the same speed + # with this Polars-only impl barely faster + df = df.with_columns( + pl.when(pl.col(column) == fill_flag) + .then( + pl.int_range(fill_range_int.lb, fill_range_int.ub + 1).sample( + pl.len(), with_replacement=True, seed=seed + ) + ) + .otherwise(pl.col(column)) + .alias(column) + .cast(dtype) + ) + else: + rng = np.random.default_rng(seed) + n_rows = df.lazy().select(pl.len()).collect().item() + # must gen enough numbers for all columns up-front, otherwise they get reused + shape = (n_cols, n_rows) + fill_nums = rng.integers( + fill_range_int.lb, + fill_range_int.ub, + size=shape, + endpoint=True, # include ub in sample + ) + + for col, num in zip(columns, fill_nums): + df = df.with_columns( + pl.when(pl.col(col) == fill_flag) + .then(pl.lit(num)) + .otherwise(pl.col(col)) + .alias(col) + .cast(dtype) + ) + + return df + + +def _fill_flag_exists[T: (pl.DataFrame, pl.LazyFrame)]( + df: T, column: str, fill_flag: str +) -> bool: + return df.lazy().select((pl.col(column) == fill_flag).any()).collect().item() + + +class _FillRange(NamedTuple): + lb: int + ub: int + + +def _parse_fill_range(fill_range: tuple[int, int]) -> _FillRange: + if len(fill_range) != 2: + raise ValueError("Must only pass 2 values") + + fill_range_int = _FillRange(*fill_range) + if fill_range_int.lb > fill_range_int.ub: + raise ValueError("Lower bound can't be greater than the upper bound") + + return fill_range_int + + +def column_pair[T: (pl.DataFrame, pl.LazyFrame)]( + df: T, + numerator: str, + denominator: str, + fill_flag: str, + fill_range: tuple[int, int], + dtype: type[IntegerType] | type[FloatType] = pl.Int64, + seed: int | None = None, +) -> T: + """ + Fill instances of the `fill_flag` in both the `numerator` column + and the `denominator` column such that numerator <= denominator. + + If `dtype` is specified, will attempt to cast the final result + to that Polars type. Only supports Polars integer and float types. + + Note: `seed` is only used for (1) imputing the denominator and (2) the + numerator case where the denominator is greater than the `fill_range` + upper bound. This is because we cannot guarantee desired reproducible + behavior for the numerator when denominator is less than or equal to the + `fill_range` upper bound since such imputation happens per-row. + """ + if numerator not in df.lazy().collect_schema().names(): + raise ValueError(f"Column {numerator} doesn't exist") + + if denominator not in df.lazy().collect_schema().names(): + raise ValueError(f"Column {numerator} doesn't exist") + + if not _fill_flag_exists(df, numerator, fill_flag): + raise ValueError( + f"Column {numerator} doesn't contain any instances of '{fill_flag}'" + ) + + if not _fill_flag_exists(df, denominator, fill_flag): + raise ValueError( + f"Column {denominator} doesn't contain any instances of '{fill_flag}'" + ) + + fill_range_int = _parse_fill_range(fill_range) + + df = df.with_columns( + pl.when(pl.col(denominator) == fill_flag) + .then( + pl.int_range(fill_range_int.lb, fill_range_int.ub + 1).sample( + pl.len(), + with_replacement=True, + seed=seed, + ) + ) + .otherwise(pl.col(denominator)) + .alias(denominator) + .cast(dtype) + ).with_columns( + # NOTE: sometimes oddly high mem consumption b/c of pl.int_ranges(), + # but not sure how to improve + pl.when( + (pl.col(numerator) == fill_flag) + & (pl.col(denominator) <= fill_range_int.ub) + ) + .then( + pl.int_ranges(fill_range_int.lb, pl.col(denominator) + 1) + .list.sample(1) + .explode() + ) + .when( + (pl.col(numerator) == fill_flag) & (pl.col(denominator) > fill_range_int.ub) + ) + .then( + pl.int_range(fill_range_int.lb, fill_range_int.ub + 1).sample( + pl.len(), + with_replacement=True, + seed=seed, + ) + ) + .otherwise(pl.col(numerator)) + .alias(numerator) + .cast(dtype) + ) + + return df diff --git a/src/csv_helper/main.py b/src/csv_helper/main.py deleted file mode 100644 index 71deeac..0000000 --- a/src/csv_helper/main.py +++ /dev/null @@ -1,878 +0,0 @@ -import time -from enum import Enum -from importlib.metadata import version -from pathlib import Path -from typing import NamedTuple, Optional - -import click -import numpy as np -import polars as pl -import typer -from numpy.random import Generator -from rich import print -from rich.console import Console -from rich.progress import Progress, SpinnerColumn, TextColumn -from rich.prompt import Confirm -from rich.table import Table -from typing_extensions import Annotated - -app = typer.Typer(no_args_is_help=True, help="A CLI for working with CSV data") -impute_app = typer.Typer(no_args_is_help=True, help="Impute CSV data") -app.add_typer(impute_app, name="impute") - -err_console = Console(stderr=True) - - -def print_version(val: bool): - """Print CLI version""" - if not val: - return - - print(f"csv-helper version {version('csv_helper')}") - raise typer.Exit() - - -@app.callback() -def callback( - version: bool = typer.Option( - False, - "--version", - "-v", - is_eager=True, - help="Print the version and exit.", - callback=print_version, - ), -) -> None: - pass - - -@app.command() -def preview( - input: Annotated[ - Path, - typer.Argument( - exists=True, - file_okay=True, - dir_okay=False, - readable=True, - help="The CSV file to preview", - ), - ], - n_rows: Annotated[ - int, typer.Option("--nrows", "-n", min=1, help="Number of rows to preview") - ] = 10, -) -> None: - """ - Preview a given CSV file. - """ - df = pl.read_csv(input, infer_schema_length=0) - - print(f"File: {input}") - if n_rows > df.height: - print(df) - else: - print(df.head(n_rows)) - - -@app.command() -def check( - input: Annotated[ - Path, - typer.Argument( - exists=True, - file_okay=True, - dir_okay=False, - readable=True, - help="The CSV file to check", - ), - ], - fill_col: Annotated[ - str, typer.Option("--col", "-c", help="Name of the column to check") - ], - fill_flag: Annotated[ - str, - typer.Option("--flag", "-f", help="Flag (string) to look for in COL"), - ], -) -> None: - """ - Check a column in a CSV file for occurrences of some string flag. - """ - df = pl.read_csv(input, infer_schema_length=0) - - if fill_col not in df.columns: - err_console.print(f"Column {fill_col} cannot be found in {input}") - raise typer.Abort() - - imp_size = df.filter(pl.col(fill_col) == fill_flag).height - print( - f"Found [blue]{imp_size:_}[/blue] occurrences of '{fill_flag}' in '{fill_col}' -> [blue]{(imp_size / df.height):0.2f}[/blue] of rows (n = {df.height:_})" - ) - print(df.filter(pl.col(fill_col) == fill_flag).head()) - - -class FillRange(NamedTuple): - lb: int - ub: int - - -# NOTE: see https://github.com/fastapi/typer/issues/182#issuecomment-1708245110 -# and https://github.com/fastapi/typer/issues/151#issuecomment-1975322806 -# for workaround for working with enums like this such that Typer understands the args properly -# without having to translate strings or ints to the values we really want -class ColType(Enum): - INT64 = pl.Int64 - FLOAT64 = pl.Float64 - - -def validate_inp_out(input: Path, output: Path, force: bool) -> None: - if output.is_file() and not force: - overwrite_file = Confirm.ask( - f"[blue bold]{output}[/blue bold] already exists. Do you want to overwrite it?" - ) - if not overwrite_file: - print("Won't overwrite") - raise typer.Abort() - - if input == output and not force: - err_console.print( - "Cannot specify output to be identical to input. Use the --force/-F option to force this behavior" - ) - raise typer.Abort() - - -def check_create_dir(output: Path) -> bool: - if not output.parent.is_dir(): - create_dir = Confirm.ask( - f"The specified output's parent directory [blue bold]{output.parent}[/blue bold] doesn't exist. Do you want to create it along with any missing parents?" - ) - if not create_dir: - print("Won't create directories") - raise typer.Abort() - return True - return False - - -def all_cols_exist(df: pl.DataFrame, fill_cols: list[str]) -> bool: - for col in fill_cols: - if col not in df.columns: - return False - return True - - -def fill_flag_exists(df: pl.DataFrame, fill_col: str, fill_flag: str) -> bool: - if df.select((pl.col(fill_col) == fill_flag).any()).item(): - return True - return False - - -def parse_fill_range(fill_range: str) -> FillRange: - fill_range_parsed = tuple(x.strip() for x in fill_range.split(",")) - if len(fill_range_parsed) != 2: - raise typer.BadParameter(f"Invalid fill range: {fill_range}") - - if not fill_range_parsed[0].isdigit() or not fill_range_parsed[1].isdigit(): - raise typer.BadParameter(f"Invalid fill range: {fill_range}") - - fill_range_int = FillRange(int(fill_range_parsed[0]), int(fill_range_parsed[1])) - if fill_range_int.lb > fill_range_int.ub: - err_console.print(f"Invalid fill range given: {fill_range}") - raise typer.Abort() - - return fill_range_int - - -@impute_app.command("file") -def impute_file( - input: Annotated[ - Path, - typer.Argument( - exists=True, - file_okay=True, - dir_okay=False, - readable=True, - help="Path to target CSV file", - ), - ], - output: Annotated[ - Path, - # NOTE: if exists = False, other checks still run if the Path happens to (file/dir) exist - typer.Argument( - exists=False, - file_okay=True, - dir_okay=False, - writable=True, - readable=False, - help="Path to save the output CSV file", - ), - ], - fill_col: Annotated[ - str, typer.Option("--col", "-c", help="Name of the column to impute") - ], - fill_flag: Annotated[ - str, - typer.Option( - "--flag", - "-f", - help="Flag (string) to look for and replace in the target column", - ), - ], - fill_range: Annotated[ - FillRange, - typer.Option( - "--range", - "-r", - metavar="TEXT", - help="Closed, integer interval from which to sample random integer for imputation. Specify as comma-separated values. For example: '1,5' corresponds to the range [1, 5]", - parser=parse_fill_range, - ), - ], - col_type: Annotated[ - str, - typer.Option( - "--type", - "-t", - help="Intended data type of the target column. Can be a Polars Int64 or Float64.", - click_type=click.Choice(ColType._member_names_, case_sensitive=False), - ), - ] = ColType.INT64.name, - seed: Annotated[ - int, typer.Option("--seed", "-s", help="Random seed for reproducibility") - ] = 123, - verbose: Annotated[ - bool, - typer.Option( - "--verbose", - "-v", - help="Whether to show additional imputation summary information", - ), - ] = False, - force: Annotated[ - bool, - typer.Option( - "--force", - "-F", - help=""" - Allow overwriting data even if (1) the specified output file already exists or - (2) the path to the input file is identical to the path of the output file. Both - checks will be ignored. - """, - ), - ] = False, -) -> None: - """ - Impute a target column in a CSV file. Will look for the specified filler flag in the target column - and replace it with a random integer from the specified range. Save the result to a new CSV file. - """ - validate_inp_out(input, output, force) - create_dir = check_create_dir(output) - - df = pl.read_csv(input, infer_schema_length=0) - - if fill_col not in df.columns: - err_console.print(f"Column {fill_col} cannot be found in {input}") - raise typer.Abort() - - if not fill_flag_exists(df, fill_col, fill_flag): - err_console.print(f"Cannot find any instances of '{fill_flag}' in {fill_col}") - raise typer.Abort() - - if verbose: - imp_size = df.filter(pl.col(fill_col) == fill_flag).height - - with Progress( - SpinnerColumn(), - TextColumn("[progress.description]{task.description}"), - transient=True, - ) as progress: - progress.add_task(description="Imputing...", total=None) - - # WARN: setting seed means that each use of this CLI cmd with same seed - # will generate same integers, but repeated calls inside of this func - # won't generate the same set of integers - rng = np.random.default_rng(seed) - cast_type = ColType[col_type] - - t0 = time.perf_counter() - df = df.with_columns( - pl.when(pl.col(fill_col) == fill_flag) - .then( - pl.lit( - # NOTE: must specify size to be height of df despite not filling every row - # thus, we get "new" rand int per row - rng.integers( - fill_range.lb, fill_range.ub, size=df.height, endpoint=True - ) - ) - ) - .otherwise(pl.col(fill_col)) - .alias(fill_col) - .cast(cast_type.value) - ) - t1 = time.perf_counter() - - if create_dir: - output.parent.mkdir(parents=True) - - df.write_csv(output) - - print("[green]Finished imputing[/green]...") - - if verbose: - table = Table(title="Imputation statistics", show_header=False) - table.add_row("[blue]Count of imputed values[/blue]", f"{imp_size:_}") - table.add_row( - "[blue]Proportion of imputed values[/blue]", - f"{(imp_size / df.height):0.2f} (n = {df.height:_})", - ) - table.add_row("[blue]Seed[/blue]", f"{seed}") - table.add_row("[blue]Time taken[/blue]", f"~{(t1 - t0):0.3f} s") - print(table) - - print(df.filter(pl.col(fill_col) <= fill_range.ub).head()) - - -class FillCols(NamedTuple): - numerator: str - denominator: str - - -def parse_fill_cols(fill_cols: str) -> FillCols: - fill_cols_parsed = tuple(x.strip() for x in fill_cols.split(",")) - if len(fill_cols_parsed) != 2: - raise typer.BadParameter(f"Invalid fill cols: {fill_cols}") - - return FillCols(fill_cols_parsed[0], fill_cols_parsed[1]) - - -def parse_sep_cols(sep_cols: str) -> list[str]: - return [col.strip() for col in sep_cols.split(",")] - - -def impute_capped(denom: int, fill_range: FillRange, rng: Generator) -> int: - """ - Return a random integer from a range that is capped - at the 'denominator' value - """ - # WARN: specifying size=1 instead of leaving size = None - # will return single-value list instead of just the value - return rng.integers(fill_range.lb, denom, endpoint=True) - - -@impute_app.command("pair") -def impute_pair( - input: Annotated[ - Path, - typer.Argument( - exists=True, - file_okay=True, - dir_okay=False, - readable=True, - help="Path to target CSV file", - ), - ], - output: Annotated[ - Path, - typer.Argument( - exists=False, - file_okay=True, - dir_okay=False, - writable=True, - readable=False, - help="Path to save the output CSV file", - ), - ], - fill_cols: Annotated[ - FillCols, - typer.Option( - "--cols", - "-c", - metavar="TEXT", - help="Pair of columns (numerator and denominator) to be imputed. Specify as comma-separated values. For example, 'count_col,denom_col' specifies 'count_col' as the numerator and 'denom_col' as the denominator.", - parser=parse_fill_cols, - ), - ], - fill_flag: Annotated[ - str, - typer.Option( - "--flag", "-f", help="Flag (string) to look for and replace in the columns" - ), - ], - fill_range: Annotated[ - FillRange, - typer.Option( - "--range", - "-r", - metavar="TEXT", - help="Closed, integer interval from which to sample random integer for imputation. Specify as comma-separated values. For example: '1,5' corresponds to the range [1, 5]", - parser=parse_fill_range, - ), - ], - col_type: Annotated[ - str, - typer.Option( - "--type", - "-t", - help="Intended data type of target columns. Can be a Polars Int64 or Float64.", - click_type=click.Choice(ColType._member_names_, case_sensitive=False), - ), - ] = ColType.INT64.name, - seed: Annotated[ - int, typer.Option("--seed", "-s", help="Random seed for reproducibility") - ] = 123, - verbose: Annotated[ - bool, - typer.Option( - "--verbose", - "-v", - help="Whether to show additional imputation summary information", - ), - ] = False, - force: Annotated[ - bool, - typer.Option( - "--force", - "-F", - help=""" - Allow overwriting data even if (1) the specified output file already exists, - (2) the path to the input file is identical to the path of the output file, or - (3) --sep-out is specfied and that file already exists. All checks will be ignored. - """, - ), - ] = False, - sep_denom: Annotated[ - Optional[Path], - typer.Option( - "--sep-denom", - exists=True, - file_okay=True, - dir_okay=False, - readable=True, - help=""" - Path to some separate CSV file in which to look for denominator data. - Currently only supports a separate file that has the exact same - structure as the input file except for the numerator column being - swapped for the denominator column (because this performs an inner - join on all those columns). - """, - ), - ] = None, - sep_cols: Annotated[ - Optional[list[str]], - typer.Option( - "--sep-cols", - help="Comma-separated list of column names on which to join the numerator and denominator data", - parser=parse_sep_cols, - ), - ] = None, - sep_out: Annotated[ - Optional[Path], - typer.Option( - "--sep-out", - exists=False, - file_okay=True, - dir_okay=False, - writable=True, - readable=False, - help="Path to save imputed denominator data from --sep-denom", - ), - ] = None, -): - """ - Impute a pair of columns in a CSV file. Will look for the - flag in both of the specified columns and substitute with a random - integer in the closed range. Note, the pair of columns - is comprised of a numerator column and a denominator column such that - the imputed values of the numerator column must not exceed the imputed - values of the denominator column. - - Separate denominator data: - - If --sep-denom is provided, then that file will be used instead to source - the denominator column. You must then also use --sep-cols to specify the - columns to use for (inner) joining the numerator and denominator data. Note - that a 1:1 relationship in the join is enforced. Optionally, use --sep-out - to specify where to save the imputed version of the denominator data from - --sep-denom. - """ - validate_inp_out(input, output, force) - create_dir = check_create_dir(output) - - if (sep_cols is not None or sep_out is not None) and sep_denom is None: - err_console.print("Must specify --sep-denom to use --sep-cols or --sep-out") - raise typer.Abort() - - df = pl.read_csv(input, infer_schema_length=0) - - if sep_denom is not None: - if sep_cols is None: - err_console.print("You must specify both --sep-denom and --sep-cols") - raise typer.Abort() - - # NOTE: extract since it gives nested list; maybe some type coercion going on - sep_cols = sep_cols[0] - df_denom = pl.read_csv(sep_denom, infer_schema_length=0) - - if ( - fill_cols.numerator not in df.columns - or fill_cols.denominator not in df_denom.columns - ): - err_console.print("Invalid columns specified for --cols") - raise typer.Abort() - - if not all_cols_exist(df, sep_cols) or not all_cols_exist(df_denom, sep_cols): - err_console.print( - "Some of the --sep-cols are missing from the numerator or denominator data" - ) - raise typer.Abort() - - # TODO: might need more sophisticated checks here to ensure the join goes ok or fails gracefully - if fill_cols.denominator not in df_denom.columns: - err_console.print( - "Separate denominator data doesn't contain the given denominator column" - ) - raise typer.Abort() - - if sep_out is not None: - if not fill_flag_exists(df_denom, fill_cols.denominator, fill_flag): - print( - f""" - The denominator file {sep_denom} doesn't contain any instancees of {fill_flag} - in {fill_cols.denominator}. Rerun the command without specifying --sep-out. - """ - ) - raise typer.Abort() - - if sep_out.is_file() and not force: - overwrite_out = Confirm.ask( - f"[blue bold]{sep_out}[/blue bold] already exists. Do you want to overwrite it?" - ) - if not overwrite_out: - print("Won't overwrite") - raise typer.Abort() - - imp_sizes = ( - len(df.filter(pl.col(fill_cols.numerator) == fill_flag)), - len(df_denom.filter(pl.col(fill_cols.denominator) == fill_flag)), - ) - else: - if not all_cols_exist(df, list(fill_cols)): - err_console.print("Invalid columns specified for --cols") - raise typer.Abort() - - imp_sizes = ( - len(df.filter(pl.col(fill_cols.numerator) == fill_flag)), - len(df.filter(pl.col(fill_cols.denominator) == fill_flag)), - ) - - if imp_sizes[0] == 0 and imp_sizes[1] == 0: - err_console.print( - f"Cannot find any instances of {fill_flag} in either {fill_cols.numerator} or {fill_cols.denominator}" - ) - raise typer.Abort() - - with Progress( - SpinnerColumn(), - TextColumn("[progress.description]{task.description}"), - transient=True, - ) as progress: - progress.add_task(description="Imputing...", total=None) - - rng = np.random.default_rng(seed) - cast_type = ColType[col_type] - - t0 = time.perf_counter() - # NOTE: impute df_denom before attempting join - if sep_denom is not None: - df_denom = df_denom.with_columns( - pl.when(pl.col(fill_cols.denominator) == fill_flag) - .then( - pl.lit( - rng.integers( - fill_range.lb, - fill_range.ub, - size=df_denom.height, - endpoint=True, - ) - ) - ) - .otherwise(pl.col(fill_cols.denominator)) - .alias(fill_cols.denominator) - .cast(cast_type.value) - ) - - # NOTE: `validate` default is "m:m" -> forcing a 1:1 relationship of the join - try: - df = df.join( - df_denom, on=sep_cols, how="inner", coalesce=True, validate="1:1" - ) - except pl.exceptions.ComputeError: - err_console.print( - "The join with --sep-denom failed because there is not a 1:1 relationship between the join keys." - ) - raise typer.Abort() - else: - df = df.with_columns( - pl.when(pl.col(fill_cols.denominator) == fill_flag) - .then( - pl.lit( - rng.integers( - fill_range.lb, fill_range.ub, size=df.height, endpoint=True - ) - ) - ) - .otherwise(pl.col(fill_cols.denominator)) - .alias(fill_cols.denominator) - .cast(cast_type.value) - ) - - # NOTE: at this point, imputation of denom is done regardless of whether sep file or not - df = df.with_columns( - pl.when( - (pl.col(fill_cols.numerator) == fill_flag) - & (pl.col(fill_cols.denominator) <= fill_range.ub) - ) - # map_elements() will run Python so it's slow - .then( - pl.col(fill_cols.denominator).map_elements( - lambda denom: impute_capped( - denom, - fill_range, - rng, - ), - return_dtype=pl.Int64, - ) - ) - .when( - (pl.col(fill_cols.numerator) == fill_flag) - & (pl.col(fill_cols.denominator) > fill_range.ub) - ) - .then( - pl.lit( - rng.integers( - fill_range.lb, fill_range.ub, size=df.height, endpoint=True - ) - ) - ) - .otherwise(pl.col(fill_cols.numerator)) - .alias(fill_cols.numerator) - .cast(cast_type.value) - ) - t1 = time.perf_counter() - - # TODO: consider create_dir also for sep_out? - if create_dir: - output.parent.mkdir(parents=True) - - if sep_denom is not None: - if sep_out is not None: - df.select(pl.col("*").exclude(fill_cols.numerator)).write_csv(sep_out) - - df.select(pl.col("*").exclude(fill_cols.denominator)).write_csv(output) - else: - df.write_csv(output) - - print("[green]Finished imputing[/green]...") - - if verbose: - table = Table(title="Imputation statistics", show_header=False) - table.add_row( - f"[blue]Count of imputed values in[/blue] '{fill_cols.numerator}'", - f"{imp_sizes[0]:_}", - ) - table.add_row( - f"[blue]Proportion of imputed values in[/blue] '{fill_cols.numerator}'", - f"{(imp_sizes[0] / df.height):0.2f} (n = {df.height:_})", - end_section=True, - ) - table.add_row( - f"[blue]Count of imputed values in[/blue] '{fill_cols.denominator}'", - f"{imp_sizes[1]:_}", - ) - table.add_row( - f"[blue]Proportion of imputed values in[/blue] '{fill_cols.denominator}'", - f"{(imp_sizes[1] / df.height):0.2f} (n = {df.height:_})", - end_section=True, - ) - table.add_row("[blue]Seed[/blue]", f"{seed}") - table.add_row("[blue]Time taken[/blue]", f"~{(t1 - t0):0.3f} s") - print(table) - - print( - df.filter( - (pl.col(fill_cols.numerator) <= fill_range.ub) - | (pl.col(fill_cols.denominator) <= fill_range.ub) - ).head() - ) - - -@impute_app.command("dir") -def impute_dir( - input_dir: Annotated[ - Path, - typer.Argument( - exists=True, - file_okay=False, - dir_okay=True, - readable=True, - help="Directory of CSV files to impute", - ), - ], - output_dir: Annotated[ - Path, - typer.Argument( - exists=False, - file_okay=False, - dir_okay=True, - writable=True, - help="Directory to save output CSV files", - ), - ], - fill_col: Annotated[ - str, typer.Option("--col", "-c", help="Name of the column to impute") - ], - fill_flag: Annotated[ - str, - typer.Option( - "--flag", - "-f", - help="Flag (string) to look for and replace in the target column", - ), - ], - fill_range: Annotated[ - FillRange, - typer.Option( - "--range", - "-r", - metavar="TEXT", - help="Closed, integer interval from which to sample random integer for imputation. Specify as comma-separated values. For example: '1,5' corresponds to the range [1, 5]", - parser=parse_fill_range, - ), - ], - col_type: Annotated[ - str, - typer.Option( - "--type", - "-t", - help="Intended data type of the target column. Can be a Polars Int64 or Float64.", - click_type=click.Choice(ColType._member_names_, case_sensitive=False), - ), - ] = ColType.INT64.name, - seed: Annotated[ - int, typer.Option("--seed", "-s", help="Random seed for reproducibility") - ] = 123, - force: Annotated[ - bool, - typer.Option( - "--force", - "-F", - help="Force imputing the data if INPUT is identical to OUTPUT", - ), - ] = False, - suffix: Annotated[ - str, - typer.Option( - "--suffix", "-x", help="Optional suffix to append to each imputed CSV file" - ), - ] = "", - verbose: Annotated[ - bool, - typer.Option( - "--verbose", - "-v", - help="Whether to show additional imputation summary information", - ), - ] = False, -) -> None: - """ - Impute a target column for a directory of uniform CSV files. Will look for a specific filler flag in the target column - and replace with a random integer from the the specified range. Save the result in the given output directory. - """ - files = list(input_dir.glob("*.csv")) - if len(files) == 0: - err_console.print( - f"The specified input directory [blue bold]{input_dir}[/blue bold] is either empty or doesn't contain any CSV files." - ) - raise typer.Abort() - - create_dir = False - if not output_dir.is_dir(): - create_dir = Confirm.ask( - f"The specified output directory [blue bold]{output_dir}[/blue bold] doesn't exist. Do you want to create it along with any missing parents?" - ) - if not create_dir: - print("Won't create directories") - raise typer.Abort() - - for file in files: - if suffix != "": - output_file = output_dir / f"{file.stem}_{suffix}{file.suffix}" - else: - output_file = output_dir / file.name - - if output_file.is_file() and not force: - overwrite_file = Confirm.ask( - f"The intended output file [blue bold]{output_file}[/blue bold] already exists. Should it be overwritten?" - ) - if not overwrite_file: - print("Won't overwrite") - raise typer.Abort() - - df = pl.read_csv(file, infer_schema_length=0) - - if not all_cols_exist(df, [fill_col]): - err_console.print(f"Column {fill_col} cannot be found in {file}") - raise typer.Abort() - - if verbose: - imp_size = len(df.filter(pl.col(fill_col) == fill_flag)) - - if not fill_flag_exists(df, fill_col, fill_flag): - err_console.print( - f"Cannot find any instances of '{fill_flag}' in {fill_col}" - ) - raise typer.Abort() - - rng = np.random.default_rng(seed) - cast_type = ColType[col_type] - - t0 = time.perf_counter() - df = df.with_columns( - pl.when(pl.col(fill_col) == fill_flag) - .then( - pl.lit( - # NOTE: must specify size to be height of df despite not filling every row - # thus, we get "new" rand int per row - rng.integers( - fill_range.lb, fill_range.ub, size=df.height, endpoint=True - ) - ) - ) - .otherwise(pl.col(fill_col)) - .alias(fill_col) - .cast(cast_type.value) - ) - t1 = time.perf_counter() - - if create_dir: - output_dir.mkdir(parents=True, exist_ok=True) - - df.write_csv(output_file) - - print(f"\nFinished imputing [blue]{file}[/blue]...") - - if verbose: - table = Table(title="Imputation statistics", show_header=False) - table.add_row("[blue]Count of imputed values[/blue]", f"{imp_size:_}") - table.add_row( - "[blue]Proportion of imputed values[/blue]", - f"{(imp_size / df.height):0.2f} (n = {df.height:_})", - ) - table.add_row("[blue]Seed[/blue]", f"{seed}") - table.add_row("[blue]Time taken[/blue]", f"~{(t1 - t0):0.3f} s") - print(table) - - print(df.filter(pl.col(fill_col) <= fill_range.ub).head()) - - -if __name__ == "__main__": - app() diff --git a/src/csv_helper/py.typed b/src/csv_helper/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/tests/main_test.py b/tests/cli_test.py similarity index 59% rename from tests/main_test.py rename to tests/cli_test.py index 74f7c88..e19cd0e 100644 --- a/tests/main_test.py +++ b/tests/cli_test.py @@ -1,19 +1,20 @@ import shutil +import textwrap from importlib.metadata import version -from pathlib import Path, PureWindowsPath -from sys import platform -from textwrap import dedent +from pathlib import Path import polars as pl import pytest -from csv_helper.main import app +from polars.testing import assert_frame_equal from typer.testing import CliRunner +from csv_helper.cli import app + runner = CliRunner() @pytest.fixture -def test_data(tmp_path) -> Path: +def test_data(tmp_path: Path) -> Path: """ Fixture that moves test CSV data to new dir for testing and returns the file's path @@ -21,11 +22,12 @@ def test_data(tmp_path) -> Path: data_dir = tmp_path / "data" data_dir.mkdir() shutil.copy("./tests/data/test_impute_data.csv", data_dir) + return data_dir / "test_impute_data.csv" @pytest.fixture -def test_data_dir(tmp_path) -> Path: +def test_data_dir(tmp_path: Path) -> Path: """ Fixture that moves test dir of CSV data to new dir for testing and returns the dir's path @@ -33,11 +35,12 @@ def test_data_dir(tmp_path) -> Path: data_dir = tmp_path / "data" data_dir.mkdir() shutil.copytree("./tests/data/test_dir", data_dir / "test_dir") + return data_dir / "test_dir" @pytest.fixture -def test_data_sep(tmp_path) -> Path: +def test_data_sep(tmp_path: Path) -> Path: """ Fixture that moves test dir of pair CSV data to new dir for testing and returns the dir's path @@ -45,34 +48,22 @@ def test_data_sep(tmp_path) -> Path: data_dir = tmp_path / "data" data_dir.mkdir() shutil.copytree("./tests/data/test_pair_sep", data_dir / "test_pair_sep") - return data_dir / "test_pair_sep" - -# NOTE: can also access funcs in csv_helper.main directly: -# from csv_helper.main import preview -# preview("./tests/data/test_impute_data.csv", 10) + return data_dir / "test_pair_sep" -def test_show_version(): +def test_print_version(): result = runner.invoke(app, ["--version"]) ver = version("csv_helper") + assert result.stdout.replace("\n", "") == f"csv-helper version {ver}" -def test_preview(test_data): - result = runner.invoke(app, ["preview", str(test_data), "-n", "15"]) +def test_show(test_data): + result = runner.invoke(app, ["show", str(test_data), "-n", "15"]) assert result.exit_code == 0 - if platform == "linux" or platform == "darwin": - msg = f"File: {test_data}" - elif platform == "win32": - msg = f"File: {PureWindowsPath(test_data)}" - - # NOTE: stripping newlines and then slicing; for some reason on macos and windows - # the stdout has newlines inserted - assert result.stdout.replace("\n", "")[: len(msg)] == msg - - out = dedent( + out = textwrap.dedent( """\ shape: (15, 4) ┌────────┬───────────┬───────┬───────────┐ @@ -94,12 +85,13 @@ def test_preview(test_data): └────────┴───────────┴───────┴───────────┘ """ ) - assert out in result.stdout + + assert result.stdout == out -def test_preview_not_file(tmp_path): +def test_show_not_file(tmp_path): dir = tmp_path / "data" - result = runner.invoke(app, ["preview", str(dir), "-n", "15"]) + result = runner.invoke(app, ["show", str(dir), "-n", "15"]) assert result.exit_code == 2 @@ -107,23 +99,19 @@ def test_check(test_data): result = runner.invoke(app, ["check", str(test_data), "-c", "cases", "-f", "<=5"]) assert result.exit_code == 0 - out = dedent( + out = textwrap.dedent( """\ - Found 308 occurrences of '<=5' in 'cases' -> 0.62 of rows (n = 500) - shape: (5, 4) - ┌────────┬───────────┬───────┬───────────┐ - │ county ┆ year_week ┆ cases ┆ all_cause │ - │ --- ┆ --- ┆ --- ┆ --- │ - │ str ┆ str ┆ str ┆ str │ - ╞════════╪═══════════╪═══════╪═══════════╡ - │ 55107 ┆ 2020-05 ┆ <=5 ┆ 334 │ - │ 28101 ┆ 2021-20 ┆ <=5 ┆ <=5 │ - │ 35043 ┆ 2022-24 ┆ <=5 ┆ 5862 │ - │ 28043 ┆ 2023-09 ┆ <=5 ┆ 811 │ - │ 26093 ┆ 2020-42 ┆ <=5 ┆ 7606 │ - └────────┴───────────┴───────┴───────────┘ + shape: (1, 3) + ┌────────┬───────┬───────┐ + │ column ┆ count ┆ prop │ + │ --- ┆ --- ┆ --- │ + │ str ┆ u32 ┆ f64 │ + ╞════════╪═══════╪═══════╡ + │ cases ┆ 308 ┆ 0.616 │ + └────────┴───────┴───────┘ """ ) + assert result.stdout == out @@ -143,6 +131,7 @@ def test_impute_file(tmp_path, test_data): "impute", "file", str(test_data), + "-o", str(out_file), "-c", "cases", @@ -150,28 +139,19 @@ def test_impute_file(tmp_path, test_data): f"<={fill_range[1]}", "-r", f"{fill_range[0]},{fill_range[1]}", - "-s", - "8", ], ) assert result.exit_code == 0 assert out_file.is_file() is True - df_in = ( - pl.read_csv(test_data, infer_schema_length=0) - # .with_row_index(name="id") - # .select("id", "cases") - ) - df_out = ( - pl.read_csv(out_file, infer_schema_length=0) - # .with_row_index(name="id") - # .select("id", "cases") - ) + df_in = pl.read_csv(test_data, infer_schema_length=0) + df_out = pl.read_csv(out_file, infer_schema_length=0) assert df_in.shape == df_out.shape df = df_in.join( df_out, on=["county", "year_week"], how="inner", suffix="_imputed" ).filter(pl.col("cases") == f"<={fill_range[1]}") + assert ( df.select((pl.col("cases_imputed") == f"<={fill_range[1]}").any()).item() is False @@ -196,6 +176,7 @@ def test_impute_file_repro(tmp_path, test_data): "impute", "file", str(test_data), + "-o", str(out_file_1), "-c", "cases", @@ -204,7 +185,7 @@ def test_impute_file_repro(tmp_path, test_data): "-r", f"{fill_range[0]},{fill_range[1]}", "-s", - "123", + "88", ], ) assert result_1.exit_code == 0 @@ -216,6 +197,7 @@ def test_impute_file_repro(tmp_path, test_data): "impute", "file", str(test_data), + "-o", str(out_file_2), "-c", "cases", @@ -224,7 +206,7 @@ def test_impute_file_repro(tmp_path, test_data): "-r", f"{fill_range[0]},{fill_range[1]}", "-s", - "123", + "88", ], ) assert result_2.exit_code == 0 @@ -236,7 +218,7 @@ def test_impute_file_repro(tmp_path, test_data): df_2 = pl.read_csv(out_file_2, infer_schema_length=0) assert df_2.select((pl.col("cases") == f"<={fill_range[1]}").any()).item() is False - assert df_1.equals(df_2) is True + assert_frame_equal(df_1, df_2) def test_impute_file_output_exists(tmp_path, test_data): @@ -253,6 +235,7 @@ def test_impute_file_output_exists(tmp_path, test_data): "impute", "file", str(test_data), + "-o", str(out_file), "-c", "cases", @@ -260,8 +243,6 @@ def test_impute_file_output_exists(tmp_path, test_data): f"<={fill_range[1]}", "-r", f"{fill_range[0]},{fill_range[1]}", - "-s", - "8", ], ) assert result.exit_code == 1 @@ -281,6 +262,7 @@ def test_impute_file_overwrite(tmp_path, test_data): "impute", "file", str(test_data), + "-o", str(out_file), "-c", "cases", @@ -288,210 +270,12 @@ def test_impute_file_overwrite(tmp_path, test_data): f"<={fill_range[1]}", "-r", f"{fill_range[0]},{fill_range[1]}", - "-s", - "8", ], input="y\n", ) assert result.exit_code == 0 -def test_impute_dir(tmp_path, test_data_dir): - out_dir = Path(tmp_path) / "test_impute_dir_output" - out_dir.mkdir() - fill_range = (1, 5) - - result = runner.invoke( - app, - [ - "impute", - "dir", - str(test_data_dir), - str(out_dir), - "-c", - "cases", - "-f", - f"<={fill_range[1]}", - "-r", - f"{fill_range[0]},{fill_range[1]}", - "-s", - "8", - ], - ) - assert result.exit_code == 0 - assert out_dir.is_dir() - for i in range(5): - f = out_dir / f"test_impute_data_{i}.csv" - assert f.is_file() is True - - for input, output in zip(test_data_dir.iterdir(), out_dir.iterdir()): - df_in = ( - pl.read_csv(input, infer_schema_length=0) - # .with_row_index(name="id") - # .select("id", "cases", "all_cause") - ) - df_out = ( - pl.read_csv(output, infer_schema_length=0) - # .with_row_index(name="id") - # .select("id", "cases", "all_cause") - ) - assert df_in.shape == df_out.shape - - df = df_in.join( - df_out, on=["county", "year_week"], how="inner", suffix="_imputed" - ).filter( - (pl.col("cases") == f"<={fill_range[1]}") - | (pl.col("all_cause") == f"<={fill_range[1]}") - ) - assert ( - df.select((pl.col("cases_imputed") == f"<={fill_range[1]}").any()).item() - is False - ) - assert ( - df.select("cases_imputed") - .cast(pl.Int64) - .select( - pl.col("cases_imputed").is_between(fill_range[0], fill_range[1]).all() - ) - .item() - is True - ) - - -def test_impute_dir_force(tmp_path, test_data_dir): - out_dir = Path(tmp_path) / "test_impute_dir_output" - out_dir.mkdir() - fill_range = (1, 5) - - inp_files = test_data_dir.glob("*.csv") - for file in inp_files: - out_file = out_dir / file.name - out_file.touch() - - result = runner.invoke( - app, - [ - "impute", - "dir", - str(test_data_dir), - str(out_dir), - "-c", - "cases", - "-f", - f"<={fill_range[1]}", - "-r", - f"{fill_range[0]},{fill_range[1]}", - "-s", - "8", - "--force", - ], - ) - assert result.exit_code == 0 - assert out_dir.is_dir() - for i in range(5): - f = out_dir / f"test_impute_data_{i}.csv" - assert f.is_file() is True - - for input, output in zip(test_data_dir.iterdir(), out_dir.iterdir()): - df_in = ( - pl.read_csv(input, infer_schema_length=0) - # .with_row_index(name="id") - # .select("id", "cases", "all_cause") - ) - df_out = ( - pl.read_csv(output, infer_schema_length=0) - # .with_row_index(name="id") - # .select("id", "cases", "all_cause") - ) - assert df_in.shape == df_out.shape - - df = df_in.join( - df_out, on=["county", "year_week"], how="inner", suffix="_imputed" - ).filter( - (pl.col("cases") == f"<={fill_range[1]}") - | (pl.col("all_cause") == f"<={fill_range[1]}") - ) - assert ( - df.select((pl.col("cases_imputed") == f"<={fill_range[1]}").any()).item() - is False - ) - assert ( - df.select("cases_imputed") - .cast(pl.Int64) - .select( - pl.col("cases_imputed").is_between(fill_range[0], fill_range[1]).all() - ) - .item() - is True - ) - - -def test_impute_dir_suffix(tmp_path, test_data_dir): - out_dir = Path(tmp_path) / "test_impute_dir_output" - out_dir.mkdir() - fill_range = (1, 5) - suffix = "imputed" - - result = runner.invoke( - app, - [ - "impute", - "dir", - str(test_data_dir), - str(out_dir), - "-c", - "cases", - "-f", - f"<={fill_range[1]}", - "-r", - f"{fill_range[0]},{fill_range[1]}", - "-s", - "8", - "-x", - suffix, - ], - ) - assert result.exit_code == 0 - assert out_dir.is_dir() is True - - for i in range(5): - f = out_dir / f"test_impute_data_{i}_{suffix}.csv" - assert f.is_file() is True - - for input, output in zip(test_data_dir.iterdir(), out_dir.iterdir()): - df_in = ( - pl.read_csv(input, infer_schema_length=0) - # .with_row_index(name="id") - # .select("id", "cases", "all_cause") - ) - df_out = ( - pl.read_csv(output, infer_schema_length=0) - # .with_row_index(name="id") - # .select("id", "cases", "all_cause") - ) - assert df_in.shape == df_out.shape - - df = df_in.join( - df_out, on=["county", "year_week"], how="inner", suffix="_imputed" - ).filter( - (pl.col("cases") == f"<={fill_range[1]}") - | (pl.col("all_cause") == f"<={fill_range[1]}") - ) - assert ( - df.select((pl.col("cases_imputed") == f"<={fill_range[1]}").any()).item() - is False - ) - assert ( - df.select("cases_imputed") - .cast(pl.Int64) - .select( - pl.col("cases_imputed").is_between(fill_range[0], fill_range[1]).all() - ) - .item() - is True - ) - - def test_impute_pair(tmp_path, test_data): out_file = tmp_path / "test_impute_pair_output.csv" fill_range = (1, 5) @@ -502,34 +286,31 @@ def test_impute_pair(tmp_path, test_data): "impute", "pair", str(test_data), - str(out_file), - "-c", - "cases,all_cause", + "-n", + "cases", + "-d", + "all_cause", "-f", f"<={fill_range[1]}", "-r", f"{fill_range[0]},{fill_range[1]}", - "-s", - "8", + "-o", + str(out_file), ], ) assert result.exit_code == 0 assert out_file.is_file() is True - df_in = ( - pl.read_csv(test_data, infer_schema_length=0) - # .with_row_index(name="id") - # .select("id", "cases", "all_cause") - ) - df_out = ( - pl.read_csv(out_file, infer_schema_length=0) - # .with_row_index(name="id") - # .select("id", "cases", "all_cause") - ) + df_in = pl.read_csv(test_data, infer_schema_length=0) + df_out = pl.read_csv(out_file, infer_schema_length=0) assert df_in.shape == df_out.shape df = df_in.join( - df_out, on=["county", "year_week"], how="inner", suffix="_imputed" + df_out, + on=["county", "year_week"], + how="inner", + suffix="_imputed", + validate="1:1", ).filter( (pl.col("cases") == f"<={fill_range[1]}") | (pl.col("all_cause") == f"<={fill_range[1]}") @@ -569,7 +350,6 @@ def test_impute_pair_sep(tmp_path, test_data_sep): num_file = test_data_sep / "test_impute_numerator_only_data.csv" out_file = tmp_path / "numerator_output.csv" denom_file = test_data_sep / "test_impute_denom_only_data.csv" - sep_cols = "county,year_week" fill_range = (1, 5) result = runner.invoke( @@ -578,19 +358,22 @@ def test_impute_pair_sep(tmp_path, test_data_sep): "impute", "pair", str(num_file), - str(out_file), - "-c", - "cases,all_cause", + "-n", + "cases", + "-d", + "all_cause", "-f", f"<={fill_range[1]}", "-r", f"{fill_range[0]},{fill_range[1]}", - "-s", - "8", + "-o", + str(out_file), "--sep-denom", str(denom_file), - "--sep-cols", - sep_cols, + "--sep-col", + "county", + "--sep-col", + "year_week", ], ) assert result.exit_code == 0 @@ -598,6 +381,7 @@ def test_impute_pair_sep(tmp_path, test_data_sep): df_num = pl.read_csv(num_file, infer_schema_length=0) df_out = pl.read_csv(out_file, infer_schema_length=0) + assert df_num.shape == df_out.shape assert ( df_out.select((pl.col("cases") == f"<={fill_range[1]}").any()).item() is False @@ -625,7 +409,6 @@ def test_impute_pair_join_fails(tmp_path, test_data_sep): num_file = test_data_sep / "test_impute_numerator_only_data.csv" out_file = tmp_path / "numerator_output.csv" denom_file = test_data_sep / "test_impute_denom_only_join_fails.csv" - sep_cols = "county,year_week" fill_range = (1, 5) result = runner.invoke( @@ -634,19 +417,22 @@ def test_impute_pair_join_fails(tmp_path, test_data_sep): "impute", "pair", str(num_file), - str(out_file), - "-c", - "cases,all_cause", + "-n", + "cases", + "-d", + "all_cause", "-f", f"<={fill_range[1]}", "-r", f"{fill_range[0]},{fill_range[1]}", - "-s", - "8", + "-o", + str(out_file), "--sep-denom", str(denom_file), - "--sep-cols", - sep_cols, + "--sep-col", + "county", + "--sep-col", + "year_week", ], ) assert result.exit_code == 1 @@ -656,7 +442,6 @@ def test_impute_pair_sep_output(tmp_path, test_data_sep): num_file = test_data_sep / "test_impute_numerator_only_data.csv" out_file = tmp_path / "test_impute_sep_files_numerator_output.csv" denom_file = test_data_sep / "test_impute_denom_only_data.csv" - sep_cols = "county,year_week" sep_out = tmp_path / "test_impute_sep_files_denom_output.csv" fill_range = (1, 5) @@ -666,19 +451,22 @@ def test_impute_pair_sep_output(tmp_path, test_data_sep): "impute", "pair", str(num_file), - str(out_file), - "-c", - "cases,all_cause", + "-n", + "cases", + "-d", + "all_cause", "-f", f"<={fill_range[1]}", "-r", f"{fill_range[0]},{fill_range[1]}", - "-s", - "8", + "-o", + str(out_file), "--sep-denom", str(denom_file), - "--sep-cols", - sep_cols, + "--sep-col", + "county", + "--sep-col", + "year_week", "--sep-out", str(sep_out), ], diff --git a/tests/complete_test.py b/tests/complete_test.py new file mode 100644 index 0000000..cd4865b --- /dev/null +++ b/tests/complete_test.py @@ -0,0 +1,156 @@ +import textwrap +from io import StringIO + +import polars as pl +import pytest +from polars.testing import assert_frame_equal + +from csv_helper import complete + + +@pytest.fixture +def df_inp() -> pl.DataFrame: + data = """\ + country,year,value + France,2020,1 + France,2021,2 + UK,2019,3 + UK,2020,4 + Spain,2022,5 + """ + df = pl.read_csv( + StringIO(textwrap.dedent(data)), + schema={ + "country": pl.String, + "year": pl.Int64, + "value": pl.Int64, + }, + ).sort("country", "year") + + return df + + +@pytest.fixture +def lf_inp(df_inp: pl.DataFrame) -> pl.LazyFrame: + return df_inp.lazy() + + +@pytest.fixture +def df_out() -> pl.DataFrame: + data = """\ + country,year,value + France,2019, + France,2020,1 + France,2021,2 + France,2022, + UK,2019,3 + UK,2020,4 + UK,2021, + UK,2022, + Spain,2019, + Spain,2020, + Spain,2021, + Spain,2022,5 + """ + df = pl.read_csv( + StringIO(textwrap.dedent(data)), + schema={ + "country": pl.String, + "year": pl.Int64, + "value": pl.Int64, + }, + ).sort("country", "year") + + return df + + +def test_complete_existing(df_inp: pl.DataFrame, df_out: pl.DataFrame) -> None: + df = df_inp.pipe(complete.complete, "country", "year").sort("country", "year") + + assert_frame_equal(df, df_out) + + +def test_complete_existing_series(df_inp: pl.DataFrame, df_out: pl.DataFrame) -> None: + country = pl.Series("country", ["France", "UK", "Spain"]) + year = pl.Series("year", [year for year in range(2019, 2023)]) + df = df_inp.pipe(complete.complete, country, year).sort("country", "year") + + assert_frame_equal(df, df_out) + + +def test_complete_exception(df_inp: pl.DataFrame) -> None: + with pytest.raises(TypeError): + df_inp.pipe(complete.complete, 0, 1).sort("country", "year") # pyright: ignore[reportArgumentType] + + +def test_complete_existing_lazy(lf_inp: pl.LazyFrame, df_out: pl.DataFrame) -> None: + lf = lf_inp.pipe(complete.complete, "country", "year").sort("country", "year") + + assert_frame_equal(lf.collect(), df_out) + + +def test_complete_existing_lazy_series( + lf_inp: pl.LazyFrame, df_out: pl.DataFrame +) -> None: + country = pl.Series("country", ["France", "UK", "Spain"]) + year = pl.Series("year", [year for year in range(2019, 2023)]) + lf = lf_inp.pipe(complete.complete, country, year).sort("country", "year") + + assert_frame_equal(lf.collect(), df_out) + + +@pytest.fixture +def df_out_non_exist() -> pl.DataFrame: + data = """\ + country,year,value + China,2019, + China,2020, + China,2021, + China,2022, + France,2019, + France,2020,1 + France,2021,2 + France,2022, + UK,2019,3 + UK,2020,4 + UK,2021, + UK,2022, + Spain,2019, + Spain,2020, + Spain,2021, + Spain,2022,5 + """ + df = pl.read_csv( + StringIO(textwrap.dedent(data)), + schema={ + "country": pl.String, + "year": pl.Int64, + "value": pl.Int64, + }, + ).sort("country", "year") + + return df + + +def test_complete_non_existing( + df_inp: pl.DataFrame, df_out_non_exist: pl.DataFrame +) -> None: + df = df_inp.pipe( + complete.complete, + pl.Series("country", ["France", "UK", "Spain", "China"]), + "year", + ).sort("country", "year") + + assert_frame_equal(df, df_out_non_exist) + + +def test_complete_non_existing_lazy( + lf_inp: pl.LazyFrame, df_out_non_exist: pl.DataFrame +) -> None: + lf = lf_inp.pipe( + complete.complete, + pl.Series("country", ["France", "UK", "Spain", "China"]), + "year", + ).sort("country", "year") + + assert_frame_equal(lf.collect(), df_out_non_exist) diff --git a/tests/impute_test.py b/tests/impute_test.py new file mode 100644 index 0000000..a11e3bc --- /dev/null +++ b/tests/impute_test.py @@ -0,0 +1,308 @@ +import textwrap +from io import StringIO + +import polars as pl +import pytest +from polars.testing import assert_frame_equal, assert_frame_not_equal + +from csv_helper import impute + + +@pytest.fixture +def df_inp() -> pl.DataFrame: + # NOTE: imp_num and imp_denom independently denote whether col needs imputation + data = """\ + id,numerator,denominator,imp_num,imp_denom + A,10,15,false,false + A,<=5,<=5,true,true + A,12,23,false,false + B,<=5,<=5,true,true + A,22,24,false,false + B,<=5,13,true,false + B,<=5,<=5,true,true + A,10,15,false,false + C,<=5,<=5,false,true + C,<=5,<=5,true,true + A,<=5,<=5,true,true + A,22,15,false,false + B,<=5,13,true,false + A,<=5,<=5,false,true + C,100,128,false,false + C,<=5,<=5,true,true + D,<=5,<=5,true,true + A,22,23,false,false + B,<=5,18,true,false + H,8,17,false,false + A,10,16,false,false + A,<=5,<=5,true,true + H,<=5,<=5,true,true + A,22,88,false,false + B,<=5,23,true,false + C,<=5,<=5,true,true + A,<=5,<=5,false,true + C,100,1300,false,false + C,<=5,<=5,true,true + D,<=5,<=5,true,true + """ + df = pl.read_csv( + StringIO(textwrap.dedent(data)), + schema={ + "id": pl.String, + "numerator": pl.String, + "denominator": pl.String, + "imp_num": pl.Boolean, + "imp_denom": pl.Boolean, + }, + ) + + return df + + +@pytest.fixture +def lf_inp(df_inp: pl.DataFrame) -> pl.LazyFrame: + return df_inp.lazy() + + +def test_impute_columns_single(df_inp: pl.DataFrame) -> None: + df_out = df_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5)) + + assert df_inp.shape == df_out.shape + assert ( + df_out.select((pl.col("numerator").cast(pl.String) == "<=5").any()).item() + is False + ) + assert ( + df_out.filter(pl.col("imp_num")).select((pl.col("numerator") <= 5).all()).item() + is True + ) + + +def test_impute_columns_no_cols_exception(df_inp: pl.DataFrame) -> None: + with pytest.raises(ValueError): + df_inp.pipe(impute.columns, [], "<=5", (1, 5)) + + +def test_impute_columns_single_lazy(lf_inp: pl.LazyFrame) -> None: + lf_out = lf_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5)) + + assert lf_inp.collect().shape == lf_out.collect().shape + assert ( + lf_out.select((pl.col("numerator").cast(pl.String) == "<=5").any()) + .collect() + .item() + is False + ) + assert ( + lf_out.filter(pl.col("imp_num")) + .select((pl.col("numerator") <= 5).all()) + .collect() + .item() + is True + ) + + +def test_impute_columns_multi(df_inp: pl.DataFrame) -> None: + df_out = df_inp.pipe(impute.columns, ["numerator", "denominator"], "<=5", (1, 5)) + + assert df_inp.shape == df_out.shape + assert ( + df_out.select((pl.col("numerator").cast(pl.String) == "<=5").any()).item() + is False + ) + assert ( + df_out.select((pl.col("denominator").cast(pl.String) == "<=5").any()).item() + is False + ) + + assert ( + df_out.filter(pl.col("imp_num")).select((pl.col("numerator") <= 5).all()).item() + is True + ) + assert ( + df_out.filter(pl.col("imp_denom")) + .select((pl.col("denominator") <= 5).all()) + .item() + is True + ) + + +def test_impute_columns_multi_lazy(lf_inp: pl.LazyFrame) -> None: + lf_out = lf_inp.pipe(impute.columns, ["numerator", "denominator"], "<=5", (1, 5)) + + assert lf_inp.collect().shape == lf_out.collect().shape + assert ( + lf_out.select((pl.col("numerator").cast(pl.String) == "<=5").any()) + .collect() + .item() + is False + ) + assert ( + lf_out.select((pl.col("denominator").cast(pl.String) == "<=5").any()) + .collect() + .item() + is False + ) + + assert ( + lf_out.filter(pl.col("imp_num")) + .select((pl.col("numerator") <= 5).all()) + .collect() + .item() + is True + ) + assert ( + lf_out.filter(pl.col("imp_denom")) + .select((pl.col("denominator") <= 5).all()) + .collect() + .item() + is True + ) + + +def test_impute_columns_seed(df_inp: pl.DataFrame) -> None: + df_1 = df_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5), seed=18) + df_2 = df_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5), seed=18) + + assert df_1.shape == df_2.shape + assert_frame_equal(df_1, df_2) + + df_1 = df_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5), seed=1) + df_2 = df_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5), seed=10) + + assert df_1.shape == df_2.shape + assert_frame_not_equal(df_1, df_2) + + +def test_impute_columns_seed_lazy(lf_inp: pl.LazyFrame) -> None: + lf_1 = lf_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5), seed=18) + lf_2 = lf_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5), seed=18) + + assert lf_1.collect().shape == lf_2.collect().shape + assert_frame_equal(lf_1, lf_2) + + lf_1 = lf_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5), seed=1) + lf_2 = lf_inp.pipe(impute.columns, ["numerator"], "<=5", (1, 5), seed=10) + + assert lf_1.collect().shape == lf_2.collect().shape + assert_frame_not_equal(lf_1, lf_2) + + +def test_impute_pair(df_inp: pl.DataFrame) -> None: + df_out = df_inp.pipe(impute.column_pair, "numerator", "denominator", "<=5", (1, 5)) + + assert df_inp.shape == df_out.shape + assert ( + df_out.select((pl.col("numerator").cast(pl.String) == "<=5").any()).item() + is False + and df_out.select((pl.col("denominator").cast(pl.String) == "<=5").any()).item() + is False + ) + assert ( + df_out.filter(pl.col("imp_num")).select((pl.col("numerator") <= 5).all()).item() + is True + ) + assert ( + df_out.filter(pl.col("imp_denom")) + .select((pl.col("denominator") <= 5).all()) + .item() + is True + ) + + assert ( + df_out.filter(pl.col("imp_denom")) + .select((pl.col("numerator") <= pl.col("denominator")).all()) + .item() + is True + ) + + +def test_impute_pair_seed(df_inp: pl.DataFrame) -> None: + df_1 = df_inp.pipe( + impute.column_pair, "numerator", "denominator", "<=5", (1, 5), seed=18 + ) + df_2 = df_inp.pipe( + impute.column_pair, "numerator", "denominator", "<=5", (1, 5), seed=18 + ) + + assert df_1.shape == df_2.shape + # can only guarantee seed reproducibility in these 2 cases + assert_frame_equal(df_1.select("denominator"), df_2.select("denominator")) + assert_frame_equal( + df_1.filter(pl.col("denominator") > 5).select("numerator"), + df_2.filter(pl.col("denominator") > 5).select("numerator"), + ) + + df_1 = df_inp.pipe( + impute.column_pair, "numerator", "denominator", "<=5", (1, 5), seed=1 + ) + df_2 = df_inp.pipe( + impute.column_pair, "numerator", "denominator", "<=5", (1, 5), seed=10 + ) + + assert df_1.shape == df_2.shape + assert_frame_not_equal(df_1, df_2) + + +def test_impute_pair_seed_lazy(lf_inp: pl.LazyFrame) -> None: + lf_1 = lf_inp.pipe( + impute.column_pair, "numerator", "denominator", "<=5", (1, 5), seed=18 + ) + lf_2 = lf_inp.pipe( + impute.column_pair, "numerator", "denominator", "<=5", (1, 5), seed=18 + ) + + assert lf_1.collect().shape == lf_2.collect().shape + # can only guarantee seed reproducibility in these 2 cases + assert_frame_equal(lf_1.select("denominator"), lf_2.select("denominator")) + assert_frame_equal( + lf_1.filter(pl.col("denominator") > 5).select("numerator"), + lf_2.filter(pl.col("denominator") > 5).select("numerator"), + ) + + lf_1 = lf_inp.pipe( + impute.column_pair, "numerator", "denominator", "<=5", (1, 5), seed=1 + ) + lf_2 = lf_inp.pipe( + impute.column_pair, "numerator", "denominator", "<=5", (1, 5), seed=10 + ) + + assert lf_1.collect().shape == lf_2.collect().shape + assert_frame_not_equal(lf_1, lf_2) + + +def test_impute_pair_lazy(lf_inp: pl.LazyFrame) -> None: + lf_out = lf_inp.pipe(impute.column_pair, "numerator", "denominator", "<=5", (1, 5)) + + assert lf_inp.collect().shape == lf_out.collect().shape + assert ( + lf_out.select((pl.col("numerator").cast(pl.String) == "<=5").any()) + .collect() + .item() + is False + and lf_out.select((pl.col("denominator").cast(pl.String) == "<=5").any()) + .collect() + .item() + is False + ) + assert ( + lf_out.filter(pl.col("imp_num")) + .select((pl.col("numerator") <= 5).all()) + .collect() + .item() + is True + ) + assert ( + lf_out.filter(pl.col("imp_denom")) + .select((pl.col("denominator") <= 5).all()) + .collect() + .item() + is True + ) + + assert ( + lf_out.filter(pl.col("imp_denom")) + .select((pl.col("numerator") <= pl.col("denominator")).all()) + .collect() + .item() + is True + ) diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 042c010..0000000 --- a/tox.ini +++ /dev/null @@ -1,11 +0,0 @@ -[tox] -env_list = py{311,312} - -[testenv] -runner = uv-venv-lock-runner -description = run tests -extras = - dev -; groups = test -; commands = test -commands = uv run -- pytest -v diff --git a/uv.lock b/uv.lock index 79e8a46..61d4994 100644 --- a/uv.lock +++ b/uv.lock @@ -1,48 +1,61 @@ version = 1 -requires-python = ">=3.11" +revision = 2 +requires-python = ">=3.12" [[package]] -name = "cachetools" -version = "5.5.0" +name = "argcomplete" +version = "3.6.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c3/38/a0f315319737ecf45b4319a8cd1f3a908e29d9277b46942263292115eee7/cachetools-5.5.0.tar.gz", hash = "sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a", size = 27661 } +sdist = { url = "https://files.pythonhosted.org/packages/16/0f/861e168fc813c56a78b35f3c30d91c6757d1fd185af1110f1aec784b35d0/argcomplete-3.6.2.tar.gz", hash = "sha256:d0519b1bc867f5f4f4713c41ad0aba73a4a5f007449716b16f385f2166dc6adf", size = 73403, upload-time = "2025-04-03T04:57:03.52Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a4/07/14f8ad37f2d12a5ce41206c21820d8cb6561b728e51fad4530dff0552a67/cachetools-5.5.0-py3-none-any.whl", hash = "sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292", size = 9524 }, + { url = "https://files.pythonhosted.org/packages/31/da/e42d7a9d8dd33fa775f467e4028a47936da2f01e4b0e561f9ba0d74cb0ca/argcomplete-3.6.2-py3-none-any.whl", hash = "sha256:65b3133a29ad53fb42c48cf5114752c7ab66c1c38544fdf6460f450c09b42591", size = 43708, upload-time = "2025-04-03T04:57:01.591Z" }, ] [[package]] -name = "chardet" -version = "5.2.0" +name = "attrs" +version = "25.3.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/f7b6ab21ec75897ed80c17d79b15951a719226b9fababf1e40ea74d69079/chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7", size = 2069618 } +sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/1367933a8532ee6ff8d63537de4f1177af4bff9f3e829baf7331f595bb24/attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b", size = 812032, upload-time = "2025-03-13T11:10:22.779Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/38/6f/f5fbc992a329ee4e0f288c1fe0e2ad9485ed064cac731ed2fe47dcc38cbf/chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970", size = 199385 }, + { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815, upload-time = "2025-03-13T11:10:21.14Z" }, ] [[package]] name = "click" -version = "8.1.7" +version = "8.1.8" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "platform_system == 'Windows'" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/96/d3/f04c7bfcf5c1862a2a5b845c6b2b360488cf47af55dfa79c98f6a6bf98b5/click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de", size = 336121 } +sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593, upload-time = "2024-12-21T18:38:44.339Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/00/2e/d53fa4befbf2cfa713304affc7ca780ce4fc1fd8710527771b58311a3229/click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28", size = 97941 }, + { url = "https://files.pythonhosted.org/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", size = 98188, upload-time = "2024-12-21T18:38:41.666Z" }, ] [[package]] name = "colorama" version = "0.4.6" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "colorlog" +version = "6.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d3/7a/359f4d5df2353f26172b3cc39ea32daa39af8de522205f512f458923e677/colorlog-6.9.0.tar.gz", hash = "sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2", size = 16624, upload-time = "2024-10-29T18:34:51.011Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e3/51/9b208e85196941db2f0654ad0357ca6388ab3ed67efdbfc799f35d1f83aa/colorlog-6.9.0-py3-none-any.whl", hash = "sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff", size = 11424, upload-time = "2024-10-29T18:34:49.815Z" }, ] [[package]] name = "csv-helper" -version = "0.2.2" +version = "0.3.0" source = { editable = "." } dependencies = [ { name = "numpy" }, @@ -52,50 +65,60 @@ dependencies = [ [package.dev-dependencies] dev = [ + { name = "nox" }, { name = "pytest" }, - { name = "tox" }, - { name = "tox-uv" }, ] [package.metadata] requires-dist = [ - { name = "numpy", specifier = ">=2.0.1" }, - { name = "polars", specifier = ">=1.4.1" }, - { name = "typer", specifier = ">=0.12.3" }, + { name = "numpy", specifier = ">=2.2.6" }, + { name = "polars", specifier = ">=1.30.0" }, + { name = "typer", specifier = ">=0.15.4" }, ] [package.metadata.requires-dev] dev = [ - { name = "pytest", specifier = ">=8.3.2" }, - { name = "tox", specifier = ">=4.23.2" }, - { name = "tox-uv", specifier = ">=1.16.0" }, + { name = "nox", specifier = ">=2025.5.1" }, + { name = "pytest", specifier = ">=8.3.5" }, +] + +[[package]] +name = "dependency-groups" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/62/55/f054de99871e7beb81935dea8a10b90cd5ce42122b1c3081d5282fdb3621/dependency_groups-1.3.1.tar.gz", hash = "sha256:78078301090517fd938c19f64a53ce98c32834dfe0dee6b88004a569a6adfefd", size = 10093, upload-time = "2025-05-02T00:34:29.452Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/99/c7/d1ec24fb280caa5a79b6b950db565dab30210a66259d17d5bb2b3a9f878d/dependency_groups-1.3.1-py3-none-any.whl", hash = "sha256:51aeaa0dfad72430fcfb7bcdbefbd75f3792e5919563077f30bc0d73f4493030", size = 8664, upload-time = "2025-05-02T00:34:27.085Z" }, ] [[package]] name = "distlib" version = "0.3.9" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0d/dd/1bec4c5ddb504ca60fc29472f3d27e8d4da1257a854e1d96742f15c1d02d/distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403", size = 613923 } +sdist = { url = "https://files.pythonhosted.org/packages/0d/dd/1bec4c5ddb504ca60fc29472f3d27e8d4da1257a854e1d96742f15c1d02d/distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403", size = 613923, upload-time = "2024-10-09T18:35:47.551Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/91/a1/cf2472db20f7ce4a6be1253a81cfdf85ad9c7885ffbed7047fb72c24cf87/distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87", size = 468973 }, + { url = "https://files.pythonhosted.org/packages/91/a1/cf2472db20f7ce4a6be1253a81cfdf85ad9c7885ffbed7047fb72c24cf87/distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87", size = 468973, upload-time = "2024-10-09T18:35:44.272Z" }, ] [[package]] name = "filelock" -version = "3.16.1" +version = "3.18.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/9d/db/3ef5bb276dae18d6ec2124224403d1d67bccdbefc17af4cc8f553e341ab1/filelock-3.16.1.tar.gz", hash = "sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435", size = 18037 } +sdist = { url = "https://files.pythonhosted.org/packages/0a/10/c23352565a6544bdc5353e0b15fc1c563352101f30e24bf500207a54df9a/filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2", size = 18075, upload-time = "2025-03-14T07:11:40.47Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b9/f8/feced7779d755758a52d1f6635d990b8d98dc0a29fa568bbe0625f18fdf3/filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0", size = 16163 }, + { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215, upload-time = "2025-03-14T07:11:39.145Z" }, ] [[package]] name = "iniconfig" -version = "2.0.0" +version = "2.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d7/4b/cbd8e699e64a6f16ca3a8220661b5f83792b3017d0f79807cb8708d33913/iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", size = 4646 } +sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload-time = "2025-03-19T20:09:59.721Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 }, + { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" }, ] [[package]] @@ -105,132 +128,128 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "mdurl" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596 } +sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596, upload-time = "2023-06-03T06:41:14.443Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528 }, + { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528, upload-time = "2023-06-03T06:41:11.019Z" }, ] [[package]] name = "mdurl" version = "0.1.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729 } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 }, + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, +] + +[[package]] +name = "nox" +version = "2025.5.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "argcomplete" }, + { name = "attrs" }, + { name = "colorlog" }, + { name = "dependency-groups" }, + { name = "packaging" }, + { name = "virtualenv" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b4/80/47712208c410defec169992e57c179f0f4d92f5dd17ba8daca50a8077e23/nox-2025.5.1.tar.gz", hash = "sha256:2a571dfa7a58acc726521ac3cd8184455ebcdcbf26401c7b737b5bc6701427b2", size = 4023334, upload-time = "2025-05-01T16:35:48.056Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a6/be/7b423b02b09eb856beffe76fe8c4121c99852db74dd12a422dcb72d1134e/nox-2025.5.1-py3-none-any.whl", hash = "sha256:56abd55cf37ff523c254fcec4d152ed51e5fe80e2ab8317221d8b828ac970a31", size = 71753, upload-time = "2025-05-01T16:35:46.037Z" }, ] [[package]] name = "numpy" -version = "2.1.3" +version = "2.2.6" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/25/ca/1166b75c21abd1da445b97bf1fa2f14f423c6cfb4fc7c4ef31dccf9f6a94/numpy-2.1.3.tar.gz", hash = "sha256:aa08e04e08aaf974d4458def539dece0d28146d866a39da5639596f4921fd761", size = 20166090 } +sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ad/81/c8167192eba5247593cd9d305ac236847c2912ff39e11402e72ae28a4985/numpy-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4d1167c53b93f1f5d8a139a742b3c6f4d429b54e74e6b57d0eff40045187b15d", size = 21156252 }, - { url = "https://files.pythonhosted.org/packages/da/74/5a60003fc3d8a718d830b08b654d0eea2d2db0806bab8f3c2aca7e18e010/numpy-2.1.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c80e4a09b3d95b4e1cac08643f1152fa71a0a821a2d4277334c88d54b2219a41", size = 13784119 }, - { url = "https://files.pythonhosted.org/packages/47/7c/864cb966b96fce5e63fcf25e1e4d957fe5725a635e5f11fe03f39dd9d6b5/numpy-2.1.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:576a1c1d25e9e02ed7fa5477f30a127fe56debd53b8d2c89d5578f9857d03ca9", size = 5352978 }, - { url = "https://files.pythonhosted.org/packages/09/ac/61d07930a4993dd9691a6432de16d93bbe6aa4b1c12a5e573d468eefc1ca/numpy-2.1.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:973faafebaae4c0aaa1a1ca1ce02434554d67e628b8d805e61f874b84e136b09", size = 6892570 }, - { url = "https://files.pythonhosted.org/packages/27/2f/21b94664f23af2bb52030653697c685022119e0dc93d6097c3cb45bce5f9/numpy-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:762479be47a4863e261a840e8e01608d124ee1361e48b96916f38b119cfda04a", size = 13896715 }, - { url = "https://files.pythonhosted.org/packages/7a/f0/80811e836484262b236c684a75dfc4ba0424bc670e765afaa911468d9f39/numpy-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc6f24b3d1ecc1eebfbf5d6051faa49af40b03be1aaa781ebdadcbc090b4539b", size = 16339644 }, - { url = "https://files.pythonhosted.org/packages/fa/81/ce213159a1ed8eb7d88a2a6ef4fbdb9e4ffd0c76b866c350eb4e3c37e640/numpy-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:17ee83a1f4fef3c94d16dc1802b998668b5419362c8a4f4e8a491de1b41cc3ee", size = 16712217 }, - { url = "https://files.pythonhosted.org/packages/7d/84/4de0b87d5a72f45556b2a8ee9fc8801e8518ec867fc68260c1f5dcb3903f/numpy-2.1.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:15cb89f39fa6d0bdfb600ea24b250e5f1a3df23f901f51c8debaa6a5d122b2f0", size = 14399053 }, - { url = "https://files.pythonhosted.org/packages/7e/1c/e5fabb9ad849f9d798b44458fd12a318d27592d4bc1448e269dec070ff04/numpy-2.1.3-cp311-cp311-win32.whl", hash = "sha256:d9beb777a78c331580705326d2367488d5bc473b49a9bc3036c154832520aca9", size = 6534741 }, - { url = "https://files.pythonhosted.org/packages/1e/48/a9a4b538e28f854bfb62e1dea3c8fea12e90216a276c7777ae5345ff29a7/numpy-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:d89dd2b6da69c4fff5e39c28a382199ddedc3a5be5390115608345dec660b9e2", size = 12869487 }, - { url = "https://files.pythonhosted.org/packages/8a/f0/385eb9970309643cbca4fc6eebc8bb16e560de129c91258dfaa18498da8b/numpy-2.1.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f55ba01150f52b1027829b50d70ef1dafd9821ea82905b63936668403c3b471e", size = 20849658 }, - { url = "https://files.pythonhosted.org/packages/54/4a/765b4607f0fecbb239638d610d04ec0a0ded9b4951c56dc68cef79026abf/numpy-2.1.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:13138eadd4f4da03074851a698ffa7e405f41a0845a6b1ad135b81596e4e9958", size = 13492258 }, - { url = "https://files.pythonhosted.org/packages/bd/a7/2332679479c70b68dccbf4a8eb9c9b5ee383164b161bee9284ac141fbd33/numpy-2.1.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:a6b46587b14b888e95e4a24d7b13ae91fa22386c199ee7b418f449032b2fa3b8", size = 5090249 }, - { url = "https://files.pythonhosted.org/packages/c1/67/4aa00316b3b981a822c7a239d3a8135be2a6945d1fd11d0efb25d361711a/numpy-2.1.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:0fa14563cc46422e99daef53d725d0c326e99e468a9320a240affffe87852564", size = 6621704 }, - { url = "https://files.pythonhosted.org/packages/5e/da/1a429ae58b3b6c364eeec93bf044c532f2ff7b48a52e41050896cf15d5b1/numpy-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8637dcd2caa676e475503d1f8fdb327bc495554e10838019651b76d17b98e512", size = 13606089 }, - { url = "https://files.pythonhosted.org/packages/9e/3e/3757f304c704f2f0294a6b8340fcf2be244038be07da4cccf390fa678a9f/numpy-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2312b2aa89e1f43ecea6da6ea9a810d06aae08321609d8dc0d0eda6d946a541b", size = 16043185 }, - { url = "https://files.pythonhosted.org/packages/43/97/75329c28fea3113d00c8d2daf9bc5828d58d78ed661d8e05e234f86f0f6d/numpy-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a38c19106902bb19351b83802531fea19dee18e5b37b36454f27f11ff956f7fc", size = 16410751 }, - { url = "https://files.pythonhosted.org/packages/ad/7a/442965e98b34e0ae9da319f075b387bcb9a1e0658276cc63adb8c9686f7b/numpy-2.1.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:02135ade8b8a84011cbb67dc44e07c58f28575cf9ecf8ab304e51c05528c19f0", size = 14082705 }, - { url = "https://files.pythonhosted.org/packages/ac/b6/26108cf2cfa5c7e03fb969b595c93131eab4a399762b51ce9ebec2332e80/numpy-2.1.3-cp312-cp312-win32.whl", hash = "sha256:e6988e90fcf617da2b5c78902fe8e668361b43b4fe26dbf2d7b0f8034d4cafb9", size = 6239077 }, - { url = "https://files.pythonhosted.org/packages/a6/84/fa11dad3404b7634aaab50733581ce11e5350383311ea7a7010f464c0170/numpy-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:0d30c543f02e84e92c4b1f415b7c6b5326cbe45ee7882b6b77db7195fb971e3a", size = 12566858 }, - { url = "https://files.pythonhosted.org/packages/4d/0b/620591441457e25f3404c8057eb924d04f161244cb8a3680d529419aa86e/numpy-2.1.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:96fe52fcdb9345b7cd82ecd34547fca4321f7656d500eca497eb7ea5a926692f", size = 20836263 }, - { url = "https://files.pythonhosted.org/packages/45/e1/210b2d8b31ce9119145433e6ea78046e30771de3fe353f313b2778142f34/numpy-2.1.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f653490b33e9c3a4c1c01d41bc2aef08f9475af51146e4a7710c450cf9761598", size = 13507771 }, - { url = "https://files.pythonhosted.org/packages/55/44/aa9ee3caee02fa5a45f2c3b95cafe59c44e4b278fbbf895a93e88b308555/numpy-2.1.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:dc258a761a16daa791081d026f0ed4399b582712e6fc887a95af09df10c5ca57", size = 5075805 }, - { url = "https://files.pythonhosted.org/packages/78/d6/61de6e7e31915ba4d87bbe1ae859e83e6582ea14c6add07c8f7eefd8488f/numpy-2.1.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:016d0f6f5e77b0f0d45d77387ffa4bb89816b57c835580c3ce8e099ef830befe", size = 6608380 }, - { url = "https://files.pythonhosted.org/packages/3e/46/48bdf9b7241e317e6cf94276fe11ba673c06d1fdf115d8b4ebf616affd1a/numpy-2.1.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c181ba05ce8299c7aa3125c27b9c2167bca4a4445b7ce73d5febc411ca692e43", size = 13602451 }, - { url = "https://files.pythonhosted.org/packages/70/50/73f9a5aa0810cdccda9c1d20be3cbe4a4d6ea6bfd6931464a44c95eef731/numpy-2.1.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5641516794ca9e5f8a4d17bb45446998c6554704d888f86df9b200e66bdcce56", size = 16039822 }, - { url = "https://files.pythonhosted.org/packages/ad/cd/098bc1d5a5bc5307cfc65ee9369d0ca658ed88fbd7307b0d49fab6ca5fa5/numpy-2.1.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ea4dedd6e394a9c180b33c2c872b92f7ce0f8e7ad93e9585312b0c5a04777a4a", size = 16411822 }, - { url = "https://files.pythonhosted.org/packages/83/a2/7d4467a2a6d984549053b37945620209e702cf96a8bc658bc04bba13c9e2/numpy-2.1.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b0df3635b9c8ef48bd3be5f862cf71b0a4716fa0e702155c45067c6b711ddcef", size = 14079598 }, - { url = "https://files.pythonhosted.org/packages/e9/6a/d64514dcecb2ee70bfdfad10c42b76cab657e7ee31944ff7a600f141d9e9/numpy-2.1.3-cp313-cp313-win32.whl", hash = "sha256:50ca6aba6e163363f132b5c101ba078b8cbd3fa92c7865fd7d4d62d9779ac29f", size = 6236021 }, - { url = "https://files.pythonhosted.org/packages/bb/f9/12297ed8d8301a401e7d8eb6b418d32547f1d700ed3c038d325a605421a4/numpy-2.1.3-cp313-cp313-win_amd64.whl", hash = "sha256:747641635d3d44bcb380d950679462fae44f54b131be347d5ec2bce47d3df9ed", size = 12560405 }, - { url = "https://files.pythonhosted.org/packages/a7/45/7f9244cd792e163b334e3a7f02dff1239d2890b6f37ebf9e82cbe17debc0/numpy-2.1.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:996bb9399059c5b82f76b53ff8bb686069c05acc94656bb259b1d63d04a9506f", size = 20859062 }, - { url = "https://files.pythonhosted.org/packages/b1/b4/a084218e7e92b506d634105b13e27a3a6645312b93e1c699cc9025adb0e1/numpy-2.1.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:45966d859916ad02b779706bb43b954281db43e185015df6eb3323120188f9e4", size = 13515839 }, - { url = "https://files.pythonhosted.org/packages/27/45/58ed3f88028dcf80e6ea580311dc3edefdd94248f5770deb980500ef85dd/numpy-2.1.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:baed7e8d7481bfe0874b566850cb0b85243e982388b7b23348c6db2ee2b2ae8e", size = 5116031 }, - { url = "https://files.pythonhosted.org/packages/37/a8/eb689432eb977d83229094b58b0f53249d2209742f7de529c49d61a124a0/numpy-2.1.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:a9f7f672a3388133335589cfca93ed468509cb7b93ba3105fce780d04a6576a0", size = 6629977 }, - { url = "https://files.pythonhosted.org/packages/42/a3/5355ad51ac73c23334c7caaed01adadfda49544f646fcbfbb4331deb267b/numpy-2.1.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7aac50327da5d208db2eec22eb11e491e3fe13d22653dce51b0f4109101b408", size = 13575951 }, - { url = "https://files.pythonhosted.org/packages/c4/70/ea9646d203104e647988cb7d7279f135257a6b7e3354ea6c56f8bafdb095/numpy-2.1.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4394bc0dbd074b7f9b52024832d16e019decebf86caf909d94f6b3f77a8ee3b6", size = 16022655 }, - { url = "https://files.pythonhosted.org/packages/14/ce/7fc0612903e91ff9d0b3f2eda4e18ef9904814afcae5b0f08edb7f637883/numpy-2.1.3-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:50d18c4358a0a8a53f12a8ba9d772ab2d460321e6a93d6064fc22443d189853f", size = 16399902 }, - { url = "https://files.pythonhosted.org/packages/ef/62/1d3204313357591c913c32132a28f09a26357e33ea3c4e2fe81269e0dca1/numpy-2.1.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:14e253bd43fc6b37af4921b10f6add6925878a42a0c5fe83daee390bca80bc17", size = 14067180 }, - { url = "https://files.pythonhosted.org/packages/24/d7/78a40ed1d80e23a774cb8a34ae8a9493ba1b4271dde96e56ccdbab1620ef/numpy-2.1.3-cp313-cp313t-win32.whl", hash = "sha256:08788d27a5fd867a663f6fc753fd7c3ad7e92747efc73c53bca2f19f8bc06f48", size = 6291907 }, - { url = "https://files.pythonhosted.org/packages/86/09/a5ab407bd7f5f5599e6a9261f964ace03a73e7c6928de906981c31c38082/numpy-2.1.3-cp313-cp313t-win_amd64.whl", hash = "sha256:2564fbdf2b99b3f815f2107c1bbc93e2de8ee655a69c261363a1172a79a257d4", size = 12644098 }, + { url = "https://files.pythonhosted.org/packages/82/5d/c00588b6cf18e1da539b45d3598d3557084990dcc4331960c15ee776ee41/numpy-2.2.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41c5a21f4a04fa86436124d388f6ed60a9343a6f767fced1a8a71c3fbca038ff", size = 20875348, upload-time = "2025-05-17T21:34:39.648Z" }, + { url = "https://files.pythonhosted.org/packages/66/ee/560deadcdde6c2f90200450d5938f63a34b37e27ebff162810f716f6a230/numpy-2.2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de749064336d37e340f640b05f24e9e3dd678c57318c7289d222a8a2f543e90c", size = 14119362, upload-time = "2025-05-17T21:35:01.241Z" }, + { url = "https://files.pythonhosted.org/packages/3c/65/4baa99f1c53b30adf0acd9a5519078871ddde8d2339dc5a7fde80d9d87da/numpy-2.2.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:894b3a42502226a1cac872f840030665f33326fc3dac8e57c607905773cdcde3", size = 5084103, upload-time = "2025-05-17T21:35:10.622Z" }, + { url = "https://files.pythonhosted.org/packages/cc/89/e5a34c071a0570cc40c9a54eb472d113eea6d002e9ae12bb3a8407fb912e/numpy-2.2.6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:71594f7c51a18e728451bb50cc60a3ce4e6538822731b2933209a1f3614e9282", size = 6625382, upload-time = "2025-05-17T21:35:21.414Z" }, + { url = "https://files.pythonhosted.org/packages/f8/35/8c80729f1ff76b3921d5c9487c7ac3de9b2a103b1cd05e905b3090513510/numpy-2.2.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2618db89be1b4e05f7a1a847a9c1c0abd63e63a1607d892dd54668dd92faf87", size = 14018462, upload-time = "2025-05-17T21:35:42.174Z" }, + { url = "https://files.pythonhosted.org/packages/8c/3d/1e1db36cfd41f895d266b103df00ca5b3cbe965184df824dec5c08c6b803/numpy-2.2.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd83c01228a688733f1ded5201c678f0c53ecc1006ffbc404db9f7a899ac6249", size = 16527618, upload-time = "2025-05-17T21:36:06.711Z" }, + { url = "https://files.pythonhosted.org/packages/61/c6/03ed30992602c85aa3cd95b9070a514f8b3c33e31124694438d88809ae36/numpy-2.2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37c0ca431f82cd5fa716eca9506aefcabc247fb27ba69c5062a6d3ade8cf8f49", size = 15505511, upload-time = "2025-05-17T21:36:29.965Z" }, + { url = "https://files.pythonhosted.org/packages/b7/25/5761d832a81df431e260719ec45de696414266613c9ee268394dd5ad8236/numpy-2.2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fe27749d33bb772c80dcd84ae7e8df2adc920ae8297400dabec45f0dedb3f6de", size = 18313783, upload-time = "2025-05-17T21:36:56.883Z" }, + { url = "https://files.pythonhosted.org/packages/57/0a/72d5a3527c5ebffcd47bde9162c39fae1f90138c961e5296491ce778e682/numpy-2.2.6-cp312-cp312-win32.whl", hash = "sha256:4eeaae00d789f66c7a25ac5f34b71a7035bb474e679f410e5e1a94deb24cf2d4", size = 6246506, upload-time = "2025-05-17T21:37:07.368Z" }, + { url = "https://files.pythonhosted.org/packages/36/fa/8c9210162ca1b88529ab76b41ba02d433fd54fecaf6feb70ef9f124683f1/numpy-2.2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c1f9540be57940698ed329904db803cf7a402f3fc200bfe599334c9bd84a40b2", size = 12614190, upload-time = "2025-05-17T21:37:26.213Z" }, + { url = "https://files.pythonhosted.org/packages/f9/5c/6657823f4f594f72b5471f1db1ab12e26e890bb2e41897522d134d2a3e81/numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84", size = 20867828, upload-time = "2025-05-17T21:37:56.699Z" }, + { url = "https://files.pythonhosted.org/packages/dc/9e/14520dc3dadf3c803473bd07e9b2bd1b69bc583cb2497b47000fed2fa92f/numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b", size = 14143006, upload-time = "2025-05-17T21:38:18.291Z" }, + { url = "https://files.pythonhosted.org/packages/4f/06/7e96c57d90bebdce9918412087fc22ca9851cceaf5567a45c1f404480e9e/numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d", size = 5076765, upload-time = "2025-05-17T21:38:27.319Z" }, + { url = "https://files.pythonhosted.org/packages/73/ed/63d920c23b4289fdac96ddbdd6132e9427790977d5457cd132f18e76eae0/numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566", size = 6617736, upload-time = "2025-05-17T21:38:38.141Z" }, + { url = "https://files.pythonhosted.org/packages/85/c5/e19c8f99d83fd377ec8c7e0cf627a8049746da54afc24ef0a0cb73d5dfb5/numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f", size = 14010719, upload-time = "2025-05-17T21:38:58.433Z" }, + { url = "https://files.pythonhosted.org/packages/19/49/4df9123aafa7b539317bf6d342cb6d227e49f7a35b99c287a6109b13dd93/numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f", size = 16526072, upload-time = "2025-05-17T21:39:22.638Z" }, + { url = "https://files.pythonhosted.org/packages/b2/6c/04b5f47f4f32f7c2b0e7260442a8cbcf8168b0e1a41ff1495da42f42a14f/numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868", size = 15503213, upload-time = "2025-05-17T21:39:45.865Z" }, + { url = "https://files.pythonhosted.org/packages/17/0a/5cd92e352c1307640d5b6fec1b2ffb06cd0dabe7d7b8227f97933d378422/numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d", size = 18316632, upload-time = "2025-05-17T21:40:13.331Z" }, + { url = "https://files.pythonhosted.org/packages/f0/3b/5cba2b1d88760ef86596ad0f3d484b1cbff7c115ae2429678465057c5155/numpy-2.2.6-cp313-cp313-win32.whl", hash = "sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd", size = 6244532, upload-time = "2025-05-17T21:43:46.099Z" }, + { url = "https://files.pythonhosted.org/packages/cb/3b/d58c12eafcb298d4e6d0d40216866ab15f59e55d148a5658bb3132311fcf/numpy-2.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c", size = 12610885, upload-time = "2025-05-17T21:44:05.145Z" }, + { url = "https://files.pythonhosted.org/packages/6b/9e/4bf918b818e516322db999ac25d00c75788ddfd2d2ade4fa66f1f38097e1/numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6", size = 20963467, upload-time = "2025-05-17T21:40:44Z" }, + { url = "https://files.pythonhosted.org/packages/61/66/d2de6b291507517ff2e438e13ff7b1e2cdbdb7cb40b3ed475377aece69f9/numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda", size = 14225144, upload-time = "2025-05-17T21:41:05.695Z" }, + { url = "https://files.pythonhosted.org/packages/e4/25/480387655407ead912e28ba3a820bc69af9adf13bcbe40b299d454ec011f/numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40", size = 5200217, upload-time = "2025-05-17T21:41:15.903Z" }, + { url = "https://files.pythonhosted.org/packages/aa/4a/6e313b5108f53dcbf3aca0c0f3e9c92f4c10ce57a0a721851f9785872895/numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8", size = 6712014, upload-time = "2025-05-17T21:41:27.321Z" }, + { url = "https://files.pythonhosted.org/packages/b7/30/172c2d5c4be71fdf476e9de553443cf8e25feddbe185e0bd88b096915bcc/numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f", size = 14077935, upload-time = "2025-05-17T21:41:49.738Z" }, + { url = "https://files.pythonhosted.org/packages/12/fb/9e743f8d4e4d3c710902cf87af3512082ae3d43b945d5d16563f26ec251d/numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa", size = 16600122, upload-time = "2025-05-17T21:42:14.046Z" }, + { url = "https://files.pythonhosted.org/packages/12/75/ee20da0e58d3a66f204f38916757e01e33a9737d0b22373b3eb5a27358f9/numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571", size = 15586143, upload-time = "2025-05-17T21:42:37.464Z" }, + { url = "https://files.pythonhosted.org/packages/76/95/bef5b37f29fc5e739947e9ce5179ad402875633308504a52d188302319c8/numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1", size = 18385260, upload-time = "2025-05-17T21:43:05.189Z" }, + { url = "https://files.pythonhosted.org/packages/09/04/f2f83279d287407cf36a7a8053a5abe7be3622a4363337338f2585e4afda/numpy-2.2.6-cp313-cp313t-win32.whl", hash = "sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff", size = 6377225, upload-time = "2025-05-17T21:43:16.254Z" }, + { url = "https://files.pythonhosted.org/packages/67/0e/35082d13c09c02c011cf21570543d202ad929d961c02a147493cb0c2bdf5/numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06", size = 12771374, upload-time = "2025-05-17T21:43:35.479Z" }, ] [[package]] name = "packaging" -version = "24.2" +version = "25.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d0/63/68dbb6eb2de9cb10ee4c9c14a0148804425e13c4fb20d61cce69f53106da/packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f", size = 163950 } +sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451 }, + { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, ] [[package]] name = "platformdirs" -version = "4.3.6" +version = "4.3.8" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/13/fc/128cc9cb8f03208bdbf93d3aa862e16d376844a14f9a0ce5cf4507372de4/platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907", size = 21302 } +sdist = { url = "https://files.pythonhosted.org/packages/fe/8b/3c73abc9c759ecd3f1f7ceff6685840859e8070c4d947c93fae71f6a0bf2/platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc", size = 21362, upload-time = "2025-05-07T22:47:42.121Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3c/a6/bc1012356d8ece4d66dd75c4b9fc6c1f6650ddd5991e421177d9f8f671be/platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb", size = 18439 }, + { url = "https://files.pythonhosted.org/packages/fe/39/979e8e21520d4e47a0bbe349e2713c0aac6f3d853d0e5b34d76206c439aa/platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4", size = 18567, upload-time = "2025-05-07T22:47:40.376Z" }, ] [[package]] name = "pluggy" -version = "1.5.0" +version = "1.6.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/96/2d/02d4312c973c6050a18b314a5ad0b3210edb65a906f868e31c111dede4a6/pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", size = 67955 } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 }, + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] [[package]] name = "polars" -version = "1.16.0" +version = "1.30.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/43/80/d845897273be97a3e73b59be711deda375b638330d591a7ef8132c20f52f/polars-1.16.0.tar.gz", hash = "sha256:dd99808b833872babe02434a809fd45c1cffe66a3d57123cdc5e447c7753d328", size = 4192568 } +sdist = { url = "https://files.pythonhosted.org/packages/82/b6/8dbdf626c0705a57f052708c9fc0860ffc2aa97955930d5faaf6a66fcfd3/polars-1.30.0.tar.gz", hash = "sha256:dfe94ae84a5efd9ba74e616e3e125b24ca155494a931890a8f17480737c4db45", size = 4668318, upload-time = "2025-05-21T13:33:24.175Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8e/b0/51c944ecd58b3ebc81eb03b50448127ff85fd9448063094524e0c6693c75/polars-1.16.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:072f5ff3b5fe05797c59890de0e464b34ede75a9735e7d7221622fa3a0616d8e", size = 34735038 }, - { url = "https://files.pythonhosted.org/packages/61/2f/d0b45007f2ae4b4926070b420c8525840b9757013cd96077bcde40807ecb/polars-1.16.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:ebaf7a1ea114b042fa9f1cd17d49436279eb30545dd74361a2f5e3febeb867cd", size = 30577461 }, - { url = "https://files.pythonhosted.org/packages/31/9e/21e05959323883abcee799837d8cac08adf10a48c233432993757e41791a/polars-1.16.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e626d21dcd2566e1442dac414fe177bc70ebfc2f16620d59d778b1b774361018", size = 36006233 }, - { url = "https://files.pythonhosted.org/packages/25/80/da5c3cd248c7642d1feb896f0a27a0860c607f8cdde3e75457182e4c76c6/polars-1.16.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:53debcce55f68731ee2c7d6c787afdee26860ed6576f1ffa0cb9111b57f82857", size = 32348398 }, - { url = "https://files.pythonhosted.org/packages/08/0b/677c905f9dd5bc37708694e8f7367659c5382bd011f5dc1d564474032d0b/polars-1.16.0-cp39-abi3-win_amd64.whl", hash = "sha256:17efcb550c42d51034ff79702612b9184d8eac0d500de1dd7fb98490459276d3", size = 35743314 }, + { url = "https://files.pythonhosted.org/packages/40/48/e9b2cb379abcc9f7aff2e701098fcdb9fe6d85dc4ad4cec7b35d39c70951/polars-1.30.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:4c33bc97c29b7112f0e689a2f8a33143973a3ff466c70b25c7fd1880225de6dd", size = 35704342, upload-time = "2025-05-21T13:32:22.996Z" }, + { url = "https://files.pythonhosted.org/packages/36/ca/f545f61282f75eea4dfde4db2944963dcd59abd50c20e33a1c894da44dad/polars-1.30.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:e3d05914c364b8e39a5b10dcf97e84d76e516b3b1693880bf189a93aab3ca00d", size = 32459857, upload-time = "2025-05-21T13:32:27.728Z" }, + { url = "https://files.pythonhosted.org/packages/76/20/e018cd87d7cb6f8684355f31f4e193222455a6e8f7b942f4a2934f5969c7/polars-1.30.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a52af3862082b868c1febeae650af8ae8a2105d2cb28f0449179a7b44f54ccf", size = 36267243, upload-time = "2025-05-21T13:32:31.796Z" }, + { url = "https://files.pythonhosted.org/packages/cb/e7/b88b973021be07b13d91b9301cc14392c994225ef5107a32a8ffd3fd6424/polars-1.30.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:ffb3ef133454275d4254442257c5f71dd6e393ce365c97997dadeb6fa9d6d4b5", size = 33416871, upload-time = "2025-05-21T13:32:35.077Z" }, + { url = "https://files.pythonhosted.org/packages/dd/7c/d46d4381adeac537b8520b653dc30cb8b7edbf59883d71fbb989e9005de1/polars-1.30.0-cp39-abi3-win_amd64.whl", hash = "sha256:c26b633a9bd530c5fc09d317fca3bb3e16c772bd7df7549a9d8ec1934773cc5d", size = 36363630, upload-time = "2025-05-21T13:32:38.286Z" }, + { url = "https://files.pythonhosted.org/packages/fb/b5/5056d0c12aadb57390d0627492bef8b1abf3549474abb9ae0fd4e2bfa885/polars-1.30.0-cp39-abi3-win_arm64.whl", hash = "sha256:476f1bde65bc7b4d9f80af370645c2981b5798d67c151055e58534e89e96f2a8", size = 32643590, upload-time = "2025-05-21T13:32:42.107Z" }, ] [[package]] name = "pygments" -version = "2.18.0" +version = "2.19.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/8e/62/8336eff65bcbc8e4cb5d05b55faf041285951b6e80f33e2bff2024788f31/pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199", size = 4891905 } +sdist = { url = "https://files.pythonhosted.org/packages/7c/2d/c3338d48ea6cc0feb8446d8e6937e1408088a72a39937982cc6111d17f84/pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", size = 4968581, upload-time = "2025-01-06T17:26:30.443Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f7/3f/01c8b82017c199075f8f788d0d906b9ffbbc5a47dc9918a945e13d5a2bda/pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a", size = 1205513 }, -] - -[[package]] -name = "pyproject-api" -version = "1.8.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "packaging" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/bb/19/441e0624a8afedd15bbcce96df1b80479dd0ff0d965f5ce8fde4f2f6ffad/pyproject_api-1.8.0.tar.gz", hash = "sha256:77b8049f2feb5d33eefcc21b57f1e279636277a8ac8ad6b5871037b243778496", size = 22340 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ba/f4/3c4ddfcc0c19c217c6de513842d286de8021af2f2ab79bbb86c00342d778/pyproject_api-1.8.0-py3-none-any.whl", hash = "sha256:3d7d347a047afe796fd5d1885b1e391ba29be7169bd2f102fcd378f04273d228", size = 13100 }, + { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293, upload-time = "2025-01-06T17:26:25.553Z" }, ] [[package]] name = "pytest" -version = "8.3.3" +version = "8.3.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, @@ -238,70 +257,36 @@ dependencies = [ { name = "packaging" }, { name = "pluggy" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8b/6c/62bbd536103af674e227c41a8f3dcd022d591f6eed5facb5a0f31ee33bbc/pytest-8.3.3.tar.gz", hash = "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181", size = 1442487 } +sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891, upload-time = "2025-03-02T12:54:54.503Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6b/77/7440a06a8ead44c7757a64362dd22df5760f9b12dc5f11b6188cd2fc27a0/pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2", size = 342341 }, + { url = "https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", size = 343634, upload-time = "2025-03-02T12:54:52.069Z" }, ] [[package]] name = "rich" -version = "13.9.4" +version = "14.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markdown-it-py" }, { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ab/3a/0316b28d0761c6734d6bc14e770d85506c986c85ffb239e688eeaab2c2bc/rich-13.9.4.tar.gz", hash = "sha256:439594978a49a09530cff7ebc4b5c7103ef57baf48d5ea3184f21d9a2befa098", size = 223149 } +sdist = { url = "https://files.pythonhosted.org/packages/a1/53/830aa4c3066a8ab0ae9a9955976fb770fe9c6102117c8ec4ab3ea62d89e8/rich-14.0.0.tar.gz", hash = "sha256:82f1bc23a6a21ebca4ae0c45af9bdbc492ed20231dcb63f297d6d1021a9d5725", size = 224078, upload-time = "2025-03-30T14:15:14.23Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/19/71/39c7c0d87f8d4e6c020a393182060eaefeeae6c01dab6a84ec346f2567df/rich-13.9.4-py3-none-any.whl", hash = "sha256:6049d5e6ec054bf2779ab3358186963bac2ea89175919d699e378b99738c2a90", size = 242424 }, + { url = "https://files.pythonhosted.org/packages/0d/9b/63f4c7ebc259242c89b3acafdb37b41d1185c07ff0011164674e9076b491/rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0", size = 243229, upload-time = "2025-03-30T14:15:12.283Z" }, ] [[package]] name = "shellingham" version = "1.5.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310 } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755 }, -] - -[[package]] -name = "tox" -version = "4.23.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cachetools" }, - { name = "chardet" }, - { name = "colorama" }, - { name = "filelock" }, - { name = "packaging" }, - { name = "platformdirs" }, - { name = "pluggy" }, - { name = "pyproject-api" }, - { name = "virtualenv" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/1f/86/32b10f91b4b975a37ac402b0f9fa016775088e0565c93602ba0b3c729ce8/tox-4.23.2.tar.gz", hash = "sha256:86075e00e555df6e82e74cfc333917f91ecb47ffbc868dcafbd2672e332f4a2c", size = 189998 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/af/c0/124b73d01c120e917383bc6c53ebc34efdf7243faa9fca64d105c94cf2ab/tox-4.23.2-py3-none-any.whl", hash = "sha256:452bc32bb031f2282881a2118923176445bac783ab97c874b8770ab4c3b76c38", size = 166758 }, -] - -[[package]] -name = "tox-uv" -version = "1.16.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "packaging" }, - { name = "tox" }, - { name = "uv" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/ed/5e/c3d2a45ab5465dddbbc267a589c9cfce23b91750d49af10738a08c98534e/tox_uv-1.16.0.tar.gz", hash = "sha256:71b2e2fa6c35c1360b91a302df1d65b3e5a1f656b321c5ebf7b84545804c9f01", size = 16337 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ad/8d/1baa9f725ddd4824708759cf7b74bc43379f5f7feb079fde0629d7b32b3e/tox_uv-1.16.0-py3-none-any.whl", hash = "sha256:e6f0b525a687e745ab878d07cbf5c7e85d582028d4a7c8935f95e84350651432", size = 13661 }, + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, ] [[package]] name = "typer" -version = "0.14.0" +version = "0.15.4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, @@ -309,55 +294,30 @@ dependencies = [ { name = "shellingham" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/0d/7e/24af5b9aaa0872f9f6dc5dcf789dc3e57ceb23b4c570b852cd4db0d98f14/typer-0.14.0.tar.gz", hash = "sha256:af58f737f8d0c0c37b9f955a6d39000b9ff97813afcbeef56af5e37cf743b45a", size = 98836 } +sdist = { url = "https://files.pythonhosted.org/packages/6c/89/c527e6c848739be8ceb5c44eb8208c52ea3515c6cf6406aa61932887bf58/typer-0.15.4.tar.gz", hash = "sha256:89507b104f9b6a0730354f27c39fae5b63ccd0c95b1ce1f1a6ba0cfd329997c3", size = 101559, upload-time = "2025-05-14T16:34:57.704Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bb/d8/a3ab71d5587b42b832a7ef2e65b3e51a18f8da32b6ce169637d4d21995ed/typer-0.14.0-py3-none-any.whl", hash = "sha256:f476233a25770ab3e7b2eebf7c68f3bc702031681a008b20167573a4b7018f09", size = 44707 }, + { url = "https://files.pythonhosted.org/packages/c9/62/d4ba7afe2096d5659ec3db8b15d8665bdcb92a3c6ff0b95e99895b335a9c/typer-0.15.4-py3-none-any.whl", hash = "sha256:eb0651654dcdea706780c466cf06d8f174405a659ffff8f163cfbfee98c0e173", size = 45258, upload-time = "2025-05-14T16:34:55.583Z" }, ] [[package]] name = "typing-extensions" -version = "4.12.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/df/db/f35a00659bc03fec321ba8bce9420de607a1d37f8342eee1863174c69557/typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8", size = 85321 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438 }, -] - -[[package]] -name = "uv" -version = "0.5.5" +version = "4.13.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/46/95/ba000f161640015c9f2ecc2681f38eb161b8edac600a0451b36e4ad15aa4/uv-0.5.5.tar.gz", hash = "sha256:7f8db4bdf7eaef6be271457c4b2a167f41ad115434944a09f5034018a29b4093", size = 2324705 } +sdist = { url = "https://files.pythonhosted.org/packages/f6/37/23083fcd6e35492953e8d2aaaa68b860eb422b34627b13f2ce3eb6106061/typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef", size = 106967, upload-time = "2025-04-10T14:19:05.416Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6d/91/ac61feea1b11e4dc342a4fd174f311b628a04ec3b4614a24676c4d214bd1/uv-0.5.5-py3-none-linux_armv6l.whl", hash = "sha256:d091e88a9c2c830169c3ccf95fd972759e0ab629dacc2d5eff525e5ba3583904", size = 13887911 }, - { url = "https://files.pythonhosted.org/packages/84/18/461af22fd1f80f86548013639ab345810dbf35aa44dff1732c6faf311a48/uv-0.5.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:f0bfc7ced2fe0c85b3070dfa219072a1406133e18aab2f2fe10b6455ede0f8b2", size = 13900075 }, - { url = "https://files.pythonhosted.org/packages/68/5f/7a236ad48f81c580691f9e5f28dd47289a9819f18410f12ee3c621791efd/uv-0.5.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:553901e95cb5a4da1da19e288c29c5f886793f981750400e5cef48e3031b970b", size = 12869530 }, - { url = "https://files.pythonhosted.org/packages/94/d6/d6f441fd041fb4883332b999481896e28502b51681bcb786a001fb1e4a50/uv-0.5.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:a4f0c7647187044056dc6f6f5d31b01f445d8695eb7d2f442b29fd5c9216a56f", size = 13158009 }, - { url = "https://files.pythonhosted.org/packages/9b/3d/cc1e44e14266bbdb71eb020d14454f4f24f72fcc2eb84a52bf809e030918/uv-0.5.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:34e894c922ba29a59bbe812a458a7095a575f76b87dfc362e0c3f4f650d6f631", size = 13676515 }, - { url = "https://files.pythonhosted.org/packages/10/5f/b81ed7ab715687a790b9b06a6f4e2781d7f7222840eead0392c3ef6f80ea/uv-0.5.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f5569798fc8eaad58fbb4fb70ced8f09ebe607fbbfb95fa42c559f57bbe0cabd", size = 14245499 }, - { url = "https://files.pythonhosted.org/packages/f7/e1/ce5a88bc5ed61ee310a8e0acc3fa3032280bb2d49514fd48b801bb36f96f/uv-0.5.5-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:59d53cce11718ce5d5367afc8c93ebcfc5e1cddfa4a44aedbf08d08d9b738381", size = 14920230 }, - { url = "https://files.pythonhosted.org/packages/03/84/6fa2deb5bed9dab815cb7f57e14885ecf8de22249b992f9c4ea5dd042c28/uv-0.5.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3dee9517ebba13d07d8f139c439c5ff63e438d31ebda4d7eb0af8d0f0cc6a181", size = 14701845 }, - { url = "https://files.pythonhosted.org/packages/59/b2/7078d52a73c1e13d984c22fae1888993e5cd3f40cea029ab2666d79d92a7/uv-0.5.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:29286cd6b9f8e040d02894a67c6b6304811ea393ca9dfade109e93cf4b3b842c", size = 18963098 }, - { url = "https://files.pythonhosted.org/packages/b9/89/a91d927574ce2103d22b7beab319468d915a1d04757cf82b6eaf79b56a10/uv-0.5.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f7f04ae5a5430873d8610d8ea0a5d35df92e60bf701f80b3cf24857e0ac5e72", size = 14442788 }, - { url = "https://files.pythonhosted.org/packages/46/3d/2c5a9d362771aeef13c2ba1c9b1267b469e11331b874486f03f6c6fc5eb1/uv-0.5.5-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:5a47345ccafc0105b2f0cc22fcb0bb05be4d0e60df67f5beea28069b0bb372c8", size = 13394955 }, - { url = "https://files.pythonhosted.org/packages/cf/92/8d646385401472e7358cdf0fc86015edc9bf9132c9b79d7e2a11c300dd84/uv-0.5.5-py3-none-musllinux_1_1_armv7l.whl", hash = "sha256:b55d507bfe2bd2330c41680e4b0077972381f40975a59b53007254196abc4477", size = 13639522 }, - { url = "https://files.pythonhosted.org/packages/da/23/5e00b71f9c8b9c16f0947bf84b71265bafdf24947d2e271a657da00a0c6a/uv-0.5.5-py3-none-musllinux_1_1_i686.whl", hash = "sha256:365715e7247c2cd8ef661e8f96927b181248f689c07e48b076c9dbc78a4a0877", size = 13957451 }, - { url = "https://files.pythonhosted.org/packages/02/38/0c624a8d89416fa6c849c6670066c74f420aa898eafdf7cefd3adf77d686/uv-0.5.5-py3-none-musllinux_1_1_ppc64le.whl", hash = "sha256:0314a4b9a25bf00afe4e5472c338c8c6bd34688c23d63ce1ad35462cf087b492", size = 15840287 }, - { url = "https://files.pythonhosted.org/packages/a8/bd/1a81700d2555be504f0b1ff82f86d666384805e354c8eec2fdf7558d0cf9/uv-0.5.5-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:f4e9ddcffc29f009f692cda699912b02f6a12089d741b71d2fcd0b181eb71c5d", size = 14560282 }, - { url = "https://files.pythonhosted.org/packages/cd/71/c0b137c62a5a8ab5ecc0a8f505b77e5a05d5b94fd9da7560a247dac000f3/uv-0.5.5-py3-none-win32.whl", hash = "sha256:9af7018430da1f0960eee1592c820c343e2619f2d71f66c3be62da330826c537", size = 13810875 }, - { url = "https://files.pythonhosted.org/packages/e3/d8/56dbbe07aba3cf561fff52b8ed75ce7b694a0710ab676fae4cd7fbf1bdae/uv-0.5.5-py3-none-win_amd64.whl", hash = "sha256:69e15f24493d86c3a2da3764891e35a033ceda09404c1f9b386671d509db95f3", size = 15604848 }, + { url = "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", size = 45806, upload-time = "2025-04-10T14:19:03.967Z" }, ] [[package]] name = "virtualenv" -version = "20.28.0" +version = "20.31.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "distlib" }, { name = "filelock" }, { name = "platformdirs" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/bf/75/53316a5a8050069228a2f6d11f32046cfa94fbb6cc3f08703f59b873de2e/virtualenv-20.28.0.tar.gz", hash = "sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa", size = 7650368 } +sdist = { url = "https://files.pythonhosted.org/packages/56/2c/444f465fb2c65f40c3a104fd0c495184c4f2336d65baf398e3c75d72ea94/virtualenv-20.31.2.tar.gz", hash = "sha256:e10c0a9d02835e592521be48b332b6caee6887f332c111aa79a09b9e79efc2af", size = 6076316, upload-time = "2025-05-08T17:58:23.811Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/10/f9/0919cf6f1432a8c4baa62511f8f8da8225432d22e83e3476f5be1a1edc6e/virtualenv-20.28.0-py3-none-any.whl", hash = "sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0", size = 4276702 }, + { url = "https://files.pythonhosted.org/packages/f3/40/b1c265d4b2b62b58576588510fc4d1fe60a86319c8de99fd8e9fec617d2c/virtualenv-20.31.2-py3-none-any.whl", hash = "sha256:36efd0d9650ee985f0cad72065001e66d49a6f24eb44d98980f630686243cf11", size = 6057982, upload-time = "2025-05-08T17:58:21.15Z" }, ]