diff --git a/.github/workflows/pullrequest-push.yml b/.github/workflows/pullrequest-push.yml index cd747cb..797fcb9 100644 --- a/.github/workflows/pullrequest-push.yml +++ b/.github/workflows/pullrequest-push.yml @@ -31,8 +31,8 @@ jobs: tailscale_enabled: false run_command: | poetry install --no-interaction --no-ansi --no-root --only dev - poetry run black magicparse/ --check --diff - poetry run flake8 + ruff check --diff ./ + ruff format --check --diff ./ secrets: inherit unit-tests: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 90ee6f8..6e3d70c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,14 +1,15 @@ +fail_fast: true repos: - - repo: https://github.com/psf/black - rev: 22.6.0 + - repo: local hooks: - - id: black - language_version: python3.9 - - id: black - alias: black-on-ci - args: ["--check --diff"] - stages: [manual] - - repo: https://github.com/pycqa/flake8 - rev: 5.0.3 - hooks: - - id: flake8 + - id: ruff-check + name: Ruff check + entry: poetry run ruff check + args: [--fix] + language: system + types: [file, python] + - id: ruff-format + name: Ruff format + entry: poetry run ruff format + language: system + types: [file, python] diff --git a/README.md b/README.md index 562f706..1b4272b 100644 --- a/README.md +++ b/README.md @@ -2,20 +2,65 @@ Declarative parser for structured data files. -## Installation +## Table of contents -```bash -poetry install magicparse +- [Getting started](#getting-started) + - [Dev requirements](#dev-requirements) +- [Usage](#usage) + - [Parse content](#parse-content) + - [Register a custom transform](#register-custom-transform) + - [Register a custom schema](#register-custom-schema) + - [Stream parsing](#stream-parsing) + - [Custom encoding](#custom-encoding) +- [API Reference](#api-reference) + - [File types](#file-types) + - [Types](#types) + - [Computed fields](#computed-fields) + - [Return types](#return-types) + - [Error handling](#error-handling) +- [License](#license) + + + +## Getting started + +📝 We recommend you to take time to read this README entirely before doing +anything to have a good overview of what you are going to do and avoid to +wrongly anticipate any steps. + + + +### Dev requirements + +After this point we expect you to have [mise](https://mise.jdx.dev/) installed on your machine. + +Use mise to install Python and poetry + +```shell +mise install + +# You can then test that they are available +python --version +poetry --version ``` -## Requirements +Install the python dev dependencies and setup pre-commit hooks + +```shell +poetry install +pre-commit install +``` -- Python 3.12+ + ## Usage + + ### Parse content + + ```python import magicparse @@ -110,7 +155,9 @@ rows = magicparse.parse(data="...", schema=schema) ``` -### Register a custom transform and parse content + + +### Register a custom transform ```python from uuid import UUID @@ -138,7 +185,9 @@ rows = magicparse.parse("13ec10cc-cc7e-4ee9-b091-9caa6d11aeb2", schema) assert rows == [{"shop-guid": "13ec10cc-cc7e-4ee9-b091-9caa6d11aeb2"}] ``` -### Register a custom schema and parse content + + +### Register a custom schema ```python import magicparse @@ -165,6 +214,8 @@ rows = magicparse.parse("Joe|William|Jack|Averell", schema) assert rows == [{"name": "Joe"}, {"name": "William"}, {"name": "Jack"}, {"name": "Averell"}] ``` + + ### Stream parsing For large files, you can use streaming to process data incrementally: @@ -192,6 +243,8 @@ for row in magicparse.stream_parse(data="...", schema=schema): print("Unknown type of row.") ``` + + ### Custom encoding By default, magicparse uses UTF-8 encoding. You can specify a different encoding: @@ -206,16 +259,20 @@ schema = { } ``` -## API + + +## API Reference + + ### File types - CSV (with or without header) - Columnar -### Fields + -#### Types +### Types - str - int @@ -243,6 +300,8 @@ schema = { - divide - round + + ### Computed Fields Types, Pre-processors, Post-processors and validator is same as Field @@ -254,7 +313,9 @@ Types, Pre-processors, Post-processors and validator is same as Field - multiply - coalesce -## Return Types + + +### Return Types The parser returns a list of row objects: @@ -262,7 +323,9 @@ The parser returns a list of row objects: - **`RowFailed`**: Failed to parse row with `errors` message - **`RowSkipped`**: Skipped row with `errors` message -## Error Handling + + +### Error Handling You can configure error handling for types, validators, and processors: @@ -281,50 +344,7 @@ Error handling options: - `"raise"` (default): Raise exception on error - `"skip-row"`: Skip the row and continue processing -## Docker - -The project includes Docker support: - -```bash -# Build and run with docker-compose -docker-compose up --build - -# Or build manually -docker build -t magicparse . -docker run -it magicparse -``` - -## Development - -### Setup - -```bash -# Install dependencies -poetry install - -# Run tests -poetry run pytest - -# Format code -poetry run black . - -# Lint code -poetry run flake8 -``` - -## Breaking Changes - -### Version 1.0.0 - -- output format is now a list of typed result. (Before it was a Tuple[dict, list[dict]]) that represent parsed values and errors). -### Version 0.16.0 - -- **Python 3.12+ required**: Upgraded from Python 3.11 to Python 3.12. Update your Python environment before upgrading. - -### Version 0.15.0 - -- **Python 3.11+ required**: Upgraded from Python 3.10 to Python 3.11. Update your Python environment before upgrading. - + ## License diff --git a/magicparse/__init__.py b/magicparse/__init__.py index 1f88d83..19b067a 100644 --- a/magicparse/__init__.py +++ b/magicparse/__init__.py @@ -33,9 +33,7 @@ ] -def parse( - data: Union[bytes, BytesIO], schema_options: Dict[str, Any] -) -> List[RowParsed | RowSkipped | RowFailed]: +def parse(data: Union[bytes, BytesIO], schema_options: Dict[str, Any]) -> List[RowParsed | RowSkipped | RowFailed]: schema_definition = Schema.build(schema_options) return schema_definition.parse(data) @@ -68,9 +66,7 @@ def register(items: Union[Registrable, List[Registrable]]) -> None: elif issubclass(item, Builder): Builder.register(item) else: - raise ValueError( - "transforms must be a subclass of Transform (or a list of it)" - ) + raise ValueError("transforms must be a subclass of Transform (or a list of it)") register(builtins_schemas) diff --git a/magicparse/builders.py b/magicparse/builders.py index c477dab..6e60a30 100644 --- a/magicparse/builders.py +++ b/magicparse/builders.py @@ -35,8 +35,7 @@ def __init__(self, on_error: OnError, fields: list[str]) -> None: or len(fields) < 2 ): raise ValueError( - "composite-processor 'concat': " - "'fields' parameter must be a list[str] with at least two elements" + "composite-processor 'concat': 'fields' parameter must be a list[str] with at least two elements" ) self.fields = fields @@ -53,13 +52,9 @@ class Divide(Builder): def __init__(self, on_error: OnError, numerator: str, denominator: str) -> None: super().__init__(on_error) if not numerator or not isinstance(numerator, str): - raise ValueError( - "builder 'divide': " "'numerator' parameter must be a non null str" - ) + raise ValueError("builder 'divide': 'numerator' parameter must be a non null str") if not denominator or not isinstance(denominator, str): - raise ValueError( - "builder 'divide': " "'denominator' parameter must be a non null str" - ) + raise ValueError("builder 'divide': 'denominator' parameter must be a non null str") self.numerator = numerator self.denominator = denominator @@ -75,13 +70,9 @@ class Multiply(Builder): def __init__(self, on_error: OnError, x_factor: str, y_factor: str) -> None: super().__init__(on_error) if not x_factor or not isinstance(x_factor, str): - raise ValueError( - "builder 'multiply': " "'x_factor' parameter must be a non null str" - ) + raise ValueError("builder 'multiply': 'x_factor' parameter must be a non null str") if not y_factor or not isinstance(y_factor, str): - raise ValueError( - "builder 'multiply': " "'y_factor' parameter must be a non null str" - ) + raise ValueError("builder 'multiply': 'y_factor' parameter must be a non null str") self.x_factor = x_factor self.y_factor = y_factor @@ -98,11 +89,7 @@ def __init__(self, on_error: OnError, fields: list[str]) -> None: super().__init__(on_error) if not fields: raise ValueError("parameters should defined fields to coalesce") - if ( - not isinstance(fields, list) - or not all(isinstance(field, str) for field in fields) - or len(fields) < 2 - ): + if not isinstance(fields, list) or not all(isinstance(field, str) for field in fields) or len(fields) < 2: raise ValueError("parameters should have two fields at least") self.fields = fields diff --git a/magicparse/fields.py b/magicparse/fields.py index 9c06a35..56b9d80 100644 --- a/magicparse/fields.py +++ b/magicparse/fields.py @@ -12,29 +12,21 @@ class Field(ABC): def __init__(self, key: str, options: dict) -> None: self.key = key - pre_processors = [ - PreProcessor.build(item) for item in options.get("pre-processors", []) - ] + pre_processors = [PreProcessor.build(item) for item in options.get("pre-processors", [])] type_converter = TypeConverter.build(options) validators = [Validator.build(item) for item in options.get("validators", [])] - post_processors = [ - PostProcessor.build(item) for item in options.get("post-processors", []) - ] + post_processors = [PostProcessor.build(item) for item in options.get("post-processors", [])] self.optional = options.get("optional", False) - self.transforms = ( - pre_processors + [type_converter] + validators + post_processors - ) + self.transforms = pre_processors + [type_converter] + validators + post_processors def _process_raw_value(self, raw_value: str) -> Result: if not raw_value: if self.optional: return Ok(value=None) else: - raise ValueError( - f"{self.key} field is required but the value was empty" - ) + raise ValueError(f"{self.key} field is required but the value was empty") for transform in self.transforms: try: raw_value = transform.apply(raw_value) diff --git a/magicparse/post_processors.py b/magicparse/post_processors.py index d8d62ba..9a475f7 100644 --- a/magicparse/post_processors.py +++ b/magicparse/post_processors.py @@ -29,10 +29,7 @@ class Divide(PostProcessor): def __init__(self, on_error: OnError, denominator: int) -> None: super().__init__(on_error) if denominator <= 0: - raise ValueError( - "post-processor 'divide': " - "'denominator' parameter must be a positive integer" - ) + raise ValueError("post-processor 'divide': 'denominator' parameter must be a positive integer") self.denominator = denominator @@ -50,10 +47,7 @@ class Round(PostProcessor): def __init__(self, on_error: OnError, precision: int) -> None: super().__init__(on_error) if precision < 0: - raise ValueError( - "post-processor 'round': " - "'precision' parameter must be a positive or zero integer" - ) + raise ValueError("post-processor 'round': 'precision' parameter must be a positive or zero integer") self.precision = precision diff --git a/magicparse/pre_processors.py b/magicparse/pre_processors.py index 2df95a4..f87238d 100644 --- a/magicparse/pre_processors.py +++ b/magicparse/pre_processors.py @@ -45,9 +45,7 @@ def apply(self, value: str) -> str: try: return self.values[value] except: - raise ValueError( - f"value '{value}' does not map to any values in [{self._keys}]" - ) + raise ValueError(f"value '{value}' does not map to any values in [{self._keys}]") @staticmethod def key() -> str: @@ -95,18 +93,14 @@ def __init__(self, on_error: OnError, pattern: str) -> None: super().__init__(on_error) pattern = re.compile(pattern) if "value" not in pattern.groupindex: - raise ValueError( - "regex-extract's pattern must contain a group named 'value'" - ) + raise ValueError("regex-extract's pattern must contain a group named 'value'") self.pattern = pattern def apply(self, value: str) -> str: match = re.match(self.pattern, value) if not match: - raise ValueError( - f"cannot extract value from pattern '{self.pattern.pattern}'" - ) + raise ValueError(f"cannot extract value from pattern '{self.pattern.pattern}'") return match.group("value") diff --git a/magicparse/schema.py b/magicparse/schema.py index 44598e0..e8d1fe9 100644 --- a/magicparse/schema.py +++ b/magicparse/schema.py @@ -34,9 +34,7 @@ class Schema(ABC): def __init__(self, options: Dict[str, Any]) -> None: self.fields = [Field.build(item) for item in options["fields"]] - self.computed_fields = [ - ComputedField.build(item) for item in options.get("computed-fields", []) - ] + self.computed_fields = [ComputedField.build(item) for item in options.get("computed-fields", [])] self.has_header = options.get("has_header", False) self.encoding = options.get("encoding", "utf-8") @@ -65,14 +63,10 @@ def register(cls, schema: "Schema") -> None: cls.registry[schema.key()] = schema - def parse( - self, data: Union[bytes, BytesIO] - ) -> List[RowParsed] | List[RowSkipped] | List[RowFailed]: + def parse(self, data: Union[bytes, BytesIO]) -> List[RowParsed] | List[RowSkipped] | List[RowFailed]: return list(self.stream_parse(data)) - def stream_parse( - self, data: Union[bytes, BytesIO] - ) -> Iterable[RowParsed | RowSkipped | RowFailed]: + def stream_parse(self, data: Union[bytes, BytesIO]) -> Iterable[RowParsed | RowSkipped | RowFailed]: if isinstance(data, bytes): stream = BytesIO(data) else: @@ -95,9 +89,7 @@ def stream_parse( yield fields continue - computed_fields = self.process_fields( - self.computed_fields, fields.values, row_number - ) + computed_fields = self.process_fields(self.computed_fields, fields.values, row_number) if not isinstance(computed_fields, RowParsed): yield computed_fields continue @@ -126,11 +118,7 @@ def process_fields( item[field.key] = parsed_value.value if errors: - return ( - RowSkipped(row_number, errors) - if skip_row - else RowFailed(row_number, errors) - ) + return RowSkipped(row_number, errors) if skip_row else RowFailed(row_number, errors) return RowParsed(row_number, item) diff --git a/poetry.lock b/poetry.lock index ffafb6e..46aca23 100644 --- a/poetry.lock +++ b/poetry.lock @@ -19,50 +19,6 @@ PyYAML = ">=3.10,<6.1" rsa = ">=3.1.2,<4.8" s3transfer = ">=0.10.0,<0.11.0" -[[package]] -name = "black" -version = "25.1.0" -description = "The uncompromising code formatter." -optional = false -python-versions = ">=3.9" -files = [ - {file = "black-25.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:759e7ec1e050a15f89b770cefbf91ebee8917aac5c20483bc2d80a6c3a04df32"}, - {file = "black-25.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e519ecf93120f34243e6b0054db49c00a35f84f195d5bce7e9f5cfc578fc2da"}, - {file = "black-25.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:055e59b198df7ac0b7efca5ad7ff2516bca343276c466be72eb04a3bcc1f82d7"}, - {file = "black-25.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:db8ea9917d6f8fc62abd90d944920d95e73c83a5ee3383493e35d271aca872e9"}, - {file = "black-25.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a39337598244de4bae26475f77dda852ea00a93bd4c728e09eacd827ec929df0"}, - {file = "black-25.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96c1c7cd856bba8e20094e36e0f948718dc688dba4a9d78c3adde52b9e6c2299"}, - {file = "black-25.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bce2e264d59c91e52d8000d507eb20a9aca4a778731a08cfff7e5ac4a4bb7096"}, - {file = "black-25.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:172b1dbff09f86ce6f4eb8edf9dede08b1fce58ba194c87d7a4f1a5aa2f5b3c2"}, - {file = "black-25.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b60580e829091e6f9238c848ea6750efed72140b91b048770b64e74fe04908b"}, - {file = "black-25.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e2978f6df243b155ef5fa7e558a43037c3079093ed5d10fd84c43900f2d8ecc"}, - {file = "black-25.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b48735872ec535027d979e8dcb20bf4f70b5ac75a8ea99f127c106a7d7aba9f"}, - {file = "black-25.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea0213189960bda9cf99be5b8c8ce66bb054af5e9e861249cd23471bd7b0b3ba"}, - {file = "black-25.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8f0b18a02996a836cc9c9c78e5babec10930862827b1b724ddfe98ccf2f2fe4f"}, - {file = "black-25.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:afebb7098bfbc70037a053b91ae8437c3857482d3a690fefc03e9ff7aa9a5fd3"}, - {file = "black-25.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:030b9759066a4ee5e5aca28c3c77f9c64789cdd4de8ac1df642c40b708be6171"}, - {file = "black-25.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:a22f402b410566e2d1c950708c77ebf5ebd5d0d88a6a2e87c86d9fb48afa0d18"}, - {file = "black-25.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1ee0a0c330f7b5130ce0caed9936a904793576ef4d2b98c40835d6a65afa6a0"}, - {file = "black-25.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f3df5f1bf91d36002b0a75389ca8663510cf0531cca8aa5c1ef695b46d98655f"}, - {file = "black-25.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9e6827d563a2c820772b32ce8a42828dc6790f095f441beef18f96aa6f8294e"}, - {file = "black-25.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:bacabb307dca5ebaf9c118d2d2f6903da0d62c9faa82bd21a33eecc319559355"}, - {file = "black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717"}, - {file = "black-25.1.0.tar.gz", hash = "sha256:33496d5cd1222ad73391352b4ae8da15253c5de89b93a80b3e2c8d9a19ec2666"}, -] - -[package.dependencies] -click = ">=8.0.0" -mypy-extensions = ">=0.4.3" -packaging = ">=22.0" -pathspec = ">=0.9.0" -platformdirs = ">=2" - -[package.extras] -colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.10)"] -jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] -uvloop = ["uvloop (>=0.15.2)"] - [[package]] name = "botocore" version = "1.35.6" @@ -83,19 +39,16 @@ urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version > crt = ["awscrt (==0.21.2)"] [[package]] -name = "click" -version = "8.1.7" -description = "Composable command line interface toolkit" +name = "cfgv" +version = "3.4.0" +description = "Validate configuration and produce human readable error messages." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, - {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, + {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, + {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, ] -[package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} - [[package]] name = "colorama" version = "0.4.6" @@ -107,6 +60,17 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "distlib" +version = "0.4.0" +description = "Distribution utilities" +optional = false +python-versions = "*" +files = [ + {file = "distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16"}, + {file = "distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d"}, +] + [[package]] name = "docutils" version = "0.16" @@ -118,21 +82,32 @@ files = [ {file = "docutils-0.16.tar.gz", hash = "sha256:c2de3a60e9e7d07be26b7f2b00ca0309c207e06c100f9cc2a94931fc75a478fc"}, ] +[[package]] +name = "filelock" +version = "3.19.1" +description = "A platform independent file lock." +optional = false +python-versions = ">=3.9" +files = [ + {file = "filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d"}, + {file = "filelock-3.19.1.tar.gz", hash = "sha256:66eda1888b0171c998b35be2bcc0f6d75c388a7ce20c3f3f37aa8e96c2dddf58"}, +] + [[package]] name = "flake8" -version = "7.1.1" +version = "7.3.0" description = "the modular source code checker: pep8 pyflakes and co" optional = false -python-versions = ">=3.8.1" +python-versions = ">=3.9" files = [ - {file = "flake8-7.1.1-py2.py3-none-any.whl", hash = "sha256:597477df7860daa5aa0fdd84bf5208a043ab96b8e96ab708770ae0364dd03213"}, - {file = "flake8-7.1.1.tar.gz", hash = "sha256:049d058491e228e03e67b390f311bbf88fce2dbaa8fa673e7aea87b7198b8d38"}, + {file = "flake8-7.3.0-py2.py3-none-any.whl", hash = "sha256:b9696257b9ce8beb888cdbe31cf885c90d31928fe202be0889a7cdafad32f01e"}, + {file = "flake8-7.3.0.tar.gz", hash = "sha256:fe044858146b9fc69b551a4b490d69cf960fcb78ad1edcb84e7fbb1b4a8e3872"}, ] [package.dependencies] mccabe = ">=0.7.0,<0.8.0" -pycodestyle = ">=2.12.0,<2.13.0" -pyflakes = ">=3.2.0,<3.3.0" +pycodestyle = ">=2.14.0,<2.15.0" +pyflakes = ">=3.4.0,<3.5.0" [[package]] name = "flake8-pyproject" @@ -150,6 +125,20 @@ Flake8 = ">=5" [package.extras] dev = ["pyTest", "pyTest-cov"] +[[package]] +name = "identify" +version = "2.6.14" +description = "File identification library for Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "identify-2.6.14-py2.py3-none-any.whl", hash = "sha256:11a073da82212c6646b1f39bb20d4483bfb9543bd5566fec60053c4bb309bf2e"}, + {file = "identify-2.6.14.tar.gz", hash = "sha256:663494103b4f717cb26921c52f8751363dc89db64364cd836a9bf1535f53cd6a"}, +] + +[package.extras] +license = ["ukkonen"] + [[package]] name = "iniconfig" version = "2.0.0" @@ -184,14 +173,14 @@ files = [ ] [[package]] -name = "mypy-extensions" -version = "1.0.0" -description = "Type system extensions for programs checked with the mypy type checker." +name = "nodeenv" +version = "1.9.1" +description = "Node.js virtual environment builder" optional = false -python-versions = ">=3.5" +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ - {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, - {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, + {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"}, + {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"}, ] [[package]] @@ -205,32 +194,21 @@ files = [ {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, ] -[[package]] -name = "pathspec" -version = "0.12.1" -description = "Utility library for gitignore style pattern matching of file paths." -optional = false -python-versions = ">=3.8" -files = [ - {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, - {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, -] - [[package]] name = "platformdirs" -version = "4.2.2" +version = "4.4.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "platformdirs-4.2.2-py3-none-any.whl", hash = "sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee"}, - {file = "platformdirs-4.2.2.tar.gz", hash = "sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3"}, + {file = "platformdirs-4.4.0-py3-none-any.whl", hash = "sha256:abd01743f24e5287cd7a5db3752faf1a2d65353f38ec26d98e25a6db65958c85"}, + {file = "platformdirs-4.4.0.tar.gz", hash = "sha256:ca753cf4d81dc309bc67b0ea38fd15dc97bc30ce419a7f58d13eb3bf14c4febf"}, ] [package.extras] -docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] -test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"] -type = ["mypy (>=1.8)"] +docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.4)", "pytest-cov (>=6)", "pytest-mock (>=3.14)"] +type = ["mypy (>=1.14.1)"] [[package]] name = "pluggy" @@ -247,6 +225,24 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "pre-commit" +version = "4.3.0" +description = "A framework for managing and maintaining multi-language pre-commit hooks." +optional = false +python-versions = ">=3.9" +files = [ + {file = "pre_commit-4.3.0-py2.py3-none-any.whl", hash = "sha256:2b0747ad7e6e967169136edffee14c16e148a778a54e4f967921aa1ebf2308d8"}, + {file = "pre_commit-4.3.0.tar.gz", hash = "sha256:499fe450cc9d42e9d58e606262795ecb64dd05438943c62b66f6a8673da30b16"}, +] + +[package.dependencies] +cfgv = ">=2.0.0" +identify = ">=1.0.0" +nodeenv = ">=0.11.1" +pyyaml = ">=5.1" +virtualenv = ">=20.10.0" + [[package]] name = "pyasn1" version = "0.6.0" @@ -260,24 +256,24 @@ files = [ [[package]] name = "pycodestyle" -version = "2.12.1" +version = "2.14.0" description = "Python style guide checker" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "pycodestyle-2.12.1-py2.py3-none-any.whl", hash = "sha256:46f0fb92069a7c28ab7bb558f05bfc0110dac69a0cd23c61ea0040283a9d78b3"}, - {file = "pycodestyle-2.12.1.tar.gz", hash = "sha256:6838eae08bbce4f6accd5d5572075c63626a15ee3e6f842df996bf62f6d73521"}, + {file = "pycodestyle-2.14.0-py2.py3-none-any.whl", hash = "sha256:dd6bf7cb4ee77f8e016f9c8e74a35ddd9f67e1d5fd4184d86c3b98e07099f42d"}, + {file = "pycodestyle-2.14.0.tar.gz", hash = "sha256:c4b5b517d278089ff9d0abdec919cd97262a3367449ea1c8b49b91529167b783"}, ] [[package]] name = "pyflakes" -version = "3.2.0" +version = "3.4.0" description = "passive checker of Python programs" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "pyflakes-3.2.0-py2.py3-none-any.whl", hash = "sha256:84b5be138a2dfbb40689ca07e2152deb896a65c3a3e24c251c5c62489568074a"}, - {file = "pyflakes-3.2.0.tar.gz", hash = "sha256:1c61603ff154621fb2a9172037d84dca3500def8c8b630657d1701f026f8af3f"}, + {file = "pyflakes-3.4.0-py2.py3-none-any.whl", hash = "sha256:f742a7dbd0d9cb9ea41e9a24a918996e8170c799fa528688d40dd582c8265f4f"}, + {file = "pyflakes-3.4.0.tar.gz", hash = "sha256:b24f96fafb7d2ab0ec5075b7350b3d2d2218eab42003821c06344973d3ea2f58"}, ] [[package]] @@ -390,6 +386,34 @@ files = [ [package.dependencies] pyasn1 = ">=0.1.3" +[[package]] +name = "ruff" +version = "0.13.1" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruff-0.13.1-py3-none-linux_armv6l.whl", hash = "sha256:b2abff595cc3cbfa55e509d89439b5a09a6ee3c252d92020bd2de240836cf45b"}, + {file = "ruff-0.13.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:4ee9f4249bf7f8bb3984c41bfaf6a658162cdb1b22e3103eabc7dd1dc5579334"}, + {file = "ruff-0.13.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:5c5da4af5f6418c07d75e6f3224e08147441f5d1eac2e6ce10dcce5e616a3bae"}, + {file = "ruff-0.13.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:80524f84a01355a59a93cef98d804e2137639823bcee2931f5028e71134a954e"}, + {file = "ruff-0.13.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ff7f5ce8d7988767dd46a148192a14d0f48d1baea733f055d9064875c7d50389"}, + {file = "ruff-0.13.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c55d84715061f8b05469cdc9a446aa6c7294cd4bd55e86a89e572dba14374f8c"}, + {file = "ruff-0.13.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:ac57fed932d90fa1624c946dc67a0a3388d65a7edc7d2d8e4ca7bddaa789b3b0"}, + {file = "ruff-0.13.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c366a71d5b4f41f86a008694f7a0d75fe409ec298685ff72dc882f882d532e36"}, + {file = "ruff-0.13.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f4ea9d1b5ad3e7a83ee8ebb1229c33e5fe771e833d6d3dcfca7b77d95b060d38"}, + {file = "ruff-0.13.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b0f70202996055b555d3d74b626406476cc692f37b13bac8828acff058c9966a"}, + {file = "ruff-0.13.1-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:f8cff7a105dad631085d9505b491db33848007d6b487c3c1979dd8d9b2963783"}, + {file = "ruff-0.13.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:9761e84255443316a258dd7dfbd9bfb59c756e52237ed42494917b2577697c6a"}, + {file = "ruff-0.13.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:3d376a88c3102ef228b102211ef4a6d13df330cb0f5ca56fdac04ccec2a99700"}, + {file = "ruff-0.13.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:cbefd60082b517a82c6ec8836989775ac05f8991715d228b3c1d86ccc7df7dae"}, + {file = "ruff-0.13.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:dd16b9a5a499fe73f3c2ef09a7885cb1d97058614d601809d37c422ed1525317"}, + {file = "ruff-0.13.1-py3-none-win32.whl", hash = "sha256:55e9efa692d7cb18580279f1fbb525146adc401f40735edf0aaeabd93099f9a0"}, + {file = "ruff-0.13.1-py3-none-win_amd64.whl", hash = "sha256:3a3fb595287ee556de947183489f636b9f76a72f0fa9c028bdcabf5bab2cc5e5"}, + {file = "ruff-0.13.1-py3-none-win_arm64.whl", hash = "sha256:c0bae9ffd92d54e03c2bf266f466da0a65e145f298ee5b5846ed435f6a00518a"}, + {file = "ruff-0.13.1.tar.gz", hash = "sha256:88074c3849087f153d4bb22e92243ad4c1b366d7055f98726bc19aa08dc12d51"}, +] + [[package]] name = "s3transfer" version = "0.10.2" @@ -435,7 +459,27 @@ h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] +[[package]] +name = "virtualenv" +version = "20.34.0" +description = "Virtual Python Environment builder" +optional = false +python-versions = ">=3.8" +files = [ + {file = "virtualenv-20.34.0-py3-none-any.whl", hash = "sha256:341f5afa7eee943e4984a9207c025feedd768baff6753cd660c857ceb3e36026"}, + {file = "virtualenv-20.34.0.tar.gz", hash = "sha256:44815b2c9dee7ed86e387b842a84f20b93f7f417f95886ca1996a72a4138eb1a"}, +] + +[package.dependencies] +distlib = ">=0.3.7,<1" +filelock = ">=3.12.2,<4" +platformdirs = ">=3.9.1,<5" + +[package.extras] +docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] + [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "90a338e1e25da79823b5fd3ba82a57631710e8dfe6f54cd1ec9751038b2e1f71" +content-hash = "fe8198ebec8153b02401f4c89ae4a2b26e49d6d07a463ff3003611527084c482" diff --git a/pyproject.toml b/pyproject.toml index b7095ad..0987855 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,20 +11,25 @@ repository = "https://github.com/ZeroGachis/magicparse" python = "^3.12" [tool.poetry.group.dev.dependencies] -black = "^25.0.0" -flake8 = "^7.0.0" pytest = "^8.0.0" awscli = "~1" flake8-pyproject = "~1.2.3" +ruff = "^0.13.1" +pre-commit = "^4.3.0" [build-system] requires = ["poetry-core>=1.2.0"] build-backend = "poetry.masonry.api" -[tool.flake8] -max-line-length = 88 -extend-ignore = ["E203", "E722"] -exclude = [".git/", ".pytest_cache/", ".venv"] + +[tool.ruff] +line-length = 120 +target-version = "py312" + +[tool.ruff.lint] +ignore = [ + "E722", # Allow for bare 'except' +] [tool.pytest.ini_options] -python_files = ["tests/*"] \ No newline at end of file +python_files = ["tests/*"] diff --git a/tests/test_builders.py b/tests/test_builders.py index ce941cd..2eff7a1 100644 --- a/tests/test_builders.py +++ b/tests/test_builders.py @@ -36,9 +36,7 @@ def test_without_parameter(self): def test_with_parameter(self): Builder.register(self.WithParamBuilder) - builder = Builder.build( - {"name": "with-param", "parameters": {"setting": "value"}} - ) + builder = Builder.build({"name": "with-param", "parameters": {"setting": "value"}}) assert isinstance(builder, self.WithParamBuilder) assert builder.setting == "value" @@ -73,34 +71,26 @@ def test_fields_params_has_less_than_two_field(self): Builder.build({"name": "concat", "parameters": {"fields": ["code"]}}) def test_field_not_present(self): - builder = Builder.build( - {"name": "concat", "parameters": {"fields": ["code_1", "code_2"]}} - ) + builder = Builder.build({"name": "concat", "parameters": {"fields": ["code_1", "code_2"]}}) with pytest.raises(KeyError): builder.apply({}) def test_concat_two_fields(self): - builder = Builder.build( - {"name": "concat", "parameters": {"fields": ["code_1", "code_2"]}} - ) + builder = Builder.build({"name": "concat", "parameters": {"fields": ["code_1", "code_2"]}}) result = builder.apply({"code_1": "X", "code_2": "Y"}) assert result == "XY" def test_concat_three_fields(self): - builder = Builder.build( - {"name": "concat", "parameters": {"fields": ["code_1", "code_2", "code_3"]}} - ) + builder = Builder.build({"name": "concat", "parameters": {"fields": ["code_1", "code_2", "code_3"]}}) result = builder.apply({"code_1": "X", "code_2": "Y", "code_3": "Z"}) assert result == "XYZ" def test_concat_integer(self): - builder = Builder.build( - {"name": "concat", "parameters": {"fields": ["code_1", "code_2"]}} - ) + builder = Builder.build({"name": "concat", "parameters": {"fields": ["code_1", "code_2"]}}) with pytest.raises(TypeError): builder.apply({"code_1": 1, "code_2": 2}) @@ -246,45 +236,26 @@ def test_empty_params(self): Builder.build({"name": "coalesce", "parameters": ""}) def test_fields_params_empty(self): - with pytest.raises( - ValueError, - match="parameters should defined fields to coalesce" - ): + with pytest.raises(ValueError, match="parameters should defined fields to coalesce"): Builder.build({"name": "coalesce", "parameters": {"fields": ""}}) def test_fields_params_not_a_list_of_str(self): - with pytest.raises( - ValueError, - match="parameters should have two fields at least" - ): + with pytest.raises(ValueError, match="parameters should have two fields at least"): Builder.build({"name": "coalesce", "parameters": {"fields": "xxx"}}) def test_fields_params_has_less_than_two_fields(self): - with pytest.raises( - ValueError, - match="parameters should have two fields at least" - ): + with pytest.raises(ValueError, match="parameters should have two fields at least"): Builder.build({"name": "coalesce", "parameters": {"fields": ["field"]}}) def test_return_first_non_empty_value(self): - coalesce = Builder.build( - { - "name": "coalesce", - "parameters": {"fields": ["field1", "field2"]} - } - ) + coalesce = Builder.build({"name": "coalesce", "parameters": {"fields": ["field1", "field2"]}}) result = coalesce.apply({"field1": "", "field2": "value"}) assert result == "value" def test_return_none_if_all_values_are_empty(self): - coalesce = Builder.build( - { - "name": "coalesce", - "parameters": {"fields": ["field1", "field2"]} - } - ) + coalesce = Builder.build({"name": "coalesce", "parameters": {"fields": ["field1", "field2"]}}) result = coalesce.apply({"field1": "", "field2": ""}) diff --git a/tests/test_computed_fields.py b/tests/test_computed_fields.py index 62dcd43..e517e13 100644 --- a/tests/test_computed_fields.py +++ b/tests/test_computed_fields.py @@ -29,7 +29,7 @@ def test_with_valid_builder(self): "name": "concat", "parameters": {"fields": ["code_1", "code_2"]}, }, - } + }, ) computed = field.parse({"code_1": "01", "code_2": "02"}) @@ -46,7 +46,7 @@ def test_error_format(self): "name": "concat", "parameters": {"fields": ["code_1", "code_2"]}, }, - } + }, ) with pytest.raises(KeyError) as error: diff --git a/tests/test_encodings.py b/tests/test_encodings.py index 5e48874..5b1841d 100644 --- a/tests/test_encodings.py +++ b/tests/test_encodings.py @@ -31,21 +31,13 @@ def test_exotic_encoding(self): ) rows = schema.parse( - "Да здравствует Владимир проклятый\n" - "Да здравствует Карл Маркс\n" - "Да здравствует Россия\n".encode("iso8859_5") + "Да здравствует Владимир проклятый\nДа здравствует Карл Маркс\nДа здравствует Россия\n".encode("iso8859_5") ) assert rows == [ - RowParsed( - row_number=1, values={"name": "Да здравствует Владимир проклятый"} - ), - RowParsed( - row_number=2, values={"name": "Да здравствует Карл Маркс"} - ), - RowParsed( - row_number=3, values={"name": "Да здравствует Россия"} - ), + RowParsed(row_number=1, values={"name": "Да здравствует Владимир проклятый"}), + RowParsed(row_number=2, values={"name": "Да здравствует Карл Маркс"}), + RowParsed(row_number=3, values={"name": "Да здравствует Россия"}), ] @@ -96,16 +88,7 @@ def test_exotic_encoding(self): ) assert rows == [ - RowParsed( - row_number=1, - values={"name": "Да здравствует Владимир проклятый"} - ), - RowParsed( - row_number=2, - values={"name": "Да здравствует Карл Маркс "} - ), - RowParsed( - row_number=3, - values={"name": "Да здравствует Россия "} - ), + RowParsed(row_number=1, values={"name": "Да здравствует Владимир проклятый"}), + RowParsed(row_number=2, values={"name": "Да здравствует Карл Маркс "}), + RowParsed(row_number=3, values={"name": "Да здравствует Россия "}), ] diff --git a/tests/test_fields.py b/tests/test_fields.py index 8526162..a534334 100644 --- a/tests/test_fields.py +++ b/tests/test_fields.py @@ -23,10 +23,8 @@ def test_chain_transformations(): { "type": "str", "pre-processors": [{"name": "strip-whitespaces"}], - "validators": [ - {"name": "regex-matches", "parameters": {"pattern": "^mac .*$"}} - ], - } + "validators": [{"name": "regex-matches", "parameters": {"pattern": "^mac .*$"}}], + }, ) assert len(field.transforms) == 3 assert isinstance(field.transforms[0], StripWhitespaces) @@ -51,7 +49,7 @@ def test_chain_transformations_with_post_processors(): } ], "post-processors": [{"name": "divide", "parameters": {"denominator": 100}}], - } + }, ) assert len(field.transforms) == 3 assert isinstance(field.transforms[0], Replace) @@ -74,9 +72,7 @@ def test_csv_error_format(): def test_columnar_error_format(): - field = ColumnarField( - "ratio", {"type": "decimal", "column-start": 0, "column-length": 5} - ) + field = ColumnarField("ratio", {"type": "decimal", "column-start": 0, "column-length": 5}) with pytest.raises(ValueError) as error: field.parse("hello") @@ -102,7 +98,7 @@ def test_optional_field(): } ], "post-processors": [{"name": "divide", "parameters": {"denominator": 100}}], - } + }, ) assert field.parse("XXX150") == Ok(value=Decimal("1.50")) assert field.parse("") == Ok(value=None) @@ -114,7 +110,7 @@ def test_required_field(): { "type": "decimal", "optional": False, - } + }, ) assert field.parse("1.5") == Ok(value=Decimal("1.50")) @@ -124,25 +120,19 @@ def test_require_field_with_empty_value(): "pepito", { "type": "decimal", - } + }, ) - with pytest.raises( - ValueError, match="pepito field is required but the value was empty" - ): + with pytest.raises(ValueError, match="pepito field is required but the value was empty"): field.parse("") def test_field_without_key(): - with pytest.raises( - ValueError, match="key is required in field definition" - ): + with pytest.raises(ValueError, match="key is required in field definition"): Field.build({"type": "decimal"}) def test_field_without_position_or_column_definition(): - with pytest.raises( - ValueError, match="missing field position for field: 'field_key'" - ): + with pytest.raises(ValueError, match="missing field position for field: 'field_key'"): Field.build({"key": "field_key", "type": "decimal"}) diff --git a/tests/test_post_processors.py b/tests/test_post_processors.py index b6bd4e1..286f472 100644 --- a/tests/test_post_processors.py +++ b/tests/test_post_processors.py @@ -6,9 +6,7 @@ class TestBuild(TestCase): def test_divide(self): - pre_processor = PostProcessor.build( - {"name": "divide", "parameters": {"denominator": 100}} - ) + pre_processor = PostProcessor.build({"name": "divide", "parameters": {"denominator": 100}}) assert isinstance(pre_processor, Divide) assert pre_processor.denominator == 100 @@ -23,45 +21,31 @@ def test_no_name_provided(self): class TestDivide(TestCase): def test_fail_when_denominator_is_zero(self): - error_message = ( - "post-processor 'divide': " - "'denominator' parameter must be a positive integer" - ) + error_message = "post-processor 'divide': 'denominator' parameter must be a positive integer" with pytest.raises(ValueError, match=error_message): PostProcessor.build({"name": "divide", "parameters": {"denominator": 0}}) def test_divide_int(self): - post_processor = PostProcessor.build( - {"name": "divide", "parameters": {"denominator": 100}} - ) + post_processor = PostProcessor.build({"name": "divide", "parameters": {"denominator": 100}}) assert post_processor.apply(150) == 1.5 def test_divide_float(self): - post_processor = PostProcessor.build( - {"name": "divide", "parameters": {"denominator": 100}} - ) + post_processor = PostProcessor.build({"name": "divide", "parameters": {"denominator": 100}}) assert post_processor.apply(1.63) == 0.0163 def test_divide_decimal(self): - post_processor = PostProcessor.build( - {"name": "divide", "parameters": {"denominator": 100}} - ) + post_processor = PostProcessor.build({"name": "divide", "parameters": {"denominator": 100}}) assert post_processor.apply(Decimal("1.63")) == Decimal("0.0163") class TestRound(TestCase): def test_with_negative_precision(self): - error_message = ( - "post-processor 'round': " - "'precision' parameter must be a positive or zero integer" - ) + error_message = "post-processor 'round': 'precision' parameter must be a positive or zero integer" with pytest.raises(ValueError, match=error_message): PostProcessor.build({"name": "round", "parameters": {"precision": -2}}) def test_with_valid_precision(self): - post_processor = PostProcessor.build( - {"name": "round", "parameters": {"precision": 2}} - ) + post_processor = PostProcessor.build({"name": "round", "parameters": {"precision": 2}}) assert post_processor.apply(3.14159265359) == 3.14 diff --git a/tests/test_pre_processors.py b/tests/test_pre_processors.py index 2f5fb6e..a633129 100644 --- a/tests/test_pre_processors.py +++ b/tests/test_pre_processors.py @@ -14,23 +14,17 @@ class TestBuild(TestCase): def test_left_pad_zeroes(self): - pre_processor = PreProcessor.build( - {"name": "left-pad-zeroes", "parameters": {"width": 10}} - ) + pre_processor = PreProcessor.build({"name": "left-pad-zeroes", "parameters": {"width": 10}}) assert isinstance(pre_processor, LeftPadZeroes) assert pre_processor.width == 10 def test_map(self): - pre_processor = PreProcessor.build( - {"name": "map", "parameters": {"values": {"input": "output"}}} - ) + pre_processor = PreProcessor.build({"name": "map", "parameters": {"values": {"input": "output"}}}) assert isinstance(pre_processor, Map) assert pre_processor.values == {"input": "output"} def test_replace(self): - pre_processor = PreProcessor.build( - {"name": "replace", "parameters": {"pattern": "aa", "replacement": "bb"}} - ) + pre_processor = PreProcessor.build({"name": "replace", "parameters": {"pattern": "aa", "replacement": "bb"}}) assert isinstance(pre_processor, Replace) assert pre_processor.pattern == "aa" assert pre_processor.replacement == "bb" @@ -40,9 +34,7 @@ def test_strip_whitespaces(self): assert isinstance(pre_processor, StripWhitespaces) def test_left_strip(self): - pre_processor = PreProcessor.build( - {"name": "left-strip", "parameters": {"characters": "0"}} - ) + pre_processor = PreProcessor.build({"name": "left-strip", "parameters": {"characters": "0"}}) assert isinstance(pre_processor, LeftStrip) def test_regex_extract(self): @@ -67,23 +59,17 @@ def test_no_name_provided(self): class TestLeftPadZeroes(TestCase): def test_do_nothing(self): - pre_processor = PreProcessor.build( - {"name": "left-pad-zeroes", "parameters": {"width": 10}} - ) + pre_processor = PreProcessor.build({"name": "left-pad-zeroes", "parameters": {"width": 10}}) assert pre_processor.apply("abcdefghij") == "abcdefghij" def test_pad(self): - pre_processor = PreProcessor.build( - {"name": "left-pad-zeroes", "parameters": {"width": 10}} - ) + pre_processor = PreProcessor.build({"name": "left-pad-zeroes", "parameters": {"width": 10}}) assert pre_processor.apply("abc") == "0000000abc" class TestMap(TestCase): def test_unknown_input(self): - pre_processor = PreProcessor.build( - {"name": "map", "parameters": {"values": {"A": "1", "B": "2"}}} - ) + pre_processor = PreProcessor.build({"name": "map", "parameters": {"values": {"A": "1", "B": "2"}}}) with pytest.raises( ValueError, match="value 'an input' does not map to any values in \\['A', 'B'\\]", @@ -91,23 +77,17 @@ def test_unknown_input(self): pre_processor.apply("an input") def test_known_input(self): - pre_processor = PreProcessor.build( - {"name": "map", "parameters": {"values": {"A": "1", "B": "2"}}} - ) + pre_processor = PreProcessor.build({"name": "map", "parameters": {"values": {"A": "1", "B": "2"}}}) assert pre_processor.apply("A") == "1" class TestReplace(TestCase): def test_pattern_not_found(self): - pre_processor = PreProcessor.build( - {"name": "replace", "parameters": {"pattern": "bbb", "replacement": "BBB"}} - ) + pre_processor = PreProcessor.build({"name": "replace", "parameters": {"pattern": "bbb", "replacement": "BBB"}}) assert pre_processor.apply("an input") == "an input" def test_success(self): - pre_processor = PreProcessor.build( - {"name": "replace", "parameters": {"pattern": "bbb", "replacement": "BBB"}} - ) + pre_processor = PreProcessor.build({"name": "replace", "parameters": {"pattern": "bbb", "replacement": "BBB"}}) assert pre_processor.apply("aaabbbccc") == "aaaBBBccc" @@ -123,15 +103,11 @@ def test_success(self): class TestLeftStrip(TestCase): def test_do_nothing(self): - pre_processor = PreProcessor.build( - {"name": "left-strip", "parameters": {"characters": "0"}} - ) + pre_processor = PreProcessor.build({"name": "left-strip", "parameters": {"characters": "0"}}) assert pre_processor.apply("12345") == "12345" def test_success(self): - pre_processor = PreProcessor.build( - {"name": "left-strip", "parameters": {"characters": "0"}} - ) + pre_processor = PreProcessor.build({"name": "left-strip", "parameters": {"characters": "0"}}) assert pre_processor.apply("0000012345") == "12345" @@ -141,9 +117,7 @@ def test_build_without_value_group(self): ValueError, match=r"regex-extract's pattern must contain a group named 'value'", ): - PreProcessor.build( - {"name": "regex-extract", "parameters": {"pattern": "xxx"}} - ) + PreProcessor.build({"name": "regex-extract", "parameters": {"pattern": "xxx"}}) def test_pattern_not_found(self): pre_processor = PreProcessor.build( @@ -155,10 +129,7 @@ def test_pattern_not_found(self): with pytest.raises(ValueError) as error: pre_processor.apply("an input") - assert ( - error.value.args[0] - == "cannot extract value from pattern '^xxx(?P\\d{13})xxx$'" - ) + assert error.value.args[0] == "cannot extract value from pattern '^xxx(?P\\d{13})xxx$'" def test_pattern_found(self): pre_processor = PreProcessor.build( diff --git a/tests/test_schema.py b/tests/test_schema.py index 6868fe8..9f3ebc7 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -4,9 +4,7 @@ from magicparse import Schema from magicparse.post_processors import PostProcessor from magicparse.pre_processors import PreProcessor -from magicparse.schema import ( - ColumnarSchema, CsvSchema, RowParsed, RowFailed, RowSkipped -) +from magicparse.schema import ColumnarSchema, CsvSchema, RowParsed, RowFailed, RowSkipped from magicparse.fields import ColumnarField, CsvField import pytest from unittest import TestCase @@ -104,11 +102,16 @@ def test_error_display_row_number(self): ) rows = schema.parse(b"a") assert rows == [ - RowFailed(row_number=1, errors=[{ - "column-number": 1, - "field-key": "age", - "error": "value 'a' is not a valid integer", - }]) + RowFailed( + row_number=1, + errors=[ + { + "column-number": 1, + "field-key": "age", + "error": "value 'a' is not a valid integer", + } + ], + ) ] def test_errors_do_not_halt_parsing(self): @@ -121,11 +124,16 @@ def test_errors_do_not_halt_parsing(self): rows = schema.parse(b"1\na\n2") assert rows == [ RowParsed(row_number=1, values={"age": 1}), - RowFailed(row_number=2, errors=[{ - "column-number": 1, - "field-key": "age", - "error": "value 'a' is not a valid integer", - }]), + RowFailed( + row_number=2, + errors=[ + { + "column-number": 1, + "field-key": "age", + "error": "value 'a' is not a valid integer", + } + ], + ), RowParsed(row_number=3, values={"age": 2}), ] @@ -188,39 +196,45 @@ def test_error_display_row_number(self): schema = Schema.build( { "file_type": "columnar", - "fields": [ - {"key": "age", "type": "int", "column-start": 0, "column-length": 1} - ], + "fields": [{"key": "age", "type": "int", "column-start": 0, "column-length": 1}], } ) rows = schema.parse(b"a") assert rows == [ - RowFailed(row_number=1, errors=[{ - "column-start": 0, - "column-length": 1, - "field-key": "age", - "error": "value 'a' is not a valid integer", - }]) + RowFailed( + row_number=1, + errors=[ + { + "column-start": 0, + "column-length": 1, + "field-key": "age", + "error": "value 'a' is not a valid integer", + } + ], + ) ] def test_errors_do_not_halt_parsing(self): schema = Schema.build( { "file_type": "columnar", - "fields": [ - {"key": "age", "type": "int", "column-start": 0, "column-length": 1} - ], + "fields": [{"key": "age", "type": "int", "column-start": 0, "column-length": 1}], } ) rows = schema.parse(b"1\na\n2") assert rows == [ RowParsed(row_number=1, values={"age": 1}), - RowFailed(row_number=2, errors=[{ - "column-start": 0, - "column-length": 1, - "field-key": "age", - "error": "value 'a' is not a valid integer", - }]), + RowFailed( + row_number=2, + errors=[ + { + "column-start": 0, + "column-length": 1, + "field-key": "age", + "error": "value 'a' is not a valid integer", + } + ], + ), RowParsed(row_number=3, values={"age": 2}), ] @@ -252,7 +266,7 @@ def test_should_return_all_errors_in_a_row(self): "delimiter": ";", "fields": [ {"key": "age", "type": "int", "column-number": 1}, - {"key": "age2", "type": "int", "column-number": 2} + {"key": "age2", "type": "int", "column-number": 2}, ], } ) @@ -260,18 +274,21 @@ def test_should_return_all_errors_in_a_row(self): rows = schema.parse(b"a;a") assert rows == [ - RowFailed(row_number=1, errors=[ - { - "column-number": 1, - "field-key": "age", - "error": "value 'a' is not a valid integer", - }, - { - "column-number": 2, - "field-key": "age2", - "error": "value 'a' is not a valid integer", - } - ]) + RowFailed( + row_number=1, + errors=[ + { + "column-number": 1, + "field-key": "age", + "error": "value 'a' is not a valid integer", + }, + { + "column-number": 2, + "field-key": "age2", + "error": "value 'a' is not a valid integer", + }, + ], + ) ] def test_skip_is_prioritized_over_errors(self): @@ -285,14 +302,10 @@ def test_skip_is_prioritized_over_errors(self): "type": "int", "column-number": 1, "validators": [ - { - "name": "greater-than", - "parameters": {"threshold": 0}, - "on-error": "skip-row" - } + {"name": "greater-than", "parameters": {"threshold": 0}, "on-error": "skip-row"} ], }, - {"key": "age2", "type": "int", "column-number": 2} + {"key": "age2", "type": "int", "column-number": 2}, ], } ) @@ -300,18 +313,21 @@ def test_skip_is_prioritized_over_errors(self): rows = schema.parse(b"-1;a") assert rows == [ - RowSkipped(row_number=1, errors=[ - { - "column-number": 1, - "field-key": "age", - "error": "value must be greater than 0", - }, - { - "column-number": 2, - "field-key": "age2", - "error": "value 'a' is not a valid integer", - } - ]) + RowSkipped( + row_number=1, + errors=[ + { + "column-number": 1, + "field-key": "age", + "error": "value must be greater than 0", + }, + { + "column-number": 2, + "field-key": "age2", + "error": "value 'a' is not a valid integer", + }, + ], + ) ] @@ -325,9 +341,7 @@ def test_no_quote(self): } ) rows = schema.parse(b"column_1\n6.66") - assert rows == [ - RowParsed(row_number=2, values={"column_1": Decimal("6.66")}) - ] + assert rows == [RowParsed(row_number=2, values={"column_1": Decimal("6.66")})] def test_single_quote(self): schema = Schema.build( @@ -339,9 +353,7 @@ def test_single_quote(self): } ) rows = schema.parse(b"column_1\n'6.66'") - assert rows == [ - RowParsed(row_number=2, values={"column_1": Decimal("6.66")}) - ] + assert rows == [RowParsed(row_number=2, values={"column_1": Decimal("6.66")})] def test_double_quote(self): schema = Schema.build( @@ -353,9 +365,7 @@ def test_double_quote(self): } ) rows = schema.parse(b'column_1\n"6.66"') - assert rows == [ - RowParsed(row_number=2, values={"column_1": Decimal("6.66")}) - ] + assert rows == [RowParsed(row_number=2, values={"column_1": Decimal("6.66")})] def test_asymetrical_quote(self): schema = Schema.build( @@ -366,9 +376,7 @@ def test_asymetrical_quote(self): } ) rows = schema.parse(b'column_1\n"test ""quoting""') - assert rows == [ - RowParsed(row_number=2, values={"column_1": '"test ""quoting""'}) - ] + assert rows == [RowParsed(row_number=2, values={"column_1": '"test ""quoting""'})] class TestRegister(TestCase): @@ -412,7 +420,8 @@ def test_stream_parse_errors_do_not_halt_parsing(self): "field-key": "age", "error": "value 'a' is not a valid integer", } - ]), + ], + ), RowParsed(row_number=3, values={"age": 2}), ] @@ -507,7 +516,7 @@ def test_computed_fields_can_be_used_by_next_computed_fields(self): "denominator": "field_3", }, }, - } + }, ], } ) @@ -522,8 +531,8 @@ def test_computed_fields_can_be_used_by_next_computed_fields(self): "field_2": 4, "field_3": 2, "multiply_field_result": 12, - "divide_field_result": Decimal("6") - } + "divide_field_result": Decimal("6"), + }, ) ] @@ -533,18 +542,21 @@ def test_default_behavior_raise(self): schema = Schema.build( { "file_type": "csv", - "fields": [ - {"key": "age", "type": "int", "column-number": 1} - ], + "fields": [{"key": "age", "type": "int", "column-number": 1}], } ) rows = list(schema.stream_parse(b"a")) assert rows == [ - RowFailed(row_number=1, errors=[{ - "column-number": 1, - "field-key": "age", - "error": "value 'a' is not a valid integer", - }]) + RowFailed( + row_number=1, + errors=[ + { + "column-number": 1, + "field-key": "age", + "error": "value 'a' is not a valid integer", + } + ], + ) ] def test_skip_row(self): @@ -561,12 +573,18 @@ def test_skip_row(self): } ) rows = list(schema.stream_parse(b"a")) - assert rows == [RowSkipped(row_number=1, errors=[{ - "column-number": 1, - "field-key": "age", - "error": "value 'a' is not a valid integer", - } - ])] + assert rows == [ + RowSkipped( + row_number=1, + errors=[ + { + "column-number": 1, + "field-key": "age", + "error": "value 'a' is not a valid integer", + } + ], + ) + ] class TestHandleValidationError(TestCase): @@ -593,12 +611,16 @@ def test_default_behavior_raise(self): rows = list(schema.stream_parse(b"-1")) assert rows == [ - RowFailed(row_number=1, errors=[{ - "column-number": 1, - "field-key": "age", - "error": "value must be greater than 0", - } - ]) + RowFailed( + row_number=1, + errors=[ + { + "column-number": 1, + "field-key": "age", + "error": "value must be greater than 0", + } + ], + ) ] def test_skip_row(self): @@ -622,12 +644,18 @@ def test_skip_row(self): } ) rows = list(schema.stream_parse(b"-1")) - assert rows == [RowSkipped(row_number=1, errors=[{ - "column-number": 1, - "field-key": "age", - "error": "value must be greater than 0", - } - ])] + assert rows == [ + RowSkipped( + row_number=1, + errors=[ + { + "column-number": 1, + "field-key": "age", + "error": "value must be greater than 0", + } + ], + ) + ] class TestHandlePostProcessorError(TestCase): @@ -659,12 +687,18 @@ def test_default_behavior_raise(self): } ) rows = list(schema.stream_parse(b"1")) - assert rows == [RowFailed(row_number=1, errors=[{ - "column-number": 1, - "field-key": "age", - "error": "test error", - } - ])] + assert rows == [ + RowFailed( + row_number=1, + errors=[ + { + "column-number": 1, + "field-key": "age", + "error": "test error", + } + ], + ) + ] def test_skip_row(self): PostProcessor.register(self.FailPostProcessor) @@ -687,12 +721,18 @@ def test_skip_row(self): } ) rows = list(schema.stream_parse(b"1")) - assert rows == [RowSkipped(row_number=1, errors=[{ - "column-number": 1, - "field-key": "age", - "error": "test error", - } - ])] + assert rows == [ + RowSkipped( + row_number=1, + errors=[ + { + "column-number": 1, + "field-key": "age", + "error": "test error", + } + ], + ) + ] class TestHandlePreProcessorError(TestCase): @@ -724,12 +764,18 @@ def test_default_behavior_raise(self): } ) rows = list(schema.stream_parse(b"1")) - assert rows == [RowFailed(row_number=1, errors=[{ - "column-number": 1, - "field-key": "age", - "error": "test error", - } - ])] + assert rows == [ + RowFailed( + row_number=1, + errors=[ + { + "column-number": 1, + "field-key": "age", + "error": "test error", + } + ], + ) + ] def test_skip_row(self): PreProcessor.register(self.FailPreProcessor) @@ -752,9 +798,15 @@ def test_skip_row(self): } ) rows = list(schema.stream_parse(b"1")) - assert rows == [RowSkipped(row_number=1, errors=[{ - "column-number": 1, - "field-key": "age", - "error": "test error", - } - ])] + assert rows == [ + RowSkipped( + row_number=1, + errors=[ + { + "column-number": 1, + "field-key": "age", + "error": "test error", + } + ], + ) + ] diff --git a/tests/test_type_converters.py b/tests/test_type_converters.py index 4b561b3..63edab6 100644 --- a/tests/test_type_converters.py +++ b/tests/test_type_converters.py @@ -84,9 +84,7 @@ def test_transform_failed(self): class TestTime(TestCase): def test_transform(self): type_converter = TypeConverter.build({"type": "time"}) - assert type_converter.apply("10:12:03+03:00") == time( - 10, 12, 3, tzinfo=timezone(timedelta(hours=3)) - ) + assert type_converter.apply("10:12:03+03:00") == time(10, 12, 3, tzinfo=timezone(timedelta(hours=3))) def test_transform_failed(self): type_converter = TypeConverter.build({"type": "time"}) @@ -143,19 +141,13 @@ def test_int(self): assert type_converter.apply(None) is None def test_decimal(self): - type_converter = TypeConverter.build( - {"type": {"key": "decimal", "nullable": True}} - ) + type_converter = TypeConverter.build({"type": {"key": "decimal", "nullable": True}}) assert type_converter.apply(None) is None def test_time(self): - type_converter = TypeConverter.build( - {"type": {"key": "time", "nullable": True}} - ) + type_converter = TypeConverter.build({"type": {"key": "time", "nullable": True}}) assert type_converter.apply(None) is None def test_datetime(self): - type_converter = TypeConverter.build( - {"type": {"key": "datetime", "nullable": True}} - ) + type_converter = TypeConverter.build({"type": {"key": "datetime", "nullable": True}}) assert type_converter.apply(None) is None diff --git a/tests/test_validators.py b/tests/test_validators.py index 746634d..4cc90f0 100644 --- a/tests/test_validators.py +++ b/tests/test_validators.py @@ -18,9 +18,7 @@ def test_regex_matches(self): assert validator.pattern.pattern == "^\\d{13}$" def test_not_null_or_empty(self): - validator = Validator.build( - {"name": "not-null-or-empty"} - ) + validator = Validator.build({"name": "not-null-or-empty"}) assert isinstance(validator, NotNullOrEmpty) @@ -62,9 +60,7 @@ def test_does_not_match(self): "parameters": {"pattern": "^\\d{13}$"}, } ) - with pytest.raises( - ValueError, match=r"string does not match regex '\^\\d\{13\}\$'" - ): + with pytest.raises(ValueError, match=r"string does not match regex '\^\\d\{13\}\$'"): validator.apply("hello") @@ -88,33 +84,25 @@ def test_register(self): class TestGreaterThanValidator(TestCase): def test_it_successfully_returns_the_value_when_greater_than_threshold(self): - validator = Validator.build( - {"name": "greater-than", "parameters": {"threshold": 11}} - ) + validator = Validator.build({"name": "greater-than", "parameters": {"threshold": 11}}) assert validator.apply(12) == 12 def test_it_successfully_returns_the_value_when_greater_than_decimal_threshold( self, ): - validator = Validator.build( - {"name": "greater-than", "parameters": {"threshold": 11.4}} - ) + validator = Validator.build({"name": "greater-than", "parameters": {"threshold": 11.4}}) assert validator.apply(11.5) == 11.5 def test_it_raises_an_error_when_the_value_is_lower_than_threshold(self): - validator = Validator.build( - {"name": "greater-than", "parameters": {"threshold": 10}} - ) + validator = Validator.build({"name": "greater-than", "parameters": {"threshold": 10}}) with pytest.raises(ValueError, match="value must be greater than 10"): validator.apply(9.9999) def test_it_raises_an_error_when_the_value_is_equal_to_threshold(self): - validator = Validator.build( - {"name": "greater-than", "parameters": {"threshold": 10}} - ) + validator = Validator.build({"name": "greater-than", "parameters": {"threshold": 10}}) with pytest.raises(ValueError, match="value must be greater than 10"): validator.apply(10) @@ -122,24 +110,18 @@ def test_it_raises_an_error_when_the_value_is_equal_to_threshold(self): class TestNotNullOrEmptyValidator(TestCase): def test_success_returns_the_value(self): - validator = Validator.build( - {"name": "not-null-or-empty"} - ) + validator = Validator.build({"name": "not-null-or-empty"}) assert validator.apply("hello") == "hello" def test_raise_when_the_value_is_null(self): - validator = Validator.build( - {"name": "not-null-or-empty"} - ) + validator = Validator.build({"name": "not-null-or-empty"}) with pytest.raises(ValueError, match="value must not be null or empty"): validator.apply(None) def test_raises_when_the_value_is_empty(self): - validator = Validator.build( - {"name": "not-null-or-empty"} - ) + validator = Validator.build({"name": "not-null-or-empty"}) with pytest.raises(ValueError, match="value must not be null or empty"): validator.apply("")