ZeroGachis · ducdetronquito · Sep 22, 2025 · Sep 22, 2025 · Sep 22, 2025 · Sep 22, 2025
@@ -31,8 +31,8 @@ jobs:
       tailscale_enabled: false
       run_command: |
         poetry install --no-interaction --no-ansi --no-root --only dev
-        poetry run black magicparse/ --check --diff
-        poetry run flake8
+        ruff check --diff ./
+        ruff format --check --diff ./
     secrets: inherit
 
   unit-tests:

@@ -1,14 +1,15 @@
+fail_fast: true
 repos:
-  - repo: https://github.com/psf/black
-    rev: 22.6.0
+  - repo: local
     hooks:
-      - id: black
-        language_version: python3.9
-      - id: black
-        alias: black-on-ci
-        args: ["--check --diff"]
-        stages: [manual]
-  -   repo: https://github.com/pycqa/flake8
-      rev: 5.0.3
-      hooks:
-      -   id: flake8
+      - id: ruff-check
+        name: Ruff check
+        entry: poetry run ruff check
+        args: [--fix]
+        language: system
+        types: [file, python]
+      - id: ruff-format
+        name: Ruff format
+        entry: poetry run ruff format
+        language: system
+        types: [file, python]
@@ -2,20 +2,65 @@
 
 Declarative parser for structured data files.
 
-## Installation
+## Table of contents
 
-```bash
-poetry install magicparse
+- [Getting started](#getting-started)
+  - [Dev requirements](#dev-requirements)
+- [Usage](#usage)
+  - [Parse content](#parse-content)
+  - [Register a custom transform](#register-custom-transform)
+  - [Register a custom schema](#register-custom-schema)
+  - [Stream parsing](#stream-parsing)
+  - [Custom encoding](#custom-encoding)
+- [API Reference](#api-reference)
+ - [File types](#file-types)
+ - [Types](#types)
+ - [Computed fields](#computed-fields)
+ - [Return types](#return-types)
+ - [Error handling](#error-handling)
+- [License](#license)
+
+<a id="getting-started"></a>
+
+## Getting started
+
+📝 We recommend you to take time to read this README entirely before doing
+anything to have a good overview of what you are going to do and avoid to
+wrongly anticipate any steps.
+
+<a id="dev-requirements"></a>
+
+### Dev requirements
+
+After this point we expect you to have [mise](https://mise.jdx.dev/) installed on your machine.
+
+Use mise to install Python and poetry
+
+```shell
+mise install
+
+# You can then test that they are available 
+python --version
+poetry --version
 ```
 
-## Requirements
+Install the python dev dependencies and setup pre-commit hooks
+
+```shell
+poetry install
+pre-commit install
+```
 
-- Python 3.12+
+<a id="usage"></a>
 
 ## Usage
 
+<a id="parse-content"></a>
+
 ### Parse content
 
+
+
 ```python
 import magicparse
 
@@ -110,7 +155,9 @@ rows = magicparse.parse(data="...", schema=schema)
 ```
 
 
-### Register a custom transform and parse content
+<a id="register-custom-transform"></a>
+
+### Register a custom transform
 
 ```python
 from uuid import UUID
@@ -138,7 +185,9 @@ rows = magicparse.parse("13ec10cc-cc7e-4ee9-b091-9caa6d11aeb2", schema)
 assert rows == [{"shop-guid": "13ec10cc-cc7e-4ee9-b091-9caa6d11aeb2"}]
 ```
 
-### Register a custom schema and parse content
+<a id="register-custom-schema"></a>
+
+### Register a custom schema
 
 ```python
 import magicparse
@@ -165,6 +214,8 @@ rows = magicparse.parse("Joe|William|Jack|Averell", schema)
 assert rows == [{"name": "Joe"}, {"name": "William"}, {"name": "Jack"}, {"name": "Averell"}]
 ```
 
+<a id="stream-parsing"></a>
+
 ### Stream parsing
 
 For large files, you can use streaming to process data incrementally:
@@ -192,6 +243,8 @@ for row in magicparse.stream_parse(data="...", schema=schema):
             print("Unknown type of row.")
 ```
 
+<a id="custom-encoding"></a>
+
 ### Custom encoding
 
 By default, magicparse uses UTF-8 encoding. You can specify a different encoding:
@@ -206,16 +259,20 @@ schema = {
 }
 ```
 
-## API
+<a id="api-reference"></a>
+
+## API Reference
+
+<a id="file-types"></a>
 
 ### File types
 
 - CSV (with or without header)
 - Columnar
 
-### Fields
+<a id="types"></a>
 
-#### Types
+### Types
 
 - str
 - int
@@ -243,6 +300,8 @@ schema = {
 - divide
 - round
 
+<a id="computed-fields"></a>
+
 ### Computed Fields
 
 Types, Pre-processors, Post-processors and validator is same as Field
@@ -254,15 +313,19 @@ Types, Pre-processors, Post-processors and validator is same as Field
 - multiply
 - coalesce
 
-## Return Types
+<a id="return-types"></a>
+
+### Return Types
 
 The parser returns a list of row objects:
 
 - **`RowParsed`**: Successfully parsed row with `values` dict
 - **`RowFailed`**: Failed to parse row with `errors` message
 - **`RowSkipped`**: Skipped row with `errors` message
 
-## Error Handling
+<a id="error-handling"></a>
+
+### Error Handling
 
 You can configure error handling for types, validators, and processors:
 
@@ -281,50 +344,7 @@ Error handling options:
 - `"raise"` (default): Raise exception on error
 - `"skip-row"`: Skip the row and continue processing
 
-## Docker
-
-The project includes Docker support:
-
-```bash
-# Build and run with docker-compose
-docker-compose up --build
-
-# Or build manually
-docker build -t magicparse .
-docker run -it magicparse
-```
-
-## Development
-
-### Setup
-
-```bash
-# Install dependencies
-poetry install
-
-# Run tests
-poetry run pytest
-
-# Format code
-poetry run black .
-
-# Lint code
-poetry run flake8
-```
-
-## Breaking Changes
-
-### Version 1.0.0
-
-- output format is now a list of typed result. (Before it was a Tuple[dict, list[dict]]) that represent parsed values and errors).
-### Version 0.16.0
-
-- **Python 3.12+ required**: Upgraded from Python 3.11 to Python 3.12. Update your Python environment before upgrading.
-
-### Version 0.15.0
-
-- **Python 3.11+ required**: Upgraded from Python 3.10 to Python 3.11. Update your Python environment before upgrading.
-
+<a id="license"></a>
 
 ## License
 

@@ -33,9 +33,7 @@
 ]
 
 
-def parse(
-    data: Union[bytes, BytesIO], schema_options: Dict[str, Any]
-) -> List[RowParsed | RowSkipped | RowFailed]:
+def parse(data: Union[bytes, BytesIO], schema_options: Dict[str, Any]) -> List[RowParsed | RowSkipped | RowFailed]:
     schema_definition = Schema.build(schema_options)
     return schema_definition.parse(data)
 
@@ -68,9 +66,7 @@ def register(items: Union[Registrable, List[Registrable]]) -> None:
         elif issubclass(item, Builder):
             Builder.register(item)
         else:
-            raise ValueError(
-                "transforms must be a subclass of Transform (or a list of it)"
-            )
+            raise ValueError("transforms must be a subclass of Transform (or a list of it)")
 
 
 register(builtins_schemas)

@@ -35,8 +35,7 @@ def __init__(self, on_error: OnError, fields: list[str]) -> None:
             or len(fields) < 2
         ):
             raise ValueError(
-                "composite-processor 'concat': "
-                "'fields' parameter must be a list[str] with at least two elements"
+                "composite-processor 'concat': 'fields' parameter must be a list[str] with at least two elements"
             )
 
         self.fields = fields
@@ -53,13 +52,9 @@ class Divide(Builder):
     def __init__(self, on_error: OnError, numerator: str, denominator: str) -> None:
         super().__init__(on_error)
         if not numerator or not isinstance(numerator, str):
-            raise ValueError(
-                "builder 'divide': " "'numerator' parameter must be a non null str"
-            )
+            raise ValueError("builder 'divide': 'numerator' parameter must be a non null str")
         if not denominator or not isinstance(denominator, str):
-            raise ValueError(
-                "builder 'divide': " "'denominator' parameter must be a non null str"
-            )
+            raise ValueError("builder 'divide': 'denominator' parameter must be a non null str")
         self.numerator = numerator
         self.denominator = denominator
 
@@ -75,13 +70,9 @@ class Multiply(Builder):
     def __init__(self, on_error: OnError, x_factor: str, y_factor: str) -> None:
         super().__init__(on_error)
         if not x_factor or not isinstance(x_factor, str):
-            raise ValueError(
-                "builder 'multiply': " "'x_factor' parameter must be a non null str"
-            )
+            raise ValueError("builder 'multiply': 'x_factor' parameter must be a non null str")
         if not y_factor or not isinstance(y_factor, str):
-            raise ValueError(
-                "builder 'multiply': " "'y_factor' parameter must be a non null str"
-            )
+            raise ValueError("builder 'multiply': 'y_factor' parameter must be a non null str")
         self.x_factor = x_factor
         self.y_factor = y_factor
 
@@ -98,11 +89,7 @@ def __init__(self, on_error: OnError, fields: list[str]) -> None:
         super().__init__(on_error)
         if not fields:
             raise ValueError("parameters should defined fields to coalesce")
-        if (
-            not isinstance(fields, list)
-            or not all(isinstance(field, str) for field in fields)
-            or len(fields) < 2
-        ):
+        if not isinstance(fields, list) or not all(isinstance(field, str) for field in fields) or len(fields) < 2:
             raise ValueError("parameters should have two fields at least")
 
         self.fields = fields

@@ -12,29 +12,21 @@
 class Field(ABC):
     def __init__(self, key: str, options: dict) -> None:
         self.key = key
-        pre_processors = [
-            PreProcessor.build(item) for item in options.get("pre-processors", [])
-        ]
+        pre_processors = [PreProcessor.build(item) for item in options.get("pre-processors", [])]
         type_converter = TypeConverter.build(options)
         validators = [Validator.build(item) for item in options.get("validators", [])]
-        post_processors = [
-            PostProcessor.build(item) for item in options.get("post-processors", [])
-        ]
+        post_processors = [PostProcessor.build(item) for item in options.get("post-processors", [])]
 
         self.optional = options.get("optional", False)
 
-        self.transforms = (
-            pre_processors + [type_converter] + validators + post_processors
-        )
+        self.transforms = pre_processors + [type_converter] + validators + post_processors
 
     def _process_raw_value(self, raw_value: str) -> Result:
         if not raw_value:
             if self.optional:
                 return Ok(value=None)
             else:
-                raise ValueError(
-                    f"{self.key} field is required but the value was empty"
-                )
+                raise ValueError(f"{self.key} field is required but the value was empty")
         for transform in self.transforms:
             try:
                 raw_value = transform.apply(raw_value)

@@ -29,10 +29,7 @@ class Divide(PostProcessor):
     def __init__(self, on_error: OnError, denominator: int) -> None:
         super().__init__(on_error)
         if denominator <= 0:
-            raise ValueError(
-                "post-processor 'divide': "
-                "'denominator' parameter must be a positive integer"
-            )
+            raise ValueError("post-processor 'divide': 'denominator' parameter must be a positive integer")
 
         self.denominator = denominator
 
@@ -50,10 +47,7 @@ class Round(PostProcessor):
     def __init__(self, on_error: OnError, precision: int) -> None:
         super().__init__(on_error)
         if precision < 0:
-            raise ValueError(
-                "post-processor 'round': "
-                "'precision' parameter must be a positive or zero integer"
-            )
+            raise ValueError("post-processor 'round': 'precision' parameter must be a positive or zero integer")
 
         self.precision = precision