ZeroGachis · sGeeK44 · Sep 19, 2025 · Sep 18, 2025 · Sep 18, 2025 · Sep 17, 2025
@@ -16,5 +16,4 @@ _.python.venv = { path = ".venv", create = false }
 
 [tasks.test]
 description = "🐍 Run tests"
-depends = ["start_db"]
 run = "pytest -s"
@@ -1,6 +1,16 @@
 # magicparse 🛸
 
-Declarative parser
+Declarative parser for structured data files.
+
+## Installation
+
+```bash
+poetry install magicparse
+```
+
+## Requirements
+
+- Python 3.12+
 
 ## Usage
 
@@ -96,7 +106,7 @@ schema = {
 }
 
 
-rows, errors= magicparse.parse(data="...", schema=schema)
+rows = magicparse.parse(data="...", schema=schema)
 ```
 
 
@@ -124,9 +134,8 @@ schema = {
     ],
 }
 
-rows, errors = magicparse.parse("13ec10cc-cc7e-4ee9-b091-9caa6d11aeb2", schema)
+rows = magicparse.parse("13ec10cc-cc7e-4ee9-b091-9caa6d11aeb2", schema)
 assert rows == [{"shop-guid": "13ec10cc-cc7e-4ee9-b091-9caa6d11aeb2"}]
-assert not errors
 ```
 
 ### Register a custom schema and parse content
@@ -152,11 +161,51 @@ schema = {
     ]
 }
 
-rows, errors = magicparse.parse("Joe|William|Jack|Averell", schema)
-assert not errors
+rows = magicparse.parse("Joe|William|Jack|Averell", schema)
 assert rows == [{"name": "Joe"}, {"name": "William"}, {"name": "Jack"}, {"name": "Averell"}]
 ```
 
+### Stream parsing
+
+For large files, you can use streaming to process data incrementally:
+
+```python
+import magicparse
+
+schema = {
+    "file_type": "csv",
+    "fields": [
+        {"key": "name", "type": "str", "column-number": 1}
+    ]
+}
+
+# Process data in chunks
+for row in magicparse.stream_parse(data="...", schema=schema):
+    match row:
+        case magicparse.RowParsed(values):  
+            print(f"The values {values}.")
+        case magicparse.RowFailed(errors):
+            print(f"The errors {errors}.")
+        case magicparse.RowSkipped(reason):
+            print(f"The errors {errors}.")
+        case _:  
+            print("Unknown type of row.")
+```
+
+### Custom encoding
+
+By default, magicparse uses UTF-8 encoding. You can specify a different encoding:
+
+```python
+schema = {
+    "file_type": "csv",
+    "encoding": "iso8859_5",  # or any other encoding
+    "fields": [
+        {"key": "name", "type": "str", "column-number": 1}
+    ]
+}
+```
+
 ## API
 
 ### File types
@@ -187,6 +236,7 @@ assert rows == [{"name": "Joe"}, {"name": "William"}, {"name": "Jack"}, {"name":
 
 - regex-matches
 - greater-than
+- not-null-or-empty
 
 #### Post-processors
 
@@ -202,3 +252,66 @@ Types, Pre-processors, Post-processors and validator is same as Field
 - concat
 - divide
 - multiply
+- coalesce
+
+## Return Types
+
+The parser returns a list of row objects:
+
+- **`RowParsed`**: Successfully parsed row with `values` dict
+- **`RowFailed`**: Failed to parse row with `errors` message
+- **`RowSkipped`**: Skipped row with `errors` message
+
+## Error Handling
+
+You can configure error handling for types, validators, and processors:
+
+```python
+{
+    "key": "price",
+    "type": {
+        "key": "decimal",
+        "nullable": True,  # Allow null values
+        "on-error": "skip-row"  # Skip on error instead of raising
+    }
+}
+```
+
+Error handling options:
+- `"raise"` (default): Raise exception on error
+- `"skip-row"`: Skip the row and continue processing
+
+## Docker
+
+The project includes Docker support:
+
+```bash
+# Build and run with docker-compose
+docker-compose up --build
+
+# Or build manually
+docker build -t magicparse .
+docker run -it magicparse
+```
+
+## Development
+
+### Setup
+
+```bash
+# Install dependencies
+poetry install
+
+# Run tests
+poetry run pytest
+
+# Format code
+poetry run black .
+
+# Lint code
+poetry run flake8
+```
+
+## License
+
+This project is licensed under the MIT License.
@@ -1,6 +1,12 @@
 from io import BytesIO
 
-from .schema import ParsedRow, Schema, builtins as builtins_schemas
+from .schema import (
+    RowParsed,
+    RowFailed,
+    RowSkipped,
+    Schema,
+    builtins as builtins_schemas,
+)
 from .post_processors import PostProcessor, builtins as builtins_post_processors
 from .pre_processors import PreProcessor, builtins as builtins_pre_processors
 from .builders import (
@@ -9,7 +15,7 @@
 )
 from .transform import Transform
 from .type_converters import TypeConverter, builtins as builtins_type_converters
-from typing import Any, Dict, Iterable, List, Tuple, Union
+from typing import Any, Dict, Iterable, List, Union
 from .validators import Validator, builtins as builtins_validators
 
 
@@ -20,21 +26,23 @@
     "PostProcessor",
     "PreProcessor",
     "Schema",
-    "ParsedRow",
+    "RowParsed",
+    "RowSkipped",
+    "RowFailed",
     "Validator",
 ]
 
 
 def parse(
     data: Union[bytes, BytesIO], schema_options: Dict[str, Any]
-) -> Tuple[List[dict], List[dict]]:
+) -> List[RowParsed | RowSkipped | RowFailed]:
     schema_definition = Schema.build(schema_options)
     return schema_definition.parse(data)
 
 
 def stream_parse(
     data: Union[bytes, BytesIO], schema_options: Dict[str, Any]
-) -> Iterable[ParsedRow]:
+) -> Iterable[RowParsed | RowSkipped | RowFailed]:
     schema_definition = Schema.build(schema_options)
     return schema_definition.stream_parse(data)
 

@@ -1,7 +1,7 @@
 from abc import ABC
 from decimal import Decimal
 
-from .transform import Transform
+from .transform import Transform, OnError
 
 
 class Builder(Transform, ABC):
@@ -17,14 +17,16 @@ def build(cls, options: dict) -> "Builder":
         except:
             raise ValueError(f"invalid builder '{name}'")
 
+        on_error = options.get("on-error", OnError.RAISE)
         if "parameters" in options:
-            return builder(**options["parameters"])
+            return builder(on_error=on_error, **options["parameters"])
         else:
-            return builder()
+            return builder(on_error=on_error)
 
 
 class Concat(Builder):
-    def __init__(self, fields: list[str]) -> None:
+    def __init__(self, on_error: OnError, fields: list[str]) -> None:
+        super().__init__(on_error)
         if (
             not fields
             or isinstance(fields, str)
@@ -48,7 +50,8 @@ def key() -> str:
 
 
 class Divide(Builder):
-    def __init__(self, numerator: str, denominator: str) -> None:
+    def __init__(self, on_error: OnError, numerator: str, denominator: str) -> None:
+        super().__init__(on_error)
         if not numerator or not isinstance(numerator, str):
             raise ValueError(
                 "builder 'divide': " "'numerator' parameter must be a non null str"
@@ -69,7 +72,8 @@ def key() -> str:
 
 
 class Multiply(Builder):
-    def __init__(self, x_factor: str, y_factor: str) -> None:
+    def __init__(self, on_error: OnError, x_factor: str, y_factor: str) -> None:
+        super().__init__(on_error)
         if not x_factor or not isinstance(x_factor, str):
             raise ValueError(
                 "builder 'multiply': " "'x_factor' parameter must be a non null str"
@@ -89,4 +93,29 @@ def key() -> str:
         return "multiply"
 
 
-builtins = [Concat, Divide, Multiply]
+class Coalesce(Builder):
+    def __init__(self, on_error: OnError, fields: list[str]) -> None:
+        super().__init__(on_error)
+        if not fields:
+            raise ValueError("parameters should defined fields to coalesce")
+        if (
+            not isinstance(fields, list)
+            or not all(isinstance(field, str) for field in fields)
+            or len(fields) < 2
+        ):
+            raise ValueError("parameters should have two fields at least")
+
+        self.fields = fields
+
+    def apply(self, row: dict) -> str:
+        for field in self.fields:
+            if row[field]:
+                return row[field]
+        return None
+
+    @staticmethod
+    def key() -> str:
+        return "coalesce"
+
+
+builtins = [Concat, Divide, Multiply, Coalesce]
@@ -6,6 +6,7 @@
 from .post_processors import PostProcessor
 from .pre_processors import PreProcessor
 from .validators import Validator
+from .transform import Ok, OnError, Result, SkipRow
 
 
 class Field(ABC):
@@ -26,24 +27,28 @@ def __init__(self, key: str, options: dict) -> None:
             pre_processors + [type_converter] + validators + post_processors
         )
 
-    def _process_raw_value(self, raw_value: str):
-        value = raw_value
+    def _process_raw_value(self, raw_value: str) -> Result:
         if not raw_value:
             if self.optional:
-                return None
+                return Ok(value=None)
             else:
                 raise ValueError(
                     f"{self.key} field is required but the value was empty"
                 )
         for transform in self.transforms:
-            value = transform.apply(value)
-        return value
+            try:
+                raw_value = transform.apply(raw_value)
+            except Exception as exc:
+                if transform.on_error == OnError.SKIP_ROW.value:
+                    return SkipRow(exception=exc)
+                raise
+        return Ok(value=raw_value)
 
     @abstractmethod
-    def _read_raw_value(self, row) -> str:
+    def _read_raw_value(self, row: List[str] | dict) -> str:
         pass
 
-    def read_value(self, row):
+    def parse(self, row: List[str] | dict) -> Result:
         raw_value = self._read_raw_value(row)
         return self._process_raw_value(raw_value)
 
@@ -75,7 +80,7 @@ def __init__(self, key: str, options: dict) -> None:
         super().__init__(key, options)
         self.column_number = options["column-number"]
 
-    def _read_raw_value(self, row: List[str]) -> str:
+    def _read_raw_value(self, row: List[str] | dict) -> str:
         return row[self.column_number - 1]
 
     def error(self, exception: Exception) -> dict:
@@ -93,7 +98,7 @@ def __init__(self, key: str, options: dict) -> None:
         self.column_length = options["column-length"]
         self.column_end = self.column_start + self.column_length
 
-    def _read_raw_value(self, row: str) -> str:
+    def _read_raw_value(self, row: str | dict) -> str:
         return row[self.column_start : self.column_end]
 
     def error(self, exception: Exception) -> dict:
@@ -110,7 +115,7 @@ def __init__(self, key: str, options: dict) -> None:
         super().__init__(key, options)
         self.builder = Builder.build(options["builder"])
 
-    def _read_raw_value(self, row) -> str:
+    def _read_raw_value(self, row: List[str] | dict) -> str:
         return self.builder.apply(row)
 
     def error(self, exception: Exception) -> dict: