From ae78f19972416ffa4d54f2a70534cf3458fa1eba Mon Sep 17 00:00:00 2001 From: ducdetronquito Date: Thu, 18 Sep 2025 10:31:16 +0200 Subject: [PATCH 01/11] chore: Remove missing step for mise test task --- .config/mise/config.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/.config/mise/config.toml b/.config/mise/config.toml index 14f6238..54eefee 100644 --- a/.config/mise/config.toml +++ b/.config/mise/config.toml @@ -16,5 +16,4 @@ _.python.venv = { path = ".venv", create = false } [tasks.test] description = "🐍 Run tests" -depends = ["start_db"] run = "pytest -s" From 375ce9cfc2c9f29f90c1a7b7e9672b6b40455f93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Perin?= Date: Thu, 18 Sep 2025 14:53:51 +0200 Subject: [PATCH 02/11] chore: Use pytest to assert exception --- tests/test_computed_fields.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_computed_fields.py b/tests/test_computed_fields.py index 239b468..b68a404 100644 --- a/tests/test_computed_fields.py +++ b/tests/test_computed_fields.py @@ -6,15 +6,15 @@ class TestBuild(TestCase): def test_without_builder(self): - with self.assertRaises(KeyError): + with pytest.raises(KeyError): ComputedField("output", {"type": "str"}) def test_not_iterable_value_for_builder(self): - with self.assertRaises(ValueError): + with pytest.raises(ValueError): ComputedField("output", {"type": "str", "builder": 1}) def test_bad_value_for_builder(self): - with self.assertRaises(ValueError): + with pytest.raises(ValueError): ComputedField("output", {"type": "str", "builder": "really"}) def test_with_valid_builder(self): From e08fa0a15b0443455423d0cc41eedc8461c5ff1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Perin?= Date: Wed, 17 Sep 2025 15:13:46 +0200 Subject: [PATCH 03/11] feat: field type can be describe with complexe structure --- magicparse/type_converters.py | 9 ++++++--- tests/test_type_converters.py | 6 +++++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/magicparse/type_converters.py b/magicparse/type_converters.py index 2f107fc..09cf701 100644 --- a/magicparse/type_converters.py +++ b/magicparse/type_converters.py @@ -8,14 +8,17 @@ class TypeConverter(Transform): @classmethod def build(cls, options) -> "TypeConverter": try: - _type = options["type"] + _typeDefinition = options["type"] + if isinstance(_typeDefinition, str): + _typeDefinition = {"key": _typeDefinition} + key = _typeDefinition["key"] except: raise ValueError("missing key 'type'") try: - return cls.registry[_type]() + return cls.registry[key]() except: - raise ValueError(f"invalid type '{_type}'") + raise ValueError(f"invalid type '{key}'") class StrConverter(TypeConverter): diff --git a/tests/test_type_converters.py b/tests/test_type_converters.py index c841047..744669f 100644 --- a/tests/test_type_converters.py +++ b/tests/test_type_converters.py @@ -15,7 +15,7 @@ ) -class TestBuild(TestCase): +class TestBuildFlattenType(TestCase): def test_str(self): type_converter = TypeConverter.build({"type": "str"}) assert isinstance(type_converter, StrConverter) @@ -44,6 +44,10 @@ def test_no_type_provided(self): with pytest.raises(ValueError, match="missing key 'type'"): TypeConverter.build({}) +class TestBuildComplexeType(TestCase): + def test_str(self): + type_converter = TypeConverter.build({"type": {"key": "str"}}) + assert isinstance(type_converter, StrConverter) class TestStr(TestCase): def test_apply(self): From ce1efb2f503ce3b476d2d506078a3bf752b7d3f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Perin?= Date: Wed, 17 Sep 2025 15:27:04 +0200 Subject: [PATCH 04/11] feat: Implement nullable type --- magicparse/type_converters.py | 31 +++++++++++++++++++++++-------- tests/test_type_converters.py | 17 +++++++++++++++++ 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/magicparse/type_converters.py b/magicparse/type_converters.py index 09cf701..a9c47e3 100644 --- a/magicparse/type_converters.py +++ b/magicparse/type_converters.py @@ -1,10 +1,25 @@ +from abc import abstractmethod from datetime import datetime, time from decimal import Decimal +from typing import Any from .transform import Transform class TypeConverter(Transform): + def __init__(self, options: dict) -> None: + self.nullable = options.get("nullable", False) + + def apply(self, value: str) -> str: + if value is None and self.nullable: + return None + + return self.convert(value) + + @abstractmethod + def convert(self, value: str) -> Any: + pass + @classmethod def build(cls, options) -> "TypeConverter": try: @@ -16,13 +31,13 @@ def build(cls, options) -> "TypeConverter": raise ValueError("missing key 'type'") try: - return cls.registry[key]() - except: - raise ValueError(f"invalid type '{key}'") + return cls.registry[key](_typeDefinition) + except Exception as e: + raise ValueError(f"invalid type '{key}': {e}") class StrConverter(TypeConverter): - def apply(self, value: str) -> str: + def convert(self, value: str) -> str: return value @staticmethod @@ -31,7 +46,7 @@ def key() -> str: class IntConverter(TypeConverter): - def apply(self, value: str) -> int: + def convert(self, value: str) -> int: try: return int(value) except: @@ -43,7 +58,7 @@ def key() -> str: class DecimalConverter(TypeConverter): - def apply(self, value: str) -> Decimal: + def convert(self, value: str) -> Decimal: try: return Decimal(value) except: @@ -55,7 +70,7 @@ def key() -> str: class TimeConverter(TypeConverter): - def apply(self, value: str) -> time: + def convert(self, value: str) -> time: try: parsed = time.fromisoformat(value) if parsed.tzinfo is None: @@ -70,7 +85,7 @@ def key() -> str: class DateTimeConverter(TypeConverter): - def apply(self, value: str) -> datetime: + def convert(self, value: str) -> datetime: try: parsed = datetime.fromisoformat(value) if parsed.tzinfo is None: diff --git a/tests/test_type_converters.py b/tests/test_type_converters.py index 744669f..9991543 100644 --- a/tests/test_type_converters.py +++ b/tests/test_type_converters.py @@ -133,3 +133,20 @@ def test_register(self): type_converter = TypeConverter.build({"type": "guid"}) assert isinstance(type_converter, self.GuidConverter) + +class TestNullableField(TestCase): + def test_int(self): + type_converter = TypeConverter.build({"type": {"key": "int", "nullable": True}}) + assert type_converter.apply(None) is None + + def test_decimal(self): + type_converter = TypeConverter.build({"type": {"key": "decimal", "nullable": True}}) + assert type_converter.apply(None) is None + + def test_time(self): + type_converter = TypeConverter.build({"type": {"key": "time", "nullable": True}}) + assert type_converter.apply(None) is None + + def test_datetime(self): + type_converter = TypeConverter.build({"type": {"key": "datetime", "nullable": True}}) + assert type_converter.apply(None) is None From 85987d5492cf007998590007387c4997a51215d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Perin?= Date: Wed, 17 Sep 2025 15:32:10 +0200 Subject: [PATCH 05/11] feat: add not-null-or-empty validator --- magicparse/validators.py | 13 ++++++++++++- tests/test_validators.py | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/magicparse/validators.py b/magicparse/validators.py index f55e079..de13187 100644 --- a/magicparse/validators.py +++ b/magicparse/validators.py @@ -51,4 +51,15 @@ def key() -> str: return "greater-than" -builtins = [GreaterThan, RegexMatches] +class NotNullOrEmpty(Validator): + def apply(self, value: str) -> str: + if not value: + raise ValueError("value must not be null or empty") + return value + + @staticmethod + def key() -> str: + return "not-null-or-empty" + + +builtins = [GreaterThan, RegexMatches, NotNullOrEmpty] diff --git a/tests/test_validators.py b/tests/test_validators.py index cd75991..00eabe5 100644 --- a/tests/test_validators.py +++ b/tests/test_validators.py @@ -1,5 +1,5 @@ from decimal import Decimal -from magicparse.validators import GreaterThan, RegexMatches, Validator +from magicparse.validators import GreaterThan, NotNullOrEmpty, RegexMatches, Validator import pytest import re from unittest import TestCase @@ -16,6 +16,13 @@ def test_regex_matches(self): assert isinstance(validator, RegexMatches) assert isinstance(validator.pattern, re.Pattern) assert validator.pattern.pattern == "^\\d{13}$" + + def test_not_null_or_empty(self): + validator = Validator.build( + { "name": "not-null-or-empty" } + ) + + assert isinstance(validator, NotNullOrEmpty) def test_unknown(self): with pytest.raises(ValueError, match="invalid validator 'anything'"): @@ -111,3 +118,28 @@ def test_it_raises_an_error_when_the_value_is_equal_to_threshold(self): with pytest.raises(ValueError, match="value must be greater than 10"): validator.apply(10) + + +class TestNotNullOrEmptyValidator(TestCase): + def test_it_successfully_returns_the_value_when_the_value_is_not_null_or_empty(self): + validator = Validator.build( + {"name": "not-null-or-empty"} + ) + + assert validator.apply("hello") == "hello" + + def test_it_raises_an_error_when_the_value_is_null(self): + validator = Validator.build( + {"name": "not-null-or-empty"} + ) + + with pytest.raises(ValueError, match="value must not be null or empty"): + validator.apply(None) + + def test_it_raises_an_error_when_the_value_is_empty(self): + validator = Validator.build( + {"name": "not-null-or-empty"} + ) + + with pytest.raises(ValueError, match="value must not be null or empty"): + validator.apply("") \ No newline at end of file From 86adbf882d5c5564602c71e48ff8bbae0078ecb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Perin?= Date: Wed, 17 Sep 2025 20:34:43 +0200 Subject: [PATCH 06/11] feat: Add error handling in Transformer --- magicparse/builders.py | 22 ++-- magicparse/fields.py | 13 +- magicparse/post_processors.py | 17 +-- magicparse/pre_processors.py | 34 +++--- magicparse/schema.py | 60 +++++---- magicparse/transform.py | 42 ++++++- magicparse/type_converters.py | 24 ++-- magicparse/validators.py | 19 +-- tests/test_builders.py | 29 ++--- tests/test_computed_fields.py | 8 +- tests/test_fields.py | 19 +-- tests/test_post_processors.py | 10 +- tests/test_pre_processors.py | 26 ++-- tests/test_schema.py | 224 ++++++++++++++++++++++++++++++++++ tests/test_type_converters.py | 75 +++++++----- tests/test_validators.py | 30 ++--- 16 files changed, 484 insertions(+), 168 deletions(-) diff --git a/magicparse/builders.py b/magicparse/builders.py index 12a8357..3513f5c 100644 --- a/magicparse/builders.py +++ b/magicparse/builders.py @@ -1,7 +1,7 @@ from abc import ABC from decimal import Decimal -from .transform import Transform +from .transform import Transform, OnError class Builder(Transform, ABC): @@ -17,14 +17,16 @@ def build(cls, options: dict) -> "Builder": except: raise ValueError(f"invalid builder '{name}'") + on_error = options.get("on-error", OnError.RAISE) if "parameters" in options: - return builder(**options["parameters"]) + return builder(on_error=on_error, **options["parameters"]) else: - return builder() + return builder(on_error=on_error) class Concat(Builder): - def __init__(self, fields: list[str]) -> None: + def __init__(self, on_error: OnError, fields: list[str]) -> None: + super().__init__(on_error) if ( not fields or isinstance(fields, str) @@ -39,7 +41,7 @@ def __init__(self, fields: list[str]) -> None: self.fields = fields - def apply(self, row: dict) -> str: + def transform(self, row: dict) -> str: return "".join(row[field] for field in self.fields) @staticmethod @@ -48,7 +50,8 @@ def key() -> str: class Divide(Builder): - def __init__(self, numerator: str, denominator: str) -> None: + def __init__(self, on_error: OnError, numerator: str, denominator: str) -> None: + super().__init__(on_error) if not numerator or not isinstance(numerator, str): raise ValueError( "builder 'divide': " "'numerator' parameter must be a non null str" @@ -60,7 +63,7 @@ def __init__(self, numerator: str, denominator: str) -> None: self.numerator = numerator self.denominator = denominator - def apply(self, row: dict) -> Decimal: + def transform(self, row: dict) -> Decimal: return row[self.numerator] / row[self.denominator] @staticmethod @@ -69,7 +72,8 @@ def key() -> str: class Multiply(Builder): - def __init__(self, x_factor: str, y_factor: str) -> None: + def __init__(self, on_error: OnError, x_factor: str, y_factor: str) -> None: + super().__init__(on_error) if not x_factor or not isinstance(x_factor, str): raise ValueError( "builder 'multiply': " "'x_factor' parameter must be a non null str" @@ -81,7 +85,7 @@ def __init__(self, x_factor: str, y_factor: str) -> None: self.x_factor = x_factor self.y_factor = y_factor - def apply(self, row: dict): + def transform(self, row: dict): return row[self.x_factor] * row[self.y_factor] @staticmethod diff --git a/magicparse/fields.py b/magicparse/fields.py index 944e41b..ef1fba1 100644 --- a/magicparse/fields.py +++ b/magicparse/fields.py @@ -6,6 +6,7 @@ from .post_processors import PostProcessor from .pre_processors import PreProcessor from .validators import Validator +from .transform import Ok, Result class Field(ABC): @@ -26,15 +27,17 @@ def __init__(self, key: str, options: dict) -> None: pre_processors + [type_converter] + validators + post_processors ) - def _process_raw_value(self, raw_value: str): - value = raw_value + def _process_raw_value( + self, raw_value: str + ) -> Result: if not raw_value: if self.optional: - return None + return Ok(value=None) else: raise ValueError( f"{self.key} field is required but the value was empty" ) + value = Ok(value=raw_value) for transform in self.transforms: value = transform.apply(value) return value @@ -43,7 +46,7 @@ def _process_raw_value(self, raw_value: str): def _read_raw_value(self, row) -> str: pass - def read_value(self, row): + def parse(self, row) -> Result: raw_value = self._read_raw_value(row) return self._process_raw_value(raw_value) @@ -111,7 +114,7 @@ def __init__(self, key: str, options: dict) -> None: self.builder = Builder.build(options["builder"]) def _read_raw_value(self, row) -> str: - return self.builder.apply(row) + return self.builder.transform(row) def error(self, exception: Exception) -> dict: return { diff --git a/magicparse/post_processors.py b/magicparse/post_processors.py index 2e901dd..9052349 100644 --- a/magicparse/post_processors.py +++ b/magicparse/post_processors.py @@ -1,4 +1,4 @@ -from .transform import Transform +from .transform import Transform, OnError from decimal import Decimal from typing import TypeVar @@ -16,16 +16,18 @@ def build(cls, options: dict) -> "PostProcessor": except: raise ValueError(f"invalid post-processor '{name}'") + on_error = options.get("on-error", OnError.RAISE) if "parameters" in options: - return post_processor(**options["parameters"]) + return post_processor(on_error=on_error, **options["parameters"]) else: - return post_processor() + return post_processor(on_error=on_error) class Divide(PostProcessor): Number = TypeVar("Number", int, float, Decimal) - def __init__(self, denominator: int) -> None: + def __init__(self, on_error: OnError, denominator: int) -> None: + super().__init__(on_error) if denominator <= 0: raise ValueError( "post-processor 'divide': " @@ -34,7 +36,7 @@ def __init__(self, denominator: int) -> None: self.denominator = denominator - def apply(self, value: Number) -> Number: + def transform(self, value: Number) -> Number: return value / self.denominator @staticmethod @@ -45,7 +47,8 @@ def key() -> str: class Round(PostProcessor): Number = TypeVar("Number", int, float, Decimal) - def __init__(self, precision: int) -> None: + def __init__(self, on_error: OnError, precision: int) -> None: + super().__init__(on_error) if precision < 0: raise ValueError( "post-processor 'round': " @@ -54,7 +57,7 @@ def __init__(self, precision: int) -> None: self.precision = precision - def apply(self, value: Number) -> Number: + def transform(self, value: Number) -> Number: return round(value, self.precision) @staticmethod diff --git a/magicparse/pre_processors.py b/magicparse/pre_processors.py index cc1f25f..be13752 100644 --- a/magicparse/pre_processors.py +++ b/magicparse/pre_processors.py @@ -1,5 +1,5 @@ import re -from .transform import Transform +from .transform import Transform, OnError class PreProcessor(Transform): @@ -15,17 +15,19 @@ def build(cls, options: dict) -> "PreProcessor": except: raise ValueError(f"invalid pre-processor '{name}'") + on_error = options.get("on-error", OnError.RAISE) if "parameters" in options: - return pre_processor(**options["parameters"]) + return pre_processor(on_error=on_error, **options["parameters"]) else: - return pre_processor() + return pre_processor(on_error=on_error) class LeftPadZeroes(PreProcessor): - def __init__(self, width: int) -> None: + def __init__(self, on_error: OnError, width: int) -> None: + super().__init__(on_error) self.width = width - def apply(self, value: str) -> str: + def transform(self, value: str) -> str: return value.zfill(self.width) @staticmethod @@ -34,11 +36,12 @@ def key() -> str: class Map(PreProcessor): - def __init__(self, values: dict) -> None: + def __init__(self, on_error: OnError, values: dict) -> None: + super().__init__(on_error) self.values = values self._keys = ", ".join(f"'{key}'" for key in self.values.keys()) - def apply(self, value: str) -> str: + def transform(self, value: str) -> str: try: return self.values[value] except: @@ -52,11 +55,12 @@ def key() -> str: class Replace(PreProcessor): - def __init__(self, pattern: str, replacement: str) -> None: + def __init__(self, on_error: OnError, pattern: str, replacement: str) -> None: + super().__init__(on_error) self.pattern = pattern self.replacement = replacement - def apply(self, value: str) -> str: + def transform(self, value: str) -> str: return value.replace(self.pattern, self.replacement) @staticmethod @@ -65,7 +69,7 @@ def key() -> str: class StripWhitespaces(PreProcessor): - def apply(self, value: str) -> str: + def transform(self, value: str) -> str: return value.strip() @staticmethod @@ -74,10 +78,11 @@ def key() -> str: class LeftStrip(PreProcessor): - def __init__(self, characters: str) -> None: + def __init__(self, on_error: OnError, characters: str) -> None: + super().__init__(on_error) self.characters = characters - def apply(self, value: str) -> str: + def transform(self, value: str) -> str: return value.lstrip(self.characters) @staticmethod @@ -86,7 +91,8 @@ def key() -> str: class RegexExtract(PreProcessor): - def __init__(self, pattern: str) -> None: + def __init__(self, on_error: OnError, pattern: str) -> None: + super().__init__(on_error) pattern = re.compile(pattern) if "value" not in pattern.groupindex: raise ValueError( @@ -95,7 +101,7 @@ def __init__(self, pattern: str) -> None: self.pattern = pattern - def apply(self, value: str) -> str: + def transform(self, value: str) -> str: match = re.match(self.pattern, value) if not match: raise ValueError( diff --git a/magicparse/schema.py b/magicparse/schema.py index 3941743..10470e5 100644 --- a/magicparse/schema.py +++ b/magicparse/schema.py @@ -2,6 +2,8 @@ from abc import ABC, abstractmethod import csv from dataclasses import dataclass + +from magicparse.transform import SkipRow from .fields import Field, ComputedField from io import BytesIO from typing import Any, Dict, List, Tuple, Union, Iterable @@ -82,29 +84,41 @@ def stream_parse(self, data: Union[bytes, BytesIO]) -> Iterable[ParsedRow]: if not any(row): continue - errors = [] - item = {} - for field in self.fields: - try: - value = field.read_value(row) - except Exception as exc: - errors.append({"row-number": row_number, **field.error(exc)}) - continue - - item[field.key] = value - - for computed_field in self.computed_fields: - try: - value = computed_field.read_value(item) - except Exception as exc: - errors.append( - {"row-number": row_number, **computed_field.error(exc)} - ) - continue - - item[computed_field.key] = value - - yield ParsedRow(row_number, item, errors) + parsed_fields = self.process_fields(self.fields, row, row_number) + if isinstance(parsed_fields, SkipRow): + continue + + computed_fields = self.process_fields( + self.computed_fields, parsed_fields.values, row_number + ) + if isinstance(computed_fields, SkipRow): + continue + + yield ParsedRow( + row_number, + {**parsed_fields.values, **computed_fields.values}, + parsed_fields.errors + computed_fields.errors, + ) + + def process_fields( + self, fields: List[Field], row: List[str], row_number: int + ) -> ParsedRow | SkipRow: + item = {} + errors = [] + for field in fields: + try: + parsed_value = field.parse(row) + + except Exception as exc: + errors.append({"row-number": row_number, **field.error(exc)}) + continue + + if isinstance(parsed_value, SkipRow): + return parsed_value + + item[field.key] = parsed_value.value + + return ParsedRow(row_number, item, errors) class CsvSchema(Schema): diff --git a/magicparse/transform.py b/magicparse/transform.py index 1e25c63..21614d5 100644 --- a/magicparse/transform.py +++ b/magicparse/transform.py @@ -1,13 +1,53 @@ from abc import ABC, abstractclassmethod, abstractmethod, abstractstaticmethod +from dataclasses import dataclass +from enum import Enum +from typing import Any + + +@dataclass(frozen=True, slots=True) +class Ok: + value: Any + + +@dataclass(frozen=True, slots=True) +class SkipRow: + pass + + +type Result = Ok | SkipRow + + + +class OnError(Enum): + RAISE = "raise" + SKIP_ROW = "skip-row" class Transform(ABC): + def __init__(self, on_error: OnError) -> None: + self.on_error = on_error + @abstractclassmethod def build(cls, options: dict) -> "Transform": pass + def apply( + self, last_result: Result + ) -> Result: + if isinstance(last_result, SkipRow): + return last_result + + try: + return Ok( + value=self.transform(last_result.value) + ) + except Exception: + if self.on_error == OnError.SKIP_ROW.value: + return SkipRow() + raise + @abstractmethod - def apply(self, value): + def transform(self, value: Any | None) -> Any | None: pass @abstractstaticmethod diff --git a/magicparse/type_converters.py b/magicparse/type_converters.py index a9c47e3..0a3b1d6 100644 --- a/magicparse/type_converters.py +++ b/magicparse/type_converters.py @@ -4,13 +4,15 @@ from typing import Any from .transform import Transform +from .transform import OnError class TypeConverter(Transform): - def __init__(self, options: dict) -> None: - self.nullable = options.get("nullable", False) - - def apply(self, value: str) -> str: + def __init__(self, nullable: bool, on_error: OnError) -> None: + super().__init__(on_error) + self.nullable = nullable + + def transform(self, value: str | None) -> Any | None: if value is None and self.nullable: return None @@ -23,15 +25,19 @@ def convert(self, value: str) -> Any: @classmethod def build(cls, options) -> "TypeConverter": try: - _typeDefinition = options["type"] - if isinstance(_typeDefinition, str): - _typeDefinition = {"key": _typeDefinition} - key = _typeDefinition["key"] + type = options["type"] + if isinstance(type, str): + key = type + type = {} + else: + key = type.pop("key") except: raise ValueError("missing key 'type'") + nullable = type.pop("nullable", False) + on_error = type.pop("on-error", OnError.RAISE) try: - return cls.registry[key](_typeDefinition) + return cls.registry[key](nullable, on_error, **type) except Exception as e: raise ValueError(f"invalid type '{key}': {e}") diff --git a/magicparse/validators.py b/magicparse/validators.py index de13187..c5e23c3 100644 --- a/magicparse/validators.py +++ b/magicparse/validators.py @@ -1,5 +1,5 @@ from decimal import Decimal -from .transform import Transform +from .transform import Transform, OnError import re @@ -16,17 +16,19 @@ def build(cls, options: dict) -> "Validator": except: raise ValueError(f"invalid validator '{name}'") + on_error = options.setdefault("on-error", OnError.RAISE) if "parameters" in options: - return validator(**options["parameters"]) + return validator(on_error=on_error, **options["parameters"]) else: - return validator() + return validator(on_error=on_error) class RegexMatches(Validator): - def __init__(self, pattern: str) -> None: + def __init__(self, on_error: str, pattern: str) -> None: + super().__init__(on_error) self.pattern = re.compile(pattern) - def apply(self, value: str) -> str: + def transform(self, value: str | None) -> str: if re.match(self.pattern, value): return value @@ -38,10 +40,11 @@ def key() -> str: class GreaterThan(Validator): - def __init__(self, threshold: float) -> None: + def __init__(self, on_error: str, threshold: float) -> None: + super().__init__(on_error) self.threshold = Decimal(threshold) - def apply(self, value: Decimal) -> Decimal: + def transform(self, value: Decimal) -> Decimal: if value > self.threshold: return value raise ValueError(f"value must be greater than {self.threshold}") @@ -52,7 +55,7 @@ def key() -> str: class NotNullOrEmpty(Validator): - def apply(self, value: str) -> str: + def transform(self, value: str) -> str: if not value: raise ValueError("value must not be null or empty") return value diff --git a/tests/test_builders.py b/tests/test_builders.py index 869f614..0e839f4 100644 --- a/tests/test_builders.py +++ b/tests/test_builders.py @@ -12,18 +12,19 @@ class WithoutParamBuilder(Builder): def key() -> str: return "without-param" - def apply(self, value): + def transform(self, value): pass class WithParamBuilder(Builder): - def __init__(self, setting: str) -> None: + def __init__(self, on_error: str, setting: str) -> None: + super().__init__(on_error) self.setting = setting @staticmethod def key() -> str: return "with-param" - def apply(self, value): + def transform(self, value): pass def test_without_parameter(self): @@ -76,14 +77,14 @@ def test_field_not_present(self): {"name": "concat", "parameters": {"fields": ["code_1", "code_2"]}} ) with pytest.raises(KeyError): - builder.apply({}) + builder.transform({}) def test_concat_two_fields(self): builder = Builder.build( {"name": "concat", "parameters": {"fields": ["code_1", "code_2"]}} ) - result = builder.apply({"code_1": "X", "code_2": "Y"}) + result = builder.transform({"code_1": "X", "code_2": "Y"}) assert result == "XY" @@ -92,7 +93,7 @@ def test_concat_three_fields(self): {"name": "concat", "parameters": {"fields": ["code_1", "code_2", "code_3"]}} ) - result = builder.apply({"code_1": "X", "code_2": "Y", "code_3": "Z"}) + result = builder.transform({"code_1": "X", "code_2": "Y", "code_3": "Z"}) assert result == "XYZ" @@ -102,7 +103,7 @@ def test_concat_integer(self): ) with pytest.raises(TypeError): - builder.apply({"code_1": 1, "code_2": 2}) + builder.transform({"code_1": 1, "code_2": 2}) class TestDivide(TestCase): @@ -140,7 +141,7 @@ def test_field_not_present(self): } ) with pytest.raises(KeyError): - builder.apply({}) + builder.transform({}) def test_numerator_not_valid(self): builder = Builder.build( @@ -150,7 +151,7 @@ def test_numerator_not_valid(self): } ) with pytest.raises(TypeError): - builder.apply({"price": "e", "price_by_unit": 1}) + builder.transform({"price": "e", "price_by_unit": 1}) def test_denominator_not_valid(self): builder = Builder.build( @@ -160,7 +161,7 @@ def test_denominator_not_valid(self): } ) with pytest.raises(TypeError): - builder.apply({"price": 1, "price_by_unit": "ee"}) + builder.transform({"price": 1, "price_by_unit": "ee"}) def test_valid_param(self): builder = Builder.build( @@ -170,7 +171,7 @@ def test_valid_param(self): } ) - result = builder.apply({"price": 1, "price_by_unit": 2}) + result = builder.transform({"price": 1, "price_by_unit": 2}) assert result == Decimal("0.5") @@ -210,7 +211,7 @@ def test_field_not_present(self): } ) with pytest.raises(KeyError): - builder.apply({}) + builder.transform({}) def test_x_y_factor_not_valid(self): builder = Builder.build( @@ -220,7 +221,7 @@ def test_x_y_factor_not_valid(self): } ) with pytest.raises(TypeError): - builder.apply({"price": "e", "unit": "e"}) + builder.transform({"price": "e", "unit": "e"}) def test_valid_param(self): builder = Builder.build( @@ -230,6 +231,6 @@ def test_valid_param(self): } ) - result = builder.apply({"price": 1.5, "unit": 2}) + result = builder.transform({"price": 1.5, "unit": 2}) assert result == 3 diff --git a/tests/test_computed_fields.py b/tests/test_computed_fields.py index b68a404..62dcd43 100644 --- a/tests/test_computed_fields.py +++ b/tests/test_computed_fields.py @@ -3,6 +3,8 @@ from magicparse.fields import ComputedField from unittest import TestCase +from magicparse.transform import Ok + class TestBuild(TestCase): def test_without_builder(self): @@ -30,9 +32,9 @@ def test_with_valid_builder(self): } ) - computed = field.read_value({"code_1": "01", "code_2": "02"}) + computed = field.parse({"code_1": "01", "code_2": "02"}) - assert computed == "0102" + assert computed == Ok(value="0102") def test_error_format(self): field = ComputedField( @@ -48,6 +50,6 @@ def test_error_format(self): ) with pytest.raises(KeyError) as error: - field.read_value({}) + field.parse({}) assert field.error(error.value) == {"error": "code_1", "field-key": "output"} diff --git a/tests/test_fields.py b/tests/test_fields.py index 249675e..1d8d8ac 100644 --- a/tests/test_fields.py +++ b/tests/test_fields.py @@ -1,6 +1,7 @@ from decimal import Decimal import pytest +from magicparse.transform import Ok from magicparse.type_converters import DecimalConverter, StrConverter from magicparse.fields import ColumnarField, CsvField, Field from magicparse.post_processors import Divide @@ -32,8 +33,8 @@ def test_chain_transformations(): assert isinstance(field.transforms[1], StrConverter) assert isinstance(field.transforms[2], RegexMatches) - result = field.read_value(" mac adam ") - assert result == "mac adam" + result = field.parse(" mac adam ") + assert result == Ok(value="mac adam") def test_chain_transformations_with_post_processors(): @@ -56,14 +57,14 @@ def test_chain_transformations_with_post_processors(): assert isinstance(field.transforms[0], Replace) assert isinstance(field.transforms[1], DecimalConverter) assert isinstance(field.transforms[2], Divide) - assert field.read_value("XXX150") == Decimal("1.50") + assert field.parse("XXX150") == Ok(value=Decimal("1.50")) def test_csv_error_format(): field = CsvField("ratio", {"type": "decimal", "column-number": 1}) with pytest.raises(ValueError) as error: - field.read_value("hello") + field.parse("hello") assert field.error(error.value) == { "column-number": 1, @@ -78,7 +79,7 @@ def test_columnar_error_format(): ) with pytest.raises(ValueError) as error: - field.read_value("hello") + field.parse("hello") assert field.error(error.value) == { "column-start": 0, @@ -103,8 +104,8 @@ def test_optional_field(): "post-processors": [{"name": "divide", "parameters": {"denominator": 100}}], } ) - assert field.read_value("XXX150") == Decimal("1.50") - assert field.read_value("") is None + assert field.parse("XXX150") == Ok(value=Decimal("1.50")) + assert field.parse("") == Ok(value=None) def test_required_field(): @@ -115,7 +116,7 @@ def test_required_field(): "optional": False, } ) - assert field.read_value("1.5") == Decimal("1.50") + assert field.parse("1.5") == Ok(value=Decimal("1.50")) def test_require_field_with_empty_value(): @@ -128,7 +129,7 @@ def test_require_field_with_empty_value(): with pytest.raises( ValueError, match="pepito field is required but the value was empty" ): - field.read_value("") + field.parse("") def test_field_without_key(): diff --git a/tests/test_post_processors.py b/tests/test_post_processors.py index b6bd4e1..d65f8a5 100644 --- a/tests/test_post_processors.py +++ b/tests/test_post_processors.py @@ -34,19 +34,19 @@ def test_divide_int(self): post_processor = PostProcessor.build( {"name": "divide", "parameters": {"denominator": 100}} ) - assert post_processor.apply(150) == 1.5 + assert post_processor.transform(150) == 1.5 def test_divide_float(self): post_processor = PostProcessor.build( {"name": "divide", "parameters": {"denominator": 100}} ) - assert post_processor.apply(1.63) == 0.0163 + assert post_processor.transform(1.63) == 0.0163 def test_divide_decimal(self): post_processor = PostProcessor.build( {"name": "divide", "parameters": {"denominator": 100}} ) - assert post_processor.apply(Decimal("1.63")) == Decimal("0.0163") + assert post_processor.transform(Decimal("1.63")) == Decimal("0.0163") class TestRound(TestCase): @@ -62,7 +62,7 @@ def test_with_valid_precision(self): post_processor = PostProcessor.build( {"name": "round", "parameters": {"precision": 2}} ) - assert post_processor.apply(3.14159265359) == 3.14 + assert post_processor.transform(3.14159265359) == 3.14 class TestRegister(TestCase): @@ -71,7 +71,7 @@ class NoThanksPostProcessor(PostProcessor): def key() -> str: return "no-thanks" - def apply(self, value): + def transform(self, value): return f"{value} ? No thanks" def test_register(self): diff --git a/tests/test_pre_processors.py b/tests/test_pre_processors.py index 2f5fb6e..553cc24 100644 --- a/tests/test_pre_processors.py +++ b/tests/test_pre_processors.py @@ -70,13 +70,13 @@ def test_do_nothing(self): pre_processor = PreProcessor.build( {"name": "left-pad-zeroes", "parameters": {"width": 10}} ) - assert pre_processor.apply("abcdefghij") == "abcdefghij" + assert pre_processor.transform("abcdefghij") == "abcdefghij" def test_pad(self): pre_processor = PreProcessor.build( {"name": "left-pad-zeroes", "parameters": {"width": 10}} ) - assert pre_processor.apply("abc") == "0000000abc" + assert pre_processor.transform("abc") == "0000000abc" class TestMap(TestCase): @@ -88,13 +88,13 @@ def test_unknown_input(self): ValueError, match="value 'an input' does not map to any values in \\['A', 'B'\\]", ): - pre_processor.apply("an input") + pre_processor.transform("an input") def test_known_input(self): pre_processor = PreProcessor.build( {"name": "map", "parameters": {"values": {"A": "1", "B": "2"}}} ) - assert pre_processor.apply("A") == "1" + assert pre_processor.transform("A") == "1" class TestReplace(TestCase): @@ -102,23 +102,23 @@ def test_pattern_not_found(self): pre_processor = PreProcessor.build( {"name": "replace", "parameters": {"pattern": "bbb", "replacement": "BBB"}} ) - assert pre_processor.apply("an input") == "an input" + assert pre_processor.transform("an input") == "an input" def test_success(self): pre_processor = PreProcessor.build( {"name": "replace", "parameters": {"pattern": "bbb", "replacement": "BBB"}} ) - assert pre_processor.apply("aaabbbccc") == "aaaBBBccc" + assert pre_processor.transform("aaabbbccc") == "aaaBBBccc" class TestStripWhitespaces(TestCase): def test_do_nothing(self): pre_processor = PreProcessor.build({"name": "strip-whitespaces"}) - assert pre_processor.apply("an input") == "an input" + assert pre_processor.transform("an input") == "an input" def test_success(self): pre_processor = PreProcessor.build({"name": "strip-whitespaces"}) - assert pre_processor.apply(" an input ") == "an input" + assert pre_processor.transform(" an input ") == "an input" class TestLeftStrip(TestCase): @@ -126,13 +126,13 @@ def test_do_nothing(self): pre_processor = PreProcessor.build( {"name": "left-strip", "parameters": {"characters": "0"}} ) - assert pre_processor.apply("12345") == "12345" + assert pre_processor.transform("12345") == "12345" def test_success(self): pre_processor = PreProcessor.build( {"name": "left-strip", "parameters": {"characters": "0"}} ) - assert pre_processor.apply("0000012345") == "12345" + assert pre_processor.transform("0000012345") == "12345" class TestRegexExtract(TestCase): @@ -153,7 +153,7 @@ def test_pattern_not_found(self): } ) with pytest.raises(ValueError) as error: - pre_processor.apply("an input") + pre_processor.transform("an input") assert ( error.value.args[0] @@ -167,7 +167,7 @@ def test_pattern_found(self): "parameters": {"pattern": "^xxx(?P\\d{13})xxx$"}, } ) - pre_processor.apply("xxx9780201379624xxx") == "9780201379624" + pre_processor.transform("xxx9780201379624xxx") == "9780201379624" class TestRegister(TestCase): @@ -176,7 +176,7 @@ class YesPreProcessor(PreProcessor): def key() -> str: return "yes" - def apply(self, value): + def transform(self, value): return f"YES {value}" def test_register(self): diff --git a/tests/test_schema.py b/tests/test_schema.py index 622473d..235c1f3 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -1,6 +1,9 @@ from decimal import Decimal +from typing import Any from magicparse import Schema +from magicparse.post_processors import PostProcessor +from magicparse.pre_processors import PreProcessor from magicparse.schema import ColumnarSchema, CsvSchema, ParsedRow from magicparse.fields import ColumnarField, CsvField import pytest @@ -411,3 +414,224 @@ def test_concat(self): "field_2": "B", "computed_field": "AB", } + + +class TestHandleTypeError(TestCase): + def test_default_behavior_raise(self): + schema = Schema.build( + { + "file_type": "csv", + "fields": [ + {"key": "age", "type": "int", "column-number": 1} + ], + } + ) + rows = list(schema.stream_parse(b"a")) + assert rows == [ + ParsedRow(row_number=1, values={}, errors=[ + { + "row-number": 1, + "column-number": 1, + "field-key": "age", + "error": "value 'a' is not a valid integer", + } + ]) + ] + + def test_skip_row(self): + schema = Schema.build( + { + "file_type": "csv", + "fields": [ + { + "key": "age", + "type": {"key": "int", "on-error": "skip-row"}, + "column-number": 1, + } + ], + } + ) + rows = list(schema.stream_parse(b"a")) + assert rows == [] + + +class TestHandleValidationError(TestCase): + def test_default_behavior_raise(self): + schema = Schema.build( + { + "file_type": "csv", + "fields": [ + { + "key": "age", + "type": "int", + "column-number": 1, + "validators": [ + { + "name": "greater-than", + "parameters": {"threshold": 0}, + } + ], + } + ], + } + ) + + rows = list(schema.stream_parse(b"-1")) + + assert rows == [ + ParsedRow(row_number=1, values={}, errors=[ + { + "row-number": 1, + "column-number": 1, + "field-key": "age", + "error": "value must be greater than 0", + } + ]) + ] + + def test_skip_row(self): + schema = Schema.build( + { + "file_type": "csv", + "fields": [ + { + "key": "age", + "type": "int", + "column-number": 1, + "validators": [ + { + "name": "greater-than", + "parameters": {"threshold": 0}, + "on-error": "skip-row", + } + ], + } + ], + } + ) + rows = list(schema.stream_parse(b"-1")) + assert rows == [] + + +class TestHandlePostProcessorError(TestCase): + class FailPostProcessor(PostProcessor): + def transform(self, value: Any) -> Any: + raise ValueError("test error") + + @staticmethod + def key() -> str: + return "fail-post-processor" + + def test_default_behavior_raise(self): + PostProcessor.register(self.FailPostProcessor) + schema = Schema.build( + { + "file_type": "csv", + "fields": [ + { + "key": "age", + "type": "int", + "column-number": 1, + "post-processors": [ + { + "name": "fail-post-processor", + } + ], + } + ], + } + ) + rows = list(schema.stream_parse(b"1")) + assert rows == [ParsedRow(row_number=1, values={}, errors=[ + { + "row-number": 1, + "column-number": 1, + "field-key": "age", + "error": "test error", + } + ])] + + def test_skip_row(self): + PostProcessor.register(self.FailPostProcessor) + schema = Schema.build( + { + "file_type": "csv", + "fields": [ + { + "key": "age", + "type": "int", + "column-number": 1, + "post-processors": [ + { + "name": "fail-post-processor", + "on-error": "skip-row", + } + ], + } + ], + } + ) + rows = list(schema.stream_parse(b"1")) + assert rows == [] + + +class TestHandlePreProcessorError(TestCase): + class FailPreProcessor(PreProcessor): + def transform(self, value: Any) -> Any: + raise ValueError("test error") + + @staticmethod + def key() -> str: + return "fail-pre-processor" + + def test_default_behavior_raise(self): + PreProcessor.register(self.FailPreProcessor) + schema = Schema.build( + { + "file_type": "csv", + "fields": [ + { + "key": "age", + "type": "int", + "column-number": 1, + "pre-processors": [ + { + "name": "fail-pre-processor", + } + ], + } + ], + } + ) + rows = list(schema.stream_parse(b"1")) + assert rows == [ParsedRow(row_number=1, values={}, errors=[ + { + "row-number": 1, + "column-number": 1, + "field-key": "age", + "error": "test error", + } + ])] + + def test_skip_row(self): + PreProcessor.register(self.FailPreProcessor) + schema = Schema.build( + { + "file_type": "csv", + "fields": [ + { + "key": "age", + "type": "int", + "column-number": 1, + "pre-processors": [ + { + "name": "fail-pre-processor", + "on-error": "skip-row", + } + ], + } + ], + } + ) + rows = list(schema.stream_parse(b"1")) + assert rows == [] diff --git a/tests/test_type_converters.py b/tests/test_type_converters.py index 9991543..c36fb47 100644 --- a/tests/test_type_converters.py +++ b/tests/test_type_converters.py @@ -44,79 +44,81 @@ def test_no_type_provided(self): with pytest.raises(ValueError, match="missing key 'type'"): TypeConverter.build({}) + class TestBuildComplexeType(TestCase): def test_str(self): type_converter = TypeConverter.build({"type": {"key": "str"}}) assert isinstance(type_converter, StrConverter) + class TestStr(TestCase): - def test_apply(self): + def test_transform(self): type_converter = TypeConverter.build({"type": "str"}) - assert type_converter.apply("hello") == "hello" + assert type_converter.transform("hello") == "hello" class TestInt(TestCase): - def test_apply(self): + def test_transform(self): type_converter = TypeConverter.build({"type": "int"}) - assert type_converter.apply("153") == 153 + assert type_converter.transform("153") == 153 - def test_apply_failed(self): + def test_transform_failed(self): type_converter = TypeConverter.build({"type": "int"}) with pytest.raises(ValueError, match="value 'abc' is not a valid integer"): - type_converter.apply("abc") + type_converter.transform("abc") class TestDecimal(TestCase): - def test_apply(self): + def test_transform(self): type_converter = TypeConverter.build({"type": "decimal"}) - assert type_converter.apply("153.56") == Decimal("153.56") + assert type_converter.transform("153.56") == Decimal("153.56") - def test_apply_failed(self): + def test_transform_failed(self): type_converter = TypeConverter.build({"type": "decimal"}) with pytest.raises(ValueError, match="value 'abc' is not a valid decimal"): - type_converter.apply("abc") + type_converter.transform("abc") class TestTime(TestCase): - def test_apply(self): + def test_transform(self): type_converter = TypeConverter.build({"type": "time"}) - assert type_converter.apply("10:12:03+03:00") == time( + assert type_converter.transform("10:12:03+03:00") == time( 10, 12, 3, tzinfo=timezone(timedelta(hours=3)) ) - def test_apply_failed(self): + def test_transform_failed(self): type_converter = TypeConverter.build({"type": "time"}) with pytest.raises(ValueError): - type_converter.apply("Invalid") + type_converter.transform("Invalid") - def test_apply_naive_time(self): + def test_transform_naive_time(self): type_converter = TypeConverter.build({"type": "time"}) with pytest.raises(ValueError): - type_converter.apply("10:12:03") + type_converter.transform("10:12:03") class TestDateTime(TestCase): - def test_apply(self): + def test_transform(self): type_converter = TypeConverter.build({"type": "datetime"}) - assert type_converter.apply("2022-01-12T10:12:03+03:00") == datetime( + assert type_converter.transform("2022-01-12T10:12:03+03:00") == datetime( 2022, 1, 12, 10, 12, 3, tzinfo=timezone(timedelta(hours=3)) ) - def test_apply_failed(self): + def test_transform_failed(self): type_converter = TypeConverter.build({"type": "datetime"}) with pytest.raises(ValueError): - type_converter.apply("Invalid") + type_converter.transform("Invalid") - def test_apply_naive_time(self): + def test_transform_naive_time(self): type_converter = TypeConverter.build({"type": "datetime"}) with pytest.raises(ValueError): - type_converter.apply("2022-01-12T10:12:03") + type_converter.transform("2022-01-12T10:12:03") class TestRegister(TestCase): @@ -125,7 +127,7 @@ class GuidConverter(TypeConverter): def key() -> str: return "guid" - def apply(self, value): + def convert(self, value): return UUID(value) def test_register(self): @@ -134,19 +136,26 @@ def test_register(self): type_converter = TypeConverter.build({"type": "guid"}) assert isinstance(type_converter, self.GuidConverter) + class TestNullableField(TestCase): def test_int(self): type_converter = TypeConverter.build({"type": {"key": "int", "nullable": True}}) - assert type_converter.apply(None) is None - + assert type_converter.transform(None) is None + def test_decimal(self): - type_converter = TypeConverter.build({"type": {"key": "decimal", "nullable": True}}) - assert type_converter.apply(None) is None - + type_converter = TypeConverter.build( + {"type": {"key": "decimal", "nullable": True}} + ) + assert type_converter.transform(None) is None + def test_time(self): - type_converter = TypeConverter.build({"type": {"key": "time", "nullable": True}}) - assert type_converter.apply(None) is None - + type_converter = TypeConverter.build( + {"type": {"key": "time", "nullable": True}} + ) + assert type_converter.transform(None) is None + def test_datetime(self): - type_converter = TypeConverter.build({"type": {"key": "datetime", "nullable": True}}) - assert type_converter.apply(None) is None + type_converter = TypeConverter.build( + {"type": {"key": "datetime", "nullable": True}} + ) + assert type_converter.transform(None) is None diff --git a/tests/test_validators.py b/tests/test_validators.py index 00eabe5..d5b3e58 100644 --- a/tests/test_validators.py +++ b/tests/test_validators.py @@ -16,10 +16,10 @@ def test_regex_matches(self): assert isinstance(validator, RegexMatches) assert isinstance(validator.pattern, re.Pattern) assert validator.pattern.pattern == "^\\d{13}$" - + def test_not_null_or_empty(self): validator = Validator.build( - { "name": "not-null-or-empty" } + {"name": "not-null-or-empty"} ) assert isinstance(validator, NotNullOrEmpty) @@ -53,7 +53,7 @@ def test_match(self): } ) - assert validator.apply("9780201379624") == "9780201379624" + assert validator.transform("9780201379624") == "9780201379624" def test_does_not_match(self): validator = Validator.build( @@ -65,7 +65,7 @@ def test_does_not_match(self): with pytest.raises( ValueError, match=r"string does not match regex '\^\\d\{13\}\$'" ): - validator.apply("hello") + validator.transform("hello") class TestRegister(TestCase): @@ -74,7 +74,7 @@ class IsTheAnswerValidator(Validator): def key() -> str: return "is-the-answer" - def apply(self, value): + def transform(self, value): if value == 42: return value raise ValueError(f"{value} is not the answer !") @@ -92,7 +92,7 @@ def test_it_successfully_returns_the_value_when_greater_than_threshold(self): {"name": "greater-than", "parameters": {"threshold": 11}} ) - assert validator.apply(12) == 12 + assert validator.transform(12) == 12 def test_it_successfully_returns_the_value_when_greater_than_decimal_threshold( self, @@ -101,7 +101,7 @@ def test_it_successfully_returns_the_value_when_greater_than_decimal_threshold( {"name": "greater-than", "parameters": {"threshold": 11.4}} ) - assert validator.apply(11.5) == 11.5 + assert validator.transform(11.5) == 11.5 def test_it_raises_an_error_when_the_value_is_lower_than_threshold(self): validator = Validator.build( @@ -109,7 +109,7 @@ def test_it_raises_an_error_when_the_value_is_lower_than_threshold(self): ) with pytest.raises(ValueError, match="value must be greater than 10"): - validator.apply(9.9999) + validator.transform(9.9999) def test_it_raises_an_error_when_the_value_is_equal_to_threshold(self): validator = Validator.build( @@ -117,29 +117,29 @@ def test_it_raises_an_error_when_the_value_is_equal_to_threshold(self): ) with pytest.raises(ValueError, match="value must be greater than 10"): - validator.apply(10) + validator.transform(10) class TestNotNullOrEmptyValidator(TestCase): - def test_it_successfully_returns_the_value_when_the_value_is_not_null_or_empty(self): + def test_success_returns_the_value(self): validator = Validator.build( {"name": "not-null-or-empty"} ) - assert validator.apply("hello") == "hello" + assert validator.transform("hello") == "hello" - def test_it_raises_an_error_when_the_value_is_null(self): + def test_raise_when_the_value_is_null(self): validator = Validator.build( {"name": "not-null-or-empty"} ) with pytest.raises(ValueError, match="value must not be null or empty"): - validator.apply(None) + validator.transform(None) - def test_it_raises_an_error_when_the_value_is_empty(self): + def test_raises_when_the_value_is_empty(self): validator = Validator.build( {"name": "not-null-or-empty"} ) with pytest.raises(ValueError, match="value must not be null or empty"): - validator.apply("") \ No newline at end of file + validator.transform("") From 04efe86ed7ed3dba8cc454824a080ee561dd8013 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Perin?= Date: Thu, 18 Sep 2025 17:36:48 +0200 Subject: [PATCH 07/11] feat: Return type row to know if row is success, skip or in error --- magicparse/__init__.py | 18 ++- magicparse/fields.py | 4 +- magicparse/schema.py | 69 ++++++----- magicparse/transform.py | 15 +-- tests/test_encodings.py | 52 ++++++--- tests/test_schema.py | 245 +++++++++++++++++++++++++--------------- 6 files changed, 253 insertions(+), 150 deletions(-) diff --git a/magicparse/__init__.py b/magicparse/__init__.py index 0d42af9..1f88d83 100644 --- a/magicparse/__init__.py +++ b/magicparse/__init__.py @@ -1,6 +1,12 @@ from io import BytesIO -from .schema import ParsedRow, Schema, builtins as builtins_schemas +from .schema import ( + RowParsed, + RowFailed, + RowSkipped, + Schema, + builtins as builtins_schemas, +) from .post_processors import PostProcessor, builtins as builtins_post_processors from .pre_processors import PreProcessor, builtins as builtins_pre_processors from .builders import ( @@ -9,7 +15,7 @@ ) from .transform import Transform from .type_converters import TypeConverter, builtins as builtins_type_converters -from typing import Any, Dict, Iterable, List, Tuple, Union +from typing import Any, Dict, Iterable, List, Union from .validators import Validator, builtins as builtins_validators @@ -20,21 +26,23 @@ "PostProcessor", "PreProcessor", "Schema", - "ParsedRow", + "RowParsed", + "RowSkipped", + "RowFailed", "Validator", ] def parse( data: Union[bytes, BytesIO], schema_options: Dict[str, Any] -) -> Tuple[List[dict], List[dict]]: +) -> List[RowParsed | RowSkipped | RowFailed]: schema_definition = Schema.build(schema_options) return schema_definition.parse(data) def stream_parse( data: Union[bytes, BytesIO], schema_options: Dict[str, Any] -) -> Iterable[ParsedRow]: +) -> Iterable[RowParsed | RowSkipped | RowFailed]: schema_definition = Schema.build(schema_options) return schema_definition.stream_parse(data) diff --git a/magicparse/fields.py b/magicparse/fields.py index ef1fba1..8a3795a 100644 --- a/magicparse/fields.py +++ b/magicparse/fields.py @@ -27,9 +27,7 @@ def __init__(self, key: str, options: dict) -> None: pre_processors + [type_converter] + validators + post_processors ) - def _process_raw_value( - self, raw_value: str - ) -> Result: + def _process_raw_value(self, raw_value: str) -> Result: if not raw_value: if self.optional: return Ok(value=None) diff --git a/magicparse/schema.py b/magicparse/schema.py index 10470e5..13535a0 100644 --- a/magicparse/schema.py +++ b/magicparse/schema.py @@ -6,13 +6,24 @@ from magicparse.transform import SkipRow from .fields import Field, ComputedField from io import BytesIO -from typing import Any, Dict, List, Tuple, Union, Iterable +from typing import Any, Dict, List, Union, Iterable @dataclass(frozen=True, slots=True) -class ParsedRow: +class RowParsed: row_number: int values: dict + + +@dataclass(frozen=True, slots=True) +class RowSkipped: + row_number: int + errors: list[dict] + + +@dataclass(frozen=True, slots=True) +class RowFailed: + row_number: int errors: list[dict] @@ -54,19 +65,14 @@ def register(cls, schema: "Schema") -> None: cls.registry[schema.key()] = schema - def parse(self, data: Union[bytes, BytesIO]) -> Tuple[List[dict], List[dict]]: - items = [] - errors = [] - - for parsed_row in self.stream_parse(data): - if parsed_row.errors: - errors.extend(parsed_row.errors) - else: - items.append(parsed_row.values) + def parse( + self, data: Union[bytes, BytesIO] + ) -> List[RowParsed] | List[RowSkipped] | List[RowFailed]: + return list(self.stream_parse(data)) - return items, errors - - def stream_parse(self, data: Union[bytes, BytesIO]) -> Iterable[ParsedRow]: + def stream_parse( + self, data: Union[bytes, BytesIO] + ) -> Iterable[RowParsed | RowSkipped | RowFailed]: if isinstance(data, bytes): stream = BytesIO(data) else: @@ -84,41 +90,48 @@ def stream_parse(self, data: Union[bytes, BytesIO]) -> Iterable[ParsedRow]: if not any(row): continue - parsed_fields = self.process_fields(self.fields, row, row_number) - if isinstance(parsed_fields, SkipRow): + fields = self.process_fields(self.fields, row, row_number) + if not isinstance(fields, RowParsed): + yield fields continue computed_fields = self.process_fields( - self.computed_fields, parsed_fields.values, row_number + self.computed_fields, fields.values, row_number ) - if isinstance(computed_fields, SkipRow): + if not isinstance(computed_fields, RowParsed): + yield computed_fields continue - yield ParsedRow( - row_number, - {**parsed_fields.values, **computed_fields.values}, - parsed_fields.errors + computed_fields.errors, - ) + yield RowParsed(row_number, {**fields.values, **computed_fields.values}) def process_fields( self, fields: List[Field], row: List[str], row_number: int - ) -> ParsedRow | SkipRow: + ) -> RowParsed | RowSkipped | RowFailed: item = {} errors = [] + skip_row = False for field in fields: try: parsed_value = field.parse(row) - except Exception as exc: - errors.append({"row-number": row_number, **field.error(exc)}) + errors.append(field.error(exc)) continue if isinstance(parsed_value, SkipRow): - return parsed_value + skip_row = True + errors.append(field.error(parsed_value.exception)) + continue item[field.key] = parsed_value.value - return ParsedRow(row_number, item, errors) + if errors: + return ( + RowSkipped(row_number, errors) + if skip_row + else RowFailed(row_number, errors) + ) + + return RowParsed(row_number, item) class CsvSchema(Schema): diff --git a/magicparse/transform.py b/magicparse/transform.py index 21614d5..28dd4bc 100644 --- a/magicparse/transform.py +++ b/magicparse/transform.py @@ -11,13 +11,12 @@ class Ok: @dataclass(frozen=True, slots=True) class SkipRow: - pass + exception: Exception type Result = Ok | SkipRow - class OnError(Enum): RAISE = "raise" SKIP_ROW = "skip-row" @@ -31,19 +30,15 @@ def __init__(self, on_error: OnError) -> None: def build(cls, options: dict) -> "Transform": pass - def apply( - self, last_result: Result - ) -> Result: + def apply(self, last_result: Result) -> Result: if isinstance(last_result, SkipRow): return last_result try: - return Ok( - value=self.transform(last_result.value) - ) - except Exception: + return Ok(value=self.transform(last_result.value)) + except Exception as exc: if self.on_error == OnError.SKIP_ROW.value: - return SkipRow() + return SkipRow(exception=exc) raise @abstractmethod diff --git a/tests/test_encodings.py b/tests/test_encodings.py index de5df1f..5e48874 100644 --- a/tests/test_encodings.py +++ b/tests/test_encodings.py @@ -1,6 +1,7 @@ from unittest import TestCase from magicparse import Schema +from magicparse.schema import RowParsed class TestCsvEncoding(TestCase): @@ -12,10 +13,13 @@ def test_default_encoding(self): } ) - rows, errors = schema.parse("José\n李\n💩\n".encode("utf-8")) + rows = schema.parse("José\n李\n💩\n".encode("utf-8")) - assert len(errors) == 0 - assert rows == [{"name": "José"}, {"name": "李"}, {"name": "💩"}] + assert rows == [ + RowParsed(row_number=1, values={"name": "José"}), + RowParsed(row_number=2, values={"name": "李"}), + RowParsed(row_number=3, values={"name": "💩"}), + ] def test_exotic_encoding(self): schema = Schema.build( @@ -26,17 +30,22 @@ def test_exotic_encoding(self): } ) - rows, errors = schema.parse( + rows = schema.parse( "Да здравствует Владимир проклятый\n" "Да здравствует Карл Маркс\n" "Да здравствует Россия\n".encode("iso8859_5") ) - assert len(errors) == 0 assert rows == [ - {"name": "Да здравствует Владимир проклятый"}, - {"name": "Да здравствует Карл Маркс"}, - {"name": "Да здравствует Россия"}, + RowParsed( + row_number=1, values={"name": "Да здравствует Владимир проклятый"} + ), + RowParsed( + row_number=2, values={"name": "Да здравствует Карл Маркс"} + ), + RowParsed( + row_number=3, values={"name": "Да здравствует Россия"} + ), ] @@ -56,10 +65,13 @@ def test_default_encoding(self): } ) - rows, errors = schema.parse("José\n李 \n💩 \n".encode("utf-8")) + rows = schema.parse("José\n李 \n💩 \n".encode("utf-8")) - assert len(errors) == 0 - assert rows == [{"name": "José"}, {"name": "李 "}, {"name": "💩 "}] + assert rows == [ + RowParsed(row_number=1, values={"name": "José"}), + RowParsed(row_number=2, values={"name": "李 "}), + RowParsed(row_number=3, values={"name": "💩 "}), + ] def test_exotic_encoding(self): schema = Schema.build( @@ -77,15 +89,23 @@ def test_exotic_encoding(self): } ) - rows, errors = schema.parse( + rows = schema.parse( "Да здравствует Владимир проклятый\n" "Да здравствует Карл Маркс \n" "Да здравствует Россия \n".encode("iso8859_5") ) - assert len(errors) == 0 assert rows == [ - {"name": "Да здравствует Владимир проклятый"}, - {"name": "Да здравствует Карл Маркс "}, - {"name": "Да здравствует Россия "}, + RowParsed( + row_number=1, + values={"name": "Да здравствует Владимир проклятый"} + ), + RowParsed( + row_number=2, + values={"name": "Да здравствует Карл Маркс "} + ), + RowParsed( + row_number=3, + values={"name": "Да здравствует Россия "} + ), ] diff --git a/tests/test_schema.py b/tests/test_schema.py index 235c1f3..e3fe86a 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -4,7 +4,7 @@ from magicparse import Schema from magicparse.post_processors import PostProcessor from magicparse.pre_processors import PreProcessor -from magicparse.schema import ColumnarSchema, CsvSchema, ParsedRow +from magicparse.schema import ColumnarSchema, CsvSchema, RowParsed, RowFailed, RowSkipped from magicparse.fields import ColumnarField, CsvField import pytest from unittest import TestCase @@ -64,15 +64,13 @@ def test_with_no_data(self): "fields": [{"key": "name", "type": "str", "column-number": 1}], } ) - rows, errors = schema.parse(b"") - assert not rows - assert not errors + rows = schema.parse(b"") + assert rows == [] def test_with_no_field_definition(self): schema = Schema.build({"file_type": "csv", "fields": []}) - rows, errors = schema.parse(b"a,b,c") - assert rows == [{}] - assert not errors + rows = schema.parse(b"a,b,c") + assert rows == [RowParsed(row_number=1, values={})] def test_without_header(self): schema = Schema.build( @@ -81,9 +79,8 @@ def test_without_header(self): "fields": [{"key": "name", "type": "str", "column-number": 1}], } ) - rows, errors = schema.parse(b"1") - assert rows == [{"name": "1"}] - assert not errors + rows = schema.parse(b"1") + assert rows == [RowParsed(row_number=1, values={"name": "1"})] def test_with_header(self): schema = Schema.build( @@ -93,9 +90,8 @@ def test_with_header(self): "fields": [{"key": "name", "type": "str", "column-number": 1}], } ) - rows, errors = schema.parse(b"column_name\n1") - assert rows == [{"name": "1"}] - assert not errors + rows = schema.parse(b"column_name\n1") + assert rows == [RowParsed(row_number=2, values={"name": "1"})] def test_error_display_row_number(self): schema = Schema.build( @@ -104,15 +100,13 @@ def test_error_display_row_number(self): "fields": [{"key": "age", "type": "int", "column-number": 1}], } ) - rows, errors = schema.parse(b"a") - assert not rows - assert errors == [ - { - "row-number": 1, + rows = schema.parse(b"a") + assert rows == [ + RowFailed(row_number=1, errors=[{ "column-number": 1, "field-key": "age", "error": "value 'a' is not a valid integer", - } + }]) ] def test_errors_do_not_halt_parsing(self): @@ -122,15 +116,15 @@ def test_errors_do_not_halt_parsing(self): "fields": [{"key": "age", "type": "int", "column-number": 1}], } ) - rows, errors = schema.parse(b"1\na\n2") - assert rows == [{"age": 1}, {"age": 2}] - assert errors == [ - { - "row-number": 2, + rows = schema.parse(b"1\na\n2") + assert rows == [ + RowParsed(row_number=1, values={"age": 1}), + RowFailed(row_number=2, errors=[{ "column-number": 1, "field-key": "age", "error": "value 'a' is not a valid integer", - } + }]), + RowParsed(row_number=3, values={"age": 2}), ] def test_parse_should_skip_empty_lines(self): @@ -140,13 +134,12 @@ def test_parse_should_skip_empty_lines(self): "fields": [{"key": "name", "type": "str", "column-number": 1}], } ) - rows, errors = schema.parse( + rows = schema.parse( b"""1 """ ) - assert rows == [{"name": "1"}] - assert not errors + assert rows == [RowParsed(row_number=1, values={"name": "1"})] class TestColumnarParse(TestCase): @@ -164,15 +157,13 @@ def test_with_no_data(self): ], } ) - rows, errors = schema.parse(b"") - assert not rows - assert not errors + rows = schema.parse(b"") + assert rows == [] def test_with_no_field_definition(self): schema = Schema.build({"file_type": "columnar", "fields": []}) - rows, errors = schema.parse(b"a") - assert rows == [{}] - assert not errors + rows = schema.parse(b"a") + assert rows == [RowParsed(row_number=1, values={})] def test_parse(self): schema = Schema.build( @@ -188,9 +179,8 @@ def test_parse(self): ], } ) - rows, errors = schema.parse(b"1") - assert rows == [{"name": "1"}] - assert not errors + rows = schema.parse(b"1") + assert rows == [RowParsed(row_number=1, values={"name": "1"})] def test_error_display_row_number(self): schema = Schema.build( @@ -201,16 +191,14 @@ def test_error_display_row_number(self): ], } ) - rows, errors = schema.parse(b"a") - assert not rows - assert errors == [ - { - "row-number": 1, + rows = schema.parse(b"a") + assert rows == [ + RowFailed(row_number=1, errors=[{ "column-start": 0, "column-length": 1, "field-key": "age", "error": "value 'a' is not a valid integer", - } + }]) ] def test_errors_do_not_halt_parsing(self): @@ -222,16 +210,16 @@ def test_errors_do_not_halt_parsing(self): ], } ) - rows, errors = schema.parse(b"1\na\n2") - assert rows == [{"age": 1}, {"age": 2}] - assert errors == [ - { - "row-number": 2, + rows = schema.parse(b"1\na\n2") + assert rows == [ + RowParsed(row_number=1, values={"age": 1}), + RowFailed(row_number=2, errors=[{ "column-start": 0, "column-length": 1, "field-key": "age", "error": "value 'a' is not a valid integer", - } + }]), + RowParsed(row_number=3, values={"age": 2}), ] def test_parse_should_skip_empty_lines(self): @@ -248,13 +236,81 @@ def test_parse_should_skip_empty_lines(self): ], } ) - rows, errors = schema.parse( + rows = schema.parse( b"""8013109C """ ) - assert rows == [{"name": "8013109C"}] - assert not errors + assert rows == [RowParsed(row_number=1, values={"name": "8013109C"})] + + def test_should_return_all_errors_in_a_row(self): + schema = Schema.build( + { + "file_type": "csv", + "delimiter": ";", + "fields": [ + {"key": "age", "type": "int", "column-number": 1}, + {"key": "age2", "type": "int", "column-number": 2} + ], + } + ) + + rows = schema.parse(b"a;a") + + assert rows == [ + RowFailed(row_number=1, errors=[ + { + "column-number": 1, + "field-key": "age", + "error": "value 'a' is not a valid integer", + }, + { + "column-number": 2, + "field-key": "age2", + "error": "value 'a' is not a valid integer", + } + ]) + ] + + def test_skip_is_prioritized_over_errors(self): + schema = Schema.build( + { + "file_type": "csv", + "delimiter": ";", + "fields": [ + { + "key": "age", + "type": "int", + "column-number": 1, + "validators": [ + { + "name": "greater-than", + "parameters": {"threshold": 0}, + "on-error": "skip-row" + } + ], + }, + {"key": "age2", "type": "int", "column-number": 2} + ], + } + ) + + rows = schema.parse(b"-1;a") + + assert rows == [ + RowSkipped(row_number=1, errors=[ + { + "column-number": 1, + "field-key": "age", + "error": "value must be greater than 0", + }, + { + "column-number": 2, + "field-key": "age2", + "error": "value 'a' is not a valid integer", + } + ]) + ] class TestQuotingSetting(TestCase): @@ -266,9 +322,10 @@ def test_no_quote(self): "fields": [{"key": "column_1", "type": "decimal", "column-number": 1}], } ) - rows, errors = schema.parse(b"column_1\n6.66") - assert rows == [{"column_1": Decimal("6.66")}] - assert not errors + rows = schema.parse(b"column_1\n6.66") + assert rows == [ + RowParsed(row_number=2, values={"column_1": Decimal("6.66")}) + ] def test_single_quote(self): schema = Schema.build( @@ -279,9 +336,10 @@ def test_single_quote(self): "fields": [{"key": "column_1", "type": "decimal", "column-number": 1}], } ) - rows, errors = schema.parse(b"column_1\n'6.66'") - assert rows == [{"column_1": Decimal("6.66")}] - assert not errors + rows = schema.parse(b"column_1\n'6.66'") + assert rows == [ + RowParsed(row_number=2, values={"column_1": Decimal("6.66")}) + ] def test_double_quote(self): schema = Schema.build( @@ -292,9 +350,10 @@ def test_double_quote(self): "fields": [{"key": "column_1", "type": "decimal", "column-number": 1}], } ) - rows, errors = schema.parse(b'column_1\n"6.66"') - assert rows == [{"column_1": Decimal("6.66")}] - assert not errors + rows = schema.parse(b'column_1\n"6.66"') + assert rows == [ + RowParsed(row_number=2, values={"column_1": Decimal("6.66")}) + ] def test_asymetrical_quote(self): schema = Schema.build( @@ -304,8 +363,10 @@ def test_asymetrical_quote(self): "fields": [{"key": "column_1", "type": "str", "column-number": 1}], } ) - rows, errors = schema.parse(b'column_1\n"test ""quoting""') - assert rows == [{"column_1": '"test ""quoting""'}] + rows = schema.parse(b'column_1\n"test ""quoting""') + assert rows == [ + RowParsed(row_number=2, values={"column_1": '"test ""quoting""'}) + ] class TestRegister(TestCase): @@ -340,20 +401,17 @@ def test_stream_parse_errors_do_not_halt_parsing(self): ) rows = list(schema.stream_parse(b"1\na\n2")) assert rows == [ - ParsedRow(row_number=1, values={"age": 1}, errors=[]), - ParsedRow( + RowParsed(row_number=1, values={"age": 1}), + RowFailed( row_number=2, - values={}, errors=[ { - "row-number": 2, "column-number": 1, "field-key": "age", "error": "value 'a' is not a valid integer", } - ], - ), - ParsedRow(row_number=3, values={"age": 2}, errors=[]), + ]), + RowParsed(row_number=3, values={"age": 2}), ] def test_stream_parse_with_header_first_row_number_is_2(self): @@ -428,14 +486,11 @@ def test_default_behavior_raise(self): ) rows = list(schema.stream_parse(b"a")) assert rows == [ - ParsedRow(row_number=1, values={}, errors=[ - { - "row-number": 1, + RowFailed(row_number=1, errors=[{ "column-number": 1, "field-key": "age", "error": "value 'a' is not a valid integer", - } - ]) + }]) ] def test_skip_row(self): @@ -452,7 +507,12 @@ def test_skip_row(self): } ) rows = list(schema.stream_parse(b"a")) - assert rows == [] + assert rows == [RowSkipped(row_number=1, errors=[{ + "column-number": 1, + "field-key": "age", + "error": "value 'a' is not a valid integer", + } + ])] class TestHandleValidationError(TestCase): @@ -479,9 +539,7 @@ def test_default_behavior_raise(self): rows = list(schema.stream_parse(b"-1")) assert rows == [ - ParsedRow(row_number=1, values={}, errors=[ - { - "row-number": 1, + RowFailed(row_number=1, errors=[{ "column-number": 1, "field-key": "age", "error": "value must be greater than 0", @@ -510,7 +568,12 @@ def test_skip_row(self): } ) rows = list(schema.stream_parse(b"-1")) - assert rows == [] + assert rows == [RowSkipped(row_number=1, errors=[{ + "column-number": 1, + "field-key": "age", + "error": "value must be greater than 0", + } + ])] class TestHandlePostProcessorError(TestCase): @@ -542,9 +605,7 @@ def test_default_behavior_raise(self): } ) rows = list(schema.stream_parse(b"1")) - assert rows == [ParsedRow(row_number=1, values={}, errors=[ - { - "row-number": 1, + assert rows == [RowFailed(row_number=1, errors=[{ "column-number": 1, "field-key": "age", "error": "test error", @@ -572,7 +633,12 @@ def test_skip_row(self): } ) rows = list(schema.stream_parse(b"1")) - assert rows == [] + assert rows == [RowSkipped(row_number=1, errors=[{ + "column-number": 1, + "field-key": "age", + "error": "test error", + } + ])] class TestHandlePreProcessorError(TestCase): @@ -604,9 +670,7 @@ def test_default_behavior_raise(self): } ) rows = list(schema.stream_parse(b"1")) - assert rows == [ParsedRow(row_number=1, values={}, errors=[ - { - "row-number": 1, + assert rows == [RowFailed(row_number=1, errors=[{ "column-number": 1, "field-key": "age", "error": "test error", @@ -634,4 +698,9 @@ def test_skip_row(self): } ) rows = list(schema.stream_parse(b"1")) - assert rows == [] + assert rows == [RowSkipped(row_number=1, errors=[{ + "column-number": 1, + "field-key": "age", + "error": "test error", + } + ])] From 5f2294df7aaf7fbfe2ac0885d340731b5e15e5ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Perin?= Date: Thu, 18 Sep 2025 22:18:30 +0200 Subject: [PATCH 08/11] feat: Add coalesce computed field --- magicparse/builders.py | 27 ++++++++++++++++++++- tests/test_builders.py | 55 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/magicparse/builders.py b/magicparse/builders.py index 3513f5c..f68b4b7 100644 --- a/magicparse/builders.py +++ b/magicparse/builders.py @@ -93,4 +93,29 @@ def key() -> str: return "multiply" -builtins = [Concat, Divide, Multiply] +class Coalesce(Builder): + def __init__(self, on_error: OnError, fields: list[str]) -> None: + super().__init__(on_error) + if not fields: + raise ValueError("parameters should defined fields to coalesce") + if ( + not isinstance(fields, list) + or not all(isinstance(field, str) for field in fields) + or len(fields) < 2 + ): + raise ValueError("parameters should have two fields at least") + + self.fields = fields + + def transform(self, row: dict) -> str: + for field in self.fields: + if row[field]: + return row[field] + return None + + @staticmethod + def key() -> str: + return "coalesce" + + +builtins = [Concat, Divide, Multiply, Coalesce] diff --git a/tests/test_builders.py b/tests/test_builders.py index 0e839f4..4e537a8 100644 --- a/tests/test_builders.py +++ b/tests/test_builders.py @@ -234,3 +234,58 @@ def test_valid_param(self): result = builder.transform({"price": 1.5, "unit": 2}) assert result == 3 + + +class TestCoalesce(TestCase): + def test_no_params(self): + with pytest.raises(TypeError): + Builder.build({"name": "coalesce"}) + + def test_empty_params(self): + with pytest.raises(TypeError): + Builder.build({"name": "coalesce", "parameters": ""}) + + def test_fields_params_empty(self): + with pytest.raises( + ValueError, + match="parameters should defined fields to coalesce" + ): + Builder.build({"name": "coalesce", "parameters": {"fields": ""}}) + + def test_fields_params_not_a_list_of_str(self): + with pytest.raises( + ValueError, + match="parameters should have two fields at least" + ): + Builder.build({"name": "coalesce", "parameters": {"fields": "xxx"}}) + + def test_fields_params_has_less_than_two_fields(self): + with pytest.raises( + ValueError, + match="parameters should have two fields at least" + ): + Builder.build({"name": "coalesce", "parameters": {"fields": ["field"]}}) + + def test_return_first_non_empty_value(self): + coalesce = Builder.build( + { + "name": "coalesce", + "parameters": {"fields": ["field1", "field2"]} + } + ) + + result = coalesce.transform({"field1": "", "field2": "value"}) + + assert result == "value" + + def test_return_none_if_all_values_are_empty(self): + coalesce = Builder.build( + { + "name": "coalesce", + "parameters": {"fields": ["field1", "field2"]} + } + ) + + result = coalesce.transform({"field1": "", "field2": ""}) + + assert result is None From 309985018c94730518153f092b11a49edb0b5e89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Perin?= Date: Fri, 19 Sep 2025 08:29:56 +0200 Subject: [PATCH 09/11] fix: ComputedField can have access to previous computed field values --- magicparse/fields.py | 10 ++++----- magicparse/schema.py | 3 ++- tests/test_fields.py | 2 +- tests/test_schema.py | 52 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 7 deletions(-) diff --git a/magicparse/fields.py b/magicparse/fields.py index 8a3795a..aff41c4 100644 --- a/magicparse/fields.py +++ b/magicparse/fields.py @@ -41,10 +41,10 @@ def _process_raw_value(self, raw_value: str) -> Result: return value @abstractmethod - def _read_raw_value(self, row) -> str: + def _read_raw_value(self, row: List[str] | dict) -> str: pass - def parse(self, row) -> Result: + def parse(self, row: List[str] | dict) -> Result: raw_value = self._read_raw_value(row) return self._process_raw_value(raw_value) @@ -76,7 +76,7 @@ def __init__(self, key: str, options: dict) -> None: super().__init__(key, options) self.column_number = options["column-number"] - def _read_raw_value(self, row: List[str]) -> str: + def _read_raw_value(self, row: List[str] | dict) -> str: return row[self.column_number - 1] def error(self, exception: Exception) -> dict: @@ -94,7 +94,7 @@ def __init__(self, key: str, options: dict) -> None: self.column_length = options["column-length"] self.column_end = self.column_start + self.column_length - def _read_raw_value(self, row: str) -> str: + def _read_raw_value(self, row: str | dict) -> str: return row[self.column_start : self.column_end] def error(self, exception: Exception) -> dict: @@ -111,7 +111,7 @@ def __init__(self, key: str, options: dict) -> None: super().__init__(key, options) self.builder = Builder.build(options["builder"]) - def _read_raw_value(self, row) -> str: + def _read_raw_value(self, row: List[str] | dict) -> str: return self.builder.transform(row) def error(self, exception: Exception) -> dict: diff --git a/magicparse/schema.py b/magicparse/schema.py index 13535a0..44598e0 100644 --- a/magicparse/schema.py +++ b/magicparse/schema.py @@ -112,7 +112,8 @@ def process_fields( skip_row = False for field in fields: try: - parsed_value = field.parse(row) + source = row | item if isinstance(row, dict) else row + parsed_value = field.parse(source) except Exception as exc: errors.append(field.error(exc)) continue diff --git a/tests/test_fields.py b/tests/test_fields.py index 1d8d8ac..8526162 100644 --- a/tests/test_fields.py +++ b/tests/test_fields.py @@ -10,7 +10,7 @@ class DummyField(Field): - def _read_raw_value(self, row: str) -> str: + def _read_raw_value(self, row: str | dict) -> str: return row def error(self, exception: Exception): diff --git a/tests/test_schema.py b/tests/test_schema.py index e3fe86a..eb669d5 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -473,6 +473,58 @@ def test_concat(self): "computed_field": "AB", } + def test_computed_fields_can_be_used_by_next_computed_fields(self): + schema = Schema.build( + { + "file_type": "csv", + "delimiter": ";", + "fields": [ + {"key": "field_1", "type": "int", "column-number": 1}, + {"key": "field_2", "type": "int", "column-number": 2}, + {"key": "field_3", "type": "int", "column-number": 3}, + ], + "computed-fields": [ + { + "key": "multiply_field_result", + "type": "int", + "builder": { + "name": "multiply", + "parameters": { + "x_factor": "field_1", + "y_factor": "field_2", + }, + }, + }, + { + "key": "divide_field_result", + "type": "decimal", + "builder": { + "name": "divide", + "parameters": { + "numerator": "multiply_field_result", + "denominator": "field_3", + }, + }, + } + ], + } + ) + + rows = list(schema.stream_parse(b"3;4;2")) + + assert rows == [ + RowParsed( + row_number=1, + values={ + "field_1": 3, + "field_2": 4, + "field_3": 2, + "multiply_field_result": 12, + "divide_field_result": Decimal("6") + } + ) + ] + class TestHandleTypeError(TestCase): def test_default_behavior_raise(self): From ffbb2e4be3e7cb26a5deca3af206d4ef22916070 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Perin?= Date: Fri, 19 Sep 2025 09:00:16 +0200 Subject: [PATCH 10/11] chore: Update README to match with current lib state --- README.md | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 119 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 9a7d703..560de99 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,16 @@ # magicparse 🛸 -Declarative parser +Declarative parser for structured data files. + +## Installation + +```bash +poetry install magicparse +``` + +## Requirements + +- Python 3.12+ ## Usage @@ -96,7 +106,7 @@ schema = { } -rows, errors= magicparse.parse(data="...", schema=schema) +rows = magicparse.parse(data="...", schema=schema) ``` @@ -124,9 +134,8 @@ schema = { ], } -rows, errors = magicparse.parse("13ec10cc-cc7e-4ee9-b091-9caa6d11aeb2", schema) +rows = magicparse.parse("13ec10cc-cc7e-4ee9-b091-9caa6d11aeb2", schema) assert rows == [{"shop-guid": "13ec10cc-cc7e-4ee9-b091-9caa6d11aeb2"}] -assert not errors ``` ### Register a custom schema and parse content @@ -152,11 +161,51 @@ schema = { ] } -rows, errors = magicparse.parse("Joe|William|Jack|Averell", schema) -assert not errors +rows = magicparse.parse("Joe|William|Jack|Averell", schema) assert rows == [{"name": "Joe"}, {"name": "William"}, {"name": "Jack"}, {"name": "Averell"}] ``` +### Stream parsing + +For large files, you can use streaming to process data incrementally: + +```python +import magicparse + +schema = { + "file_type": "csv", + "fields": [ + {"key": "name", "type": "str", "column-number": 1} + ] +} + +# Process data in chunks +for row in magicparse.stream_parse(data="...", schema=schema): + match row: + case magicparse.RowParsed(values): + print(f"The values {values}.") + case magicparse.RowFailed(errors): + print(f"The errors {errors}.") + case magicparse.RowSkipped(reason): + print(f"The errors {errors}.") + case _: + print("Unknown type of row.") +``` + +### Custom encoding + +By default, magicparse uses UTF-8 encoding. You can specify a different encoding: + +```python +schema = { + "file_type": "csv", + "encoding": "iso8859_5", # or any other encoding + "fields": [ + {"key": "name", "type": "str", "column-number": 1} + ] +} +``` + ## API ### File types @@ -187,6 +236,7 @@ assert rows == [{"name": "Joe"}, {"name": "William"}, {"name": "Jack"}, {"name": - regex-matches - greater-than +- not-null-or-empty #### Post-processors @@ -202,3 +252,66 @@ Types, Pre-processors, Post-processors and validator is same as Field - concat - divide - multiply +- coalesce + +## Return Types + +The parser returns a list of row objects: + +- **`RowParsed`**: Successfully parsed row with `values` dict +- **`RowFailed`**: Failed to parse row with `errors` message +- **`RowSkipped`**: Skipped row with `errors` message + +## Error Handling + +You can configure error handling for types, validators, and processors: + +```python +{ + "key": "price", + "type": { + "key": "decimal", + "nullable": True, # Allow null values + "on-error": "skip-row" # Skip on error instead of raising + } +} +``` + +Error handling options: +- `"raise"` (default): Raise exception on error +- `"skip-row"`: Skip the row and continue processing + +## Docker + +The project includes Docker support: + +```bash +# Build and run with docker-compose +docker-compose up --build + +# Or build manually +docker build -t magicparse . +docker run -it magicparse +``` + +## Development + +### Setup + +```bash +# Install dependencies +poetry install + +# Run tests +poetry run pytest + +# Format code +poetry run black . + +# Lint code +poetry run flake8 +``` + +## License + +This project is licensed under the MIT License. From 31b8e4bd6b0fab181ef459d594a935ab2567ed96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Perin?= Date: Fri, 19 Sep 2025 10:26:46 +0200 Subject: [PATCH 11/11] refactor: Move out of transformer skiping logic --- magicparse/builders.py | 8 ++++---- magicparse/fields.py | 14 +++++++++----- magicparse/post_processors.py | 4 ++-- magicparse/pre_processors.py | 12 ++++++------ magicparse/transform.py | 13 +------------ magicparse/type_converters.py | 2 +- magicparse/validators.py | 6 +++--- tests/test_builders.py | 30 +++++++++++++++--------------- tests/test_post_processors.py | 10 +++++----- tests/test_pre_processors.py | 26 +++++++++++++------------- tests/test_schema.py | 8 +++++--- tests/test_type_converters.py | 30 +++++++++++++++--------------- tests/test_validators.py | 20 ++++++++++---------- 13 files changed, 89 insertions(+), 94 deletions(-) diff --git a/magicparse/builders.py b/magicparse/builders.py index f68b4b7..c477dab 100644 --- a/magicparse/builders.py +++ b/magicparse/builders.py @@ -41,7 +41,7 @@ def __init__(self, on_error: OnError, fields: list[str]) -> None: self.fields = fields - def transform(self, row: dict) -> str: + def apply(self, row: dict) -> str: return "".join(row[field] for field in self.fields) @staticmethod @@ -63,7 +63,7 @@ def __init__(self, on_error: OnError, numerator: str, denominator: str) -> None: self.numerator = numerator self.denominator = denominator - def transform(self, row: dict) -> Decimal: + def apply(self, row: dict) -> Decimal: return row[self.numerator] / row[self.denominator] @staticmethod @@ -85,7 +85,7 @@ def __init__(self, on_error: OnError, x_factor: str, y_factor: str) -> None: self.x_factor = x_factor self.y_factor = y_factor - def transform(self, row: dict): + def apply(self, row: dict): return row[self.x_factor] * row[self.y_factor] @staticmethod @@ -107,7 +107,7 @@ def __init__(self, on_error: OnError, fields: list[str]) -> None: self.fields = fields - def transform(self, row: dict) -> str: + def apply(self, row: dict) -> str: for field in self.fields: if row[field]: return row[field] diff --git a/magicparse/fields.py b/magicparse/fields.py index aff41c4..9c06a35 100644 --- a/magicparse/fields.py +++ b/magicparse/fields.py @@ -6,7 +6,7 @@ from .post_processors import PostProcessor from .pre_processors import PreProcessor from .validators import Validator -from .transform import Ok, Result +from .transform import Ok, OnError, Result, SkipRow class Field(ABC): @@ -35,10 +35,14 @@ def _process_raw_value(self, raw_value: str) -> Result: raise ValueError( f"{self.key} field is required but the value was empty" ) - value = Ok(value=raw_value) for transform in self.transforms: - value = transform.apply(value) - return value + try: + raw_value = transform.apply(raw_value) + except Exception as exc: + if transform.on_error == OnError.SKIP_ROW.value: + return SkipRow(exception=exc) + raise + return Ok(value=raw_value) @abstractmethod def _read_raw_value(self, row: List[str] | dict) -> str: @@ -112,7 +116,7 @@ def __init__(self, key: str, options: dict) -> None: self.builder = Builder.build(options["builder"]) def _read_raw_value(self, row: List[str] | dict) -> str: - return self.builder.transform(row) + return self.builder.apply(row) def error(self, exception: Exception) -> dict: return { diff --git a/magicparse/post_processors.py b/magicparse/post_processors.py index 9052349..d8d62ba 100644 --- a/magicparse/post_processors.py +++ b/magicparse/post_processors.py @@ -36,7 +36,7 @@ def __init__(self, on_error: OnError, denominator: int) -> None: self.denominator = denominator - def transform(self, value: Number) -> Number: + def apply(self, value: Number) -> Number: return value / self.denominator @staticmethod @@ -57,7 +57,7 @@ def __init__(self, on_error: OnError, precision: int) -> None: self.precision = precision - def transform(self, value: Number) -> Number: + def apply(self, value: Number) -> Number: return round(value, self.precision) @staticmethod diff --git a/magicparse/pre_processors.py b/magicparse/pre_processors.py index be13752..2df95a4 100644 --- a/magicparse/pre_processors.py +++ b/magicparse/pre_processors.py @@ -27,7 +27,7 @@ def __init__(self, on_error: OnError, width: int) -> None: super().__init__(on_error) self.width = width - def transform(self, value: str) -> str: + def apply(self, value: str) -> str: return value.zfill(self.width) @staticmethod @@ -41,7 +41,7 @@ def __init__(self, on_error: OnError, values: dict) -> None: self.values = values self._keys = ", ".join(f"'{key}'" for key in self.values.keys()) - def transform(self, value: str) -> str: + def apply(self, value: str) -> str: try: return self.values[value] except: @@ -60,7 +60,7 @@ def __init__(self, on_error: OnError, pattern: str, replacement: str) -> None: self.pattern = pattern self.replacement = replacement - def transform(self, value: str) -> str: + def apply(self, value: str) -> str: return value.replace(self.pattern, self.replacement) @staticmethod @@ -69,7 +69,7 @@ def key() -> str: class StripWhitespaces(PreProcessor): - def transform(self, value: str) -> str: + def apply(self, value: str) -> str: return value.strip() @staticmethod @@ -82,7 +82,7 @@ def __init__(self, on_error: OnError, characters: str) -> None: super().__init__(on_error) self.characters = characters - def transform(self, value: str) -> str: + def apply(self, value: str) -> str: return value.lstrip(self.characters) @staticmethod @@ -101,7 +101,7 @@ def __init__(self, on_error: OnError, pattern: str) -> None: self.pattern = pattern - def transform(self, value: str) -> str: + def apply(self, value: str) -> str: match = re.match(self.pattern, value) if not match: raise ValueError( diff --git a/magicparse/transform.py b/magicparse/transform.py index 28dd4bc..dcc68c6 100644 --- a/magicparse/transform.py +++ b/magicparse/transform.py @@ -30,19 +30,8 @@ def __init__(self, on_error: OnError) -> None: def build(cls, options: dict) -> "Transform": pass - def apply(self, last_result: Result) -> Result: - if isinstance(last_result, SkipRow): - return last_result - - try: - return Ok(value=self.transform(last_result.value)) - except Exception as exc: - if self.on_error == OnError.SKIP_ROW.value: - return SkipRow(exception=exc) - raise - @abstractmethod - def transform(self, value: Any | None) -> Any | None: + def apply(self, value: Any) -> Any: pass @abstractstaticmethod diff --git a/magicparse/type_converters.py b/magicparse/type_converters.py index 0a3b1d6..945276d 100644 --- a/magicparse/type_converters.py +++ b/magicparse/type_converters.py @@ -12,7 +12,7 @@ def __init__(self, nullable: bool, on_error: OnError) -> None: super().__init__(on_error) self.nullable = nullable - def transform(self, value: str | None) -> Any | None: + def apply(self, value: str | None) -> Any | None: if value is None and self.nullable: return None diff --git a/magicparse/validators.py b/magicparse/validators.py index c5e23c3..a777998 100644 --- a/magicparse/validators.py +++ b/magicparse/validators.py @@ -28,7 +28,7 @@ def __init__(self, on_error: str, pattern: str) -> None: super().__init__(on_error) self.pattern = re.compile(pattern) - def transform(self, value: str | None) -> str: + def apply(self, value: str | None) -> str: if re.match(self.pattern, value): return value @@ -44,7 +44,7 @@ def __init__(self, on_error: str, threshold: float) -> None: super().__init__(on_error) self.threshold = Decimal(threshold) - def transform(self, value: Decimal) -> Decimal: + def apply(self, value: Decimal) -> Decimal: if value > self.threshold: return value raise ValueError(f"value must be greater than {self.threshold}") @@ -55,7 +55,7 @@ def key() -> str: class NotNullOrEmpty(Validator): - def transform(self, value: str) -> str: + def apply(self, value: str) -> str: if not value: raise ValueError("value must not be null or empty") return value diff --git a/tests/test_builders.py b/tests/test_builders.py index 4e537a8..ce941cd 100644 --- a/tests/test_builders.py +++ b/tests/test_builders.py @@ -12,7 +12,7 @@ class WithoutParamBuilder(Builder): def key() -> str: return "without-param" - def transform(self, value): + def apply(self, value): pass class WithParamBuilder(Builder): @@ -24,7 +24,7 @@ def __init__(self, on_error: str, setting: str) -> None: def key() -> str: return "with-param" - def transform(self, value): + def apply(self, value): pass def test_without_parameter(self): @@ -77,14 +77,14 @@ def test_field_not_present(self): {"name": "concat", "parameters": {"fields": ["code_1", "code_2"]}} ) with pytest.raises(KeyError): - builder.transform({}) + builder.apply({}) def test_concat_two_fields(self): builder = Builder.build( {"name": "concat", "parameters": {"fields": ["code_1", "code_2"]}} ) - result = builder.transform({"code_1": "X", "code_2": "Y"}) + result = builder.apply({"code_1": "X", "code_2": "Y"}) assert result == "XY" @@ -93,7 +93,7 @@ def test_concat_three_fields(self): {"name": "concat", "parameters": {"fields": ["code_1", "code_2", "code_3"]}} ) - result = builder.transform({"code_1": "X", "code_2": "Y", "code_3": "Z"}) + result = builder.apply({"code_1": "X", "code_2": "Y", "code_3": "Z"}) assert result == "XYZ" @@ -103,7 +103,7 @@ def test_concat_integer(self): ) with pytest.raises(TypeError): - builder.transform({"code_1": 1, "code_2": 2}) + builder.apply({"code_1": 1, "code_2": 2}) class TestDivide(TestCase): @@ -141,7 +141,7 @@ def test_field_not_present(self): } ) with pytest.raises(KeyError): - builder.transform({}) + builder.apply({}) def test_numerator_not_valid(self): builder = Builder.build( @@ -151,7 +151,7 @@ def test_numerator_not_valid(self): } ) with pytest.raises(TypeError): - builder.transform({"price": "e", "price_by_unit": 1}) + builder.apply({"price": "e", "price_by_unit": 1}) def test_denominator_not_valid(self): builder = Builder.build( @@ -161,7 +161,7 @@ def test_denominator_not_valid(self): } ) with pytest.raises(TypeError): - builder.transform({"price": 1, "price_by_unit": "ee"}) + builder.apply({"price": 1, "price_by_unit": "ee"}) def test_valid_param(self): builder = Builder.build( @@ -171,7 +171,7 @@ def test_valid_param(self): } ) - result = builder.transform({"price": 1, "price_by_unit": 2}) + result = builder.apply({"price": 1, "price_by_unit": 2}) assert result == Decimal("0.5") @@ -211,7 +211,7 @@ def test_field_not_present(self): } ) with pytest.raises(KeyError): - builder.transform({}) + builder.apply({}) def test_x_y_factor_not_valid(self): builder = Builder.build( @@ -221,7 +221,7 @@ def test_x_y_factor_not_valid(self): } ) with pytest.raises(TypeError): - builder.transform({"price": "e", "unit": "e"}) + builder.apply({"price": "e", "unit": "e"}) def test_valid_param(self): builder = Builder.build( @@ -231,7 +231,7 @@ def test_valid_param(self): } ) - result = builder.transform({"price": 1.5, "unit": 2}) + result = builder.apply({"price": 1.5, "unit": 2}) assert result == 3 @@ -274,7 +274,7 @@ def test_return_first_non_empty_value(self): } ) - result = coalesce.transform({"field1": "", "field2": "value"}) + result = coalesce.apply({"field1": "", "field2": "value"}) assert result == "value" @@ -286,6 +286,6 @@ def test_return_none_if_all_values_are_empty(self): } ) - result = coalesce.transform({"field1": "", "field2": ""}) + result = coalesce.apply({"field1": "", "field2": ""}) assert result is None diff --git a/tests/test_post_processors.py b/tests/test_post_processors.py index d65f8a5..b6bd4e1 100644 --- a/tests/test_post_processors.py +++ b/tests/test_post_processors.py @@ -34,19 +34,19 @@ def test_divide_int(self): post_processor = PostProcessor.build( {"name": "divide", "parameters": {"denominator": 100}} ) - assert post_processor.transform(150) == 1.5 + assert post_processor.apply(150) == 1.5 def test_divide_float(self): post_processor = PostProcessor.build( {"name": "divide", "parameters": {"denominator": 100}} ) - assert post_processor.transform(1.63) == 0.0163 + assert post_processor.apply(1.63) == 0.0163 def test_divide_decimal(self): post_processor = PostProcessor.build( {"name": "divide", "parameters": {"denominator": 100}} ) - assert post_processor.transform(Decimal("1.63")) == Decimal("0.0163") + assert post_processor.apply(Decimal("1.63")) == Decimal("0.0163") class TestRound(TestCase): @@ -62,7 +62,7 @@ def test_with_valid_precision(self): post_processor = PostProcessor.build( {"name": "round", "parameters": {"precision": 2}} ) - assert post_processor.transform(3.14159265359) == 3.14 + assert post_processor.apply(3.14159265359) == 3.14 class TestRegister(TestCase): @@ -71,7 +71,7 @@ class NoThanksPostProcessor(PostProcessor): def key() -> str: return "no-thanks" - def transform(self, value): + def apply(self, value): return f"{value} ? No thanks" def test_register(self): diff --git a/tests/test_pre_processors.py b/tests/test_pre_processors.py index 553cc24..2f5fb6e 100644 --- a/tests/test_pre_processors.py +++ b/tests/test_pre_processors.py @@ -70,13 +70,13 @@ def test_do_nothing(self): pre_processor = PreProcessor.build( {"name": "left-pad-zeroes", "parameters": {"width": 10}} ) - assert pre_processor.transform("abcdefghij") == "abcdefghij" + assert pre_processor.apply("abcdefghij") == "abcdefghij" def test_pad(self): pre_processor = PreProcessor.build( {"name": "left-pad-zeroes", "parameters": {"width": 10}} ) - assert pre_processor.transform("abc") == "0000000abc" + assert pre_processor.apply("abc") == "0000000abc" class TestMap(TestCase): @@ -88,13 +88,13 @@ def test_unknown_input(self): ValueError, match="value 'an input' does not map to any values in \\['A', 'B'\\]", ): - pre_processor.transform("an input") + pre_processor.apply("an input") def test_known_input(self): pre_processor = PreProcessor.build( {"name": "map", "parameters": {"values": {"A": "1", "B": "2"}}} ) - assert pre_processor.transform("A") == "1" + assert pre_processor.apply("A") == "1" class TestReplace(TestCase): @@ -102,23 +102,23 @@ def test_pattern_not_found(self): pre_processor = PreProcessor.build( {"name": "replace", "parameters": {"pattern": "bbb", "replacement": "BBB"}} ) - assert pre_processor.transform("an input") == "an input" + assert pre_processor.apply("an input") == "an input" def test_success(self): pre_processor = PreProcessor.build( {"name": "replace", "parameters": {"pattern": "bbb", "replacement": "BBB"}} ) - assert pre_processor.transform("aaabbbccc") == "aaaBBBccc" + assert pre_processor.apply("aaabbbccc") == "aaaBBBccc" class TestStripWhitespaces(TestCase): def test_do_nothing(self): pre_processor = PreProcessor.build({"name": "strip-whitespaces"}) - assert pre_processor.transform("an input") == "an input" + assert pre_processor.apply("an input") == "an input" def test_success(self): pre_processor = PreProcessor.build({"name": "strip-whitespaces"}) - assert pre_processor.transform(" an input ") == "an input" + assert pre_processor.apply(" an input ") == "an input" class TestLeftStrip(TestCase): @@ -126,13 +126,13 @@ def test_do_nothing(self): pre_processor = PreProcessor.build( {"name": "left-strip", "parameters": {"characters": "0"}} ) - assert pre_processor.transform("12345") == "12345" + assert pre_processor.apply("12345") == "12345" def test_success(self): pre_processor = PreProcessor.build( {"name": "left-strip", "parameters": {"characters": "0"}} ) - assert pre_processor.transform("0000012345") == "12345" + assert pre_processor.apply("0000012345") == "12345" class TestRegexExtract(TestCase): @@ -153,7 +153,7 @@ def test_pattern_not_found(self): } ) with pytest.raises(ValueError) as error: - pre_processor.transform("an input") + pre_processor.apply("an input") assert ( error.value.args[0] @@ -167,7 +167,7 @@ def test_pattern_found(self): "parameters": {"pattern": "^xxx(?P\\d{13})xxx$"}, } ) - pre_processor.transform("xxx9780201379624xxx") == "9780201379624" + pre_processor.apply("xxx9780201379624xxx") == "9780201379624" class TestRegister(TestCase): @@ -176,7 +176,7 @@ class YesPreProcessor(PreProcessor): def key() -> str: return "yes" - def transform(self, value): + def apply(self, value): return f"YES {value}" def test_register(self): diff --git a/tests/test_schema.py b/tests/test_schema.py index eb669d5..6868fe8 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -4,7 +4,9 @@ from magicparse import Schema from magicparse.post_processors import PostProcessor from magicparse.pre_processors import PreProcessor -from magicparse.schema import ColumnarSchema, CsvSchema, RowParsed, RowFailed, RowSkipped +from magicparse.schema import ( + ColumnarSchema, CsvSchema, RowParsed, RowFailed, RowSkipped +) from magicparse.fields import ColumnarField, CsvField import pytest from unittest import TestCase @@ -630,7 +632,7 @@ def test_skip_row(self): class TestHandlePostProcessorError(TestCase): class FailPostProcessor(PostProcessor): - def transform(self, value: Any) -> Any: + def apply(self, value: Any) -> Any: raise ValueError("test error") @staticmethod @@ -695,7 +697,7 @@ def test_skip_row(self): class TestHandlePreProcessorError(TestCase): class FailPreProcessor(PreProcessor): - def transform(self, value: Any) -> Any: + def apply(self, value: Any) -> Any: raise ValueError("test error") @staticmethod diff --git a/tests/test_type_converters.py b/tests/test_type_converters.py index c36fb47..4b561b3 100644 --- a/tests/test_type_converters.py +++ b/tests/test_type_converters.py @@ -54,37 +54,37 @@ def test_str(self): class TestStr(TestCase): def test_transform(self): type_converter = TypeConverter.build({"type": "str"}) - assert type_converter.transform("hello") == "hello" + assert type_converter.apply("hello") == "hello" class TestInt(TestCase): def test_transform(self): type_converter = TypeConverter.build({"type": "int"}) - assert type_converter.transform("153") == 153 + assert type_converter.apply("153") == 153 def test_transform_failed(self): type_converter = TypeConverter.build({"type": "int"}) with pytest.raises(ValueError, match="value 'abc' is not a valid integer"): - type_converter.transform("abc") + type_converter.apply("abc") class TestDecimal(TestCase): def test_transform(self): type_converter = TypeConverter.build({"type": "decimal"}) - assert type_converter.transform("153.56") == Decimal("153.56") + assert type_converter.apply("153.56") == Decimal("153.56") def test_transform_failed(self): type_converter = TypeConverter.build({"type": "decimal"}) with pytest.raises(ValueError, match="value 'abc' is not a valid decimal"): - type_converter.transform("abc") + type_converter.apply("abc") class TestTime(TestCase): def test_transform(self): type_converter = TypeConverter.build({"type": "time"}) - assert type_converter.transform("10:12:03+03:00") == time( + assert type_converter.apply("10:12:03+03:00") == time( 10, 12, 3, tzinfo=timezone(timedelta(hours=3)) ) @@ -92,19 +92,19 @@ def test_transform_failed(self): type_converter = TypeConverter.build({"type": "time"}) with pytest.raises(ValueError): - type_converter.transform("Invalid") + type_converter.apply("Invalid") def test_transform_naive_time(self): type_converter = TypeConverter.build({"type": "time"}) with pytest.raises(ValueError): - type_converter.transform("10:12:03") + type_converter.apply("10:12:03") class TestDateTime(TestCase): def test_transform(self): type_converter = TypeConverter.build({"type": "datetime"}) - assert type_converter.transform("2022-01-12T10:12:03+03:00") == datetime( + assert type_converter.apply("2022-01-12T10:12:03+03:00") == datetime( 2022, 1, 12, 10, 12, 3, tzinfo=timezone(timedelta(hours=3)) ) @@ -112,13 +112,13 @@ def test_transform_failed(self): type_converter = TypeConverter.build({"type": "datetime"}) with pytest.raises(ValueError): - type_converter.transform("Invalid") + type_converter.apply("Invalid") def test_transform_naive_time(self): type_converter = TypeConverter.build({"type": "datetime"}) with pytest.raises(ValueError): - type_converter.transform("2022-01-12T10:12:03") + type_converter.apply("2022-01-12T10:12:03") class TestRegister(TestCase): @@ -140,22 +140,22 @@ def test_register(self): class TestNullableField(TestCase): def test_int(self): type_converter = TypeConverter.build({"type": {"key": "int", "nullable": True}}) - assert type_converter.transform(None) is None + assert type_converter.apply(None) is None def test_decimal(self): type_converter = TypeConverter.build( {"type": {"key": "decimal", "nullable": True}} ) - assert type_converter.transform(None) is None + assert type_converter.apply(None) is None def test_time(self): type_converter = TypeConverter.build( {"type": {"key": "time", "nullable": True}} ) - assert type_converter.transform(None) is None + assert type_converter.apply(None) is None def test_datetime(self): type_converter = TypeConverter.build( {"type": {"key": "datetime", "nullable": True}} ) - assert type_converter.transform(None) is None + assert type_converter.apply(None) is None diff --git a/tests/test_validators.py b/tests/test_validators.py index d5b3e58..746634d 100644 --- a/tests/test_validators.py +++ b/tests/test_validators.py @@ -53,7 +53,7 @@ def test_match(self): } ) - assert validator.transform("9780201379624") == "9780201379624" + assert validator.apply("9780201379624") == "9780201379624" def test_does_not_match(self): validator = Validator.build( @@ -65,7 +65,7 @@ def test_does_not_match(self): with pytest.raises( ValueError, match=r"string does not match regex '\^\\d\{13\}\$'" ): - validator.transform("hello") + validator.apply("hello") class TestRegister(TestCase): @@ -74,7 +74,7 @@ class IsTheAnswerValidator(Validator): def key() -> str: return "is-the-answer" - def transform(self, value): + def apply(self, value): if value == 42: return value raise ValueError(f"{value} is not the answer !") @@ -92,7 +92,7 @@ def test_it_successfully_returns_the_value_when_greater_than_threshold(self): {"name": "greater-than", "parameters": {"threshold": 11}} ) - assert validator.transform(12) == 12 + assert validator.apply(12) == 12 def test_it_successfully_returns_the_value_when_greater_than_decimal_threshold( self, @@ -101,7 +101,7 @@ def test_it_successfully_returns_the_value_when_greater_than_decimal_threshold( {"name": "greater-than", "parameters": {"threshold": 11.4}} ) - assert validator.transform(11.5) == 11.5 + assert validator.apply(11.5) == 11.5 def test_it_raises_an_error_when_the_value_is_lower_than_threshold(self): validator = Validator.build( @@ -109,7 +109,7 @@ def test_it_raises_an_error_when_the_value_is_lower_than_threshold(self): ) with pytest.raises(ValueError, match="value must be greater than 10"): - validator.transform(9.9999) + validator.apply(9.9999) def test_it_raises_an_error_when_the_value_is_equal_to_threshold(self): validator = Validator.build( @@ -117,7 +117,7 @@ def test_it_raises_an_error_when_the_value_is_equal_to_threshold(self): ) with pytest.raises(ValueError, match="value must be greater than 10"): - validator.transform(10) + validator.apply(10) class TestNotNullOrEmptyValidator(TestCase): @@ -126,7 +126,7 @@ def test_success_returns_the_value(self): {"name": "not-null-or-empty"} ) - assert validator.transform("hello") == "hello" + assert validator.apply("hello") == "hello" def test_raise_when_the_value_is_null(self): validator = Validator.build( @@ -134,7 +134,7 @@ def test_raise_when_the_value_is_null(self): ) with pytest.raises(ValueError, match="value must not be null or empty"): - validator.transform(None) + validator.apply(None) def test_raises_when_the_value_is_empty(self): validator = Validator.build( @@ -142,4 +142,4 @@ def test_raises_when_the_value_is_empty(self): ) with pytest.raises(ValueError, match="value must not be null or empty"): - validator.transform("") + validator.apply("")