diff --git a/.config/mise/config.toml b/.config/mise/config.toml index 14f6238..54eefee 100644 --- a/.config/mise/config.toml +++ b/.config/mise/config.toml @@ -16,5 +16,4 @@ _.python.venv = { path = ".venv", create = false } [tasks.test] description = "🐍 Run tests" -depends = ["start_db"] run = "pytest -s" diff --git a/README.md b/README.md index 9a7d703..560de99 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,16 @@ # magicparse 🛸 -Declarative parser +Declarative parser for structured data files. + +## Installation + +```bash +poetry install magicparse +``` + +## Requirements + +- Python 3.12+ ## Usage @@ -96,7 +106,7 @@ schema = { } -rows, errors= magicparse.parse(data="...", schema=schema) +rows = magicparse.parse(data="...", schema=schema) ``` @@ -124,9 +134,8 @@ schema = { ], } -rows, errors = magicparse.parse("13ec10cc-cc7e-4ee9-b091-9caa6d11aeb2", schema) +rows = magicparse.parse("13ec10cc-cc7e-4ee9-b091-9caa6d11aeb2", schema) assert rows == [{"shop-guid": "13ec10cc-cc7e-4ee9-b091-9caa6d11aeb2"}] -assert not errors ``` ### Register a custom schema and parse content @@ -152,11 +161,51 @@ schema = { ] } -rows, errors = magicparse.parse("Joe|William|Jack|Averell", schema) -assert not errors +rows = magicparse.parse("Joe|William|Jack|Averell", schema) assert rows == [{"name": "Joe"}, {"name": "William"}, {"name": "Jack"}, {"name": "Averell"}] ``` +### Stream parsing + +For large files, you can use streaming to process data incrementally: + +```python +import magicparse + +schema = { + "file_type": "csv", + "fields": [ + {"key": "name", "type": "str", "column-number": 1} + ] +} + +# Process data in chunks +for row in magicparse.stream_parse(data="...", schema=schema): + match row: + case magicparse.RowParsed(values): + print(f"The values {values}.") + case magicparse.RowFailed(errors): + print(f"The errors {errors}.") + case magicparse.RowSkipped(reason): + print(f"The errors {errors}.") + case _: + print("Unknown type of row.") +``` + +### Custom encoding + +By default, magicparse uses UTF-8 encoding. You can specify a different encoding: + +```python +schema = { + "file_type": "csv", + "encoding": "iso8859_5", # or any other encoding + "fields": [ + {"key": "name", "type": "str", "column-number": 1} + ] +} +``` + ## API ### File types @@ -187,6 +236,7 @@ assert rows == [{"name": "Joe"}, {"name": "William"}, {"name": "Jack"}, {"name": - regex-matches - greater-than +- not-null-or-empty #### Post-processors @@ -202,3 +252,66 @@ Types, Pre-processors, Post-processors and validator is same as Field - concat - divide - multiply +- coalesce + +## Return Types + +The parser returns a list of row objects: + +- **`RowParsed`**: Successfully parsed row with `values` dict +- **`RowFailed`**: Failed to parse row with `errors` message +- **`RowSkipped`**: Skipped row with `errors` message + +## Error Handling + +You can configure error handling for types, validators, and processors: + +```python +{ + "key": "price", + "type": { + "key": "decimal", + "nullable": True, # Allow null values + "on-error": "skip-row" # Skip on error instead of raising + } +} +``` + +Error handling options: +- `"raise"` (default): Raise exception on error +- `"skip-row"`: Skip the row and continue processing + +## Docker + +The project includes Docker support: + +```bash +# Build and run with docker-compose +docker-compose up --build + +# Or build manually +docker build -t magicparse . +docker run -it magicparse +``` + +## Development + +### Setup + +```bash +# Install dependencies +poetry install + +# Run tests +poetry run pytest + +# Format code +poetry run black . + +# Lint code +poetry run flake8 +``` + +## License + +This project is licensed under the MIT License. diff --git a/magicparse/__init__.py b/magicparse/__init__.py index 0d42af9..1f88d83 100644 --- a/magicparse/__init__.py +++ b/magicparse/__init__.py @@ -1,6 +1,12 @@ from io import BytesIO -from .schema import ParsedRow, Schema, builtins as builtins_schemas +from .schema import ( + RowParsed, + RowFailed, + RowSkipped, + Schema, + builtins as builtins_schemas, +) from .post_processors import PostProcessor, builtins as builtins_post_processors from .pre_processors import PreProcessor, builtins as builtins_pre_processors from .builders import ( @@ -9,7 +15,7 @@ ) from .transform import Transform from .type_converters import TypeConverter, builtins as builtins_type_converters -from typing import Any, Dict, Iterable, List, Tuple, Union +from typing import Any, Dict, Iterable, List, Union from .validators import Validator, builtins as builtins_validators @@ -20,21 +26,23 @@ "PostProcessor", "PreProcessor", "Schema", - "ParsedRow", + "RowParsed", + "RowSkipped", + "RowFailed", "Validator", ] def parse( data: Union[bytes, BytesIO], schema_options: Dict[str, Any] -) -> Tuple[List[dict], List[dict]]: +) -> List[RowParsed | RowSkipped | RowFailed]: schema_definition = Schema.build(schema_options) return schema_definition.parse(data) def stream_parse( data: Union[bytes, BytesIO], schema_options: Dict[str, Any] -) -> Iterable[ParsedRow]: +) -> Iterable[RowParsed | RowSkipped | RowFailed]: schema_definition = Schema.build(schema_options) return schema_definition.stream_parse(data) diff --git a/magicparse/builders.py b/magicparse/builders.py index 12a8357..c477dab 100644 --- a/magicparse/builders.py +++ b/magicparse/builders.py @@ -1,7 +1,7 @@ from abc import ABC from decimal import Decimal -from .transform import Transform +from .transform import Transform, OnError class Builder(Transform, ABC): @@ -17,14 +17,16 @@ def build(cls, options: dict) -> "Builder": except: raise ValueError(f"invalid builder '{name}'") + on_error = options.get("on-error", OnError.RAISE) if "parameters" in options: - return builder(**options["parameters"]) + return builder(on_error=on_error, **options["parameters"]) else: - return builder() + return builder(on_error=on_error) class Concat(Builder): - def __init__(self, fields: list[str]) -> None: + def __init__(self, on_error: OnError, fields: list[str]) -> None: + super().__init__(on_error) if ( not fields or isinstance(fields, str) @@ -48,7 +50,8 @@ def key() -> str: class Divide(Builder): - def __init__(self, numerator: str, denominator: str) -> None: + def __init__(self, on_error: OnError, numerator: str, denominator: str) -> None: + super().__init__(on_error) if not numerator or not isinstance(numerator, str): raise ValueError( "builder 'divide': " "'numerator' parameter must be a non null str" @@ -69,7 +72,8 @@ def key() -> str: class Multiply(Builder): - def __init__(self, x_factor: str, y_factor: str) -> None: + def __init__(self, on_error: OnError, x_factor: str, y_factor: str) -> None: + super().__init__(on_error) if not x_factor or not isinstance(x_factor, str): raise ValueError( "builder 'multiply': " "'x_factor' parameter must be a non null str" @@ -89,4 +93,29 @@ def key() -> str: return "multiply" -builtins = [Concat, Divide, Multiply] +class Coalesce(Builder): + def __init__(self, on_error: OnError, fields: list[str]) -> None: + super().__init__(on_error) + if not fields: + raise ValueError("parameters should defined fields to coalesce") + if ( + not isinstance(fields, list) + or not all(isinstance(field, str) for field in fields) + or len(fields) < 2 + ): + raise ValueError("parameters should have two fields at least") + + self.fields = fields + + def apply(self, row: dict) -> str: + for field in self.fields: + if row[field]: + return row[field] + return None + + @staticmethod + def key() -> str: + return "coalesce" + + +builtins = [Concat, Divide, Multiply, Coalesce] diff --git a/magicparse/fields.py b/magicparse/fields.py index 944e41b..9c06a35 100644 --- a/magicparse/fields.py +++ b/magicparse/fields.py @@ -6,6 +6,7 @@ from .post_processors import PostProcessor from .pre_processors import PreProcessor from .validators import Validator +from .transform import Ok, OnError, Result, SkipRow class Field(ABC): @@ -26,24 +27,28 @@ def __init__(self, key: str, options: dict) -> None: pre_processors + [type_converter] + validators + post_processors ) - def _process_raw_value(self, raw_value: str): - value = raw_value + def _process_raw_value(self, raw_value: str) -> Result: if not raw_value: if self.optional: - return None + return Ok(value=None) else: raise ValueError( f"{self.key} field is required but the value was empty" ) for transform in self.transforms: - value = transform.apply(value) - return value + try: + raw_value = transform.apply(raw_value) + except Exception as exc: + if transform.on_error == OnError.SKIP_ROW.value: + return SkipRow(exception=exc) + raise + return Ok(value=raw_value) @abstractmethod - def _read_raw_value(self, row) -> str: + def _read_raw_value(self, row: List[str] | dict) -> str: pass - def read_value(self, row): + def parse(self, row: List[str] | dict) -> Result: raw_value = self._read_raw_value(row) return self._process_raw_value(raw_value) @@ -75,7 +80,7 @@ def __init__(self, key: str, options: dict) -> None: super().__init__(key, options) self.column_number = options["column-number"] - def _read_raw_value(self, row: List[str]) -> str: + def _read_raw_value(self, row: List[str] | dict) -> str: return row[self.column_number - 1] def error(self, exception: Exception) -> dict: @@ -93,7 +98,7 @@ def __init__(self, key: str, options: dict) -> None: self.column_length = options["column-length"] self.column_end = self.column_start + self.column_length - def _read_raw_value(self, row: str) -> str: + def _read_raw_value(self, row: str | dict) -> str: return row[self.column_start : self.column_end] def error(self, exception: Exception) -> dict: @@ -110,7 +115,7 @@ def __init__(self, key: str, options: dict) -> None: super().__init__(key, options) self.builder = Builder.build(options["builder"]) - def _read_raw_value(self, row) -> str: + def _read_raw_value(self, row: List[str] | dict) -> str: return self.builder.apply(row) def error(self, exception: Exception) -> dict: diff --git a/magicparse/post_processors.py b/magicparse/post_processors.py index 2e901dd..d8d62ba 100644 --- a/magicparse/post_processors.py +++ b/magicparse/post_processors.py @@ -1,4 +1,4 @@ -from .transform import Transform +from .transform import Transform, OnError from decimal import Decimal from typing import TypeVar @@ -16,16 +16,18 @@ def build(cls, options: dict) -> "PostProcessor": except: raise ValueError(f"invalid post-processor '{name}'") + on_error = options.get("on-error", OnError.RAISE) if "parameters" in options: - return post_processor(**options["parameters"]) + return post_processor(on_error=on_error, **options["parameters"]) else: - return post_processor() + return post_processor(on_error=on_error) class Divide(PostProcessor): Number = TypeVar("Number", int, float, Decimal) - def __init__(self, denominator: int) -> None: + def __init__(self, on_error: OnError, denominator: int) -> None: + super().__init__(on_error) if denominator <= 0: raise ValueError( "post-processor 'divide': " @@ -45,7 +47,8 @@ def key() -> str: class Round(PostProcessor): Number = TypeVar("Number", int, float, Decimal) - def __init__(self, precision: int) -> None: + def __init__(self, on_error: OnError, precision: int) -> None: + super().__init__(on_error) if precision < 0: raise ValueError( "post-processor 'round': " diff --git a/magicparse/pre_processors.py b/magicparse/pre_processors.py index cc1f25f..2df95a4 100644 --- a/magicparse/pre_processors.py +++ b/magicparse/pre_processors.py @@ -1,5 +1,5 @@ import re -from .transform import Transform +from .transform import Transform, OnError class PreProcessor(Transform): @@ -15,14 +15,16 @@ def build(cls, options: dict) -> "PreProcessor": except: raise ValueError(f"invalid pre-processor '{name}'") + on_error = options.get("on-error", OnError.RAISE) if "parameters" in options: - return pre_processor(**options["parameters"]) + return pre_processor(on_error=on_error, **options["parameters"]) else: - return pre_processor() + return pre_processor(on_error=on_error) class LeftPadZeroes(PreProcessor): - def __init__(self, width: int) -> None: + def __init__(self, on_error: OnError, width: int) -> None: + super().__init__(on_error) self.width = width def apply(self, value: str) -> str: @@ -34,7 +36,8 @@ def key() -> str: class Map(PreProcessor): - def __init__(self, values: dict) -> None: + def __init__(self, on_error: OnError, values: dict) -> None: + super().__init__(on_error) self.values = values self._keys = ", ".join(f"'{key}'" for key in self.values.keys()) @@ -52,7 +55,8 @@ def key() -> str: class Replace(PreProcessor): - def __init__(self, pattern: str, replacement: str) -> None: + def __init__(self, on_error: OnError, pattern: str, replacement: str) -> None: + super().__init__(on_error) self.pattern = pattern self.replacement = replacement @@ -74,7 +78,8 @@ def key() -> str: class LeftStrip(PreProcessor): - def __init__(self, characters: str) -> None: + def __init__(self, on_error: OnError, characters: str) -> None: + super().__init__(on_error) self.characters = characters def apply(self, value: str) -> str: @@ -86,7 +91,8 @@ def key() -> str: class RegexExtract(PreProcessor): - def __init__(self, pattern: str) -> None: + def __init__(self, on_error: OnError, pattern: str) -> None: + super().__init__(on_error) pattern = re.compile(pattern) if "value" not in pattern.groupindex: raise ValueError( diff --git a/magicparse/schema.py b/magicparse/schema.py index 3941743..44598e0 100644 --- a/magicparse/schema.py +++ b/magicparse/schema.py @@ -2,15 +2,28 @@ from abc import ABC, abstractmethod import csv from dataclasses import dataclass + +from magicparse.transform import SkipRow from .fields import Field, ComputedField from io import BytesIO -from typing import Any, Dict, List, Tuple, Union, Iterable +from typing import Any, Dict, List, Union, Iterable @dataclass(frozen=True, slots=True) -class ParsedRow: +class RowParsed: row_number: int values: dict + + +@dataclass(frozen=True, slots=True) +class RowSkipped: + row_number: int + errors: list[dict] + + +@dataclass(frozen=True, slots=True) +class RowFailed: + row_number: int errors: list[dict] @@ -52,19 +65,14 @@ def register(cls, schema: "Schema") -> None: cls.registry[schema.key()] = schema - def parse(self, data: Union[bytes, BytesIO]) -> Tuple[List[dict], List[dict]]: - items = [] - errors = [] - - for parsed_row in self.stream_parse(data): - if parsed_row.errors: - errors.extend(parsed_row.errors) - else: - items.append(parsed_row.values) - - return items, errors + def parse( + self, data: Union[bytes, BytesIO] + ) -> List[RowParsed] | List[RowSkipped] | List[RowFailed]: + return list(self.stream_parse(data)) - def stream_parse(self, data: Union[bytes, BytesIO]) -> Iterable[ParsedRow]: + def stream_parse( + self, data: Union[bytes, BytesIO] + ) -> Iterable[RowParsed | RowSkipped | RowFailed]: if isinstance(data, bytes): stream = BytesIO(data) else: @@ -82,29 +90,49 @@ def stream_parse(self, data: Union[bytes, BytesIO]) -> Iterable[ParsedRow]: if not any(row): continue - errors = [] - item = {} - for field in self.fields: - try: - value = field.read_value(row) - except Exception as exc: - errors.append({"row-number": row_number, **field.error(exc)}) - continue - - item[field.key] = value - - for computed_field in self.computed_fields: - try: - value = computed_field.read_value(item) - except Exception as exc: - errors.append( - {"row-number": row_number, **computed_field.error(exc)} - ) - continue - - item[computed_field.key] = value - - yield ParsedRow(row_number, item, errors) + fields = self.process_fields(self.fields, row, row_number) + if not isinstance(fields, RowParsed): + yield fields + continue + + computed_fields = self.process_fields( + self.computed_fields, fields.values, row_number + ) + if not isinstance(computed_fields, RowParsed): + yield computed_fields + continue + + yield RowParsed(row_number, {**fields.values, **computed_fields.values}) + + def process_fields( + self, fields: List[Field], row: List[str], row_number: int + ) -> RowParsed | RowSkipped | RowFailed: + item = {} + errors = [] + skip_row = False + for field in fields: + try: + source = row | item if isinstance(row, dict) else row + parsed_value = field.parse(source) + except Exception as exc: + errors.append(field.error(exc)) + continue + + if isinstance(parsed_value, SkipRow): + skip_row = True + errors.append(field.error(parsed_value.exception)) + continue + + item[field.key] = parsed_value.value + + if errors: + return ( + RowSkipped(row_number, errors) + if skip_row + else RowFailed(row_number, errors) + ) + + return RowParsed(row_number, item) class CsvSchema(Schema): diff --git a/magicparse/transform.py b/magicparse/transform.py index 1e25c63..dcc68c6 100644 --- a/magicparse/transform.py +++ b/magicparse/transform.py @@ -1,13 +1,37 @@ from abc import ABC, abstractclassmethod, abstractmethod, abstractstaticmethod +from dataclasses import dataclass +from enum import Enum +from typing import Any + + +@dataclass(frozen=True, slots=True) +class Ok: + value: Any + + +@dataclass(frozen=True, slots=True) +class SkipRow: + exception: Exception + + +type Result = Ok | SkipRow + + +class OnError(Enum): + RAISE = "raise" + SKIP_ROW = "skip-row" class Transform(ABC): + def __init__(self, on_error: OnError) -> None: + self.on_error = on_error + @abstractclassmethod def build(cls, options: dict) -> "Transform": pass @abstractmethod - def apply(self, value): + def apply(self, value: Any) -> Any: pass @abstractstaticmethod diff --git a/magicparse/type_converters.py b/magicparse/type_converters.py index 2f107fc..945276d 100644 --- a/magicparse/type_converters.py +++ b/magicparse/type_converters.py @@ -1,25 +1,49 @@ +from abc import abstractmethod from datetime import datetime, time from decimal import Decimal +from typing import Any from .transform import Transform +from .transform import OnError class TypeConverter(Transform): + def __init__(self, nullable: bool, on_error: OnError) -> None: + super().__init__(on_error) + self.nullable = nullable + + def apply(self, value: str | None) -> Any | None: + if value is None and self.nullable: + return None + + return self.convert(value) + + @abstractmethod + def convert(self, value: str) -> Any: + pass + @classmethod def build(cls, options) -> "TypeConverter": try: - _type = options["type"] + type = options["type"] + if isinstance(type, str): + key = type + type = {} + else: + key = type.pop("key") except: raise ValueError("missing key 'type'") + nullable = type.pop("nullable", False) + on_error = type.pop("on-error", OnError.RAISE) try: - return cls.registry[_type]() - except: - raise ValueError(f"invalid type '{_type}'") + return cls.registry[key](nullable, on_error, **type) + except Exception as e: + raise ValueError(f"invalid type '{key}': {e}") class StrConverter(TypeConverter): - def apply(self, value: str) -> str: + def convert(self, value: str) -> str: return value @staticmethod @@ -28,7 +52,7 @@ def key() -> str: class IntConverter(TypeConverter): - def apply(self, value: str) -> int: + def convert(self, value: str) -> int: try: return int(value) except: @@ -40,7 +64,7 @@ def key() -> str: class DecimalConverter(TypeConverter): - def apply(self, value: str) -> Decimal: + def convert(self, value: str) -> Decimal: try: return Decimal(value) except: @@ -52,7 +76,7 @@ def key() -> str: class TimeConverter(TypeConverter): - def apply(self, value: str) -> time: + def convert(self, value: str) -> time: try: parsed = time.fromisoformat(value) if parsed.tzinfo is None: @@ -67,7 +91,7 @@ def key() -> str: class DateTimeConverter(TypeConverter): - def apply(self, value: str) -> datetime: + def convert(self, value: str) -> datetime: try: parsed = datetime.fromisoformat(value) if parsed.tzinfo is None: diff --git a/magicparse/validators.py b/magicparse/validators.py index f55e079..a777998 100644 --- a/magicparse/validators.py +++ b/magicparse/validators.py @@ -1,5 +1,5 @@ from decimal import Decimal -from .transform import Transform +from .transform import Transform, OnError import re @@ -16,17 +16,19 @@ def build(cls, options: dict) -> "Validator": except: raise ValueError(f"invalid validator '{name}'") + on_error = options.setdefault("on-error", OnError.RAISE) if "parameters" in options: - return validator(**options["parameters"]) + return validator(on_error=on_error, **options["parameters"]) else: - return validator() + return validator(on_error=on_error) class RegexMatches(Validator): - def __init__(self, pattern: str) -> None: + def __init__(self, on_error: str, pattern: str) -> None: + super().__init__(on_error) self.pattern = re.compile(pattern) - def apply(self, value: str) -> str: + def apply(self, value: str | None) -> str: if re.match(self.pattern, value): return value @@ -38,7 +40,8 @@ def key() -> str: class GreaterThan(Validator): - def __init__(self, threshold: float) -> None: + def __init__(self, on_error: str, threshold: float) -> None: + super().__init__(on_error) self.threshold = Decimal(threshold) def apply(self, value: Decimal) -> Decimal: @@ -51,4 +54,15 @@ def key() -> str: return "greater-than" -builtins = [GreaterThan, RegexMatches] +class NotNullOrEmpty(Validator): + def apply(self, value: str) -> str: + if not value: + raise ValueError("value must not be null or empty") + return value + + @staticmethod + def key() -> str: + return "not-null-or-empty" + + +builtins = [GreaterThan, RegexMatches, NotNullOrEmpty] diff --git a/tests/test_builders.py b/tests/test_builders.py index 869f614..ce941cd 100644 --- a/tests/test_builders.py +++ b/tests/test_builders.py @@ -16,7 +16,8 @@ def apply(self, value): pass class WithParamBuilder(Builder): - def __init__(self, setting: str) -> None: + def __init__(self, on_error: str, setting: str) -> None: + super().__init__(on_error) self.setting = setting @staticmethod @@ -233,3 +234,58 @@ def test_valid_param(self): result = builder.apply({"price": 1.5, "unit": 2}) assert result == 3 + + +class TestCoalesce(TestCase): + def test_no_params(self): + with pytest.raises(TypeError): + Builder.build({"name": "coalesce"}) + + def test_empty_params(self): + with pytest.raises(TypeError): + Builder.build({"name": "coalesce", "parameters": ""}) + + def test_fields_params_empty(self): + with pytest.raises( + ValueError, + match="parameters should defined fields to coalesce" + ): + Builder.build({"name": "coalesce", "parameters": {"fields": ""}}) + + def test_fields_params_not_a_list_of_str(self): + with pytest.raises( + ValueError, + match="parameters should have two fields at least" + ): + Builder.build({"name": "coalesce", "parameters": {"fields": "xxx"}}) + + def test_fields_params_has_less_than_two_fields(self): + with pytest.raises( + ValueError, + match="parameters should have two fields at least" + ): + Builder.build({"name": "coalesce", "parameters": {"fields": ["field"]}}) + + def test_return_first_non_empty_value(self): + coalesce = Builder.build( + { + "name": "coalesce", + "parameters": {"fields": ["field1", "field2"]} + } + ) + + result = coalesce.apply({"field1": "", "field2": "value"}) + + assert result == "value" + + def test_return_none_if_all_values_are_empty(self): + coalesce = Builder.build( + { + "name": "coalesce", + "parameters": {"fields": ["field1", "field2"]} + } + ) + + result = coalesce.apply({"field1": "", "field2": ""}) + + assert result is None diff --git a/tests/test_computed_fields.py b/tests/test_computed_fields.py index 239b468..62dcd43 100644 --- a/tests/test_computed_fields.py +++ b/tests/test_computed_fields.py @@ -3,18 +3,20 @@ from magicparse.fields import ComputedField from unittest import TestCase +from magicparse.transform import Ok + class TestBuild(TestCase): def test_without_builder(self): - with self.assertRaises(KeyError): + with pytest.raises(KeyError): ComputedField("output", {"type": "str"}) def test_not_iterable_value_for_builder(self): - with self.assertRaises(ValueError): + with pytest.raises(ValueError): ComputedField("output", {"type": "str", "builder": 1}) def test_bad_value_for_builder(self): - with self.assertRaises(ValueError): + with pytest.raises(ValueError): ComputedField("output", {"type": "str", "builder": "really"}) def test_with_valid_builder(self): @@ -30,9 +32,9 @@ def test_with_valid_builder(self): } ) - computed = field.read_value({"code_1": "01", "code_2": "02"}) + computed = field.parse({"code_1": "01", "code_2": "02"}) - assert computed == "0102" + assert computed == Ok(value="0102") def test_error_format(self): field = ComputedField( @@ -48,6 +50,6 @@ def test_error_format(self): ) with pytest.raises(KeyError) as error: - field.read_value({}) + field.parse({}) assert field.error(error.value) == {"error": "code_1", "field-key": "output"} diff --git a/tests/test_encodings.py b/tests/test_encodings.py index de5df1f..5e48874 100644 --- a/tests/test_encodings.py +++ b/tests/test_encodings.py @@ -1,6 +1,7 @@ from unittest import TestCase from magicparse import Schema +from magicparse.schema import RowParsed class TestCsvEncoding(TestCase): @@ -12,10 +13,13 @@ def test_default_encoding(self): } ) - rows, errors = schema.parse("José\n李\n💩\n".encode("utf-8")) + rows = schema.parse("José\n李\n💩\n".encode("utf-8")) - assert len(errors) == 0 - assert rows == [{"name": "José"}, {"name": "李"}, {"name": "💩"}] + assert rows == [ + RowParsed(row_number=1, values={"name": "José"}), + RowParsed(row_number=2, values={"name": "李"}), + RowParsed(row_number=3, values={"name": "💩"}), + ] def test_exotic_encoding(self): schema = Schema.build( @@ -26,17 +30,22 @@ def test_exotic_encoding(self): } ) - rows, errors = schema.parse( + rows = schema.parse( "Да здравствует Владимир проклятый\n" "Да здравствует Карл Маркс\n" "Да здравствует Россия\n".encode("iso8859_5") ) - assert len(errors) == 0 assert rows == [ - {"name": "Да здравствует Владимир проклятый"}, - {"name": "Да здравствует Карл Маркс"}, - {"name": "Да здравствует Россия"}, + RowParsed( + row_number=1, values={"name": "Да здравствует Владимир проклятый"} + ), + RowParsed( + row_number=2, values={"name": "Да здравствует Карл Маркс"} + ), + RowParsed( + row_number=3, values={"name": "Да здравствует Россия"} + ), ] @@ -56,10 +65,13 @@ def test_default_encoding(self): } ) - rows, errors = schema.parse("José\n李 \n💩 \n".encode("utf-8")) + rows = schema.parse("José\n李 \n💩 \n".encode("utf-8")) - assert len(errors) == 0 - assert rows == [{"name": "José"}, {"name": "李 "}, {"name": "💩 "}] + assert rows == [ + RowParsed(row_number=1, values={"name": "José"}), + RowParsed(row_number=2, values={"name": "李 "}), + RowParsed(row_number=3, values={"name": "💩 "}), + ] def test_exotic_encoding(self): schema = Schema.build( @@ -77,15 +89,23 @@ def test_exotic_encoding(self): } ) - rows, errors = schema.parse( + rows = schema.parse( "Да здравствует Владимир проклятый\n" "Да здравствует Карл Маркс \n" "Да здравствует Россия \n".encode("iso8859_5") ) - assert len(errors) == 0 assert rows == [ - {"name": "Да здравствует Владимир проклятый"}, - {"name": "Да здравствует Карл Маркс "}, - {"name": "Да здравствует Россия "}, + RowParsed( + row_number=1, + values={"name": "Да здравствует Владимир проклятый"} + ), + RowParsed( + row_number=2, + values={"name": "Да здравствует Карл Маркс "} + ), + RowParsed( + row_number=3, + values={"name": "Да здравствует Россия "} + ), ] diff --git a/tests/test_fields.py b/tests/test_fields.py index 249675e..8526162 100644 --- a/tests/test_fields.py +++ b/tests/test_fields.py @@ -1,6 +1,7 @@ from decimal import Decimal import pytest +from magicparse.transform import Ok from magicparse.type_converters import DecimalConverter, StrConverter from magicparse.fields import ColumnarField, CsvField, Field from magicparse.post_processors import Divide @@ -9,7 +10,7 @@ class DummyField(Field): - def _read_raw_value(self, row: str) -> str: + def _read_raw_value(self, row: str | dict) -> str: return row def error(self, exception: Exception): @@ -32,8 +33,8 @@ def test_chain_transformations(): assert isinstance(field.transforms[1], StrConverter) assert isinstance(field.transforms[2], RegexMatches) - result = field.read_value(" mac adam ") - assert result == "mac adam" + result = field.parse(" mac adam ") + assert result == Ok(value="mac adam") def test_chain_transformations_with_post_processors(): @@ -56,14 +57,14 @@ def test_chain_transformations_with_post_processors(): assert isinstance(field.transforms[0], Replace) assert isinstance(field.transforms[1], DecimalConverter) assert isinstance(field.transforms[2], Divide) - assert field.read_value("XXX150") == Decimal("1.50") + assert field.parse("XXX150") == Ok(value=Decimal("1.50")) def test_csv_error_format(): field = CsvField("ratio", {"type": "decimal", "column-number": 1}) with pytest.raises(ValueError) as error: - field.read_value("hello") + field.parse("hello") assert field.error(error.value) == { "column-number": 1, @@ -78,7 +79,7 @@ def test_columnar_error_format(): ) with pytest.raises(ValueError) as error: - field.read_value("hello") + field.parse("hello") assert field.error(error.value) == { "column-start": 0, @@ -103,8 +104,8 @@ def test_optional_field(): "post-processors": [{"name": "divide", "parameters": {"denominator": 100}}], } ) - assert field.read_value("XXX150") == Decimal("1.50") - assert field.read_value("") is None + assert field.parse("XXX150") == Ok(value=Decimal("1.50")) + assert field.parse("") == Ok(value=None) def test_required_field(): @@ -115,7 +116,7 @@ def test_required_field(): "optional": False, } ) - assert field.read_value("1.5") == Decimal("1.50") + assert field.parse("1.5") == Ok(value=Decimal("1.50")) def test_require_field_with_empty_value(): @@ -128,7 +129,7 @@ def test_require_field_with_empty_value(): with pytest.raises( ValueError, match="pepito field is required but the value was empty" ): - field.read_value("") + field.parse("") def test_field_without_key(): diff --git a/tests/test_schema.py b/tests/test_schema.py index 622473d..6868fe8 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -1,7 +1,12 @@ from decimal import Decimal +from typing import Any from magicparse import Schema -from magicparse.schema import ColumnarSchema, CsvSchema, ParsedRow +from magicparse.post_processors import PostProcessor +from magicparse.pre_processors import PreProcessor +from magicparse.schema import ( + ColumnarSchema, CsvSchema, RowParsed, RowFailed, RowSkipped +) from magicparse.fields import ColumnarField, CsvField import pytest from unittest import TestCase @@ -61,15 +66,13 @@ def test_with_no_data(self): "fields": [{"key": "name", "type": "str", "column-number": 1}], } ) - rows, errors = schema.parse(b"") - assert not rows - assert not errors + rows = schema.parse(b"") + assert rows == [] def test_with_no_field_definition(self): schema = Schema.build({"file_type": "csv", "fields": []}) - rows, errors = schema.parse(b"a,b,c") - assert rows == [{}] - assert not errors + rows = schema.parse(b"a,b,c") + assert rows == [RowParsed(row_number=1, values={})] def test_without_header(self): schema = Schema.build( @@ -78,9 +81,8 @@ def test_without_header(self): "fields": [{"key": "name", "type": "str", "column-number": 1}], } ) - rows, errors = schema.parse(b"1") - assert rows == [{"name": "1"}] - assert not errors + rows = schema.parse(b"1") + assert rows == [RowParsed(row_number=1, values={"name": "1"})] def test_with_header(self): schema = Schema.build( @@ -90,9 +92,8 @@ def test_with_header(self): "fields": [{"key": "name", "type": "str", "column-number": 1}], } ) - rows, errors = schema.parse(b"column_name\n1") - assert rows == [{"name": "1"}] - assert not errors + rows = schema.parse(b"column_name\n1") + assert rows == [RowParsed(row_number=2, values={"name": "1"})] def test_error_display_row_number(self): schema = Schema.build( @@ -101,15 +102,13 @@ def test_error_display_row_number(self): "fields": [{"key": "age", "type": "int", "column-number": 1}], } ) - rows, errors = schema.parse(b"a") - assert not rows - assert errors == [ - { - "row-number": 1, + rows = schema.parse(b"a") + assert rows == [ + RowFailed(row_number=1, errors=[{ "column-number": 1, "field-key": "age", "error": "value 'a' is not a valid integer", - } + }]) ] def test_errors_do_not_halt_parsing(self): @@ -119,15 +118,15 @@ def test_errors_do_not_halt_parsing(self): "fields": [{"key": "age", "type": "int", "column-number": 1}], } ) - rows, errors = schema.parse(b"1\na\n2") - assert rows == [{"age": 1}, {"age": 2}] - assert errors == [ - { - "row-number": 2, + rows = schema.parse(b"1\na\n2") + assert rows == [ + RowParsed(row_number=1, values={"age": 1}), + RowFailed(row_number=2, errors=[{ "column-number": 1, "field-key": "age", "error": "value 'a' is not a valid integer", - } + }]), + RowParsed(row_number=3, values={"age": 2}), ] def test_parse_should_skip_empty_lines(self): @@ -137,13 +136,12 @@ def test_parse_should_skip_empty_lines(self): "fields": [{"key": "name", "type": "str", "column-number": 1}], } ) - rows, errors = schema.parse( + rows = schema.parse( b"""1 """ ) - assert rows == [{"name": "1"}] - assert not errors + assert rows == [RowParsed(row_number=1, values={"name": "1"})] class TestColumnarParse(TestCase): @@ -161,15 +159,13 @@ def test_with_no_data(self): ], } ) - rows, errors = schema.parse(b"") - assert not rows - assert not errors + rows = schema.parse(b"") + assert rows == [] def test_with_no_field_definition(self): schema = Schema.build({"file_type": "columnar", "fields": []}) - rows, errors = schema.parse(b"a") - assert rows == [{}] - assert not errors + rows = schema.parse(b"a") + assert rows == [RowParsed(row_number=1, values={})] def test_parse(self): schema = Schema.build( @@ -185,9 +181,8 @@ def test_parse(self): ], } ) - rows, errors = schema.parse(b"1") - assert rows == [{"name": "1"}] - assert not errors + rows = schema.parse(b"1") + assert rows == [RowParsed(row_number=1, values={"name": "1"})] def test_error_display_row_number(self): schema = Schema.build( @@ -198,16 +193,14 @@ def test_error_display_row_number(self): ], } ) - rows, errors = schema.parse(b"a") - assert not rows - assert errors == [ - { - "row-number": 1, + rows = schema.parse(b"a") + assert rows == [ + RowFailed(row_number=1, errors=[{ "column-start": 0, "column-length": 1, "field-key": "age", "error": "value 'a' is not a valid integer", - } + }]) ] def test_errors_do_not_halt_parsing(self): @@ -219,16 +212,16 @@ def test_errors_do_not_halt_parsing(self): ], } ) - rows, errors = schema.parse(b"1\na\n2") - assert rows == [{"age": 1}, {"age": 2}] - assert errors == [ - { - "row-number": 2, + rows = schema.parse(b"1\na\n2") + assert rows == [ + RowParsed(row_number=1, values={"age": 1}), + RowFailed(row_number=2, errors=[{ "column-start": 0, "column-length": 1, "field-key": "age", "error": "value 'a' is not a valid integer", - } + }]), + RowParsed(row_number=3, values={"age": 2}), ] def test_parse_should_skip_empty_lines(self): @@ -245,13 +238,81 @@ def test_parse_should_skip_empty_lines(self): ], } ) - rows, errors = schema.parse( + rows = schema.parse( b"""8013109C """ ) - assert rows == [{"name": "8013109C"}] - assert not errors + assert rows == [RowParsed(row_number=1, values={"name": "8013109C"})] + + def test_should_return_all_errors_in_a_row(self): + schema = Schema.build( + { + "file_type": "csv", + "delimiter": ";", + "fields": [ + {"key": "age", "type": "int", "column-number": 1}, + {"key": "age2", "type": "int", "column-number": 2} + ], + } + ) + + rows = schema.parse(b"a;a") + + assert rows == [ + RowFailed(row_number=1, errors=[ + { + "column-number": 1, + "field-key": "age", + "error": "value 'a' is not a valid integer", + }, + { + "column-number": 2, + "field-key": "age2", + "error": "value 'a' is not a valid integer", + } + ]) + ] + + def test_skip_is_prioritized_over_errors(self): + schema = Schema.build( + { + "file_type": "csv", + "delimiter": ";", + "fields": [ + { + "key": "age", + "type": "int", + "column-number": 1, + "validators": [ + { + "name": "greater-than", + "parameters": {"threshold": 0}, + "on-error": "skip-row" + } + ], + }, + {"key": "age2", "type": "int", "column-number": 2} + ], + } + ) + + rows = schema.parse(b"-1;a") + + assert rows == [ + RowSkipped(row_number=1, errors=[ + { + "column-number": 1, + "field-key": "age", + "error": "value must be greater than 0", + }, + { + "column-number": 2, + "field-key": "age2", + "error": "value 'a' is not a valid integer", + } + ]) + ] class TestQuotingSetting(TestCase): @@ -263,9 +324,10 @@ def test_no_quote(self): "fields": [{"key": "column_1", "type": "decimal", "column-number": 1}], } ) - rows, errors = schema.parse(b"column_1\n6.66") - assert rows == [{"column_1": Decimal("6.66")}] - assert not errors + rows = schema.parse(b"column_1\n6.66") + assert rows == [ + RowParsed(row_number=2, values={"column_1": Decimal("6.66")}) + ] def test_single_quote(self): schema = Schema.build( @@ -276,9 +338,10 @@ def test_single_quote(self): "fields": [{"key": "column_1", "type": "decimal", "column-number": 1}], } ) - rows, errors = schema.parse(b"column_1\n'6.66'") - assert rows == [{"column_1": Decimal("6.66")}] - assert not errors + rows = schema.parse(b"column_1\n'6.66'") + assert rows == [ + RowParsed(row_number=2, values={"column_1": Decimal("6.66")}) + ] def test_double_quote(self): schema = Schema.build( @@ -289,9 +352,10 @@ def test_double_quote(self): "fields": [{"key": "column_1", "type": "decimal", "column-number": 1}], } ) - rows, errors = schema.parse(b'column_1\n"6.66"') - assert rows == [{"column_1": Decimal("6.66")}] - assert not errors + rows = schema.parse(b'column_1\n"6.66"') + assert rows == [ + RowParsed(row_number=2, values={"column_1": Decimal("6.66")}) + ] def test_asymetrical_quote(self): schema = Schema.build( @@ -301,8 +365,10 @@ def test_asymetrical_quote(self): "fields": [{"key": "column_1", "type": "str", "column-number": 1}], } ) - rows, errors = schema.parse(b'column_1\n"test ""quoting""') - assert rows == [{"column_1": '"test ""quoting""'}] + rows = schema.parse(b'column_1\n"test ""quoting""') + assert rows == [ + RowParsed(row_number=2, values={"column_1": '"test ""quoting""'}) + ] class TestRegister(TestCase): @@ -337,20 +403,17 @@ def test_stream_parse_errors_do_not_halt_parsing(self): ) rows = list(schema.stream_parse(b"1\na\n2")) assert rows == [ - ParsedRow(row_number=1, values={"age": 1}, errors=[]), - ParsedRow( + RowParsed(row_number=1, values={"age": 1}), + RowFailed( row_number=2, - values={}, errors=[ { - "row-number": 2, "column-number": 1, "field-key": "age", "error": "value 'a' is not a valid integer", } - ], - ), - ParsedRow(row_number=3, values={"age": 2}, errors=[]), + ]), + RowParsed(row_number=3, values={"age": 2}), ] def test_stream_parse_with_header_first_row_number_is_2(self): @@ -411,3 +474,287 @@ def test_concat(self): "field_2": "B", "computed_field": "AB", } + + def test_computed_fields_can_be_used_by_next_computed_fields(self): + schema = Schema.build( + { + "file_type": "csv", + "delimiter": ";", + "fields": [ + {"key": "field_1", "type": "int", "column-number": 1}, + {"key": "field_2", "type": "int", "column-number": 2}, + {"key": "field_3", "type": "int", "column-number": 3}, + ], + "computed-fields": [ + { + "key": "multiply_field_result", + "type": "int", + "builder": { + "name": "multiply", + "parameters": { + "x_factor": "field_1", + "y_factor": "field_2", + }, + }, + }, + { + "key": "divide_field_result", + "type": "decimal", + "builder": { + "name": "divide", + "parameters": { + "numerator": "multiply_field_result", + "denominator": "field_3", + }, + }, + } + ], + } + ) + + rows = list(schema.stream_parse(b"3;4;2")) + + assert rows == [ + RowParsed( + row_number=1, + values={ + "field_1": 3, + "field_2": 4, + "field_3": 2, + "multiply_field_result": 12, + "divide_field_result": Decimal("6") + } + ) + ] + + +class TestHandleTypeError(TestCase): + def test_default_behavior_raise(self): + schema = Schema.build( + { + "file_type": "csv", + "fields": [ + {"key": "age", "type": "int", "column-number": 1} + ], + } + ) + rows = list(schema.stream_parse(b"a")) + assert rows == [ + RowFailed(row_number=1, errors=[{ + "column-number": 1, + "field-key": "age", + "error": "value 'a' is not a valid integer", + }]) + ] + + def test_skip_row(self): + schema = Schema.build( + { + "file_type": "csv", + "fields": [ + { + "key": "age", + "type": {"key": "int", "on-error": "skip-row"}, + "column-number": 1, + } + ], + } + ) + rows = list(schema.stream_parse(b"a")) + assert rows == [RowSkipped(row_number=1, errors=[{ + "column-number": 1, + "field-key": "age", + "error": "value 'a' is not a valid integer", + } + ])] + + +class TestHandleValidationError(TestCase): + def test_default_behavior_raise(self): + schema = Schema.build( + { + "file_type": "csv", + "fields": [ + { + "key": "age", + "type": "int", + "column-number": 1, + "validators": [ + { + "name": "greater-than", + "parameters": {"threshold": 0}, + } + ], + } + ], + } + ) + + rows = list(schema.stream_parse(b"-1")) + + assert rows == [ + RowFailed(row_number=1, errors=[{ + "column-number": 1, + "field-key": "age", + "error": "value must be greater than 0", + } + ]) + ] + + def test_skip_row(self): + schema = Schema.build( + { + "file_type": "csv", + "fields": [ + { + "key": "age", + "type": "int", + "column-number": 1, + "validators": [ + { + "name": "greater-than", + "parameters": {"threshold": 0}, + "on-error": "skip-row", + } + ], + } + ], + } + ) + rows = list(schema.stream_parse(b"-1")) + assert rows == [RowSkipped(row_number=1, errors=[{ + "column-number": 1, + "field-key": "age", + "error": "value must be greater than 0", + } + ])] + + +class TestHandlePostProcessorError(TestCase): + class FailPostProcessor(PostProcessor): + def apply(self, value: Any) -> Any: + raise ValueError("test error") + + @staticmethod + def key() -> str: + return "fail-post-processor" + + def test_default_behavior_raise(self): + PostProcessor.register(self.FailPostProcessor) + schema = Schema.build( + { + "file_type": "csv", + "fields": [ + { + "key": "age", + "type": "int", + "column-number": 1, + "post-processors": [ + { + "name": "fail-post-processor", + } + ], + } + ], + } + ) + rows = list(schema.stream_parse(b"1")) + assert rows == [RowFailed(row_number=1, errors=[{ + "column-number": 1, + "field-key": "age", + "error": "test error", + } + ])] + + def test_skip_row(self): + PostProcessor.register(self.FailPostProcessor) + schema = Schema.build( + { + "file_type": "csv", + "fields": [ + { + "key": "age", + "type": "int", + "column-number": 1, + "post-processors": [ + { + "name": "fail-post-processor", + "on-error": "skip-row", + } + ], + } + ], + } + ) + rows = list(schema.stream_parse(b"1")) + assert rows == [RowSkipped(row_number=1, errors=[{ + "column-number": 1, + "field-key": "age", + "error": "test error", + } + ])] + + +class TestHandlePreProcessorError(TestCase): + class FailPreProcessor(PreProcessor): + def apply(self, value: Any) -> Any: + raise ValueError("test error") + + @staticmethod + def key() -> str: + return "fail-pre-processor" + + def test_default_behavior_raise(self): + PreProcessor.register(self.FailPreProcessor) + schema = Schema.build( + { + "file_type": "csv", + "fields": [ + { + "key": "age", + "type": "int", + "column-number": 1, + "pre-processors": [ + { + "name": "fail-pre-processor", + } + ], + } + ], + } + ) + rows = list(schema.stream_parse(b"1")) + assert rows == [RowFailed(row_number=1, errors=[{ + "column-number": 1, + "field-key": "age", + "error": "test error", + } + ])] + + def test_skip_row(self): + PreProcessor.register(self.FailPreProcessor) + schema = Schema.build( + { + "file_type": "csv", + "fields": [ + { + "key": "age", + "type": "int", + "column-number": 1, + "pre-processors": [ + { + "name": "fail-pre-processor", + "on-error": "skip-row", + } + ], + } + ], + } + ) + rows = list(schema.stream_parse(b"1")) + assert rows == [RowSkipped(row_number=1, errors=[{ + "column-number": 1, + "field-key": "age", + "error": "test error", + } + ])] diff --git a/tests/test_type_converters.py b/tests/test_type_converters.py index c841047..4b561b3 100644 --- a/tests/test_type_converters.py +++ b/tests/test_type_converters.py @@ -15,7 +15,7 @@ ) -class TestBuild(TestCase): +class TestBuildFlattenType(TestCase): def test_str(self): type_converter = TypeConverter.build({"type": "str"}) assert isinstance(type_converter, StrConverter) @@ -45,18 +45,24 @@ def test_no_type_provided(self): TypeConverter.build({}) +class TestBuildComplexeType(TestCase): + def test_str(self): + type_converter = TypeConverter.build({"type": {"key": "str"}}) + assert isinstance(type_converter, StrConverter) + + class TestStr(TestCase): - def test_apply(self): + def test_transform(self): type_converter = TypeConverter.build({"type": "str"}) assert type_converter.apply("hello") == "hello" class TestInt(TestCase): - def test_apply(self): + def test_transform(self): type_converter = TypeConverter.build({"type": "int"}) assert type_converter.apply("153") == 153 - def test_apply_failed(self): + def test_transform_failed(self): type_converter = TypeConverter.build({"type": "int"}) with pytest.raises(ValueError, match="value 'abc' is not a valid integer"): @@ -64,11 +70,11 @@ def test_apply_failed(self): class TestDecimal(TestCase): - def test_apply(self): + def test_transform(self): type_converter = TypeConverter.build({"type": "decimal"}) assert type_converter.apply("153.56") == Decimal("153.56") - def test_apply_failed(self): + def test_transform_failed(self): type_converter = TypeConverter.build({"type": "decimal"}) with pytest.raises(ValueError, match="value 'abc' is not a valid decimal"): @@ -76,19 +82,19 @@ def test_apply_failed(self): class TestTime(TestCase): - def test_apply(self): + def test_transform(self): type_converter = TypeConverter.build({"type": "time"}) assert type_converter.apply("10:12:03+03:00") == time( 10, 12, 3, tzinfo=timezone(timedelta(hours=3)) ) - def test_apply_failed(self): + def test_transform_failed(self): type_converter = TypeConverter.build({"type": "time"}) with pytest.raises(ValueError): type_converter.apply("Invalid") - def test_apply_naive_time(self): + def test_transform_naive_time(self): type_converter = TypeConverter.build({"type": "time"}) with pytest.raises(ValueError): @@ -96,19 +102,19 @@ def test_apply_naive_time(self): class TestDateTime(TestCase): - def test_apply(self): + def test_transform(self): type_converter = TypeConverter.build({"type": "datetime"}) assert type_converter.apply("2022-01-12T10:12:03+03:00") == datetime( 2022, 1, 12, 10, 12, 3, tzinfo=timezone(timedelta(hours=3)) ) - def test_apply_failed(self): + def test_transform_failed(self): type_converter = TypeConverter.build({"type": "datetime"}) with pytest.raises(ValueError): type_converter.apply("Invalid") - def test_apply_naive_time(self): + def test_transform_naive_time(self): type_converter = TypeConverter.build({"type": "datetime"}) with pytest.raises(ValueError): @@ -121,7 +127,7 @@ class GuidConverter(TypeConverter): def key() -> str: return "guid" - def apply(self, value): + def convert(self, value): return UUID(value) def test_register(self): @@ -129,3 +135,27 @@ def test_register(self): type_converter = TypeConverter.build({"type": "guid"}) assert isinstance(type_converter, self.GuidConverter) + + +class TestNullableField(TestCase): + def test_int(self): + type_converter = TypeConverter.build({"type": {"key": "int", "nullable": True}}) + assert type_converter.apply(None) is None + + def test_decimal(self): + type_converter = TypeConverter.build( + {"type": {"key": "decimal", "nullable": True}} + ) + assert type_converter.apply(None) is None + + def test_time(self): + type_converter = TypeConverter.build( + {"type": {"key": "time", "nullable": True}} + ) + assert type_converter.apply(None) is None + + def test_datetime(self): + type_converter = TypeConverter.build( + {"type": {"key": "datetime", "nullable": True}} + ) + assert type_converter.apply(None) is None diff --git a/tests/test_validators.py b/tests/test_validators.py index cd75991..746634d 100644 --- a/tests/test_validators.py +++ b/tests/test_validators.py @@ -1,5 +1,5 @@ from decimal import Decimal -from magicparse.validators import GreaterThan, RegexMatches, Validator +from magicparse.validators import GreaterThan, NotNullOrEmpty, RegexMatches, Validator import pytest import re from unittest import TestCase @@ -17,6 +17,13 @@ def test_regex_matches(self): assert isinstance(validator.pattern, re.Pattern) assert validator.pattern.pattern == "^\\d{13}$" + def test_not_null_or_empty(self): + validator = Validator.build( + {"name": "not-null-or-empty"} + ) + + assert isinstance(validator, NotNullOrEmpty) + def test_unknown(self): with pytest.raises(ValueError, match="invalid validator 'anything'"): Validator.build({"name": "anything"}) @@ -111,3 +118,28 @@ def test_it_raises_an_error_when_the_value_is_equal_to_threshold(self): with pytest.raises(ValueError, match="value must be greater than 10"): validator.apply(10) + + +class TestNotNullOrEmptyValidator(TestCase): + def test_success_returns_the_value(self): + validator = Validator.build( + {"name": "not-null-or-empty"} + ) + + assert validator.apply("hello") == "hello" + + def test_raise_when_the_value_is_null(self): + validator = Validator.build( + {"name": "not-null-or-empty"} + ) + + with pytest.raises(ValueError, match="value must not be null or empty"): + validator.apply(None) + + def test_raises_when_the_value_is_empty(self): + validator = Validator.build( + {"name": "not-null-or-empty"} + ) + + with pytest.raises(ValueError, match="value must not be null or empty"): + validator.apply("")