Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/pullrequest-push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ jobs:
poetry install --no-interaction --no-ansi --no-root --only dev
ruff check --diff ./
ruff format --check --diff ./
pyright
secrets: inherit

unit-tests:
Expand Down
5 changes: 5 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,8 @@ repos:
entry: poetry run ruff format
language: system
types: [file, python]
- id: pyright
name: Pyright
entry: poetry run pyright
language: system
types: [file, python]
15 changes: 7 additions & 8 deletions magicparse/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from collections.abc import Sequence
from io import BytesIO

from .schema import (
Expand All @@ -15,7 +16,7 @@
)
from .transform import Transform
from .type_converters import TypeConverter, builtins as builtins_type_converters
from typing import Any, Dict, Iterable, List, Union
from typing import Any, Iterable
from .validators import Validator, builtins as builtins_validators


Expand All @@ -33,23 +34,21 @@
]


def parse(data: Union[bytes, BytesIO], schema_options: Dict[str, Any]) -> List[RowParsed | RowSkipped | RowFailed]:
def parse(data: bytes | BytesIO, schema_options: dict[str, Any]) -> list[RowParsed | RowSkipped | RowFailed]:
schema_definition = Schema.build(schema_options)
return schema_definition.parse(data)


def stream_parse(
data: Union[bytes, BytesIO], schema_options: Dict[str, Any]
) -> Iterable[RowParsed | RowSkipped | RowFailed]:
def stream_parse(data: bytes | BytesIO, schema_options: dict[str, Any]) -> Iterable[RowParsed | RowSkipped | RowFailed]:
schema_definition = Schema.build(schema_options)
return schema_definition.stream_parse(data)


Registrable = Union[Schema, Transform]
Registrable = type[Schema] | type[Transform]


def register(items: Union[Registrable, List[Registrable]]) -> None:
if not isinstance(items, list):
def register(items: Registrable | Sequence[Registrable]) -> None:
if not isinstance(items, Sequence):
items = [items]

for item in items:
Expand Down
46 changes: 23 additions & 23 deletions magicparse/builders.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
from abc import ABC
from decimal import Decimal
from typing import Any, cast

from .transform import Transform, OnError


class Builder(Transform, ABC):
registry = dict[str, type["Builder"]]()

@classmethod
def build(cls, options: dict) -> "Builder":
def build(cls, options: dict[str, Any]) -> "Builder":
try:
name = options["name"]
except:
Expand All @@ -25,31 +28,28 @@ def build(cls, options: dict) -> "Builder":


class Concat(Builder):
def __init__(self, on_error: OnError, fields: list[str]) -> None:
def __init__(self, on_error: OnError, fields: Any) -> None:
super().__init__(on_error)
if (
not fields
or isinstance(fields, str)
or not isinstance(fields, list)
or not all(isinstance(field, str) for field in fields)
or len(fields) < 2
not isinstance(fields, list)
or not all(isinstance(field, str) for field in fields) # type: ignore[reportUnknownVariableType]
or len(fields) < 2 # type: ignore[reportUnknownVariableType]
):
raise ValueError(
"composite-processor 'concat': 'fields' parameter must be a list[str] with at least two elements"
)
self.fields = cast(list[str], fields)

self.fields = fields

def apply(self, row: dict) -> str:
return "".join(row[field] for field in self.fields)
def apply(self, value: dict[str, Any]) -> str:
return "".join(value[field] for field in self.fields)

@staticmethod
def key() -> str:
return "concat"


class Divide(Builder):
def __init__(self, on_error: OnError, numerator: str, denominator: str) -> None:
def __init__(self, on_error: OnError, numerator: Any, denominator: Any) -> None:
super().__init__(on_error)
if not numerator or not isinstance(numerator, str):
raise ValueError("builder 'divide': 'numerator' parameter must be a non null str")
Expand All @@ -58,16 +58,16 @@ def __init__(self, on_error: OnError, numerator: str, denominator: str) -> None:
self.numerator = numerator
self.denominator = denominator

def apply(self, row: dict) -> Decimal:
return row[self.numerator] / row[self.denominator]
def apply(self, value: dict[str, Any]) -> Decimal:
return value[self.numerator] / value[self.denominator]

@staticmethod
def key() -> str:
return "divide"


class Multiply(Builder):
def __init__(self, on_error: OnError, x_factor: str, y_factor: str) -> None:
def __init__(self, on_error: OnError, x_factor: Any, y_factor: Any) -> None:
super().__init__(on_error)
if not x_factor or not isinstance(x_factor, str):
raise ValueError("builder 'multiply': 'x_factor' parameter must be a non null str")
Expand All @@ -76,28 +76,28 @@ def __init__(self, on_error: OnError, x_factor: str, y_factor: str) -> None:
self.x_factor = x_factor
self.y_factor = y_factor

def apply(self, row: dict):
return row[self.x_factor] * row[self.y_factor]
def apply(self, value: dict[str, Any]):
return value[self.x_factor] * value[self.y_factor]

@staticmethod
def key() -> str:
return "multiply"


class Coalesce(Builder):
def __init__(self, on_error: OnError, fields: list[str]) -> None:
def __init__(self, on_error: OnError, fields: Any) -> None:
super().__init__(on_error)
if not fields:
raise ValueError("parameters should defined fields to coalesce")
if not isinstance(fields, list) or not all(isinstance(field, str) for field in fields) or len(fields) < 2:
if not isinstance(fields, list) or not all(isinstance(field, str) for field in fields) or len(fields) < 2: # type: ignore[reportUnknownVariableType]
raise ValueError("parameters should have two fields at least")

self.fields = fields
self.fields = cast(list[str], fields)

def apply(self, row: dict) -> str:
def apply(self, value: dict[str, Any]) -> str | None:
for field in self.fields:
if row[field]:
return row[field]
if value[field]:
return value[field]
return None

@staticmethod
Expand Down
38 changes: 19 additions & 19 deletions magicparse/fields.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from abc import ABC, abstractmethod
from typing import List
from typing import Any

from .builders import Builder
from .type_converters import TypeConverter
Expand All @@ -10,7 +10,7 @@


class Field(ABC):
def __init__(self, key: str, options: dict) -> None:
def __init__(self, key: str, options: dict[str, Any]) -> None:
self.key = key
pre_processors = [PreProcessor.build(item) for item in options.get("pre-processors", [])]
type_converter = TypeConverter.build(options)
Expand All @@ -37,19 +37,19 @@ def _process_raw_value(self, raw_value: str) -> Result:
return Ok(value=raw_value)

@abstractmethod
def _read_raw_value(self, row: List[str] | dict) -> str:
def _read_raw_value(self, row: Any) -> str:
pass

def parse(self, row: List[str] | dict) -> Result:
def parse(self, row: str | list[str] | dict[str, Any]) -> Result:
raw_value = self._read_raw_value(row)
return self._process_raw_value(raw_value)

@abstractmethod
def error(self, exception: Exception):
def error(self, exception: Exception) -> dict[str, Any]:
pass

@classmethod
def build(cls, options: dict) -> "Field":
def build(cls, options: dict[str, Any]) -> "Field":
options = options.copy()
key = options.pop("key", None)
if not key:
Expand All @@ -68,14 +68,14 @@ def build(cls, options: dict) -> "Field":


class CsvField(Field):
def __init__(self, key: str, options: dict) -> None:
def __init__(self, key: str, options: dict[str, Any]) -> None:
super().__init__(key, options)
self.column_number = options["column-number"]
self.column_number = int(options["column-number"])

def _read_raw_value(self, row: List[str] | dict) -> str:
def _read_raw_value(self, row: list[str]) -> str:
return row[self.column_number - 1]

def error(self, exception: Exception) -> dict:
def error(self, exception: Exception) -> dict[str, Any]:
return {
"column-number": self.column_number,
"field-key": self.key,
Expand All @@ -84,16 +84,16 @@ def error(self, exception: Exception) -> dict:


class ColumnarField(Field):
def __init__(self, key: str, options: dict) -> None:
def __init__(self, key: str, options: dict[str, Any]) -> None:
super().__init__(key, options)
self.column_start = options["column-start"]
self.column_length = options["column-length"]
self.column_start = int(options["column-start"])
self.column_length = int(options["column-length"])
self.column_end = self.column_start + self.column_length

def _read_raw_value(self, row: str | dict) -> str:
def _read_raw_value(self, row: str) -> str:
return row[self.column_start : self.column_end]

def error(self, exception: Exception) -> dict:
def error(self, exception: Exception) -> dict[str, Any]:
return {
"column-start": self.column_start,
"column-length": self.column_length,
Expand All @@ -103,21 +103,21 @@ def error(self, exception: Exception) -> dict:


class ComputedField(Field):
def __init__(self, key: str, options: dict) -> None:
def __init__(self, key: str, options: dict[str, Any]) -> None:
super().__init__(key, options)
self.builder = Builder.build(options["builder"])

def _read_raw_value(self, row: List[str] | dict) -> str:
def _read_raw_value(self, row: dict[str, Any]) -> str:
return self.builder.apply(row)

def error(self, exception: Exception) -> dict:
def error(self, exception: Exception) -> dict[str, Any]:
return {
"field-key": self.key,
"error": exception.args[0],
}

@classmethod
def build(cls, options: dict) -> "ComputedField":
def build(cls, options: dict[str, Any]) -> "ComputedField":
key = options.pop("key", None)
if not key:
raise ValueError("key is required in computed field definition")
Expand Down
13 changes: 7 additions & 6 deletions magicparse/post_processors.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from .transform import Transform, OnError
from decimal import Decimal
from typing import TypeVar
from typing import Any


class PostProcessor(Transform):
registry = dict[str, type["PostProcessor"]]()

@classmethod
def build(cls, options: dict) -> "PostProcessor":
def build(cls, options: dict[str, Any]) -> "PostProcessor":
try:
name = options["name"]
except:
Expand All @@ -23,9 +25,10 @@ def build(cls, options: dict) -> "PostProcessor":
return post_processor(on_error=on_error)


class Divide(PostProcessor):
Number = TypeVar("Number", int, float, Decimal)
type Number = int | float | Decimal


class Divide(PostProcessor):
def __init__(self, on_error: OnError, denominator: int) -> None:
super().__init__(on_error)
if denominator <= 0:
Expand All @@ -42,8 +45,6 @@ def key() -> str:


class Round(PostProcessor):
Number = TypeVar("Number", int, float, Decimal)

def __init__(self, on_error: OnError, precision: int) -> None:
super().__init__(on_error)
if precision < 0:
Expand Down
13 changes: 8 additions & 5 deletions magicparse/pre_processors.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import re
from typing import Any
from .transform import Transform, OnError


class PreProcessor(Transform):
registry = dict[str, type["PreProcessor"]]()

@classmethod
def build(cls, options: dict) -> "PreProcessor":
def build(cls, options: dict[str, Any]) -> "PreProcessor":
try:
name = options["name"]
except:
Expand Down Expand Up @@ -36,7 +39,7 @@ def key() -> str:


class Map(PreProcessor):
def __init__(self, on_error: OnError, values: dict) -> None:
def __init__(self, on_error: OnError, values: dict[str, Any]) -> None:
super().__init__(on_error)
self.values = values
self._keys = ", ".join(f"'{key}'" for key in self.values.keys())
Expand Down Expand Up @@ -91,11 +94,11 @@ def key() -> str:
class RegexExtract(PreProcessor):
def __init__(self, on_error: OnError, pattern: str) -> None:
super().__init__(on_error)
pattern = re.compile(pattern)
if "value" not in pattern.groupindex:
_pattern = re.compile(pattern)
if "value" not in _pattern.groupindex:
raise ValueError("regex-extract's pattern must contain a group named 'value'")

self.pattern = pattern
self.pattern = _pattern

def apply(self, value: str) -> str:
match = re.match(self.pattern, value)
Expand Down
Empty file added magicparse/py.typed
Empty file.
Loading