diff --git a/docs/detailed_guidance/domain_types.md b/docs/detailed_guidance/domain_types.md
index df382de..62279b1 100644
--- a/docs/detailed_guidance/domain_types.md
+++ b/docs/detailed_guidance/domain_types.md
@@ -4,24 +4,24 @@ Domain types are custom defined pydantic types that solve common problems with u
This might include Postcodes, NHS Numbers, dates with specific formats etc.
Below is a list of defined types, their output type and any contraints. Nested beneath them are any constraints that area allowed and their default values if there are any.
-| Defined Type | Output Type | Contraints & Defaults |
-| ------------ | ----------- | --------------------- |
-| NHSNumber | str |
-| permissive_nhs_number | str |
warn_on_test_numbers = False |
-| Postcode | str |
-| OrgId | str |
-| conformatteddate | date | date_format: strge: datele: dategt: datelt: date |
-| formatteddatetime | datetime | date_format: str timezone_treatment: one_of ["forbid", "permit", "require] = "permit" |
-| reportingperiod | date | reporting_period_type: one_of ["start", "end"]date_format: str = "%Y-%m-%d" |
-| alphanumeric | str | min_digits : NonNegativeInt = 1max_digits: PositiveInt = 1 |
-| identifier | str | min_digits : NonNegativeInt = 1max_digits: PositiveInt = 1
+| Defined Type | Output Type | Contraints & Defaults | Supported Implementations |
+| ------------ | ----------- | --------------------- | ------------------------- |
+| NHSNumber | str | | Spark, DuckDB |
+| permissive_nhs_number | str | warn_on_test_numbers = False | Spark, DuckDB |
+| Postcode | str | | Spark, DuckDB |
+| OrgId | str | | Spark, DuckDB |
+| conformatteddate | date | date_format: strge: datele: dategt: datelt: date | Spark, DuckDB |
+| formatteddatetime | datetime | date_format: str timezone_treatment: one_of ["forbid", "permit", "require] = "permit" | Spark, DuckDB |
+| formattedtime | time | time_format: str timezone_treatment: one_of ["forbid", "permit", "require"] = "permit" | DuckDB |
+| reportingperiod | date | reporting_period_type: one_of ["start", "end"]date_format: str = "%Y-%m-%d" | Spark, DuckDB |
+| alphanumeric | str | min_digits : NonNegativeInt = 1max_digits: PositiveInt = 1 | Spark, DuckDB |
+| identifier | str | min_digits : NonNegativeInt = 1max_digits: PositiveInt = 1 | Spark, DuckDB |
-Other types that are allowed include:
+**Other types that are allowed include:**
- str
- int
- date
- datetime
- Decimal
- float
-
-And any types that are included in [pydantic version 1.10](https://docs.pydantic.dev/1.10/usage/types/#pydantic-types)
+- Any types that are included in [pydantic version 1.10](https://docs.pydantic.dev/1.10/usage/types/#pydantic-types)
diff --git a/src/dve/core_engine/backends/implementations/duckdb/__init__.py b/src/dve/core_engine/backends/implementations/duckdb/__init__.py
index d731064..996ec80 100644
--- a/src/dve/core_engine/backends/implementations/duckdb/__init__.py
+++ b/src/dve/core_engine/backends/implementations/duckdb/__init__.py
@@ -1,4 +1,5 @@
"""Implementation of duckdb backend"""
+
from dve.core_engine.backends.implementations.duckdb.readers.json import DuckDBJSONReader
from dve.core_engine.backends.readers import register_reader
diff --git a/src/dve/core_engine/backends/implementations/duckdb/duckdb_helpers.py b/src/dve/core_engine/backends/implementations/duckdb/duckdb_helpers.py
index ea1901e..a261f7b 100644
--- a/src/dve/core_engine/backends/implementations/duckdb/duckdb_helpers.py
+++ b/src/dve/core_engine/backends/implementations/duckdb/duckdb_helpers.py
@@ -4,7 +4,7 @@
"""Helper objects for duckdb data contract implementation"""
from collections.abc import Generator, Iterator
from dataclasses import is_dataclass
-from datetime import date, datetime
+from datetime import date, datetime, time
from decimal import Decimal
from pathlib import Path
from typing import Any, ClassVar, Union
@@ -87,6 +87,7 @@ def __call__(self):
date: ddbtyp.DATE,
datetime: ddbtyp.TIMESTAMP,
Decimal: DDBDecimal()(),
+ time: ddbtyp.TIME,
}
"""A mapping of Python types to the equivalent DuckDB types."""
diff --git a/src/dve/core_engine/backends/implementations/spark/spark_helpers.py b/src/dve/core_engine/backends/implementations/spark/spark_helpers.py
index 921b04e..7cb7b17 100644
--- a/src/dve/core_engine/backends/implementations/spark/spark_helpers.py
+++ b/src/dve/core_engine/backends/implementations/spark/spark_helpers.py
@@ -12,14 +12,7 @@
from dataclasses import dataclass, is_dataclass
from decimal import Decimal
from functools import wraps
-from typing import (
- Any,
- ClassVar,
- Optional,
- TypeVar,
- Union,
- overload,
-)
+from typing import Any, ClassVar, Optional, TypeVar, Union, overload
from delta.exceptions import ConcurrentAppendException, DeltaConcurrentModificationException
from pydantic import BaseModel
diff --git a/src/dve/core_engine/backends/readers/xml.py b/src/dve/core_engine/backends/readers/xml.py
index bd7b8e4..5de23c4 100644
--- a/src/dve/core_engine/backends/readers/xml.py
+++ b/src/dve/core_engine/backends/readers/xml.py
@@ -3,14 +3,7 @@
import re
from collections.abc import Collection, Iterator
-from typing import (
- IO,
- Any,
- GenericAlias, # type: ignore
- Optional,
- Union,
- overload
-)
+from typing import IO, Any, GenericAlias, Optional, Union, overload # type: ignore
import polars as pl
from lxml import etree # type: ignore
diff --git a/src/dve/core_engine/backends/utilities.py b/src/dve/core_engine/backends/utilities.py
index 9319780..bfa6f90 100644
--- a/src/dve/core_engine/backends/utilities.py
+++ b/src/dve/core_engine/backends/utilities.py
@@ -2,10 +2,10 @@
import sys
from dataclasses import is_dataclass
-from datetime import date, datetime
+from datetime import date, datetime, time
from decimal import Decimal
-from typing import Any, ClassVar, Union
from typing import GenericAlias # type: ignore
+from typing import Any, ClassVar, Union
import polars as pl # type: ignore
from polars.datatypes.classes import DataTypeClass as PolarsType
@@ -33,13 +33,16 @@
date: pl.Date, # type: ignore
datetime: pl.Datetime, # type: ignore
Decimal: pl.Utf8, # type: ignore
+ time: pl.Time, # type: ignore
}
"""A mapping of Python types to the equivalent Polars types."""
def stringify_type(type_: Union[type, GenericAlias]) -> type:
"""Stringify an individual type."""
- if isinstance(type_, type) and not isinstance(type_, GenericAlias): # A model, return the contents. # pylint: disable=C0301
+ if isinstance(type_, type) and not isinstance(
+ type_, GenericAlias
+ ): # A model, return the contents. # pylint: disable=C0301
if issubclass(type_, BaseModel):
return stringify_model(type_)
diff --git a/src/dve/core_engine/message.py b/src/dve/core_engine/message.py
index d81acde..7dd4f02 100644
--- a/src/dve/core_engine/message.py
+++ b/src/dve/core_engine/message.py
@@ -2,8 +2,8 @@
import copy
import datetime as dt
-import operator
import json
+import operator
from collections.abc import Callable
from decimal import Decimal
from functools import reduce
diff --git a/src/dve/core_engine/type_hints.py b/src/dve/core_engine/type_hints.py
index a6c0c44..ac6cf2a 100644
--- a/src/dve/core_engine/type_hints.py
+++ b/src/dve/core_engine/type_hints.py
@@ -6,12 +6,14 @@
from pathlib import Path
from queue import Queue as ThreadQueue
from typing import TYPE_CHECKING, Any, List, Optional, TypeVar, Union # pylint: disable=W1901
-# TODO - cannot remove List from Typing. See L60 for details.
from pyspark.sql import DataFrame
from pyspark.sql.types import StructType
from typing_extensions import Literal, ParamSpec, get_args
+# TODO - cannot remove List from Typing. See L60 for details.
+
+
if TYPE_CHECKING: # pragma: no cover
from dve.core_engine.message import FeedbackMessage
diff --git a/src/dve/metadata_parser/domain_types.py b/src/dve/metadata_parser/domain_types.py
index c944278..3153d26 100644
--- a/src/dve/metadata_parser/domain_types.py
+++ b/src/dve/metadata_parser/domain_types.py
@@ -392,6 +392,97 @@ def __get_validators__(cls) -> Iterator[classmethod]:
yield cls.validate # type: ignore
+class FormattedTime(dt.time):
+ """A time, provided as a datetime or a string in a specific format."""
+
+ TIME_FORMAT: ClassVar[Optional[str]] = None
+ """The specific format of the time."""
+ TIMEZONE_TREATMENT: ClassVar[Literal["forbid", "permit", "require"]] = "permit"
+ """How to treat the presence of timezone-related information."""
+ DEFAULT_PATTERNS: Sequence[str] = list(
+ # 24 hour time pattern combinations
+ map(
+ "".join,
+ itertools.product(
+ ("%H:%M:%S", "%H%M%S"),
+ ("", ".%f"),
+ ("%p", "%P", ""),
+ ("%z", ""),
+ ),
+ )
+ ) + list(
+ # 12 hour time pattern combinations
+ map(
+ "".join,
+ itertools.product(
+ ("%I:%M:%S", "%I%M%S"),
+ ("", ".%f"),
+ ("%z", ""),
+ (" %p", "%p", "%P", " %P", ""),
+ ),
+ )
+ )
+ """A sequence of time format patterns to try if `TIME_FORMAT` is unset."""
+
+ @classmethod
+ def convert_to_time(cls, value: dt.datetime) -> dt.time:
+ """
+ Convert `datetime.datetime` to `datetime.time`. If datetime contains timezone info, that
+ will be retained.
+ """
+ if value.tzinfo:
+ return value.timetz()
+
+ return value.time()
+
+ @classmethod
+ def parse_time(cls, string: str) -> dt.time:
+ """Attempt to parse a datetime using various formats in sequence."""
+ string = string.strip()
+ if string.endswith("Z"): # Convert 'zulu' time to UTC.
+ string = string[:-1] + "+00:00"
+
+ for pattern in cls.DEFAULT_PATTERNS:
+ try:
+ datetime = dt.datetime.strptime(string, pattern)
+ except ValueError:
+ continue
+
+ time = cls.convert_to_time(datetime)
+
+ return time # pragma: no cover
+ raise ValueError("Unable to parse provided time")
+
+ @classmethod
+ def validate(cls, value: Union[dt.time, dt.datetime, str]) -> dt.time | None:
+ """Validate a passed time, datetime or string."""
+ if value is None:
+ return value
+
+ if isinstance(value, dt.time):
+ new_time = value
+ elif isinstance(value, dt.datetime):
+ new_time = cls.convert_to_time(value)
+ else:
+ if cls.TIME_FORMAT is not None:
+ try:
+ new_time = dt.datetime.strptime(value, cls.TIME_FORMAT) # type: ignore
+ new_time = cls.convert_to_time(new_time) # type: ignore
+ except ValueError as err:
+ raise ValueError(
+ f"Unable to parse provided time in format {cls.TIME_FORMAT}"
+ ) from err
+ else:
+ new_time = cls.parse_time(value)
+
+ if cls.TIMEZONE_TREATMENT == "forbid" and new_time.tzinfo:
+ raise ValueError("Provided time has timezone, but this is forbidden for this field")
+ if cls.TIMEZONE_TREATMENT == "require" and not new_time.tzinfo:
+ raise ValueError("Provided time missing timezone, but this is required for this field")
+
+ return new_time
+
+
@lru_cache()
@validate_arguments
def formatteddatetime(
@@ -412,6 +503,23 @@ def formatteddatetime(
return type("FormattedDatetime", (FormattedDatetime, *FormattedDatetime.__bases__), dict_)
+@lru_cache()
+@validate_arguments
+def formattedtime(
+ time_format: Optional[str] = None,
+ timezone_treatment: Literal["forbid", "permit", "require"] = "permit",
+) -> type[FormattedTime]:
+ """Return a formatted time class with a set time format and timezone treatment."""
+ if time_format is None and timezone_treatment == "permit":
+ return FormattedTime
+
+ dict_ = FormattedTime.__dict__.copy()
+ dict_["TIME_FORMAT"] = time_format
+ dict_["TIMEZONE_TREATMENT"] = timezone_treatment
+
+ return type("FormattedTime", (FormattedTime, *FormattedTime.__bases__), dict_)
+
+
class ReportingPeriod(dt.date):
"""A reporting period field, with the type of reporting period supplied"""
diff --git a/src/dve/metadata_parser/model_generator.py b/src/dve/metadata_parser/model_generator.py
index 53a82d8..7681b7f 100644
--- a/src/dve/metadata_parser/model_generator.py
+++ b/src/dve/metadata_parser/model_generator.py
@@ -72,6 +72,7 @@ def constr(
"identifier": domain_types.identifier,
"orgid": domain_types.OrgID,
"formatteddatetime": domain_types.formatteddatetime,
+ "formattedtime": domain_types.formattedtime,
"conformatteddate": domain_types.conformatteddate,
"reportingperiodstart": domain_types.reportingperiod(reporting_period_type="start"),
"reportingperiodend": domain_types.reportingperiod(reporting_period_type="end"),
diff --git a/tests/test_core_engine/test_backends/fixtures.py b/tests/test_core_engine/test_backends/fixtures.py
index 1f9ac23..14369b9 100644
--- a/tests/test_core_engine/test_backends/fixtures.py
+++ b/tests/test_core_engine/test_backends/fixtures.py
@@ -2,7 +2,7 @@
# pylint: disable=redefined-outer-name
import json
-from datetime import date, datetime
+from datetime import date, datetime, time
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Any, Dict, Iterator, List, Tuple
@@ -83,10 +83,10 @@ def temp_duckdb_dir():
@pytest.fixture
def temp_csv_file(temp_duckdb_dir: Path):
- header: str = "ID,varchar_field,bigint_field,date_field,timestamp_field"
+ header: str = "ID,varchar_field,bigint_field,date_field,timestamp_field,time_field"
typed_data = [
- [1, "hi", 3, date(2023, 1, 3), datetime(2023, 1, 3, 12, 0, 3)],
- [2, "bye", 4, date(2023, 3, 7), datetime(2023, 5, 9, 15, 21, 53)],
+ [1, "hi", 3, date(2023, 1, 3), datetime(2023, 1, 3, 12, 0, 3), time(12, 0, 0)],
+ [2, "bye", 4, date(2023, 3, 7), datetime(2023, 5, 9, 15, 21, 53), time(13, 0 ,0)],
]
class SimpleModel(BaseModel):
@@ -95,6 +95,7 @@ class SimpleModel(BaseModel):
bigint_field: int
date_field: date
timestamp_field: datetime
+ time_field: time
with open(temp_duckdb_dir.joinpath("dummy.csv"), mode="w") as csv_file:
csv_file.write(header + "\n")
diff --git a/tests/test_core_engine/test_backends/test_implementations/test_duckdb/test_data_contract.py b/tests/test_core_engine/test_backends/test_implementations/test_duckdb/test_data_contract.py
index 23f1534..5093150 100644
--- a/tests/test_core_engine/test_backends/test_implementations/test_duckdb/test_data_contract.py
+++ b/tests/test_core_engine/test_backends/test_implementations/test_duckdb/test_data_contract.py
@@ -41,6 +41,14 @@ def test_duckdb_data_contract_csv(temp_csv_file):
"bigint_field": "NonNegativeInt",
"date_field": "date",
"timestamp_field": "datetime",
+ "time_field": {
+ "description": "test",
+ "callable": "formattedtime",
+ "constraints": {
+ "time_format": "%Y-%m-%d",
+ "timezone_treatment": "forbid"
+ }
+ }
},
"reader_config": {
".csv": {
diff --git a/tests/test_model_generation/test_domain_types.py b/tests/test_model_generation/test_domain_types.py
index 9db587a..6ceee74 100644
--- a/tests/test_model_generation/test_domain_types.py
+++ b/tests/test_model_generation/test_domain_types.py
@@ -307,3 +307,56 @@ def test_reportingperiod_raises(field, value):
data = {field: value}
with pytest.raises(ValueError):
model = ReportingPeriodModel(**data)
+
+
+@pytest.mark.parametrize(
+ ["time_to_validate", "time_format", "timezone_treatment", "expected"],
+ [
+ ["23:00:00", "%H:%M:%S", "forbid", dt.time(23, 0, 0)],
+ ["11:00:00", "%I:%M:%S", "forbid", dt.time(11, 0, 0)],
+ ["23:00:00Z", None, "require", dt.time(23, 0, 0, tzinfo=UTC)],
+ ["12:00:00Zam", None, "permit", dt.time(0, 0, 0, tzinfo=UTC)],
+ ["12:00:00pm", None, "forbid", dt.time(12, 0, 0)],
+ ["1970-01-01", "%Y-%m-%d", "forbid", dt.time(0, 0)],
+ # not great that it effectively returns incorrect time object here. However, this would be
+ # down to user error in setting up the dischema.
+ [dt.datetime(2025, 12, 1, 13, 0, 5), "%H:%M:%S", "forbid", dt.time(13, 0, 5)],
+ [dt.datetime(2025, 12, 1, 13, 0, 5, tzinfo=UTC), "%H:%M:%S", "require", dt.time(13, 0, 5, tzinfo=UTC)],
+ [dt.time(13, 0, 0), "%H:%M:%S", "forbid", dt.time(13, 0, 0)],
+ [dt.time(13, 0, 0, tzinfo=UTC), "%H:%M:%S", "permit", dt.time(13, 0, 0, tzinfo=UTC)],
+ [dt.time(13, 0, 0, tzinfo=UTC), "%H:%M:%S", "require", dt.time(13, 0, 0, tzinfo=UTC)],
+ ]
+)
+def test_formattedtime(
+ time_to_validate: str | dt.datetime | dt.time,
+ time_format: str,
+ timezone_treatment: str,
+ expected: dt.time
+):
+ """Test serialised time objects can be parsed correctly when valid."""
+ time_type = hct.formattedtime(time_format, timezone_treatment)
+ assert time_type.validate(time_to_validate) == expected
+
+
+@pytest.mark.parametrize(
+ ["time_to_validate", "time_format", "timezone_treatment"],
+ [
+ ["1970-01-01", "%H:%M:%S", "forbid",],
+ ["1970-01-01", "%H:%M:%S", "forbid",],
+ ["23:00:00", "%I:%M:%S", "permit",],
+ ["23:00:00", "%H:%M:%S", "require",],
+ ["23:00:00Z", "%I:%M:%S", "forbid",],
+ [dt.datetime(2025, 12, 1, 13, 0, 5, tzinfo=UTC), "%H:%M:%S", "forbid",],
+ [dt.time(13, 0, 5, tzinfo=UTC), "%H:%M:%S", "forbid",]
+ ]
+)
+def test_formattedtime_raises(
+ time_to_validate: str | dt.datetime | dt.time, time_format: str, timezone_treatment: str
+):
+ """
+ Test incorrect serialised objects can be handled correctly when attempting to parse into time
+ objects.
+ """
+ time_type = hct.formattedtime(time_format, timezone_treatment)
+ with pytest.raises(ValueError):
+ time_type.validate(time_to_validate) # pylint: disable=W0106