diff --git a/changelog.md b/changelog.md index 8ee7d087..c3ac517b 100644 --- a/changelog.md +++ b/changelog.md @@ -1,7 +1,8 @@ # Zepben Python SDK ## [1.1.0] - UNRELEASED ### Breaking Changes -* None. +* Updated `EwbDataFilePaths` to be an abstract class that supports variants. Added `LocalEwbDataFilePaths` which is a local file system implementation of + `EwbDataFilePaths`, and should be used in place of the old `EwbDataFilePaths`. ### New Features * None. @@ -176,7 +177,7 @@ * `RegulatingControl.ratedCurrent` * `Sensor.relayFunctions` * `UsagePoint.approvedInverterCapacity` -* using `EquipmentTreeBuilder` more then once per interpreter will no longer cause the `roots` to contain more objects then it should due to `_roots` being a +* using `EquipmentTreeBuilder` more then once per interpreter will no longer cause the `roots` to contain more objects then it should due to `_roots` being a class var * Errors when initiating gRPC connections will now properly be propagated to users. diff --git a/src/zepben/ewb/__init__.py b/src/zepben/ewb/__init__.py index 5647c3e1..a92c89cc 100644 --- a/src/zepben/ewb/__init__.py +++ b/src/zepben/ewb/__init__.py @@ -352,6 +352,7 @@ from zepben.ewb.database.paths.database_type import * from zepben.ewb.database.paths.ewb_data_file_paths import * +from zepben.ewb.database.paths.local_ewb_data_file_paths import * from zepben.ewb.database.sql.column import * from zepben.ewb.database.sqlite.tables.sqlite_table import * diff --git a/src/zepben/ewb/database/paths/ewb_data_file_paths.py b/src/zepben/ewb/database/paths/ewb_data_file_paths.py index 36230ae7..90d7947e 100644 --- a/src/zepben/ewb/database/paths/ewb_data_file_paths.py +++ b/src/zepben/ewb/database/paths/ewb_data_file_paths.py @@ -1,202 +1,89 @@ -# Copyright 2024 Zeppelin Bend Pty Ltd +# Copyright 2025 Zeppelin Bend Pty Ltd # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. __all__ = ['EwbDataFilePaths'] +from abc import ABC, abstractmethod from datetime import date, timedelta from pathlib import Path -from typing import Callable, Iterator, Optional, List +from typing import Optional, List, Generator from zepben.ewb import require from zepben.ewb.database.paths.database_type import DatabaseType -class EwbDataFilePaths: +class EwbDataFilePaths(ABC): """Provides paths to all the various data files / folders used by EWB.""" - def __init__(self, base_dir: Path, - create_path: bool = False, - create_directories_func: Callable[[Path], None] = lambda it: it.mkdir(parents=True), - is_directory: Callable[[Path], bool] = Path.is_dir, - exists: Callable[[Path], bool] = Path.exists, - list_files: Callable[[Path], Iterator[Path]] = Path.iterdir): - """ - :param base_dir: The root directory of the EWB data structure. - :param create_path: Create the root directory (and any missing parent folders) if it does not exist. - """ - self.create_directories_func = create_directories_func - self.is_directory = is_directory - self.exists = exists - self.list_files = list_files - self._base_dir = base_dir - - if create_path: - self.create_directories_func(base_dir) - - require(self.is_directory(base_dir), lambda: f"base_dir must be a directory") - - @property - def base_dir(self): - """The root directory of the EWB data structure.""" - return self._base_dir - - def customer(self, database_date: date) -> Path: - """ - Determine the path to the "customers" database for the specified date. - - :param database_date: The :class:`date` to use for the "customers" database. - :return: The :class:`path` to the "customers" database for the specified date. - """ - return self._to_dated_path(database_date, DatabaseType.CUSTOMER.file_descriptor) - - def diagram(self, database_date: date) -> Path: - """ - Determine the path to the "diagrams" database for the specified date. - - :param database_date: The :class:`date` to use for the "diagrams" database. - :return: The :class:`path` to the "diagrams" database for the specified date. - """ - return self._to_dated_path(database_date, DatabaseType.DIAGRAM.file_descriptor) - - def measurement(self, database_date: date) -> Path: - """ - Determine the path to the "measurements" database for the specified date. - - :param database_date: The :class:`date` to use for the "measurements" database. - :return: The :class:`path` to the "measurements" database for the specified date. - """ - return self._to_dated_path(database_date, DatabaseType.MEASUREMENT.file_descriptor) - - def network_model(self, database_date: date) -> Path: - """ - Determine the path to the "network model" database for the specified date. - - :param database_date: The :class:`date` to use for the "network model" database. - :return: The :class:`path` to the "network model" database for the specified date. - """ - return self._to_dated_path(database_date, DatabaseType.NETWORK_MODEL.file_descriptor) - - def tile_cache(self, database_date: date) -> Path: - """ - Determine the path to the "tile cache" database for the specified date. - - :param database_date: The :class:`date` to use for the "tile cache" database. - :return: The :class:`path` to the "tile cache" database for the specified date. - """ - return self._to_dated_path(database_date, DatabaseType.TILE_CACHE.file_descriptor) - - def energy_reading(self, database_date: date) -> Path: - """ - Determine the path to the "energy readings" database for the specified date. - - :param database_date: The :class:`date` to use for the "energy readings" database. - :return: The :class:`path` to the "energy readings" database for the specified date. - """ - return self._to_dated_path(database_date, DatabaseType.ENERGY_READING.file_descriptor) - - def energy_readings_index(self) -> Path: - """ - Determine the path to the "energy readings index" database. - - :return: The :class:`path` to the "energy readings index" database. - """ - return self._base_dir.joinpath(f"{DatabaseType.ENERGY_READINGS_INDEX.file_descriptor}.sqlite") + VARIANTS_PATH: str = "variants" + """ + The folder containing the variants. Will be placed under the dated folder alongside the network model database. + """ - def load_aggregator_meters_by_date(self) -> Path: + def resolve(self, database_type: DatabaseType, database_date: Optional[date] = None, variant: Optional[str] = None) -> Path: """ - Determine the path to the "load aggregator meters-by-date" database. + Resolves the :class:`Path` to the database file for the specified :class:`DatabaseType`, within the specified `database_date` + and optional `variant` when `DatabaseType.per_date` is set to true. - :return: The :class:`path` to the "load aggregator meters-by-date" database. - """ - return self._base_dir.joinpath(f"{DatabaseType.LOAD_AGGREGATOR_METERS_BY_DATE.file_descriptor}.sqlite") - - def weather_reading(self) -> Path: - """ - Determine the path to the "weather readings" database. + :param database_type: The :class:`DatabaseType` to use for the database :class:`Path`. + :param database_date: The :class:`date` to use for the database :class:`Path`. Required when `database_type.per_date` is true, otherwise must be `None`. + :param variant: The optional name of the variant containing the database. - :return: The :class:`path` to the "weather readings" database. + :return: The :class:`Path` to the :class:`DatabaseType` database file. """ - return self._base_dir.joinpath(f"{DatabaseType.WEATHER_READING.file_descriptor}.sqlite") - - def results_cache(self) -> Path: - """ - Determine the path to the "results cache" database. - - :return: The :class:`path` to the "results cache" database. - """ - return self._base_dir.joinpath(f"{DatabaseType.RESULTS_CACHE.file_descriptor}.sqlite") + if database_date is not None: + require(database_type.per_date, lambda: "database_type must have its per_date set to True to use this method with a database_date.") + if variant is not None: + return self.resolve_database(self._to_dated_variant_path(database_type, database_date, variant)) + else: + return self.resolve_database(self._to_dated_path(database_type, database_date)) + else: + require(not database_type.per_date, lambda: "database_type must have its per_date set to False to use this method without a database_date.") + return self.resolve_database(Path(self._database_name(database_type))) + @abstractmethod def create_directories(self, database_date: date) -> Path: """ Create the directories required to have a valid path for the specified date. :param database_date: The :class:`date` required in the path. - :return: The :class:`path` to the directory for the `database_date`. - """ - date_path = self._base_dir.joinpath(str(database_date)) - if self.exists(date_path): - return date_path - else: - self.create_directories_func(date_path) - return date_path - - def _to_dated_path(self, database_date: date, file: str) -> Path: - return self._base_dir.joinpath(str(database_date), f"{database_date}-{file}.sqlite") - - def _check_exists(self, database_type: DatabaseType, database_date: date) -> bool: + :return: The :class:`Path` to the directory for the `database_date`. """ - Check if a database of the specified type and date exists. + raise NotImplemented - :param database_type: The type of database to search for. - :param database_date: The date to check. - :return: `True` if a database of the specified `database_type` and `database_date` exists in the date path. - """ - if not database_type.per_date: - raise ValueError("INTERNAL ERROR: Should only be calling `checkExists` for `perDate` files.") - - if database_type == DatabaseType.CUSTOMER: - model_path = self.customer(database_date) - elif database_type == DatabaseType.DIAGRAM: - model_path = self.diagram(database_date) - elif database_type == DatabaseType.MEASUREMENT: - model_path = self.measurement(database_date) - elif database_type == DatabaseType.NETWORK_MODEL: - model_path = self.network_model(database_date) - elif database_type == DatabaseType.TILE_CACHE: - model_path = self.tile_cache(database_date) - elif database_type == DatabaseType.ENERGY_READING: - model_path = self.energy_reading(database_date) - else: - raise ValueError( - "INTERNAL ERROR: Should only be calling `check_exists` for `per_date` files, which should all be covered above, so go ahead and add it.") - return self.exists(model_path) - - def find_closest(self, database_type: DatabaseType, max_days_to_search: int = 999, target_date: date = date.today(), search_forwards: bool = False) -> \ - Optional[date]: + def find_closest( + self, + database_type: DatabaseType, + max_days_to_search: int = 999999, + target_date: date = date.today(), + search_forwards: bool = False + ) -> Optional[date]: """ Find the closest date with a usable database of the specified type. :param database_type: The type of database to search for. :param max_days_to_search: The maximum number of days to search for a valid database. - :param target_date: The target :class:`date`. Defaults to today. - :param search_forwards: Indicates the search should also look forwards in time from `start_date` for a valid file. Defaults to reverse search only. - :return: The closest :class:`date` to `database_date` with a valid database of `database_type` within the search parameters, or `None` if no valid database was found. + :param target_date: The target date. Defaults to today. + :param search_forwards: Indicates the search should also look forwards in time from `target_date` for a valid file. Defaults to reverse search only. + + :return: The closest :class:`date` to `target_date` with a valid database of `database_type` within the search parameters, or null if no valid database + was found. """ if not database_type.per_date: return None - if self._check_exists(database_type, target_date): + descendants = list(self.enumerate_descendants()) + if self._check_exists(descendants, database_type, target_date): return target_date offset = 1 - while offset <= max_days_to_search: offset_days = timedelta(offset) try: previous_date = target_date - offset_days - if self._check_exists(database_type, previous_date): + if self._check_exists(descendants, database_type, previous_date): return previous_date except OverflowError: pass @@ -204,34 +91,102 @@ def find_closest(self, database_type: DatabaseType, max_days_to_search: int = 99 if search_forwards: try: forward_date = target_date + offset_days - if self._check_exists(database_type, forward_date): + if self._check_exists(descendants, database_type, forward_date): return forward_date except OverflowError: pass + offset += 1 + return None - def _get_available_dates_for(self, database_type: DatabaseType) -> List[date]: + def get_available_dates_for(self, database_type: DatabaseType) -> List[date]: + """ + Find available databases specified by :class:`DatabaseType` in data path. + + :param database_type: The type of database to search for. + + :return: list of :class:`date`'s for which this specified :class:`DatabaseType` databases exist in the data path. + """ if not database_type.per_date: raise ValueError( - "INTERNAL ERROR: Should only be calling `_get_available_dates_for` for `per_date` files.") + "INTERNAL ERROR: Should only be calling `get_available_dates_for` for `per_date` files, " + "which should all be covered above, so go ahead and add it." + ) to_return = list() - for file in self.list_files(self._base_dir): - if self.is_directory(file): + for it in self.enumerate_descendants(): + if it.name.endswith(self._database_name(database_type)): try: - database_date = date.fromisoformat(file.name) - if self.exists(self._to_dated_path(database_date, database_type.file_descriptor)): - to_return.append(database_date) + to_return.append(date.fromisoformat(it.parent.name)) except ValueError: pass + + return sorted(to_return) + + def get_available_variants_for(self, target_date: date = date.today()) -> List[str]: + """ + Find available variants for the specified `target_date` in data path. + + :param target_date: The target date. Defaults to today. + + :return: list of variant names that exist in the data path for the specified `target_date`. + """ + to_return = list() + + for it in self.enumerate_descendants(): + try: + if (str(it.parent.name).lower() == self.VARIANTS_PATH) and (str(it.parent.parent.name) == str(target_date)): + to_return.append(str(it.name)) + except ValueError: + pass + return sorted(to_return) - def get_network_model_databases(self) -> List[date]: + @abstractmethod + def enumerate_descendants(self) -> Generator[Path, None, None]: + """ + Lists the child items of source location. + + :return: generator of child items. + """ + raise NotImplemented + + @abstractmethod + def resolve_database(self, path: Path) -> Path: """ - Find available network-model databases in data path. + Resolves the database in the specified source :class:`Path`. - :return: A list of :class:`date`'s for which network-model databases exist in the data path. + :param path: :class:`Path` to the source database file. + :return: :class:`Path` to the local database file. """ - return self._get_available_dates_for(DatabaseType.NETWORK_MODEL) + raise NotImplemented + + def _check_exists(self, descendants: List[Path], database_type: DatabaseType, database_date: date) -> bool: + """ + Check if a database :class:`Path` of the specified :class:`DatabaseType` and :class:`date` exists. + + :param descendants: A list of :class:`Path` representing the descendant paths. + :param database_type: The type of database to search for. + :param database_date: The date to check. + + :return: True if a database of the specified `database_type` and `database_date` exits in the date path. + """ + for cp in descendants: + if cp.is_relative_to(self._to_dated_path(database_type, database_date)): + return True + + return False + + def _to_dated_path(self, database_type: DatabaseType, database_date: date) -> Path: + date_str = str(database_date) + return Path(date_str).joinpath(f"{date_str}-{self._database_name(database_type)}") + + def _to_dated_variant_path(self, database_type: DatabaseType, database_date: date, variant: str) -> Path: + date_str = str(database_date) + return Path(date_str).joinpath(self.VARIANTS_PATH, variant, f"{date_str}-{self._database_name(database_type)}") + + @staticmethod + def _database_name(database_type: DatabaseType) -> str: + return f"{database_type.file_descriptor}.sqlite" diff --git a/src/zepben/ewb/database/paths/local_ewb_data_file_paths.py b/src/zepben/ewb/database/paths/local_ewb_data_file_paths.py new file mode 100644 index 00000000..01ffa9be --- /dev/null +++ b/src/zepben/ewb/database/paths/local_ewb_data_file_paths.py @@ -0,0 +1,58 @@ +# Copyright 2025 Zeppelin Bend Pty Ltd +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +__all__ = ['LocalEwbDataFilePaths'] + +from datetime import date +from pathlib import Path +from typing import Callable, Generator, Union + +from zepben.ewb import require +from zepben.ewb.database.paths.ewb_data_file_paths import EwbDataFilePaths + + +class LocalEwbDataFilePaths(EwbDataFilePaths): + """Provides paths to all the various data files / folders in the local file system used by EWB.""" + + def __init__( + self, + base_dir: Union[Path, str], + create_path: bool = False, + create_directories_func: Callable[[Path], None] = lambda it: it.mkdir(parents=True), + is_directory: Callable[[Path], bool] = Path.is_dir, + exists: Callable[[Path], bool] = Path.exists, + list_files: Callable[[Path], Generator[Path, None, None]] = Path.iterdir, + ): + """ + :param base_dir: The root directory of the EWB data structure. + :param create_path: Create the root directory (and any missing parent folders) if it does not exist. + :param create_directories_func: Function for directory creation. + :param is_directory: Function to determine if the supplied path is a directory . + :param exists: Function to determine if the supplied path exists. + :param list_files: Function for listing directories and files under the supplied path. + """ + self._base_dir = Path(base_dir) + self._create_directories_func = create_directories_func + self._exists = exists + self._list_files = list_files + + if create_path: + self._create_directories_func(base_dir) + + require(is_directory(base_dir), lambda: f"base_dir must be a directory") + + def create_directories(self, database_date: date) -> Path: + date_path = self._base_dir.joinpath(str(database_date)) + if not self._exists(date_path): + self._create_directories_func(date_path) + + return date_path + + def enumerate_descendants(self) -> Generator[Path, None, None]: + for it in self._list_files(self._base_dir): + yield it + + def resolve_database(self, path: Path) -> Path: + return self._base_dir.joinpath(path) diff --git a/test/database/paths/test_ewb_data_file_paths.py b/test/database/paths/test_ewb_data_file_paths.py deleted file mode 100644 index 62a31c9e..00000000 --- a/test/database/paths/test_ewb_data_file_paths.py +++ /dev/null @@ -1,260 +0,0 @@ -# Copyright 2024 Zeppelin Bend Pty Ltd -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -from datetime import date, timedelta -from pathlib import Path -from typing import Iterator -from unittest.mock import Mock - -from pytest import raises - -from zepben.ewb import EwbDataFilePaths, DatabaseType - -base_dir = Path("/not/real/path/") - - -def expected_dated_path(expected_date, file_descriptor): - return Path(f"{base_dir}/{expected_date}/{expected_date}-{file_descriptor}.sqlite") - - -def expected_path(file_descriptor): - return Path(f"{base_dir}/{file_descriptor}.sqlite") - - -def test_validates_directory_is_valid_at_construction(): - mock_is_directory = Mock(return_value=True) - EwbDataFilePaths(base_dir, is_directory=mock_is_directory) - mock_is_directory.assert_called_once_with(base_dir) - - mock_is_directory.reset_mock() - mock_is_directory.return_value = False - with raises(ValueError, match="base_dir must be a directory"): - EwbDataFilePaths(base_dir, is_directory=mock_is_directory) - mock_is_directory.assert_called_once_with(base_dir) - - -def test_creates_missing_root_directory_if_requested(): - mock_create_dir = Mock(side_effect=lambda to_create: to_create) - - EwbDataFilePaths(base_dir, create_path=False, is_directory=lambda _: True, create_directories_func=mock_create_dir) - mock_create_dir.assert_not_called() - - EwbDataFilePaths(base_dir, create_path=True, is_directory=lambda _: True, create_directories_func=mock_create_dir) - mock_create_dir.assert_called_once_with(base_dir) - - -def test_formats_paths(): - ewb_paths = EwbDataFilePaths(base_dir, - create_path=False, - create_directories_func=lambda _: None, - is_directory=lambda _: True, - exists=lambda _: True, - list_files=lambda _: iter(list()) - ) - - test_date = date(4444, 5, 6) - - assert ewb_paths.customer(test_date) == expected_dated_path(test_date, "customers") - assert ewb_paths.diagram(test_date) == expected_dated_path(test_date, "diagrams") - assert ewb_paths.measurement(test_date) == expected_dated_path(test_date, "measurements") - assert ewb_paths.network_model(test_date) == expected_dated_path(test_date, "network-model") - assert ewb_paths.tile_cache(test_date) == expected_dated_path(test_date, "tile-cache") - assert ewb_paths.energy_reading(test_date) == expected_dated_path(test_date, "load-readings") - - assert ewb_paths.energy_readings_index() == expected_path("load-readings-index") - assert ewb_paths.load_aggregator_meters_by_date() == expected_path("load-aggregator-mbd") - assert ewb_paths.weather_reading() == expected_path("weather-readings") - assert ewb_paths.results_cache() == expected_path("results-cache") - - -def test_creates_data_directories_if_they_dont_exist(): - test_date = date(1111, 2, 3) - - expected_date_path = Path(str(base_dir), str(test_date)) - - mock_exists = Mock(return_value=True) - - mock_create_dir = Mock(side_effect=lambda to_create: to_create) - - ewb_paths = EwbDataFilePaths(base_dir, - create_path=False, - create_directories_func=mock_create_dir, - is_directory=lambda _: True, - exists=mock_exists, - list_files=lambda _: iter(list()) - ) - - # if the date directory already exists - assert ewb_paths.create_directories(test_date) == expected_date_path - mock_exists.assert_called_once_with(expected_date_path) - mock_create_dir.assert_not_called() - - mock_exists.reset_mock() - mock_exists.return_value = False - - # if the date directory needs to be created - assert ewb_paths.create_directories(test_date) == expected_date_path - mock_exists.assert_called_once_with(expected_date_path) - mock_create_dir.assert_called_once_with(expected_date_path) - - -def test_finds_specified_date_if_it_exists(): - test_date = date(2222, 3, 4) - for db_type in DatabaseType: - if db_type.per_date: - validate_closest_by_exists_calls(db_type, test_date, test_date, 10, False, 1, True, lambda _: True) - # confirm finds the database on the exact day if max_days_to_search = 0 - validate_closest_by_exists_calls(db_type, test_date, test_date, 0, False, 1, True, lambda _: True) - else: - validate_closest_by_exists_calls(db_type, test_date, test_date, 10, False, 0, False, lambda _: True) - - -def test_finds_previous_date_if_it_exists_and_today_is_missing(): - actual_date = date(2000, 5, 5) - search_date = date(2000, 5, 7) - for db_type in DatabaseType: - if db_type.per_date: - validate_closest_by_exists_calls(db_type, actual_date, search_date, 10, False, 3, True) - - -def validate_closest_by_exists_calls(database_type: DatabaseType, db_date: date, search_date: date, max_days_to_search: int, search_forwards: bool, - expected_exist_calls: int, expect_to_find: bool, my_exists=None): - if my_exists is None: - def my_exists(to_test: Path) -> bool: - return to_test == expected_dated_path(db_date, database_type.file_descriptor) - - mock_exists = Mock(side_effect=my_exists) - - ewb_paths = EwbDataFilePaths(base_dir, - create_path=False, - create_directories_func=Mock(), - is_directory=lambda _: True, - exists=mock_exists, - list_files=lambda _: iter(list()) - ) - - expected_return = None - if expect_to_find: - expected_return = db_date - - assert ewb_paths.find_closest(database_type, max_days_to_search=max_days_to_search, target_date=search_date, - search_forwards=search_forwards) == expected_return - assert mock_exists.call_count == expected_exist_calls - - -def test_doesnt_find_files_outside_the_search_window(): - actual_date = date(2000, 5, 5) - search_date = date(2000, 5, 16) - for db_type in DatabaseType: - if db_type.per_date: - validate_closest_by_exists_calls(db_type, actual_date, search_date, 10, False, 11, expect_to_find=False) - - -def test_can_search_forwards_in_time(): - previous_date = date(2000, 5, 5) - search_date = date(2000, 5, 8) - forward_date = date(2000, 5, 10) - - for db_type in DatabaseType: - if db_type.per_date: - my_list = [expected_dated_path(previous_date, db_type.file_descriptor), expected_dated_path(forward_date, db_type.file_descriptor)] - - validate_closest_by_exists_calls(db_type, forward_date, search_date, 10, True, 5, expect_to_find=True, my_exists=lambda to_test: to_test in my_list) - - -def test_closest_date_using_default_parameters(): - previous_date = date.today() - timedelta(days=2) - forward_date = date.today() + timedelta(days=1) - - for db_type in DatabaseType: - if db_type.per_date: - my_list = [expected_dated_path(previous_date, db_type.file_descriptor), expected_dated_path(forward_date, db_type.file_descriptor)] - - mock_exists = Mock(side_effect=lambda to_test: to_test in my_list) - - ewb_paths = EwbDataFilePaths(base_dir, - create_path=False, - create_directories_func=Mock(), - is_directory=lambda _: True, - exists=mock_exists, - list_files=lambda _: iter(list()) - ) - - assert ewb_paths.find_closest(db_type) == previous_date - assert mock_exists.call_count == 3 - - -def test_get_available_dates_for_accepts_date_types(): - for db_type in DatabaseType: - if db_type.per_date: - validate_get_available_dates_for(db_type) - - -def test_get_available_dates_for_throws_on_non_date_type(): - for db_type in DatabaseType: - if not db_type.per_date: - with raises(ValueError, match="INTERNAL ERROR: Should only be calling `_get_available_dates_for` for `per_date` files."): - validate_get_available_dates_for(db_type) - - -def test_get_available_dates_for_sorts_the_returned_dates(): - unsorted_dates = ["2001-02-03", "2032-05-07", "2009-05-09", "2009-05-08"] - - ewb_paths = EwbDataFilePaths(base_dir, - create_path=False, - create_directories_func=Mock(), - is_directory=Mock(return_value=True), - exists=Mock(return_value=True), - list_files=Mock(return_value=iter([Path(str(base_dir), x) for x in unsorted_dates])) - ) - sorted_dates = [ - date.fromisoformat("2001-02-03"), - date.fromisoformat("2009-05-08"), - date.fromisoformat("2009-05-09"), - date.fromisoformat("2032-05-07")] - assert ewb_paths._get_available_dates_for(DatabaseType.NETWORK_MODEL) == sorted_dates - - -def validate_get_available_dates_for(db_type: DatabaseType): - usable_directories = ["2001-02-03", "2001-02-04", "2011-03-09"] - empty_directories = ["2111-11-11", "2222-12-14"] - non_date_directories = ["other_data", "2002-02-04-backup", "backup-2011-03-09"] - non_directory_files = ["config.json", "other", "run.sh", "1234-11-22"] - - all_files = usable_directories + empty_directories + non_date_directories + non_directory_files - date_directories = usable_directories + empty_directories - - def my_list_dir(to_list: Path) -> Iterator[Path]: - if to_list == base_dir: - return iter([Path(str(base_dir), x) for x in all_files]) - - def my_is_directory(to_test: Path) -> bool: - if to_test in [Path(str(base_dir), x) for x in non_directory_files]: - return False - return True - - def my_exists(to_test: Path) -> bool: - if to_test in [expected_dated_path(date.fromisoformat(x), db_type.file_descriptor) for x in empty_directories]: - return False - return True - - mock_list_dir = Mock(side_effect=my_list_dir) - mock_is_directory = Mock(side_effect=my_is_directory) - mock_exists = Mock(side_effect=my_exists) - - ewb_paths = EwbDataFilePaths(base_dir, - create_path=False, - create_directories_func=Mock(), - is_directory=mock_is_directory, - exists=mock_exists, - list_files=mock_list_dir - ) - - assert [date.fromisoformat(x) for x in usable_directories] == ewb_paths._get_available_dates_for(db_type) - - mock_list_dir.assert_called_once_with(base_dir) - for it in all_files: - mock_is_directory.assert_any_call(Path(str(base_dir), it)) - if it in date_directories: - mock_exists.assert_any_call(expected_dated_path(date.fromisoformat(it), db_type.file_descriptor)) diff --git a/test/database/paths/test_local_ewb_data_file_paths.py b/test/database/paths/test_local_ewb_data_file_paths.py new file mode 100644 index 00000000..8f38d9ee --- /dev/null +++ b/test/database/paths/test_local_ewb_data_file_paths.py @@ -0,0 +1,264 @@ +# Copyright 2024 Zeppelin Bend Pty Ltd +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +from datetime import date, timedelta +from pathlib import Path +from typing import List, Generator, Optional +from unittest.mock import Mock + +from pytest import raises + +from zepben.ewb import LocalEwbDataFilePaths, DatabaseType, EwbDataFilePaths + + +class TestLocalEwbDataFilePaths: + + def setup_method(self): + self.today = date.today() + self.base_dir = Path("/not/real/path/") + self.descendant_paths: List[Path] = [] + + self.mock_create_directories = Mock(side_effect=lambda path: path) + self.mock_is_directory = Mock(return_value=True) + self.mock_exists = Mock(return_value=True) + + def list_files(_: Path) -> Generator[Path, None, None]: + for it in self.descendant_paths: + yield it + + self.ewb_paths = LocalEwbDataFilePaths( + self.base_dir, + create_path=False, + create_directories_func=self.mock_create_directories, + is_directory=self.mock_is_directory, + exists=self.mock_exists, + list_files=list_files + ) + + def test_validates_directory_is_valid_at_construction(self): + self.mock_is_directory.assert_called_once_with(self.base_dir) + + self.mock_is_directory.reset_mock() + self.mock_is_directory.return_value = False + + with raises(ValueError, match="base_dir must be a directory"): + LocalEwbDataFilePaths(self.base_dir, is_directory=self.mock_is_directory) + + self.mock_is_directory.assert_called_once_with(self.base_dir) + + def test_creates_missing_root_directory_if_requested(self): + # Assert the default __init__ version didn't try and create the + self.mock_create_directories.assert_not_called() + + LocalEwbDataFilePaths(self.base_dir, create_path=True, is_directory=self.mock_is_directory, create_directories_func=self.mock_create_directories) + self.mock_create_directories.assert_called_once_with(self.base_dir) + + def test_formats_paths(self): + for database_type in DatabaseType: + if database_type.per_date: + assert self.ewb_paths.resolve(database_type, self.today) == self._expected_dated_path(self.today, database_type.file_descriptor) + else: + with raises(ValueError, match="database_type must have its per_date set to True to use this method with a database_date."): + self.ewb_paths.resolve(database_type, self.today) + + for database_type in DatabaseType: + if not database_type.per_date: + assert self.ewb_paths.resolve(database_type) == Path(f"{self.base_dir}/{database_type.file_descriptor}.sqlite") + else: + with raises(ValueError, match="database_type must have its per_date set to False to use this method without a database_date."): + self.ewb_paths.resolve(database_type) + + def test_creates_data_directories_if_they_dont_exist(self): + date_dir = self.base_dir.joinpath(str(self.today)) + + assert self.ewb_paths.create_directories(self.today) == date_dir + + self.mock_create_directories.assert_not_called() + self.mock_exists.return_value = False + + assert self.ewb_paths.create_directories(self.today) == date_dir + + self.mock_create_directories.assert_called_once_with(date_dir) + + def test_finds_specified_date_if_it_exists(self): + for database_type in DatabaseType: + if database_type.per_date: + self.descendant_paths.append(Path(str(self.today)).joinpath(f"{self.today}-{database_type.file_descriptor}.sqlite")) + else: + self.descendant_paths.append(Path(f"{database_type.file_descriptor}.sqlite")) + + self._validate_closest(self.today) + + def test_finds_previous_date_if_it_exists_and_today_is_missing(self): + # NOTE: We want to use two days ago rather than yesterday to make sure it searches more than one day. + two_days_ago = self.today - timedelta(days=2) + + # Files for 2 days ago. + for database_type in DatabaseType: + if database_type.per_date: + self.descendant_paths.append(Path(str(two_days_ago)).joinpath(f"{two_days_ago}-{database_type.file_descriptor}.sqlite")) + + self._validate_closest(two_days_ago) + + def test_doesnt_find_files_outside_the_search_window(self): + eleven_days_ago = self.today - timedelta(days=11) + + # Files for 11 days ago. + for database_type in DatabaseType: + if database_type.per_date: + self.descendant_paths.append(Path(str(eleven_days_ago)).joinpath(f"{eleven_days_ago}-{database_type.file_descriptor}.sqlite")) + + # Doesn't find files for 11 days ago (outside the 10-day search). + self._validate_closest(None) + + def test_can_search_forwards_in_time(self): + # NOTE: We want to use two days from now rather than tomorrow to make sure it searches more than one day. We also use + # three days ago to make sure it is searching outwards from the date, not into the past then the future. + two_days_from_now = self.today + timedelta(days=2) + three_days_ago = self.today - timedelta(days=3) + + # Files for 2 days from now and 3 days ago. + for database_type in DatabaseType: + if database_type.per_date: + self.descendant_paths.append(Path(str(two_days_from_now)).joinpath(f"{two_days_from_now}-{database_type.file_descriptor}.sqlite")) + self.descendant_paths.append(Path(str(three_days_ago)).joinpath(f"{three_days_ago}-{database_type.file_descriptor}.sqlite")) + + self._validate_closest(two_days_from_now, search_forwards=True) + + def test_closest_date_using_default_parameters(self): + tomorrow = self.today + timedelta(days=1) + two_days_ago = self.today - timedelta(days=2) + + # Files for tomorrow and 2 days ago. + for database_type in DatabaseType: + if database_type.per_date: + self.descendant_paths.append(Path(str(tomorrow)).joinpath(f"{tomorrow}-{database_type.file_descriptor}.sqlite")) + self.descendant_paths.append(Path(str(two_days_ago)).joinpath(f"{two_days_ago}-{database_type.file_descriptor}.sqlite")) + + # Should find two days ago as it doesn't search forward by default. + for database_type in DatabaseType: + if database_type.per_date: + assert self.ewb_paths.find_closest(DatabaseType.NETWORK_MODEL) == two_days_ago + + def test_get_available_dates_for_accepts_date_types(self): + for db_type in DatabaseType: + if db_type.per_date: + self._validate_get_available_dates_for(db_type) + else: + with raises(ValueError, match="INTERNAL ERROR: Should only be calling `get_available_dates_for` for `per_date` files."): + self._validate_get_available_dates_for(db_type) + + def test_get_available_dates_for_sorts_the_returned_dates(self): + self.descendant_paths = [ + Path("2001-02-03", "network-model.sqlite"), + Path("2032-05-07", "network-model.sqlite"), + Path("2009-05-09", "network-model.sqlite"), + Path("2009-05-08", "network-model.sqlite"), + ] + + assert self.ewb_paths.get_available_dates_for(DatabaseType.NETWORK_MODEL) == [ + date.fromisoformat("2001-02-03"), + date.fromisoformat("2009-05-08"), + date.fromisoformat("2009-05-09"), + date.fromisoformat("2032-05-07"), + ] + + def test_enumerate_descendants(self): + self.descendant_paths = [ + Path(str(self.today), f"{self.today}-network-model.sqlite"), + Path(str(self.today), f"{self.today}-customer.sqlite"), + Path("results-cache.sqlite"), + Path("weather-readings.sqlite") + ] + + result = list(self.ewb_paths.enumerate_descendants()) + assert len(result) == len(self.descendant_paths) + for it in result: + assert it in self.descendant_paths, f"{it} - all listed files should have been found in the results." + + def test_resolve_database(self): + path = "2333-11-22" + assert self.ewb_paths.resolve_database(Path(path)) == self.base_dir.joinpath(path) + + def test_resolves_variant_databases(self): + def to_variant_path(variant: str, db_type: DatabaseType) -> Path: + return self.base_dir.joinpath(str(self.today)).joinpath(EwbDataFilePaths.VARIANTS_PATH) \ + .joinpath(variant).joinpath(f"{self.today}-{db_type.file_descriptor}.sqlite") + + for database_type in DatabaseType: + if database_type.per_date: + assert self.ewb_paths.resolve(database_type, self.today, "my-variant1") == to_variant_path("my-variant1", database_type) + assert self.ewb_paths.resolve(database_type, self.today, "my-variant2") == to_variant_path("my-variant2", database_type) + else: + with raises(ValueError, match="database_type must have its per_date set to True to use this method with a database_date."): + self.ewb_paths.resolve(database_type, self.today, "my-variant") + + def test_can_request_variants_for_a_day(self): + yesterday = self.today - timedelta(days=1) + + self.descendant_paths = [ + Path(str(yesterday), EwbDataFilePaths.VARIANTS_PATH, "my-variant-1"), + Path(str(yesterday), EwbDataFilePaths.VARIANTS_PATH, "my-variant-2"), + + Path(str(self.today), EwbDataFilePaths.VARIANTS_PATH, "my-variant-2"), + Path(str(self.today), EwbDataFilePaths.VARIANTS_PATH, "my-variant-3"), + ] + + assert self.ewb_paths.get_available_variants_for(yesterday) == ["my-variant-1", "my-variant-2"] + + # No date will default to today. + assert self.ewb_paths.get_available_variants_for() == ["my-variant-2", "my-variant-3"] + + # Should return an empty list if there are no variants for the specified date. + assert self.ewb_paths.get_available_variants_for(self.today - timedelta(days=2)) == [] + + def test_variant_databases_dont_count_for_the_exists_check_for_find_nearest(self): + t1 = self.today - timedelta(days=1) + t2 = self.today - timedelta(days=2) + + for database_type in DatabaseType: + if database_type.per_date: + self.descendant_paths.append(Path(str(t1), EwbDataFilePaths.VARIANTS_PATH, "my-variant", f"{t1}-{database_type.file_descriptor}.sqlite")) + + for database_type in DatabaseType: + if database_type.per_date: + assert self.ewb_paths.find_closest(database_type, max_days_to_search=3) is None + + for database_type in DatabaseType: + if database_type.per_date: + self.descendant_paths.append(Path(str(t2), f"{t2}-{database_type.file_descriptor}.sqlite")) + + for database_type in DatabaseType: + if database_type.per_date: + assert self.ewb_paths.find_closest(database_type, max_days_to_search=3) == t2 + + def test_only_folders_under_variants_are_included(self): + yesterday = self.today - timedelta(days=1) + + self.descendant_paths.append(Path(str(yesterday), "not-variant", "my-variant-1")) + + assert self.ewb_paths.get_available_variants_for(yesterday) == [] + + def _expected_dated_path(self, expected_date, file_descriptor): + return Path(f"{self.base_dir}/{expected_date}/{expected_date}-{file_descriptor}.sqlite") + + def _validate_closest(self, expected_date: Optional[date], search_forwards: bool = False): + for database_type in DatabaseType: + if database_type.per_date: + assert self.ewb_paths.find_closest(database_type, 10, self.today, search_forwards) == expected_date + else: + assert self.ewb_paths.find_closest(database_type, 10, self.today, search_forwards) is None + + def _validate_get_available_dates_for(self, db_type: DatabaseType): + usable_directories = ["2001-02-03", "2001-02-04", "2011-03-09"] + empty_directories = ["2111-11-11", "2222-12-14"] + non_date_directories = ["other_data", "2002-02-04-backup", "backup-2011-03-09"] + non_directory_files = ["config.json", "other", "run.sh", "1234-11-22"] + + other_paths = empty_directories + non_date_directories + non_directory_files + + self.descendant_paths += [Path(self.base_dir, it, f"{db_type.file_descriptor}.sqlite") for it in usable_directories] + self.descendant_paths += [Path(self.base_dir, it) for it in other_paths] + + assert self.ewb_paths.get_available_dates_for(db_type) == [date.fromisoformat(it) for it in usable_directories]