From 28807357777d2028317e414219bf53ae990692ec Mon Sep 17 00:00:00 2001 From: Debasish Pal <48341250+debpal@users.noreply.github.com> Date: Wed, 22 Oct 2025 15:00:52 +0300 Subject: [PATCH 01/12] Added pymoo Python package for model calibration --- pyproject.toml | 3 ++- requirements.txt | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d8b6883..c8ca94c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,8 +33,9 @@ classifiers = [ "Topic :: Scientific/Engineering :: Hydrology" ] dependencies = [ + "pydantic", "SALib", - "pydantic" + "pymoo" ] [project.urls] diff --git a/requirements.txt b/requirements.txt index 9e7cf3a..74dd4cf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ pytest pytest-cov +pydantic SALib -pydantic \ No newline at end of file +pymoo \ No newline at end of file From 821e5cd40737e9bd99e234dc2634d144ed7ab85b Mon Sep 17 00:00:00 2001 From: Debasish Pal <48341250+debpal@users.noreply.github.com> Date: Wed, 22 Oct 2025 15:02:15 +0300 Subject: [PATCH 02/12] Added Calibration tab and reshuffle existing tabs --- mkdocs.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index 64a147f..7b2f1f3 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -38,14 +38,15 @@ nav: - User Guide: - SWAT+ Simulation: userguide/swatplus_simulation.md - Sensitivity Interface: userguide/sensitivity_interface.md + - Model Calibration: userguide/model_calibration.md - Data Analysis: userguide/data_analysis.md - API Reference: - TxtinoutReader: api/txtinout_reader.md - - DataManager: api/data_manager.md - - SensitivityAnalyzer: api/sensitivity_analyzer.md - PerformanceMetrics: api/performance_metrics.md - - pySWATPlus.types: api/types.md + - SensitivityAnalyzer: api/sensitivity_analyzer.md + - Calibration: api/calibration.md + - DataManager: api/data_manager.md - Contributor Guide: CONTRIBUTING.md From 9e1671f1510c6d061d93329c803b2a82c83771dc Mon Sep 17 00:00:00 2001 From: Debasish Pal <48341250+debpal@users.noreply.github.com> Date: Wed, 22 Oct 2025 15:03:17 +0300 Subject: [PATCH 03/12] Added new calibration features --- README.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index feddbbc..4025791 100644 --- a/README.md +++ b/README.md @@ -35,10 +35,12 @@ ## ✨ Key Features -- Modify model parameters through the `calibration.cal` file. -- Run SWAT+ simulations seamlessly. -- Compute performance metrics using widely adopted indicators. -- Perform sensitivity analysis on model parameters using the [SALib](https://github.com/SALib/SALib) Python package, with support for parallel computation; currently, only Sobol sampling and Sobol indices are supported. +- Run `SWAT+` simulations by modifying model parameters through the `calibration.cal` file.. +- Evaluate model performance against observed data using widely recognized statistical indicators. +- Perform sensitivity analysis on model parameters using the [`SALib`](https://github.com/SALib/SALib) Python package. +- Calibrate model parameters through multi-objective optimization and evolutionary algorithms using the [`pymoo`](https://github.com/anyoptimization/pymoo) Python package. +- Execute sensitivity analysis and model calibration through high-level interfaces with built-in parallel computation support. +- Analyze outputs from model simulations, sensitivity analyses, and calibrations. ## 📥 Install pySWATPlus From 071f51b1d104b1590fbdd12d13c6ecab6fbf48a1 Mon Sep 17 00:00:00 2001 From: Debasish Pal <48341250+debpal@users.noreply.github.com> Date: Wed, 22 Oct 2025 15:04:37 +0300 Subject: [PATCH 04/12] Added Calibration class and its userguide --- docs/api/calibration.md | 2 + docs/api/types.md | 4 - docs/index.md | 10 +- docs/userguide/data_analysis.md | 24 +++-- docs/userguide/model_calibration.md | 123 ++++++++++++++++++++++++ docs/userguide/sensitivity_interface.md | 12 ++- 6 files changed, 154 insertions(+), 21 deletions(-) create mode 100644 docs/api/calibration.md delete mode 100644 docs/api/types.md create mode 100644 docs/userguide/model_calibration.md diff --git a/docs/api/calibration.md b/docs/api/calibration.md new file mode 100644 index 0000000..67bce19 --- /dev/null +++ b/docs/api/calibration.md @@ -0,0 +1,2 @@ +::: pySWATPlus.Calibration + diff --git a/docs/api/types.md b/docs/api/types.md deleted file mode 100644 index caefe4c..0000000 --- a/docs/api/types.md +++ /dev/null @@ -1,4 +0,0 @@ -::: pySWATPlus.types.ModifyType - -::: pySWATPlus.types.BoundType - diff --git a/docs/index.md b/docs/index.md index 5a3eadc..befd45c 100644 --- a/docs/index.md +++ b/docs/index.md @@ -8,10 +8,12 @@ ## ✨ Key Features -- Modify model parameters through the `calibration.cal` file. -- Run SWAT+ simulations seamlessly. -- Compute performance metrics using widely adopted indicators. -- Perform sensitivity analysis on model parameters using the [SALib](https://github.com/SALib/SALib) Python package, with support for parallel computation; currently, only Sobol sampling and Sobol indices are supported. +- Run `SWAT+` simulations by modifying model parameters through the `calibration.cal` file.. +- Evaluate model performance against observed data using widely recognized statistical indicators. +- Perform sensitivity analysis on model parameters using the [`SALib`](https://github.com/SALib/SALib) Python package. +- Calibrate model parameters through multi-objective optimization and evolutionary algorithms using the [`pymoo`](https://github.com/anyoptimization/pymoo) Python package. +- Execute sensitivity analysis and model calibration through high-level interfaces with built-in parallel computation support. +- Analyze outputs from model simulations, sensitivity analyses, and calibrations. ## 📥 Install pySWATPlus diff --git a/docs/userguide/data_analysis.md b/docs/userguide/data_analysis.md index 2579b7a..e9d8cb9 100644 --- a/docs/userguide/data_analysis.md +++ b/docs/userguide/data_analysis.md @@ -20,10 +20,22 @@ output = pySWATPlus.DataManager().simulated_timeseries_df( usecols=['name', 'flo_out'], json_file=r"C:\Users\Username\output_folder\tmp.json" ) - -print(output) ``` +## Statistics from Daily Time Series + +Monthly and yearly statistics such as maximum, minimum, mean, and standard deviation derived from daily time series data help summarize and interpret simulation variability over time. +The following interface computes monthly and yearly statistical summaries for a Hydrological Response Unit (HRU) based on daily simulated flow discharge data. These metrics provide insight into seasonal patterns, flow extremes, and overall hydrological stability. + +```python +output = pySWATPlus.DataManager().simulated_timeseries_df( + sim_file=r"C:\Users\Username\custom_folder\channel_sd_day.txt", + has_units=True, + gis_id=561, + sim_col='flo_out', + output_dir=r"C:\Users\Username\output_dir" +) +``` ## Read Sensitivity Simulation Data @@ -49,14 +61,6 @@ output = pySWATPlus.DataManager().read_sensitive_dfs( For a selected `DataFrame`, scenario metrics across all simulations can be computed by comparing model outputs with observed data. -NSE: Nash–Sutcliffe Efficiency -KGE: Kling–Gupta Efficiency -MSE: Mean Squared Error -RMSE: Root Mean Squared Error -PBIAS: Percent Bias -MARE: Mean Absolute Relative Error - - - To get the mapping between available indicators and their abbreviations: diff --git a/docs/userguide/model_calibration.md b/docs/userguide/model_calibration.md new file mode 100644 index 0000000..9ca3d28 --- /dev/null +++ b/docs/userguide/model_calibration.md @@ -0,0 +1,123 @@ +# Model Calibration + +This tutorial demonstrates how to perform `SWAT+` model calibration to optimize sensitive parameters based on observed data. + +Before running the calibration, ensure the model is properly configured. This includes setting the simulation timeline, managing file outputs, and fixing non-sensitive parameters so that they remain constant across all scenarios. A detailed explanation of these steps is provided in the [`Configuration Settings`](https://swat-model.github.io/pySWATPlus/userguide/sensitivity_interface/#configuration-settings) section. + +Leveraging the multi-objective optimization and evolutionary algorithms available in the [`pymoo`](https://github.com/anyoptimization/pymoo) Python package, +the calibration interface offers flexible options for optimizing model parameters: + +- Single- or multi-objective optimization (e.g., single objective such as flow discharge, or multiple objectives such as flow discharge and nitrogen concentration together). +- Multiple algorithm options for both single- and multi-objective optimization: + + - Single-objective algorithms + + - Genetic Alogorithm + - [Differential Evolution Algorithm](https://doi.org/10.1007/3-540-31306-0) + + - Multi-objective algorithms + + - [Non-dominated sorted Genetic Algorithm-II](https://doi.org/10.1109/4235.996017) + - [Adaptive Geometry Estimation based Multi-objective Evolutionary Algorithm - II](https://doi.org/10.1145/3512290.3528732). + +- Five indicators for comparing simulated and observed values: + + - Nash–Sutcliffe Efficiency + - Kling–Gupta Efficiency + - Mean Squared Error + - Root Mean Squared Error + - Mean Absolute Relative Error + +- Parallel computation support for faster processing. + +- Automatic saving of optimization history for each generation, enabling analysis of optimization progress, convergence trends, performance indicators, and visualization. + + +The interface provides a [`Calibration`](https://swat-model.github.io/pySWATPlus/api/model_calibration/) class that must be initialized with the required parameters. +This class includes the `parameter_optimization` method, which performs parameter optimization using multi-objective algorithms, evolutionary strategies, and parallel computation. + +The following code provides an example of optimizing flow discharge for both daily and monthly time-series data using multi-objective evolutionary computation. +The usage of both daily and monthly flow discharge is just for illustrative purposes on how multi-objective optimization can be performed. Users should replace monthly flow +discharge by nitorgen or phosporus concentration according to their needs. + + +```python +# Calibration parameter space +parameters = [ + { + 'name': 'esco', + 'change_type': 'absval', + 'lower_bound': 0, + 'upper_bound': 1 + }, + { + 'name': 'perco', + 'change_type': 'absval', + 'lower_bound': 0, + 'upper_bound': 1 + } +] + + +# Extract data configuration +extract_data = { + 'channel_sd_day.txt': { + 'has_units': True, + 'apply_filter': {'name': ['cha561']} + }, + 'channel_sd_mon.txt': { + 'has_units': True, + 'ref_day': 1, + 'apply_filter': {'name': ['cha561']} + } +} + +# Observe data configuration +observe_data = { + 'channel_sd_day.txt': { + 'obs_file': r"C:\Users\Username\observed_folder\discharge_daily.csv", + 'date_format': '%Y-%m-%d' + }, + 'channel_sd_mon.txt': { + 'obs_file': r"C:\Users\Username\observed_folder\discharge_monthly.csv", + 'date_format': '%Y-%m-%d' + } +} + +# Objective configuration +objectives = { + 'channel_sd_day.txt': { + 'sim_col': 'flo_out', + 'obs_col': 'discharge', + 'indicator': 'NSE' + }, + 'channel_sd_mon.txt': { + 'sim_col': 'flo_out', + 'obs_col': 'mean', + 'indicator': 'RMSE' + } +} + +# Model calibration +if __name__ == '__main__': + calibration = pySWATPlus.Calibration( + parameters=parameters, + calsim_dir=r"C:\Users\Username\simulation_calibration", + txtinout_dir=r"C:\Users\Username\project\Scenarios\Default\TxtInOut", + extract_data=extract_data, + observe_data=observe_data, + objectives=objectives, + algorithm='NSGA2', + n_gen=2, + pop_size=5 + ) + + output = calibration.parameter_optimization() +``` + + +!!! tip "Troubleshooting Parallel Processing Errors" + If you encounter an error related to `concurrent.futures.ProcessPoolExecutor` and `multiprocessing` without a clear description, + try closing the current command terminal and restarting it. This issue can occasionally occur due to lingering background processes + or locked resources from previous runs. + diff --git a/docs/userguide/sensitivity_interface.md b/docs/userguide/sensitivity_interface.md index d8472fa..eccb5b5 100644 --- a/docs/userguide/sensitivity_interface.md +++ b/docs/userguide/sensitivity_interface.md @@ -1,12 +1,12 @@ # Sensitivity Interface -Sensitivity interface helps quantify how variation in input parameters affects model outputs. This tutorial demonstrates how to perform sensitivity analysis on SWAT+ model parameters. +The sensitivity interface helps quantify how variations in input parameters affect `SWAT+` model outputs. This tutorial demonstrates how to perform sensitivity analysis on selected parameters. ## Configuration Settings -Before running a sensitivity simulation, you must define the necessary configuration settings. -These settings specify key parameters such as the simulation timeline, output print options, and other essential model controls. +Before running a sensitivity simulation, configure the necessary settings, such as the simulation period, output print options, and non-targeted parameters, +so that these parameters remain fixed across all scenarios. ```python @@ -108,6 +108,12 @@ if __name__ == '__main__': print(output) ``` + +!!! tip "Troubleshooting Parallel Processing Errors" + If you encounter an error related to `concurrent.futures.ProcessPoolExecutor` and `multiprocessing` without a clear description, + try closing the current command terminal and restarting it. This issue can occasionally occur due to lingering background processes + or locked resources from previous runs. + ## Sensitivity Indices Sensitivity indices (first, second, and total orders) are computed using the indicators available in the `pySWATPlus.PerformanceMetrics` class, along with their confidence intervals. From 72979afbace7f2a2be7ca4db88db96400a77883a Mon Sep 17 00:00:00 2001 From: Debasish Pal <48341250+debpal@users.noreply.github.com> Date: Wed, 22 Oct 2025 15:16:13 +0300 Subject: [PATCH 05/12] Renamed types.py to newtype.py because types is an existing module in python and difficult to import some cases --- pySWATPlus/{types.py => newtype.py} | 49 ----------------------------- 1 file changed, 49 deletions(-) rename pySWATPlus/{types.py => newtype.py} (73%) diff --git a/pySWATPlus/types.py b/pySWATPlus/newtype.py similarity index 73% rename from pySWATPlus/types.py rename to pySWATPlus/newtype.py index 7fd91db..d8da9b1 100644 --- a/pySWATPlus/types.py +++ b/pySWATPlus/newtype.py @@ -55,29 +55,6 @@ def check_bounds( - `units` (Iterable[int]): Optional. List of unit IDs to which the parameter change should be constrained. - `conditions` (dict[str, list[str]]): Optional. Conditions to apply when changing the parameter. Supported keys include `'hsg'`, `'texture'`, `'plant'`, and `'landuse'`, each mapped to a list of allowed values. - -Example: - ```python - parameters = [ - { - 'name': 'cn2', - 'change_type': 'pctchg', - 'value': 50, - }, - { - 'name': 'perco', - 'change_type': 'absval', - 'value': 0.5, - 'conditions': {'hsg': ['A']} - }, - { - 'name': 'bf_max', - 'change_type': 'absval', - 'value': 0.3, - 'units': range(1, 194) - } - ] - ``` ''' BoundType: typing.TypeAlias = list[dict[str, typing.Any]] @@ -92,30 +69,4 @@ def check_bounds( - `units` (Iterable[int]): Optional. List of unit IDs to which the parameter change should be constrained. - `conditions` (dict[str, list[str]]): Optional. Conditions to apply when changing the parameter. Supported keys include `'hsg'`, `'texture'`, `'plant'`, and `'landuse'`, each mapped to a list of allowed values. - -Example: - ```python - parameters = [ - { - 'name': 'cn2', - 'change_type': 'pctchg', - 'lower_bound': 25, - 'upper_bound': 75, - }, - { - 'name': 'perco', - 'change_type': 'absval', - 'lower_bound': 0, - 'upper_bound': 1, - 'conditions': {'hsg': ['A']} - }, - { - 'name': 'bf_max', - 'change_type': 'absval', - 'lower_bound': 0.1, - 'upper_bound': 2.0, - 'units': range(1, 194) - } - ] - ``` ''' From 6f61942aa098a854cece9bc49a0d54c8b0d119dd Mon Sep 17 00:00:00 2001 From: Debasish Pal <48341250+debpal@users.noreply.github.com> Date: Wed, 22 Oct 2025 15:17:42 +0300 Subject: [PATCH 06/12] Modified and reshuffled methods --- pySWATPlus/txtinout_reader.py | 49 ++++----- pySWATPlus/utils.py | 200 ++++++++++++++++++++++++++-------- pySWATPlus/validators.py | 90 +++++++++++++-- 3 files changed, 255 insertions(+), 84 deletions(-) diff --git a/pySWATPlus/txtinout_reader.py b/pySWATPlus/txtinout_reader.py index 819e577..80c728d 100644 --- a/pySWATPlus/txtinout_reader.py +++ b/pySWATPlus/txtinout_reader.py @@ -3,7 +3,7 @@ import pathlib import typing import logging -from .types import ModifyType, ModifyDict +from . import newtype from . import utils from . import validators @@ -51,8 +51,8 @@ def __init__( # Raise error on .exe file if len(exe_files) != 1: - raise TypeError( - 'Expected exactly one .exe file in the parent folder, but found none or multiple' + raise ValueError( + f'Expected exactly one ".exe" file in directory {str(tio_dir)}, but found none or multiple' ) # TxtInOut directory path @@ -71,11 +71,8 @@ def enable_object_in_print_prt( allow_unavailable_object: bool = False ) -> None: ''' - Update or add an object in the `print.prt` file with specified time frequency flags. - - This method modifies the `print.prt` file in a SWAT+ project to enable or disable output - for a specific object (or all objects if `obj` is None) at specified time frequencies - (daily, monthly, yearly, or average annual). If the object does not exist in the file + Update the `print.prt` file to enable or disable output for a specific object (or all objects if `obj` is None) + at specified time frequencies (daily, monthly, yearly, or average annual). If the object does not exist in the file and `obj` is not None, it is appended to the end of the file. Note: @@ -150,7 +147,7 @@ def enable_object_in_print_prt( if obj is None: # Update all objects - new_print_prt += utils._build_line_to_add( + new_print_prt += utils._print_prt_line_add( obj=line_obj, daily=daily, monthly=monthly, @@ -159,7 +156,7 @@ def enable_object_in_print_prt( ) elif line_obj == obj: # Already 'obj' exist, replace it in same position - new_print_prt += utils._build_line_to_add( + new_print_prt += utils._print_prt_line_add( obj=line_obj, daily=daily, monthly=monthly, @@ -171,7 +168,7 @@ def enable_object_in_print_prt( new_print_prt += line if not found and obj is not None: - new_print_prt += utils._build_line_to_add( + new_print_prt += utils._print_prt_line_add( obj=obj, daily=daily, monthly=monthly, @@ -574,7 +571,7 @@ def copy_required_files( def _write_calibration_file( self, - parameters: list[ModifyDict] + parameters: list[newtype.ModifyDict] ) -> None: ''' Writes `calibration.cal` file with parameter changes. @@ -614,10 +611,10 @@ def _write_calibration_file( units = change.units # Convert to compact representation - compacted_units = utils._compact_units(units) if units else [] + compacted_units = utils._dict_units_compact(units) if units else [] # get conditions - parsed_conditions = utils._parse_conditions(change) + parsed_conditions = utils._dict_conditions_parse(change) calibration_cal_rows.append( { @@ -655,7 +652,7 @@ def _write_calibration_file( if col == 'NAME': line += f'{row[col]:<{col_widths[col]}}' # left-align elif col == 'VAL' and isinstance(row[col], float): - line += utils._format_val_field(typing.cast(float, row[col])) # special VAL formatting + line += utils._calibration_val_field_str(typing.cast(float, row[col])) # special VAL formatting else: line += f'{row[col]:>{col_widths[col]}}' # right-align numeric columns @@ -744,13 +741,13 @@ def _apply_swat_configuration( ''' # Ensure both begin and end dates are given - validators._ensure_together( + validators._variables_defined_or_none( begin_date=begin_date, end_date=end_date ) # Ensure both begin and end print dates are given - validators._ensure_together( + validators._variables_defined_or_none( print_begin_date=print_begin_date, print_end_date=print_end_date ) @@ -825,7 +822,8 @@ def _apply_swat_configuration( for sub_key, sub_val in val.items(): if sub_key not in key_dict: raise KeyError( - f'Invalids sub-key "{sub_key}" for key "{key}" in print_prt_control, expected sub-keys are [{", ".join(key_dict.keys())}]' + f'Invalids sub-key "{sub_key}" for key "{key}" in print_prt_control, ' + f'expected sub-keys are [{", ".join(key_dict.keys())}]' ) key_dict[sub_key] = sub_val self.enable_object_in_print_prt( @@ -890,7 +888,7 @@ def _run_swat_exe( def run_swat( self, sim_dir: typing.Optional[str | pathlib.Path] = None, - parameters: typing.Optional[ModifyType] = None, + parameters: typing.Optional[newtype.ModifyType] = None, begin_date: typing.Optional[str] = None, end_date: typing.Optional[str] = None, simulation_timestep: typing.Optional[int] = None, @@ -908,7 +906,7 @@ def run_swat( sim_dir (str | pathlib.Path): Path to the directory where the simulation will be done. If None, the simulation runs directly in the current folder. - parameters (ModifyType): List of dictionaries specifying parameter changes in the `calibration.cal` file. + parameters (newtype.ModifyType): List of dictionaries specifying parameter changes in the `calibration.cal` file. Each dictionary contain the following keys: - `name` (str): **Required.** Name of the parameter in the `cal_parms.cal` file. @@ -1025,16 +1023,11 @@ def run_swat( # Create calibration.cal file if parameters is not None: - # Validate unique dictionaries for calibration parameters - validators._calibration_list_contain_unique_dict( - parameters=parameters + # List of ModifyDict objects + params = utils._parameters_modify_dict_list( + parameters=parameters, ) - # Check structural validity of input calibration parameters - params = [ - ModifyDict(**param) for param in parameters - ] - # Check if input calibration parameters exists in cal_parms.cal validators._calibration_parameters( input_dir=reader.root_dir, diff --git a/pySWATPlus/utils.py b/pySWATPlus/utils.py index 309dca9..d461155 100644 --- a/pySWATPlus/utils.py +++ b/pySWATPlus/utils.py @@ -5,10 +5,11 @@ import pathlib import typing import collections.abc -from .types import ModifyDict +from . import newtype +from . import validators -def _build_line_to_add( +def _print_prt_line_add( obj: str, daily: bool, monthly: bool, @@ -16,7 +17,7 @@ def _build_line_to_add( avann: bool ) -> str: ''' - Format lines for `print.prt` file + Append a new line to the contents of the `print.prt` file string. ''' print_periodicity = { @@ -38,7 +39,7 @@ def _date_str_to_object( date_str: str ) -> datetime.date: ''' - Convert a date string in 'DD-Mon-YYYY' format to a `datetime.date` object + Convert a date string in 'DD-Mon-YYYY' format to a `datetime.date` object. ''' date_fmt = '%d-%b-%Y' @@ -52,42 +53,13 @@ def _date_str_to_object( return get_date -def _format_val_field( - value: float -) -> str: - ''' - Format a number for the VAL column: - - 16 characters total: 1 leading space + 15-character numeric field - - Right-aligned - - Fixed-point if integer part fits; scientific if too large - ''' - - # Convert to string without formatting - s = str(value) - - if len(s) > 15: - # Use scientific notation - formatted = f'{value:.6e}' - else: - # If it fits, just use normal string - formatted = s - - # Right-align to 16 characters - return f'{formatted:>16}' - - -def _compact_units( +def _dict_units_compact( unit_list: collections.abc.Iterable[int] ) -> list[int]: ''' - Compact a 1-based list of unit IDs into SWAT units syntax. - - Consecutive unit IDs are represented as a range using negative numbers: - - - Single units are listed as positive numbers. - - Consecutive ranges are represented as [start, -end]. - - All IDs must be 1-based (Fortran-style). + Compact the `units` key in `pySWATPlus.newtype.ModifyDict` or `pySWATPlus.newtype.BoundDict`, + where single units are represented as positive numbers and consecutive ranges as [start, -end], + to be added to the `calibration.cal` file for the corresponding parameter. ''' if not unit_list: @@ -120,11 +92,12 @@ def _compact_units( return compact -def _parse_conditions( - parameters: ModifyDict +def _dict_conditions_parse( + parameters: newtype.ModifyDict ) -> list[str]: ''' - Parse the conditions that must be added to that parameter in calibration.cal file + Parse the `conditions` key in `pySWATPlus.newtype.ModifyDict` or `pySWATPlus.newtype.BoundDict` + to be added to the `calibration.cal` file for the corresponding parameter. ''' conditions = parameters.conditions @@ -139,11 +112,35 @@ def _parse_conditions( return conditions_parsed +def _calibration_val_field_str( + value: float +) -> str: + ''' + Format a number for the VAL column to the `calibration.cal` file: + - 16 characters total: 1 leading space + 15-character numeric field + - Right-aligned + - Fixed-point if integer part fits; scientific if too large + ''' + + # Convert to string without formatting + s = str(value) + + if len(s) > 15: + # Use scientific notation + formatted = f'{value:.6e}' + else: + # If it fits, just use normal string + formatted = s + + # Right-align to 16 characters + return f'{formatted:>16}' + + def _df_clean( df: pandas.DataFrame ) -> pandas.DataFrame: ''' - Clean a DataFrame by stripping whitespace from column names and string values. + Clean a `DataFrame` by stripping whitespace from column names and string values. ''' # Strip spaces from column names @@ -162,7 +159,7 @@ def _df_extract( skiprows: typing.Optional[list[int]] = None ) -> pandas.DataFrame: ''' - Extract a DataFrame from `input_file` using multiple parsing strategies. + Extract a `DataFrame` from `input_file` using multiple parsing strategies. ''' if input_file.suffix.lower() == '.csv': @@ -210,6 +207,14 @@ def _df_observe( and `obs_col` (the observed values). ''' + # Check input variables type + validators._variable_origin_static_type( + vars_types=typing.get_type_hints( + obj=_df_observe + ), + vars_values=locals() + ) + # DataFrame obs_df = pandas.read_csv( filepath_or_buffer=obs_file, @@ -228,7 +233,8 @@ def _df_observe( def _df_normalize( - df: pandas.DataFrame + df: pandas.DataFrame, + norm_col: str ) -> pandas.DataFrame: ''' Normalize the values in the input `DataFrame` using the formula `(df - min) / (max - min)`, @@ -237,16 +243,16 @@ def _df_normalize( ''' # Minimum and maximum values - df_min = df.min().min() - df_max = df.max().max() + norm_min = df[norm_col].min() + norm_max = df[norm_col].max() # Normalized DataFrame - norm_df = (df - df_min) / (df_max - df_min) + norm_df = (df - norm_min) / (norm_max - norm_min) return norm_df -def _retrieve_sensitivity_output( +def _sensitivity_output_retrieval( sensim_file: pathlib.Path, df_name: str, add_problem: bool, @@ -287,3 +293,103 @@ def _retrieve_sensitivity_output( output['sample'] = sensitivity_sim['sample'] return output + + +def _parameters_modify_dict_list( + parameters: list[dict[str, typing.Any]], +) -> list[newtype.ModifyDict]: + ''' + Convert each dictionary in the `parameters` list into a `pySWATPlus.newtype.ModifyDict` object. + ''' + + # Validate the "parameters" list contains unique dictionaries + validators._parameters_contain_unique_dict( + parameters=parameters + ) + + valid_keys = [ + 'name', + 'value', + 'change_type', + 'units', + 'conditions' + ] + + param_list = [] + for param in parameters: + for key in param: + if key not in valid_keys: + raise KeyError( + f'Invalid key "{key}" for {json.dumps(param)} in "parameters"; ' + f'expected keys are {json.dumps(valid_keys)}' + ) + param_list.append(newtype.ModifyDict(**param)) + + return param_list + + +def _parameters_bound_dict_list( + parameters: list[dict[str, typing.Any]], +) -> list[newtype.BoundDict]: + ''' + Convert each dictionary in the `parameters` list into a `pySWATPlus.newtype.BoundDict` object. + ''' + + # Validate the "parameters" list contains unique dictionaries + validators._parameters_contain_unique_dict( + parameters=parameters + ) + + valid_keys = [ + 'name', + 'change_type', + 'lower_bound', + 'upper_bound', + 'units', + 'conditions' + ] + + param_list = [] + for param in parameters: + for key in param: + if key not in valid_keys: + raise KeyError( + f'Invalid key "{key}" for {json.dumps(param)} in "parameters"; ' + f'expected keys are {json.dumps(valid_keys)}' + ) + param_list.append(newtype.BoundDict(**param)) + + return param_list + + +def _parameters_name_with_counter( + parameters: list[newtype.BoundDict] +) -> list[str]: + ''' + Add a counter with parameter name if same calibration parameter appears + multiple times in `pySWATPlus.newtype.BoundType` list. + ''' + + # Count variables + count_vars = collections.Counter( + p.name for p in parameters + ) + + # Intialize dictionary to keeps track the count of variables + current_count = { + v: 0 for v in list(count_vars) + } + + # List of unique name with counter + name_counter = [] + for param in parameters: + p_name = param.name + if count_vars[p_name] == 1: + # Keep same name if occur only once in the list + name_counter.append(p_name) + else: + # Add counter suffix if occur multiple times + current_count[p_name] = current_count[p_name] + 1 + name_counter.append(f'{p_name}|{current_count[p_name]}') + + return name_counter diff --git a/pySWATPlus/validators.py b/pySWATPlus/validators.py index eeaa8c0..a569751 100644 --- a/pySWATPlus/validators.py +++ b/pySWATPlus/validators.py @@ -4,7 +4,7 @@ import types import datetime import json -from .types import ModifyDict, BoundDict +from . import newtype def _variable_origin_static_type( @@ -120,7 +120,7 @@ def _date_within_range( return None -def _calibration_list_contain_unique_dict( +def _parameters_contain_unique_dict( parameters: list[dict[str, typing.Any]] ) -> None: ''' @@ -143,7 +143,7 @@ def _calibration_list_contain_unique_dict( def _calibration_units( input_dir: pathlib.Path, - param_change: ModifyDict + param_change: newtype.ModifyDict ) -> None: ''' Validate units for a given parameter change against calibration parameters. @@ -189,7 +189,7 @@ def _calibration_units( usecols=['id'] ) - # Id's in the files are consecutive (and starting from 1). We can use the length of the df to check if all units are present + # Use the DataFrame length to check if all units starting from 1 are present max_unit = max(units) if len(df) < max_unit: raise ValueError( @@ -203,7 +203,7 @@ def _calibration_units( def _calibration_conditions( input_dir: pathlib.Path, - param_change: ModifyDict + param_change: newtype.ModifyDict ) -> None: ''' Validate conditions for a given parameter change against calibration parameters. @@ -252,7 +252,7 @@ def _calibration_conditions( def _calibration_conditions_and_units( input_dir: pathlib.Path, - parameters: list[ModifyDict] + parameters: list[newtype.ModifyDict] ) -> None: ''' Check the following: @@ -283,7 +283,7 @@ def _calibration_conditions_and_units( def _calibration_parameters( input_dir: pathlib.Path, - parameters: list[BoundDict] | list[ModifyDict] + parameters: list[newtype.BoundDict] | list[newtype.ModifyDict] ) -> None: ''' Validate existence of input calibration parameters in `cal_parms.cal`. @@ -324,12 +324,14 @@ def _json_extension( return None -def _ensure_together(**kwargs: typing.Any) -> None: +def _variables_defined_or_none( + **kwargs: typing.Any +) -> None: ''' Ensure that either all or none of the given arguments are provided (not None). Example: - _ensure_together(begin_date=begin, end_date=end) + _ensure_together(begin_date=begin_date, end_date=end_date) ''' total = len(kwargs) @@ -342,3 +344,73 @@ def _ensure_together(**kwargs: typing.Any) -> None: raise ValueError( f'Arguments [{all_args}] must be provided together, but missing: {missing}' ) + + return None + + +def _simulation_preliminary_setup( + sim_dir: pathlib.Path, + tio_dir: pathlib.Path, + parameters: list[newtype.BoundDict] | list[newtype.ModifyDict] +) -> None: + + # Check validity of simulation directory path + _dir_path( + input_dir=sim_dir + ) + + # Check simulation directory is empty + _dir_empty( + input_dir=sim_dir + ) + + # Validate parameter names + _calibration_parameters( + input_dir=tio_dir, + parameters=parameters + ) + + return None + + +def _extract_data_config( + extract_data: dict[str, dict[str, typing.Any]], +) -> None: + ''' + Validate `extract_data` configuration. + ''' + + # List of valid sub-keys of sub-dictionaries + valid_subkeys = [ + 'has_units', + 'begin_date', + 'end_date', + 'ref_day', + 'ref_month', + 'apply_filter', + 'usecols' + ] + + # Iterate dictionary + for file_key, file_dict in extract_data.items(): + # Check type of a sub-dictionary + if not isinstance(file_dict, dict): + raise TypeError( + f'Expected "{file_key}" in extract_data must be a dictionary, ' + f'but got type "{type(file_dict).__name__}"' + ) + # Check mandatory 'has_units' sub-key in sub-dictionary + if 'has_units' not in file_dict: + raise KeyError( + f'Key has_units is missing for "{file_key}" in extract_data' + ) + # Iterate sub-key + for sub_key in file_dict: + # Check valid sub-key + if sub_key not in valid_subkeys: + raise KeyError( + f'Invalid sub-key "{sub_key}" for "{file_key}" in extract_data; ' + f'expected sub-keys are {json.dumps(valid_subkeys)}' + ) + + return None From 6b3f1d305db65fb95a45f9744e81678d6fbea14e Mon Sep 17 00:00:00 2001 From: Debasish Pal <48341250+debpal@users.noreply.github.com> Date: Wed, 22 Oct 2025 15:18:55 +0300 Subject: [PATCH 07/12] Added private method for simulation in separate CPU --- pySWATPlus/cpu.py | 85 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 pySWATPlus/cpu.py diff --git a/pySWATPlus/cpu.py b/pySWATPlus/cpu.py new file mode 100644 index 0000000..986e622 --- /dev/null +++ b/pySWATPlus/cpu.py @@ -0,0 +1,85 @@ +import numpy +import shutil +import typing +import pathlib +from .txtinout_reader import TxtinoutReader +from .data_manager import DataManager +from . import newtype + + +def _simulation_output( + track_sim: int, + var_array: numpy.typing.NDArray[numpy.float64], + num_sim: int, + var_names: list[str], + sim_dir: pathlib.Path, + tio_dir: pathlib.Path, + params_bounds: list[newtype.BoundDict], + extract_data: dict[str, dict[str, typing.Any]], + clean_setup: bool +) -> dict[str, typing.Any]: + ''' + Run parallel simulations for sensitivity analysis and calibration, assigning each process + to a dedicated logical CPU for optimized performance. + ''' + + # Dictionary mapping for sensitivity simulation name and variable + var_dict = { + var_names[i]: float(var_array[i]) for i in range(len(var_names)) + } + + # Create ParameterType dictionary to write calibration.cal file + params_sim = [] + for i, param in enumerate(params_bounds): + params_sim.append( + { + 'name': param.name, + 'change_type': param.change_type, + 'value': var_dict[var_names[i]], + 'units': param.units, + 'conditions': param.conditions + } + ) + + # Display start of current simulation for tracking + print( + f'Started simulation: {track_sim}/{num_sim}', + flush=True + ) + + # Create simulation directory + cpu_dir = f'sim_{track_sim}' + cpu_path = sim_dir / cpu_dir + cpu_path.mkdir() + + # Output simulation dictionary + cpu_output = { + 'dir': cpu_dir, + 'array': var_array + } + + # Initialize TxtinoutReader class + tio_reader = TxtinoutReader( + tio_dir=tio_dir + ) + + # Run SWAT+ model in CPU directory + tio_reader.run_swat( + sim_dir=cpu_path, + parameters=params_sim + ) + + # Extract simulated data + for sim_fname, sim_fdict in extract_data.items(): + sim_file = cpu_path / sim_fname + df = DataManager().simulated_timeseries_df( + sim_file=sim_file, + **sim_fdict + ) + cpu_output[f'{sim_file.stem}_df'] = df + + # Remove simulation directory + if clean_setup: + shutil.rmtree(cpu_path, ignore_errors=True) + + return cpu_output From 53a46f52fd8447f6e09a831f3aacbd9b9576332f Mon Sep 17 00:00:00 2001 From: Debasish Pal <48341250+debpal@users.noreply.github.com> Date: Wed, 22 Oct 2025 15:20:56 +0300 Subject: [PATCH 08/12] Moved some functions to utils.py and validators.py due to repetition --- pySWATPlus/performance_metrics.py | 34 ++-- pySWATPlus/sensitivity_analyzer.py | 293 ++++++++--------------------- 2 files changed, 103 insertions(+), 224 deletions(-) diff --git a/pySWATPlus/performance_metrics.py b/pySWATPlus/performance_metrics.py index b1ae16b..2ac3ea9 100644 --- a/pySWATPlus/performance_metrics.py +++ b/pySWATPlus/performance_metrics.py @@ -275,8 +275,8 @@ def scenario_indicators( Compute performance indicators for sample scenarios obtained using the method [`simulation_by_sample_parameters`](https://swat-model.github.io/pySWATPlus/api/sensitivity_analyzer/#pySWATPlus.SensitivityAnalyzer.simulation_by_sample_parameters). - Before computing the indicators, simulated and observed values are normalized using the formula `(v - min_v) / (max_v - min_v)`, - where `min_v` and `max_v` represent the minimum and maximum of all simulated and observed values combined. + Before computing the indicators, simulated and observed values are normalized using the formula `(v - min_o) / (max_o - min_o)`, + where `min_o` and `max_o` represent the minimum and maximum of observed values, respectively. The method returns a dictionary with two keys: @@ -284,6 +284,12 @@ def scenario_indicators( - `indicator`: A `DataFrame` containing the `Scenario` column and one column per indicator, with scenario indices and corresponding indicator values. + Before computing the indicators, both simulated and observed values are normalized using the formula + `(v - min_o) / (max_o - min_o)`, where `min_o` and `max_o` represent the minimum and maximum of observed values, respectively. + + Note: + All negative and `None` observed values are removed before computing `min_o` and `max_o` to prevent errors during normalization. + Args: sensim_file (str | pathlib.Path): Path to the `sensitivity_simulation.json` file produced by `simulation_by_sobol_sample`. @@ -296,8 +302,7 @@ def scenario_indicators( date_format (str): Date format of the `date` column in `obs_file`, used to parse `datetime.date` objects from date strings. - obs_col (str): Name of the column in `obs_file` containing observed data. All negative and `None` observed values are removed - due to the normalization of observed and similated values before computing indicators. + obs_col (str): Name of the column in `obs_file` containing observed data. indicators (list[str]): List of performance indicators to compute. Available options: @@ -340,7 +345,7 @@ def scenario_indicators( obs_df.columns = ['date', 'obs'] # Retrieve sensitivity output - sensitivity_sim = utils._retrieve_sensitivity_output( + sensitivity_sim = utils._sensitivity_output_retrieval( sensim_file=pathlib.Path(sensim_file).resolve(), df_name=df_name, add_problem=True, @@ -348,7 +353,7 @@ def scenario_indicators( ) # Empty DataFrame to store scenario indicators - inct_df = pandas.DataFrame( + ind_df = pandas.DataFrame( columns=indicators ) @@ -364,7 +369,8 @@ def scenario_indicators( ) # Normalized DataFrame norm_df = utils._df_normalize( - df=merge_df[['sim', 'obs']] + df=merge_df[['sim', 'obs']], + norm_col='obs' ) # Iterate indicators for indicator in indicators: @@ -373,21 +379,21 @@ def scenario_indicators( self, f'compute_{indicator.lower()}' ) - # indicator value + # Indicator value key_indicator = indicator_method( df=norm_df, sim_col='sim', obs_col='obs' ) - # Store error in DataFrame - inct_df.loc[key, indicator] = key_indicator + # Store indicator value in DataFrame + ind_df.loc[key, indicator] = key_indicator # Reset index to scenario column scnro_col = 'Scenario' - inct_df = inct_df.reset_index( + ind_df = ind_df.reset_index( names=[scnro_col] ) - inct_df[scnro_col] = inct_df[scnro_col].astype(int) + ind_df[scnro_col] = ind_df[scnro_col].astype(int) # Save DataFrame if json_file is not None: @@ -397,7 +403,7 @@ def scenario_indicators( json_file=json_file ) # Write DataFrame to the JSON file - inct_df.to_json( + ind_df.to_json( path_or_buf=json_file, orient='records', indent=4 @@ -406,7 +412,7 @@ def scenario_indicators( # Output dictionary output = { 'problem': sensitivity_sim['problem'], - 'indicator': inct_df + 'indicator': ind_df } return output diff --git a/pySWATPlus/sensitivity_analyzer.py b/pySWATPlus/sensitivity_analyzer.py index ae13e5a..1d81443 100644 --- a/pySWATPlus/sensitivity_analyzer.py +++ b/pySWATPlus/sensitivity_analyzer.py @@ -8,13 +8,12 @@ import time import copy import json -import shutil -import collections -from .txtinout_reader import TxtinoutReader -from .data_manager import DataManager -from .types import BoundType, BoundDict from .performance_metrics import PerformanceMetrics +from .txtinout_reader import TxtinoutReader +from . import newtype from . import validators +from . import utils +from . import cpu class SensitivityAnalyzer: @@ -22,71 +21,22 @@ class SensitivityAnalyzer: Provide functionality for sensitivity analyzis. ''' - def _validate_extract_data_config( - self, - extract_data: dict[str, dict[str, typing.Any]], - ) -> None: - ''' - Validate `extract_data` configuration. - ''' - - valid_subkeys = [ - 'has_units', - 'begin_date', - 'end_date', - 'ref_day', - 'ref_month', - 'apply_filter', - 'usecols' - ] - for sim_fname, sim_fdict in extract_data.items(): - if not isinstance(sim_fdict, dict): - raise TypeError( - f'Expected "{sim_fname}" in simulation_date must be a dictionary, but got type "{type(sim_fdict).__name__}"' - ) - if 'has_units' not in sim_fdict: - raise KeyError( - f'Key has_units is missing for "{sim_fname}" in extract_data' - ) - for sim_fkey in sim_fdict: - if sim_fkey not in valid_subkeys: - raise ValueError( - f'Invalid key "{sim_fkey}" for "{sim_fname}" in extract_data; expected subkeys are {valid_subkeys}' - ) - - return None - def _create_sobol_problem( self, - params_bounds: list[BoundDict] + params_bounds: list[newtype.BoundDict] ) -> dict[str, typing.Any]: - ''' Prepare Sobol problem dictionary for sensitivity analysis. ''' - # Count variables - count_vars = collections.Counter( - p.name for p in params_bounds + # List of parameter names with counter if required + var_names = utils._parameters_name_with_counter( + parameters=params_bounds ) - # Intialize dictionary to keeps track the count of variables - current_count = { - v: 0 for v in list(count_vars) - } - # List of unique names and bounds of paramters - var_names = [] var_bounds = [] for param in params_bounds: - p_name = param.name - if count_vars[p_name] == 1: - # Keep same name if occur only once in the list - var_names.append(p_name) - else: - # Add counter suffix if occur multiple times - current_count[p_name] = current_count[p_name] + 1 - var_names.append(f'{p_name}|{current_count[p_name]}') var_bounds.append([param.lower_bound, param.upper_bound]) # Sobol problem @@ -98,83 +48,6 @@ def _create_sobol_problem( return problem - def _cpu_simulation( - self, - track_sim: int, - var_array: numpy.typing.NDArray[numpy.float64], - num_sim: int, - var_names: list[str], - sensim_dir: pathlib.Path, - txtinout_dir: pathlib.Path, - params_bounds: list[BoundDict], - extract_data: dict[str, dict[str, typing.Any]], - clean_setup: bool - ) -> dict[str, typing.Any]: - ''' - Execute the simulation on a dedicated logical CPU. - ''' - - # Dictionary mapping for sensitivity simulation name and variable - var_dict = { - var_names[i]: float(var_array[i]) for i in range(len(var_names)) - } - - # Create ParameterType dictionary to write calibration.cal file - params_sim = [] - for i, param in enumerate(params_bounds): - params_sim.append( - { - 'name': param.name, - 'change_type': param.change_type, - 'value': var_dict[var_names[i]], - 'units': param.units, - 'conditions': param.conditions - } - ) - - # Tracking simulation - print( - f'Started simulation: {track_sim}/{num_sim}', - flush=True - ) - - # Create simulation directory - cpu_dir = f'sim_{track_sim}' - cpu_path = sensim_dir / cpu_dir - cpu_path.mkdir() - - # Output simulation dictionary - cpu_output = { - 'dir': cpu_dir, - 'array': var_array - } - - # Initialize TxtinoutReader class - txtinout_reader = TxtinoutReader( - tio_dir=txtinout_dir - ) - - # Run SWAT+ model in CPU directory - txtinout_reader.run_swat( - sim_dir=cpu_path, - parameters=params_sim - ) - - # Extract simulated data - for sim_fname, sim_fdict in extract_data.items(): - sim_file = cpu_path / sim_fname - df = DataManager().simulated_timeseries_df( - sim_file=sim_file, - **sim_fdict - ) - cpu_output[f'{sim_file.stem}_df'] = df - - # Remove simulation directory - if clean_setup: - shutil.rmtree(cpu_path, ignore_errors=True) - - return cpu_output - def _save_output_in_json( self, sensim_dir: pathlib.Path, @@ -214,7 +87,7 @@ def _save_output_in_json( def simulation_by_sample_parameters( self, - parameters: BoundType, + parameters: newtype.BoundType, sample_number: int, sensim_dir: str | pathlib.Path, txtinout_dir: str | pathlib.Path, @@ -241,7 +114,7 @@ def simulation_by_sample_parameters( the sample array, and the simulation results for further analysis. Args: - parameters (BoundType): List of dictionaries defining parameter configurations for sensitivity simulations. + parameters (newtype.BoundType): List of dictionaries defining parameter configurations for sensitivity simulations. Each dictionary contain the following keys: - `name` (str): **Required.** Name of the parameter in the `cal_parms.cal` file. @@ -249,34 +122,33 @@ def simulation_by_sample_parameters( - `lower_bound` (float): **Required.** Lower bound for the parameter. - `upper_bound` (float): **Required.** Upper bound for the parameter. - `units` (Iterable[int]): Optional. List of unit IDs to which the parameter change should be constrained. - - `conditions` (dict[str, list[str]]): Optional. Conditions to apply when changing the parameter, - specified as a dictionary mapping condition types to lists of values. - - Example: - ```python - parameters = [ - { - 'name': 'cn2', - 'change_type': 'pctchg', - 'lower_bound': 25, - 'upper_bound': 75, - }, - { - 'name': 'perco', - 'change_type': 'absval', - 'lower_bound': 0, - 'upper_bound': 1, - 'conditions': {'hsg': ['A']} - }, - { - 'name': 'bf_max', - 'change_type': 'absval', - 'lower_bound': 0.1, - 'upper_bound': 2.0, - 'units': range(1, 194) - } - ] - ``` + - `conditions` (dict[str, list[str]]): Optional. Conditions to apply when changing the parameter. + Supported keys include `'hsg'`, `'texture'`, `'plant'`, and `'landuse'`, each mapped to a list of allowed values. + + ```python + parameters = [ + { + 'name': 'cn2', + 'change_type': 'pctchg', + 'lower_bound': 25, + 'upper_bound': 75, + }, + { + 'name': 'perco', + 'change_type': 'absval', + 'lower_bound': 0, + 'upper_bound': 1, + 'conditions': {'hsg': ['A']} + }, + { + 'name': 'bf_max', + 'change_type': 'absval', + 'lower_bound': 0.1, + 'upper_bound': 2.0, + 'units': range(1, 194) + } + ] + ``` sample_number (int): sample_number (int): Determines the number of samples. Generates an array of length `2^N * (D + 1)`, where `D` is the number of parameter changes @@ -286,11 +158,12 @@ def simulation_by_sample_parameters( Raises an error if the folder is not empty. This precaution helps prevent data deletion, overwriting directories, and issues with reading required data files not generated by the simulation. - txtinout_dir (str | pathlib.Path): Path to the `TxtInOut` directory. Raises an error if the folder does not contain exactly one SWAT+ executable `.exe` file. + txtinout_dir (str | pathlib.Path): Path to the `TxtInOut` directory containing the required files for SWAT+ simulation. extract_data (dict[str, dict[str, typing.Any]]): A nested dictionary specifying how to extract data from SWAT+ simulation output files. The top-level keys are filenames of the output files, without paths (e.g., `channel_sd_day.txt`). Each key must map to a non-empty dictionary - containing the following subkeys, as defined in [`simulated_timeseries_df`](https://swat-model.github.io/pySWATPlus/api/data_manager/#pySWATPlus.DataManager.simulated_timeseries_df): + containing the following sub-keys, which correspond to the input variables within the method + [`simulated_timeseries_df`](https://swat-model.github.io/pySWATPlus/api/data_manager/#pySWATPlus.DataManager.simulated_timeseries_df): - `has_units` (bool): **Required.** If `True`, the third line of the simulated file contains units for the columns. - `begin_date` (str): Optional. Start date in `DD-Mon-YYYY` format (e.g., 01-Jan-2010). Defaults to the earliest date in the simulated file. @@ -334,7 +207,7 @@ def simulation_by_sample_parameters( - `time`: A dictionary containing time-related statistics: - `sample_length`: Total number of samples, including duplicates. - - `total_time_sec`: Total time in seconds for the simulation. + - `time_sec`: Total time in seconds for the simulation. - `time_per_sample_sec`: Average simulation time per sample in seconds. - `problem`: The problem definition dictionary passed to `Sobol` sampling, containing: @@ -346,7 +219,7 @@ def simulation_by_sample_parameters( - `sample`: The sampled array of parameter sets used in the simulations. - - `simulation`: Dictionary mapping simulation indices (integers from 1 to `sample_length`) to output sub-dictionaries with the following keys: + - `simulation`: Dictionary mapping scenario indices (integers from 1 to `sample_length`) to output sub-dictionaries with the following keys: - `var`: Dictionary mapping each variable name (from `var_names`) to the actual value used in that simulation. - `dir`: Name of the directory (e.g., `sim_`) where the simulation was executed. This is useful when `clean_setup` is `False`, as it allows users @@ -370,8 +243,8 @@ def simulation_by_sample_parameters( - The computation progress can be tracked through the following `console` messages, where the simulation index ranges from 1 to the total number of unique simulations: - - `Started simulation: /` - - `Completed simulation: /` + - `Started simulation: /` + - `Completed simulation: /` - The disk space on the computer for `sensim_dir` must be sufficient to run parallel simulations (at least `max_workers` times the size of the `TxtInOut` folder). @@ -389,42 +262,35 @@ def simulation_by_sample_parameters( vars_values=locals() ) - # Absolute path - txtinout_dir = pathlib.Path(txtinout_dir).resolve() + # Absolute directory path sensim_dir = pathlib.Path(sensim_dir).resolve() + txtinout_dir = pathlib.Path(txtinout_dir).resolve() - # Check validity of directory path - validators._dir_path( - input_dir=txtinout_dir - ) - validators._dir_path( - input_dir=sensim_dir - ) - - # Check sensim_dir is empty - validators._dir_empty( - input_dir=sensim_dir + # Validate initialization of TxtinoutReader class + tmp_reader = TxtinoutReader( + tio_dir=txtinout_dir ) - # Validate extract_data configuration - self._validate_extract_data_config( - extract_data=extract_data - ) + # Disable CSV print to save time + tmp_reader.disable_csv_print() - # Validate unique dictionaries for parameters - validators._calibration_list_contain_unique_dict( + # List of BoundDict objects + params_bounds = utils._parameters_bound_dict_list( parameters=parameters ) - # Validate input keys in dictionaries for sensitive parameters - params_bounds = [ - BoundDict(**param) for param in parameters - ] - validators._calibration_parameters( - input_dir=txtinout_dir, + # Validate configuration of simulation parameters + validators._simulation_preliminary_setup( + sim_dir=sensim_dir, + tio_dir=txtinout_dir, parameters=params_bounds ) + # Validate extract_data configuration + validators._extract_data_config( + extract_data=extract_data + ) + # problem dictionary problem = self._create_sobol_problem( params_bounds=params_bounds @@ -448,13 +314,13 @@ def simulation_by_sample_parameters( # Number of unique simulations num_sim = len(unique_array) - # Sensitivity simulation in separate CPU + # Simulation in separate CPU cpu_sim = functools.partial( - self._cpu_simulation, + cpu._simulation_output, num_sim=num_sim, var_names=copy_problem['names'], - sensim_dir=sensim_dir, - txtinout_dir=txtinout_dir, + sim_dir=sensim_dir, + tio_dir=txtinout_dir, params_bounds=params_bounds, extract_data=extract_data, clean_setup=clean_setup @@ -468,12 +334,12 @@ def simulation_by_sample_parameters( executor.submit(cpu_sim, idx, arr) for idx, arr in enumerate(unique_array, start=1) ] for future in concurrent.futures.as_completed(futures): - # Message for completion of individual simulation for better tracking + # Message simulation completion for tracking print(f'Completed simulation: {futures.index(future) + 1}/{num_sim}', flush=True) # Collect simulation results - f_r = future.result() - cpu_dict[tuple(f_r['array'])] = { - k: v for k, v in f_r.items() if k != 'array' + future_result = future.result() + cpu_dict[tuple(future_result['array'])] = { + k: v for k, v in future_result.items() if k != 'array' } # Generate sensitivity simulation output for all sample_array from unique_array outputs @@ -496,7 +362,7 @@ def simulation_by_sample_parameters( required_time = time.time() - start_time time_stats = { 'sample_length': len(sample_array), - 'total_time_sec': round(required_time), + 'time_sec': round(required_time), 'time_per_sample_sec': round(required_time / len(sample_array), 1), } @@ -535,7 +401,14 @@ def parameter_sensitivity_indices( The method returns a dictionary with two keys: - `problem`: The definition dictionary passed to sampling. - - `sensitivty_indices`: A dictionary where each key is an indicator name and the corresponding value is the computed sensitivity indices. + - `sensitivty_indices`: A dictionary where each key is an indicator name and the corresponding value contains the computed sensitivity indices obtained using the + [`SALib.analyze.sobol.analyze`](https://salib.readthedocs.io/en/latest/api/SALib.analyze.html#SALib.analyze.sobol.analyze) method. + + The sensitivity indices are computed for the specified list of indicators. Before computing the indicators, both simulated and observed values are normalized using the formula + `(v - min_o) / (max_o - min_o)`, where `min_o` and `max_o` represent the minimum and maximum of observed values, respectively. + + Note: + All negative and `None` observed values are removed before computing `min_o` and `max_o` to prevent errors during normalization. Args: sensim_file (str | pathlib.Path): Path to the `sensitivity_simulation.json` file produced by `simulation_by_sample_parameters`. @@ -549,7 +422,7 @@ def parameter_sensitivity_indices( date_format (str): Date format of the `date` column in `obs_file`, used to parse `datetime.date` objects from date strings. - obs_col (str): Name of the column in `obs_file` containing observed data. All negative and `None` observed values are removed before analysis. + obs_col (str): Name of the column in `obs_file` containing observed data. indicators (list[str]): List of indicators to compute sensitivity indices. Available options: @@ -576,7 +449,7 @@ def parameter_sensitivity_indices( ) # Problem and indicators - prob_inct = PerformanceMetrics().scenario_indicators( + prob_ind = PerformanceMetrics().scenario_indicators( sensim_file=sensim_file, df_name=df_name, sim_col=sim_col, @@ -585,8 +458,8 @@ def parameter_sensitivity_indices( obs_col=obs_col, indicators=indicators ) - problem = prob_inct['problem'] - indicator_df = prob_inct['indicator'] + problem = prob_ind['problem'] + indicator_df = prob_ind['indicator'] # Sensitivity indices sensitivity_indices = {} From d860c8543829f562ff1ba05b250332fb3a8d0eed Mon Sep 17 00:00:00 2001 From: Debasish Pal <48341250+debpal@users.noreply.github.com> Date: Wed, 22 Oct 2025 15:21:55 +0300 Subject: [PATCH 09/12] Add new method to compute monthly and yearl summary statistics from daily time series data --- pySWATPlus/data_manager.py | 138 ++++++++++++++++++++++++++++++++++++- 1 file changed, 137 insertions(+), 1 deletion(-) diff --git a/pySWATPlus/data_manager.py b/pySWATPlus/data_manager.py index 1322639..4bf064d 100644 --- a/pySWATPlus/data_manager.py +++ b/pySWATPlus/data_manager.py @@ -189,6 +189,142 @@ def simulated_timeseries_df( return df + def hru_stats_from_daily_simulation( + self, + sim_file: str | pathlib.Path, + has_units: bool, + gis_id: int, + sim_col: str, + output_dir: typing.Optional[str | pathlib.Path] = None + ) -> dict[str, pandas.DataFrame]: + ''' + Compute monthly and yearly statistical summaries for a Hydrological Response Unit (HRU) from daily simulation time series data. + + The method returns a dictionary containing two keys, `monthly` and `yearly`, whose values are `DataFrame` objects. + Each `DataFrame` includes the following columns: + + - `date`: The first day of the corresponding month or year. + - `min`: Minimum simulated value within the time window. + - `max`: Maximum simulated value within the time window. + - `mean`: Mean simulated value within the time window. + - `std`: Standard deviation of simulated values within the time window. + + The statistics are computed using daily values between the first and last dates (both inclusive) + of each month or year. The `date` column represents the first day of the corresponding period + (e.g., 01-Jan-2012, 01-Feb-2012 for monthly; 01-Jan-2012 for yearly). + + If the first or last record in the input file does not align exactly with the start or end + of a month or year, the statistics are computed for the available portion of that period. + In such cases, the `date` column represents the first available date for that partial period. + + Args: + sim_file (str | pathlib.Path): Path to the input file containing time series data generated by + the method [`run_swat`](https://swat-model.github.io/pySWATPlus/api/txtinout_reader/#pySWATPlus.TxtinoutReader.run_swat). + The file must contain `yr`, `mon`, and `day` columns. + + has_units (bool): If `True`, the third line of the input file contains column units. + + gis_id (int): Unique identifier for the Hydrological Response Unit (HRU) found in the `gis_id`. + + sim_col (str): Name of the column containing simulated values. + + output_dir (str | pathlib.Path): Directory path to save the computed results as two following JSON files. + If `None` (default), the results are not saved. + + - `statistics_monthly.json`: Contains the monthly statistical `DataFrame` (with `date` formatted as `DD-Mon-YYYY`). + - `statistics_yearly.json`: Contains the yearly statistical `DataFrame` (with `date` formatted as `DD-Mon-YYYY`). + + Returns: + Dictionary with two keys: + + - `monthly`: `DataFrame` containing monthly statistics, with `date` as `datetime.date` objects. + - `yearly`: `DataFrame` containing yearly statistics, with `date` as `datetime.date` objects. + ''' + + # Check input variables type + validators._variable_origin_static_type( + vars_types=typing.get_type_hints( + obj=self.hru_stats_from_daily_simulation + ), + vars_values=locals() + ) + + # Check input file contains daily time series data + sim_file = pathlib.Path(sim_file).resolve() + if not sim_file.stem.endswith('_day'): + raise ValueError( + f'Statistical summary applies only to daily time series files ending with "_day"; received file name "{sim_file.stem}"' + ) + + # Validate directory path + if output_dir is not None: + validators._dir_path( + input_dir=pathlib.Path(output_dir).resolve() + ) + + # Simulated DataFrame + df = self.simulated_timeseries_df( + sim_file=sim_file, + has_units=has_units, + apply_filter={ + 'gis_id': [gis_id] + }, + usecols=[sim_col] + ) + + # Frequncy abbreviations + freq_abb = { + 'monthly': 'MS', + 'yearly': 'YS' + } + + # Date column + date_col = 'date' + + output = {} + # Iterate frequency + for freq in freq_abb: + # Start date + start_date = df[date_col].min() + # End date + end_date = df[date_col].max() + # Time frequency representation days + freq_day = pandas.date_range( + start=start_date, + end=end_date, + freq=freq_abb[freq] + ) + freq_day = pandas.Series(freq_day).dt.date + # Add start and end date + freq_day = pandas.Series( + [start_date] + freq_day.tolist() + [end_date] + ) + # Get unique date if repitation of start and end dates + freq_day = freq_day.unique() + # Frequency DataFrame + freq_df = pandas.DataFrame() + for idx, dates in enumerate(zip(freq_day[:-1], freq_day[1:])): + idx_df = df[(df[date_col] >= dates[0]) & (df[date_col] < dates[1])] + freq_df.loc[idx, 'date'] = dates[0] + freq_df.loc[idx, 'max'] = idx_df[sim_col].max() + freq_df.loc[idx, 'min'] = idx_df[sim_col].min() + freq_df.loc[idx, 'mean'] = idx_df[sim_col].mean() + freq_df.loc[idx, 'std'] = idx_df[sim_col].std() + # Insert the DataFrame in the output dictionary + output[freq] = freq_df + # Save the DataFrame + if output_dir is not None: + save_file = pathlib.Path(output_dir).resolve() / f'statistics_{freq}.json' + copy_df = copy.deepcopy(freq_df) + copy_df[date_col] = copy_df[date_col].apply(lambda x: x.strftime('%d-%b-%Y')) + copy_df.to_json( + path_or_buf=save_file, + orient='records', + indent=4 + ) + + return output + def read_sensitive_dfs( self, sensim_file: str | pathlib.Path, @@ -235,7 +371,7 @@ def read_sensitive_dfs( sensim_file = pathlib.Path(sensim_file).resolve() # Sensitiivty output data - output = utils._retrieve_sensitivity_output( + output = utils._sensitivity_output_retrieval( sensim_file=sensim_file, df_name=df_name, add_problem=add_problem, From ab032ce5be33b4989371e960db30b520b96146ff Mon Sep 17 00:00:00 2001 From: Debasish Pal <48341250+debpal@users.noreply.github.com> Date: Wed, 22 Oct 2025 15:23:02 +0300 Subject: [PATCH 10/12] Added Calibration class for model calibration --- pySWATPlus/__init__.py | 2 + pySWATPlus/calibration.py | 640 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 642 insertions(+) create mode 100644 pySWATPlus/calibration.py diff --git a/pySWATPlus/__init__.py b/pySWATPlus/__init__.py index 84979d3..b51f5ec 100644 --- a/pySWATPlus/__init__.py +++ b/pySWATPlus/__init__.py @@ -2,6 +2,7 @@ from .data_manager import DataManager from .sensitivity_analyzer import SensitivityAnalyzer from .performance_metrics import PerformanceMetrics +from .calibration import Calibration from importlib.metadata import version, PackageNotFoundError try: @@ -16,5 +17,6 @@ 'DataManager', 'SensitivityAnalyzer', 'PerformanceMetrics', + 'Calibration', '__version__' ] diff --git a/pySWATPlus/calibration.py b/pySWATPlus/calibration.py new file mode 100644 index 0000000..08aac1f --- /dev/null +++ b/pySWATPlus/calibration.py @@ -0,0 +1,640 @@ +import importlib +import numpy +import functools +import concurrent.futures +import typing +import pathlib +import json +import copy +import pymoo.core.problem +import pymoo.optimize +from .txtinout_reader import TxtinoutReader +from .performance_metrics import PerformanceMetrics +from . import validators +from . import utils +from . import newtype +from . import cpu + + +class Calibration(pymoo.core.problem.Problem): # type: ignore[misc] + ''' + Warning: + This class is currently under development. + + A `Problem` subclass from the [`pymoo`](https://github.com/anyoptimization/pymoo) Python package + for performing model calibration against observed data using multi-objective optimization + and evolutionary algorithms. + + Args: + parameters (newtype.BoundType): List of dictionaries defining parameter configurations for calibration simulations. + Each dictionary contain the following keys: + + - `name` (str): **Required.** Name of the parameter in the `cal_parms.cal` file. + - `change_type` (str): **Required.** Type of change to apply. Must be one of 'absval', 'abschg', or 'pctchg'. + - `lower_bound` (float): **Required.** Lower bound for the parameter. + - `upper_bound` (float): **Required.** Upper bound for the parameter. + - `units` (Iterable[int]): Optional. List of unit IDs to which the parameter change should be constrained. + - `conditions` (dict[str, list[str]]): Optional. Conditions to apply when changing the parameter. + Supported keys include `'hsg'`, `'texture'`, `'plant'`, and `'landuse'`, each mapped to a list of allowed values. + + ```python + parameters = [ + { + 'name': 'cn2', + 'change_type': 'pctchg', + 'lower_bound': 25, + 'upper_bound': 75, + }, + { + 'name': 'perco', + 'change_type': 'absval', + 'lower_bound': 0, + 'upper_bound': 1, + 'conditions': {'hsg': ['A']} + }, + { + 'name': 'bf_max', + 'change_type': 'absval', + 'lower_bound': 0.1, + 'upper_bound': 2.0, + 'units': range(1, 194) + } + ] + ``` + + calsim_dir (str | pathlib.Path): Path to the directory where simulations for each individual in each generation will be performed. + Raises an error if the folder is not empty. This precaution helps prevent data deletion, overwriting directories, + and issues with reading required data files not generated by the simulation. + + txtinout_dir (str | pathlib.Path): Path to the `TxtInOut` directory containing the required files for SWAT+ simulation. + + extract_data (dict[str, dict[str, typing.Any]]): A nested dictionary specifying how to extract data from SWAT+ simulation output files. + The top-level keys are filenames of the output files, without paths (e.g., `channel_sd_day.txt`). Each key must map to a non-empty dictionary + containing the following sub-keys, which correspond to the input variables within the method + [`simulated_timeseries_df`](https://swat-model.github.io/pySWATPlus/api/data_manager/#pySWATPlus.DataManager.simulated_timeseries_df): + + - `has_units` (bool): **Required.** If `True`, the third line of the simulated file contains units for the columns. + - `begin_date` (str): Optional. Start date in `DD-Mon-YYYY` format (e.g., 01-Jan-2010). Defaults to the earliest date in the simulated file. + - `end_date` (str): Optional. End date in `DD-Mon-YYYY` format (e.g., 31-Dec-2013). Defaults to the latest date in the simulated file. + - `ref_day` (int): Optional. Reference day for monthly and yearly time series. + If `None` (default), the last day of the month or year is used, obtained from simulation. Not applicable to daily time series files (ending with `_day`). + - `ref_month` (int): Optional. Reference month for yearly time series. If `None` (default), the last month of the year is used, obtained from simulation. + Not applicable to monthly time series files (ending with `_mon`). + - `apply_filter` (dict[str, list[typing.Any]]): Optional. Each key is a column name and the corresponding value + is a list of allowed values for filtering rows in the DataFrame. By default, no filtering is applied. + An error is raised if filtering produces an empty DataFrame. + + !!! note + The sub-key `usecols` should **not** be included here. Although no error will be raised, it will be ignored during class initialization + because the `sim_col` sub-key from the `objectives` input is automatically used as `usecols`. Including it manually has no effect. + + ```python + extract_data = { + 'channel_sd_day.txt': { + 'has_units': True, + 'begin_date': '01-Jun-2014', + 'end_date': '01-Oct-2016', + 'apply_filter': {'gis_id': [561]} + }, + 'channel_sd_mon.txt': { + 'has_units': True, + 'apply_filter': {'name': ['cha561']} + } + } + ``` + + observe_data (dict[str, dict[str, str]]): A nested dictionary specifying observed data configuration. The top-level keys + are same as keys of `extract_data` (e.g., `channel_sd_day.txt`). Each key must map to a non-empty dictionary containing the following sub-keys: + + - `obs_file` (str): **Required.** Path to the CSV file containing observed data. The file must include a `date` column with comma as the + separator to read the `DataFrame` by the [`pandas.read_csv`](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html#pandas-read-csv) + method. The `date` col will be used to merge simulated and observed data. + - `date_format` (str): **Required.** Date format of the `date` column in `obs_file`, used to parse `datetime.date` objects from date strings. + + ```python + observe_data = { + 'channel_sd_day.txt': { + 'obs_file': "C:\\Users\\Username\\observed_data\\discharge_daily.csv", + 'date_format': '%Y-%m-%d' + }, + 'channel_sd_mon.txt': { + 'obs_file': "C:\\Users\\Username\\observed_data\\discharge_monthly.csv", + 'date_format': '%Y-%m-%d' + } + } + ``` + + objectives (dict[str, dict[str, str]]): A nested dictionary specifying objectives configuration. The top-level keys + are same as keys of `extract_data` (e.g., `channel_sd_day.txt`). Each key must map to a non-empty dictionary containing the following sub-keys: + + - `sim_col` (str): **Required.** Name of the column containing simulated values. + - `obs_col` (str): **Required.** Name of the column containing observed values. + - `indicator` (str): **Required.** Name of the performance indicator used for optimization. + Available options with their optimization direction are listed below: + + - `NSE`: Nash–Sutcliffe Efficiency (**maximize**). + - `KGE`: Kling–Gupta Efficiency (**maximize**). + - `MSE`: Mean Squared Error (**minimize**). + - `RMSE`: Root Mean Squared Error (**minimize**). + - `MARE`: Mean Absolute Relative Error (**minimize**). + + !!! tip + Avoid using `MARE` if `obs_col` contains zero values, as it will cause a division-by-zero error. + + ```python + objectives = { + 'channel_sd_day.txt': { + 'sim_col': 'flo_out', + 'obs_col': 'discharge', + 'indicator': 'NSE' + }, + 'channel_sd_mon.txt': { + 'sim_col': 'flo_out', + 'obs_col': 'discharge', + 'indicator': 'MSE' + } + } + ``` + + algorithm (str): Name of the alogrithm. Available options: + + - `GA`: Genetic Algorithm (**single-objective**). + - `DE`: [Differential Evolution Algorithm](https://doi.org/10.1007/3-540-31306-0) (**single-objective**). + - `NSGA2`: [Non-dominated sorted Genetic Algorithm - II](https://doi.org/10.1109/4235.996017) (**multi-objective**). + - `AGEMOEA2`: [Adaptive Geometry Estimation based Multi-objective Evolutionary Algorithm - II](https://doi.org/10.1145/3512290.3528732) (**multi-objective**). + + !!! Note + Multi-objective algorithms can be used for single-objective optimization, but not vice versa. + + n_gen (int): Number of generation that alogirithm will run. + + pop_size (int): Number of individual in each generation. + + max_workers (int): Number of logical CPUs to use for parallel processing. If `None` (default), all available logical CPUs are used. + + !!! tip + Simulation efficiency can be improved by choosing `pop_size` strategically. + The total number of simulations during optimization equals `n_gen × pop_size`. + Simulations within a generation are executed in parallel batches depending on CPU availability. + For example, with 16 CPUs and a `pop_size` of 20, two batches will be required (16 + 4); + therefore, selecting a `pop_size` of 16 or 32 may improve efficiency. + ''' + + def __init__( + self, + parameters: newtype.BoundType, + calsim_dir: str | pathlib.Path, + txtinout_dir: str | pathlib.Path, + extract_data: dict[str, dict[str, typing.Any]], + observe_data: dict[str, dict[str, str]], + objectives: dict[str, dict[str, str]], + algorithm: str, + n_gen: int, + pop_size: int, + max_workers: typing.Optional[int] = None + ) -> None: + + # Check input variables type + validators._variable_origin_static_type( + vars_types=typing.get_type_hints( + obj=self.__class__.__init__ + ), + vars_values=locals() + ) + + # Absolute directory path + calsim_dir = pathlib.Path(calsim_dir).resolve() + txtinout_dir = pathlib.Path(txtinout_dir).resolve() + + # Check same top-level keys in dictionaries + if not (extract_data.keys() == observe_data.keys() == objectives.keys()): + raise KeyError( + 'Mismatch of key names. Ensure extract_data, observe_data, and objectives have identical top-level keys.' + ) + + # Validate initialization of TxtinoutReader class + tmp_reader = TxtinoutReader( + tio_dir=txtinout_dir + ) + + # Disable CSV print to save time + tmp_reader.disable_csv_print() + + # List of BoundDict objects + params_bounds = utils._parameters_bound_dict_list( + parameters=parameters + ) + + # Validate configuration of simulation parameters + validators._simulation_preliminary_setup( + sim_dir=calsim_dir, + tio_dir=txtinout_dir, + parameters=params_bounds + ) + + # Validate objectives configuration + self._validate_objectives_config( + objectives=objectives + ) + + # Validate observe_data configuration + self._validate_observe_data_config( + observe_data=observe_data + ) + + # Dictionary of observed DataFrames + observe_dict = {} + for key in observe_data: + key_df = utils._df_observe( + obs_file=pathlib.Path(observe_data[key]['obs_file']).resolve(), + date_format=observe_data[key]['date_format'], + obs_col=objectives[key]['obs_col'] + ) + key_df.columns = ['date', 'obs'] + observe_dict[key.split('.')[0] + '_df'] = key_df + + # Validate extract_data configuration + for key in extract_data: + extract_data[key]['usecols'] = [objectives[key]['sim_col']] + validators._extract_data_config( + extract_data=extract_data + ) + + # Change variable name with counter (for multiple occurence only) in BoundType + var_names = utils._parameters_name_with_counter( + parameters=params_bounds + ) + + # List of variable lower and upper bounds + var_lb = [] + var_ub = [] + for param in params_bounds: + var_lb.append(param.lower_bound) + var_ub.append(param.upper_bound) + + # Initalize parameters + self.params_bounds = params_bounds + self.extract_data = extract_data + self.objectives = objectives + self.var_names = var_names + self.observe_dict = observe_dict + self.calsim_dir = calsim_dir + self.txtinout_dir = txtinout_dir + self.algorithm = algorithm + self.n_gen = n_gen + self.pop_size = pop_size + self.total_sim = n_gen * pop_size + self.max_workers = max_workers + self.track_gen = 0 + + # Access properties and methods from Problem class + super().__init__( + n_var=len(params_bounds), + n_obj=len(objectives), + xl=numpy.array(var_lb), + xu=numpy.array(var_ub) + ) + + def _evaluate( + self, + x: numpy.typing.NDArray[numpy.float64], + out: dict[str, numpy.typing.NDArray[numpy.float64]] + ) -> None: + + # Track starting simulation number for each generation + start_sim = self.track_gen * self.pop_size + 1 + + # Display start of current generation number + self.track_gen = self.track_gen + 1 + print( + f'{"\n"}Started generation: {self.track_gen}/{self.n_gen}{"\n"}' + ) + + # Simulation in separate CPU + cpu_sim = functools.partial( + cpu._simulation_output, + num_sim=self.total_sim, + var_names=self.var_names, + sim_dir=self.calsim_dir, + tio_dir=self.txtinout_dir, + params_bounds=self.params_bounds, + extract_data=self.extract_data, + clean_setup=True + ) + + # Assign model simulation in individual computer CPU and collect results + cpu_dict = {} + with concurrent.futures.ProcessPoolExecutor(max_workers=self.max_workers) as executor: + # Multicore simulation + futures = [ + executor.submit(cpu_sim, idx, arr) for idx, arr in enumerate(x, start=start_sim) + ] + for future in concurrent.futures.as_completed(futures): + # Display end of current simulation for tracking + print(f'Completed simulation: {start_sim + futures.index(future)}/{self.total_sim}', flush=True) + # Collect simulation results + future_result = future.result() + cpu_dict[tuple(future_result['array'])] = { + k: v for k, v in future_result.items() if k != 'array' + } + + # Mapping betten objectives and their directions + objs_dirs = self._objectives_directions() + + # An empty list to store population objectives in current generation + gen_objs = [] + + # Iterate population array in the current generation + for pop in x: + # Empty list to collect individual population objectives + pop_objs = [] + # Simulation output for the population + pop_sim = cpu_dict[tuple(pop)] + # Iterate objectives + for obj in self.objectives: + # Objective indicator + obj_ind = self.objectives[obj]['indicator'] + # Objective key name to extract simulated and obseved DataFrames + obj_key = obj.split('.')[0] + '_df' + # Simulated DataFrame + sim_df = pop_sim[obj_key] + sim_df.columns = ['date', 'sim'] + # Observed DataFrame + obs_df = self.observe_dict[obj_key] + # Merge simulated and observed DataFrames by 'date' column + merge_df = sim_df.merge( + right=obs_df, + how='inner', + on='date' + ) + # Normalized DataFrame + norm_df = utils._df_normalize( + df=merge_df[['sim', 'obs']], + norm_col='obs' + ) + # Indicator method from abbreviation + indicator_method = getattr( + PerformanceMetrics(), + f'compute_{obj_ind.lower()}' + ) + # Indicator value computed from method + ind_val = indicator_method( + df=norm_df, + sim_col='sim', + obs_col='obs' + ) + # Objective value based on maximize or minimize direction + obj_val = - ind_val if objs_dirs[obj_ind] == 'max' else ind_val + # Store objective value for sample + pop_objs.append(obj_val) + # Store sample objectives in population objective list + gen_objs.append(pop_objs) + + # Array of objective values for the current generation + out["F"] = numpy.array(gen_objs) + + # Print end of current generation number + print( + f'{"\n"}Completed generation: {self.track_gen}/{self.n_gen}{"\n"}' + ) + + def _objectives_directions( + self + ) -> dict[str, str]: + ''' + Provide a dictionary mapping optimization objectives to their respective directions. + ''' + + objs_dirs = { + 'NSE': 'max', + 'KGE': 'max', + 'MSE': 'min', + 'RMSE': 'min', + 'MARE': 'min' + } + + return objs_dirs + + def _validate_observe_data_config( + self, + observe_data: dict[str, dict[str, str]], + ) -> None: + ''' + Validate `observe_data` configuration. + ''' + + # List of valid sub-keys of sub-dictionaries + valid_subkeys = [ + 'obs_file', + 'date_format' + ] + + # Iterate dictionary + for file_key, file_dict in observe_data.items(): + # Check type of a sub-dictionary + if not isinstance(file_dict, dict): + raise TypeError( + f'Expected "{file_key}" in observe_data must be a dictionary, ' + f'but got type "{type(file_dict).__name__}"' + ) + # Check sub-dictionary length + if len(file_dict) != 2: + raise ValueError( + f'Length of "{file_key}" sub-dictionary in observe_data must be 2, ' + f'but got {len(file_dict)}' + ) + # Iterate sub-key + for sub_key in file_dict: + # Check valid sub-key + if sub_key not in valid_subkeys: + raise KeyError( + f'Invalid sub-key "{sub_key}" for "{file_key}" in observe_data; ' + f'expected sub-keys are {json.dumps(valid_subkeys)}' + ) + + return None + + def _validate_objectives_config( + self, + objectives: dict[str, dict[str, str]], + ) -> None: + ''' + Validate `objectives` configuration. + ''' + + # List of valid sub-keys of sub-dictionaries + valid_subkeys = [ + 'sim_col', + 'obs_col', + 'indicator' + ] + + valid_indicators = [ + key for key in PerformanceMetrics().indicator_names if key != 'PBIAS' + ] + + # Iterate dictionary + for file_key, file_dict in objectives.items(): + # Check type of a sub-dictionary + if not isinstance(file_dict, dict): + raise TypeError( + f'Expected "{file_key}" in "objectives" must be a dictionary, ' + f'but got type "{type(file_dict).__name__}"' + ) + # Check sub-dictionary length + if len(file_dict) != 3: + raise ValueError( + f'Length of "{file_key}" sub-dictionary in "objectives" must be 3, ' + f'but got {len(file_dict)}' + ) + # Iterate sub-key + for sub_key in file_dict: + # Check valid sub-key + if sub_key not in valid_subkeys: + raise KeyError( + f'Invalid sub-key "{sub_key}" for "{file_key}" in "objectives"; ' + f'expected sub-keys are {json.dumps(valid_subkeys)}' + ) + if sub_key == 'indicator' and file_dict[sub_key] not in valid_indicators: + raise ValueError( + f'Invalid "indicator" value "{file_dict[sub_key]}" for "{file_key}" in "objectives"; ' + f'expected indicators are {valid_indicators}' + ) + + return None + + def _algorithm_class( + self, + algorithm: str + ) -> type: + ''' + Retrieve the optimization algorithm class from the `pymoo` package. + ''' + + # Dictionary mapping between algorithm name and module + api_module = { + 'GA': importlib.import_module('pymoo.algorithms.soo.nonconvex.ga'), + 'DE': importlib.import_module('pymoo.algorithms.soo.nonconvex.de'), + 'NSGA2': importlib.import_module('pymoo.algorithms.moo.nsga2'), + 'AGEMOEA2': importlib.import_module('pymoo.algorithms.moo.age2') + } + + # Check algorithm name is valid + if algorithm not in api_module: + raise NameError( + f'Invalid algorithm "{algorithm}"; valid names are {list(api_module.keys())}' + ) + + # Algorithm class + alg_class = typing.cast( + type, + getattr(api_module[algorithm], algorithm) + ) + + return alg_class + + def parameter_optimization( + self + ) -> dict[str, typing.Any]: + ''' + Run the optimization using the configured algorithm, population size, and number of generations. + + This method executes the optimization process and returns a dictionary containing the optimized + parameters, corresponding objective values, and total execution time. + + Two JSON files are saved in the input directory `calsim_dir`. + + The file `optimization_historyresult.json` stores the optimization history. Each key in this + file is an integer starting from 1, representing the generation number. The corresponding value + is a sub-dictionary with two keys: `pop` for the population data (decision variables) and `obj` + for the objective function values. This file is useful for analyzing optimization progress, + convergence trends, performance indicators, and visualization. + + The file `optimization_result.json` contains the final output dictionary described below. + + Returns: + Dictionary with the following keys: + + - `algorithm`: Name of the algorithm. + - `generation`: Number of generation to run. + - `population`: Number of individual in each generation. + - `total_simulation`: Number of total simulation. + - `time_sec`: Total execution time in seconds. + - `variables`: Array of optimized decision variables. + - `objectives`: Array of objective values corresponding to the optimized decision variables. + + Note: + - For multi-objective optimization, the number of `variables` and `objectives` may exceed one, + representing non-dominated solutions where a solution cannot be improved in one objective + without worsening another. + + - The computation progress can be tracked through the following `console` messages. + The simulation index ranges from 1 to the total number of simulations, which equals the product + of the population size and the number of generations: + + - `Started generation: /` + - `Started simulation: /` + - `Completed simulation: /` + - `Completed generation: /` + + - The disk space on the computer for `calsim_dir` must be sufficient to run + parallel simulations (at least `pop_size` times the size of the `TxtInOut` folder). + Otherwise, no error will be raised by the system, but simulation outputs may not be generated. + ''' + + # Algorithm object + alg_class = self._algorithm_class( + algorithm=self.algorithm + ) + alg_object = alg_class( + pop_size=self.pop_size + ) + + # Optimization result + result = pymoo.optimize.minimize( + problem=self, + algorithm=alg_object, + termination=('n_gen', self.n_gen), + save_history=True + ) + + # Sign of objective directions + objs_dirs = self._objectives_directions() + dir_list = [ + objs_dirs[v['indicator']] for k, v in self.objectives.items() + ] + dir_sign = numpy.where(numpy.array(dir_list) == 'max', -1, 1) + + # Save optimization history for each generation + opt_hist = {} + for i, gen in enumerate(result.history, start=1): + opt_hist[i] = { + 'pop': gen.pop.get('X').tolist(), + 'obj': (gen.pop.get('F') * dir_sign).tolist() + } + json_file = self.calsim_dir / 'optimization_history.json' + with open(json_file, 'w') as output_write: + json.dump(opt_hist, output_write, indent=4) + + # Optimized output of parameters, objectives, and execution times + required_time = round(result.exec_time) + opt_output = { + 'algorithm': self.algorithm, + 'generation': self.n_gen, + 'population': self.pop_size, + 'total_simulation': self.pop_size * self.n_gen, + 'time_sec': required_time, + 'variables': result.X, + 'objectives': result.F * dir_sign + } + + # Save optimized outputs + save_output = copy.deepcopy(opt_output) + save_output = { + k: v.tolist() if k.startswith(('var', 'obj')) else v for k, v in save_output.items() + } + json_file = self.calsim_dir / 'optimization_result.json' + with open(json_file, 'w') as output_write: + json.dump(save_output, output_write, indent=4) + + return opt_output From 1c776d9d10b99d9f5ad3ba9d96c4dd8d97f4a4cd Mon Sep 17 00:00:00 2001 From: Debasish Pal <48341250+debpal@users.noreply.github.com> Date: Wed, 22 Oct 2025 15:23:43 +0300 Subject: [PATCH 11/12] Added and modified test functions according to codes --- tests/TxtInOut/print.prt | 2 +- tests/test_calibration.py | 325 +++++++++++++++++++++++++++++ tests/test_data_manager.py | 15 ++ tests/test_newtype.py | 57 +++++ tests/test_sensitivity_analyzer.py | 6 +- tests/test_txtinout_reader.py | 100 +++++---- tests/test_types.py | 27 --- tests/test_utils.py | 24 +-- tests/test_validators.py | 32 +-- 9 files changed, 487 insertions(+), 101 deletions(-) create mode 100644 tests/test_calibration.py create mode 100644 tests/test_newtype.py delete mode 100644 tests/test_types.py diff --git a/tests/TxtInOut/print.prt b/tests/TxtInOut/print.prt index ffe5bd6..c561cb3 100644 --- a/tests/TxtInOut/print.prt +++ b/tests/TxtInOut/print.prt @@ -4,7 +4,7 @@ nyskip day_start yrc_start day_end yrc_end interval aa_int_cnt 0 csvout dbout cdfout -y n n +n n n crop_yld mgtout hydcon fdcout b n n n objects daily monthly yearly avann diff --git a/tests/test_calibration.py b/tests/test_calibration.py new file mode 100644 index 0000000..4c7d499 --- /dev/null +++ b/tests/test_calibration.py @@ -0,0 +1,325 @@ +import os +import pySWATPlus +import pytest +import tempfile + + +def test_calibration(): + + # set up TxtInOut directory path + txtinout_dir = os.path.join(os.path.dirname(__file__), 'TxtInOut') + + # initialize TxtinoutReader class + txtinout_reader = pySWATPlus.TxtinoutReader( + tio_dir=txtinout_dir + ) + + # Calibration parameters + parameters = [ + { + 'name': 'perco', + 'change_type': 'absval', + 'lower_bound': 0, + 'upper_bound': 1 + } + ] + # Extract data configuration + extract_data = { + 'channel_sd_mon.txt': { + 'has_units': True, + 'ref_day': 1, + 'apply_filter': {'name': ['cha561']} + } + } + + # Observe data configuration + observe_data = { + 'channel_sd_mon.txt': { + 'obs_file': os.path.join(txtinout_dir, 'a_observe_discharge_monthly.csv'), + 'date_format': '%Y-%m-%d' + } + } + + # Objective configuration + objectives = { + 'channel_sd_mon.txt': { + 'sim_col': 'flo_out', + 'obs_col': 'mean', + 'indicator': 'RMSE' + } + } + + with tempfile.TemporaryDirectory() as tmp1_dir: + # Copy required files to a simulation directory + sim_dir = txtinout_reader.copy_required_files( + sim_dir=tmp1_dir + ) + # Intialize TxtinOutReader class by simulation direcotry + sim_reader = pySWATPlus.TxtinoutReader( + tio_dir=sim_dir + ) + # Disable all objects for daily time series file in print.prt to save time and space + sim_reader.enable_object_in_print_prt( + obj=None, + daily=False, + monthly=True, + yearly=True, + avann=True + ) + # Set begin and end year + sim_reader.set_simulation_period( + begin_date='01-Jan-2010', + end_date='31-Dec-2013' + ) + # Set warmup year + sim_reader.set_warmup_year( + warmup=1 + ) + + # Pass: calibration of model parameters + with tempfile.TemporaryDirectory() as tmp2_dir: + calibration = pySWATPlus.Calibration( + parameters=parameters, + calsim_dir=tmp2_dir, + txtinout_dir=tmp1_dir, + extract_data=extract_data, + observe_data=observe_data, + objectives=objectives, + algorithm='NSGA2', + n_gen=2, + pop_size=3 + ) + + output = calibration.parameter_optimization() + + assert isinstance(output, dict) + assert len(output) == 7 + assert 'variables' in output + assert 'objectives' in output + assert os.path.exists(os.path.join(tmp2_dir, 'optimization_history.json')) + assert os.path.exists(os.path.join(tmp2_dir, 'optimization_result.json')) + + +def test_error_calibration(): + + # set up TxtInOut directory path + txtinout_dir = os.path.join(os.path.dirname(__file__), 'TxtInOut') + + # initialize TxtinoutReader class + txtinout_reader = pySWATPlus.TxtinoutReader( + tio_dir=txtinout_dir + ) + + # Calibration parameters + parameters = [ + { + 'name': 'perco', + 'change_type': 'absval', + 'lower_bound': 0, + 'upper_bound': 1 + } + ] + # Extract data configuration + extract_data = { + 'channel_sd_mon.txt': { + 'has_units': True, + 'ref_day': 1, + 'apply_filter': {'name': ['cha561']} + } + } + + # Observe data configuration + observe_data = { + 'channel_sd_mon.txt': { + 'obs_file': os.path.join(txtinout_dir, 'a_observe_discharge_monthly.csv'), + 'date_format': '%Y-%m-%d' + } + } + + # Objective configuration + objectives = { + 'channel_sd_mon.txt': { + 'sim_col': 'flo_out', + 'obs_col': 'mean', + 'indicator': 'RMSE' + } + } + + with tempfile.TemporaryDirectory() as tmp1_dir: + # Copy required files to a simulation directory + txtinout_reader.copy_required_files( + sim_dir=tmp1_dir + ) + + with tempfile.TemporaryDirectory() as tmp2_dir: + + # Error: mismatch of top-level key names + with pytest.raises(Exception) as exc_info: + pySWATPlus.Calibration( + parameters=parameters, + calsim_dir=tmp2_dir, + txtinout_dir=tmp1_dir, + extract_data=extract_data, + observe_data=observe_data, + objectives={ + 'channel_sd_monn.txt': { + 'sim_col': 'flo_out', + 'obs_col': 'mean', + 'indicator': 'RMSE' + } + }, + algorithm='NSGA2', + n_gen=2, + pop_size=5 + ) + assert exc_info.value.args[0] == 'Mismatch of key names. Ensure extract_data, observe_data, and objectives have identical top-level keys.' + + # Error: invalid value type of top-key in observe_data + with pytest.raises(Exception) as exc_info: + pySWATPlus.Calibration( + parameters=parameters, + calsim_dir=tmp2_dir, + txtinout_dir=tmp1_dir, + extract_data=extract_data, + observe_data={ + 'channel_sd_mon.txt': [] + }, + objectives=objectives, + algorithm='NSGA2', + n_gen=2, + pop_size=5 + ) + assert 'Expected "channel_sd_mon.txt" in observe_data must be a dictionary' in exc_info.value.args[0] + + # Error: invalid length of sub-dictionary in observe_data + with pytest.raises(Exception) as exc_info: + pySWATPlus.Calibration( + parameters=parameters, + calsim_dir=tmp2_dir, + txtinout_dir=tmp1_dir, + extract_data=extract_data, + observe_data={ + 'channel_sd_mon.txt': { + 'date_format': '%Y-%m-%d' + } + }, + objectives=objectives, + algorithm='NSGA2', + n_gen=2, + pop_size=5 + ) + assert 'Length of "channel_sd_mon.txt" sub-dictionary in observe_data must be 2' in exc_info.value.args[0] + + # Error: invalid sub-key in observe_data + with pytest.raises(Exception) as exc_info: + pySWATPlus.Calibration( + parameters=parameters, + calsim_dir=tmp2_dir, + txtinout_dir=tmp1_dir, + extract_data=extract_data, + observe_data={ + 'channel_sd_mon.txt': { + 'obs_file': os.path.join(txtinout_dir, 'a_observe_discharge_monthly.csv'), + 'date_formatt': '%Y-%m-%d' + } + }, + objectives=objectives, + algorithm='NSGA2', + n_gen=2, + pop_size=5 + ) + assert 'Invalid sub-key "date_formatt" for "channel_sd_mon.txt" in observe_data' in exc_info.value.args[0] + + # Error: invalid value type of top-key in objectives + with pytest.raises(Exception) as exc_info: + pySWATPlus.Calibration( + parameters=parameters, + calsim_dir=tmp2_dir, + txtinout_dir=tmp1_dir, + extract_data=extract_data, + observe_data=observe_data, + objectives={ + 'channel_sd_mon.txt': [] + }, + algorithm='NSGA2', + n_gen=2, + pop_size=5 + ) + assert 'Expected "channel_sd_mon.txt" in "objectives" must be a dictionary' in exc_info.value.args[0] + + # Error: invalid length of sub-dictionary in objectives + with pytest.raises(Exception) as exc_info: + pySWATPlus.Calibration( + parameters=parameters, + calsim_dir=tmp2_dir, + txtinout_dir=tmp1_dir, + extract_data=extract_data, + observe_data=observe_data, + objectives={ + 'channel_sd_mon.txt': { + 'sim_col': 'flo_out', + 'obs_col': 'mean' + } + }, + algorithm='NSGA2', + n_gen=2, + pop_size=5 + ) + assert 'Length of "channel_sd_mon.txt" sub-dictionary in "objectives" must be 3' in exc_info.value.args[0] + + # Error: invalid sub-key in objectives + with pytest.raises(Exception) as exc_info: + pySWATPlus.Calibration( + parameters=parameters, + calsim_dir=tmp2_dir, + txtinout_dir=tmp1_dir, + extract_data=extract_data, + observe_data=observe_data, + objectives={ + 'channel_sd_mon.txt': { + 'sim_col': 'flo_out', + 'obs_col': 'mean', + 'indicatorr': 'RMSE' + } + }, + algorithm='NSGA2', + n_gen=2, + pop_size=5 + ) + assert 'Invalid sub-key "indicatorr" for "channel_sd_mon.txt" in "objectives"' in exc_info.value.args[0] + + # Error: invalid indicator in objectives + with pytest.raises(Exception) as exc_info: + pySWATPlus.Calibration( + parameters=parameters, + calsim_dir=tmp2_dir, + txtinout_dir=tmp1_dir, + extract_data=extract_data, + observe_data=observe_data, + objectives={ + 'channel_sd_mon.txt': { + 'sim_col': 'flo_out', + 'obs_col': 'mean', + 'indicator': 'RMSEE' + } + }, + algorithm='NSGA2', + n_gen=2, + pop_size=5 + ) + assert 'Invalid "indicator" value "RMSEE" for "channel_sd_mon.txt" in "objectives"' in exc_info.value.args[0] + + # Error: invalid algorithm name + with pytest.raises(Exception) as exc_info: + pySWATPlus.Calibration( + parameters=parameters, + calsim_dir=tmp2_dir, + txtinout_dir=tmp1_dir, + extract_data=extract_data, + observe_data=observe_data, + objectives=objectives, + algorithm='NSGA', + n_gen=2, + pop_size=5 + ).parameter_optimization() + assert 'Invalid algorithm "NSGA"' in exc_info.value.args[0] diff --git a/tests/test_data_manager.py b/tests/test_data_manager.py index 1edc5c0..c8aad0d 100644 --- a/tests/test_data_manager.py +++ b/tests/test_data_manager.py @@ -122,3 +122,18 @@ def test_simulated_timeseries_df( json_file='ext_invalid.txt' ) assert exc_info.value.args[0] == 'Expected ".json" extension for "json_file", but got ".txt"' + + +def test_error_hru_stats_from_daily_simulation( + data_manager +): + + # Error: invalid JSON file extension to save the DataFrame + with pytest.raises(Exception) as exc_info: + data_manager.hru_stats_from_daily_simulation( + sim_file='channel_sd_mon.txt', + has_units=True, + gis_id=561, + sim_col='flo_out' + ) + assert exc_info.value.args[0] == 'Statistical summary applies only to daily time series files ending with "_day"; received file name "channel_sd_mon"' diff --git a/tests/test_newtype.py b/tests/test_newtype.py new file mode 100644 index 0000000..21d2d3c --- /dev/null +++ b/tests/test_newtype.py @@ -0,0 +1,57 @@ +import pytest +import pySWATPlus + + +def test_error_basedict(): + + # Error: unit value is less than or equal to 0 + with pytest.raises(Exception) as exc_info: + pySWATPlus.newtype.BaseDict( + name='esco', + change_type='absval', + units=[-1, 0, 2] + ) + assert 'For parameter "esco": all values for "units" must be > 0' in str(exc_info.value) + + +def test_error_bounddict(): + + # Error: upper_bound is smaller than lower_bound in BoundDict + with pytest.raises(Exception) as exc_info: + pySWATPlus.newtype.BoundDict( + name='perco', + change_type='absval', + lower_bound=1.0, + upper_bound=0.5 + ) + assert 'For parameter "perco": upper_bound=0.5 must be greater than lower_bound=1.0' in str(exc_info.value) + + +def test_error_parameters(): + + # Error: invalid key in "parameters" list for ModifyDict type + with pytest.raises(Exception) as exc_info: + pySWATPlus.utils._parameters_modify_dict_list( + parameters=[ + { + 'name': 'perco', + 'val': 0.3, + 'change_type': 'pctchg' + } + ] + ) + assert 'Invalid key "val"' in exc_info.value.args[0] + + # Error: invalid key in "parameters" list for BoundDict type + with pytest.raises(Exception) as exc_info: + pySWATPlus.utils._parameters_bound_dict_list( + parameters=[ + { + 'name': 'perco', + 'change_type': 'pctchg', + 'lower_boundd': 0, + 'upper_bound': 1 + } + ] + ) + assert 'Invalid key "lower_boundd"' in exc_info.value.args[0] diff --git a/tests/test_sensitivity_analyzer.py b/tests/test_sensitivity_analyzer.py index d00e7eb..5ecf2b5 100644 --- a/tests/test_sensitivity_analyzer.py +++ b/tests/test_sensitivity_analyzer.py @@ -178,7 +178,7 @@ def test_simulation_by_sample_parameters( 'channel_sd_yr.txt': [] } ) - assert exc_info.value.args[0] == 'Expected "channel_sd_yr.txt" in simulation_date must be a dictionary, but got type "list"' + assert exc_info.value.args[0] == 'Expected "channel_sd_yr.txt" in extract_data must be a dictionary, but got type "list"' # Error: missing has_units subkey for key in extract_data with pytest.raises(Exception) as exc_info: sensitivity_analyzer.simulation_by_sample_parameters( @@ -205,7 +205,7 @@ def test_simulation_by_sample_parameters( } } ) - assert 'Invalid key "begin_datee" for "channel_sd_yr.txt" in extract_data' in exc_info.value.args[0] + assert 'Invalid sub-key "begin_datee" for "channel_sd_yr.txt" in extract_data' in exc_info.value.args[0] def test_error_scenario_indicators( @@ -247,7 +247,7 @@ def test_create_sobol_problem( ] params_bounds = [ - pySWATPlus.types.BoundDict(**param) for param in parameters + pySWATPlus.newtype.BoundDict(**param) for param in parameters ] output = sensitivity_analyzer._create_sobol_problem( diff --git a/tests/test_txtinout_reader.py b/tests/test_txtinout_reader.py index 69acdaa..8967cc1 100644 --- a/tests/test_txtinout_reader.py +++ b/tests/test_txtinout_reader.py @@ -1,5 +1,4 @@ import os -import pandas import pySWATPlus import pytest import tempfile @@ -76,10 +75,10 @@ def test_run_swat( simulation_timestep=0, warmup=1, print_prt_control={ - 'channel_sd': {'daily': False}, - 'basin_wb': {} + 'channel_sd': {}, + 'basin_wb': {'daily': False} }, - print_begin_date='01-Feb-2010', + print_begin_date='01-Feb-2011', print_end_date='31-Dec-2011', print_interval=1 ) @@ -91,8 +90,6 @@ def test_run_swat( skiprows=[0, 2], ) - assert pandas.api.types.is_integer_dtype(df['jday']) - # Pass: read CSV file csv_df = pySWATPlus.utils._df_extract( input_file=sim2_dir / 'channel_sd_yr.csv', @@ -104,6 +101,25 @@ def test_run_swat( assert list(df.columns) == list(csv_df.columns) assert all(df.dtypes == csv_df.dtypes) + # Pass: monthly and yearly summary statistics from daily time series + stats_dict = pySWATPlus.DataManager().hru_stats_from_daily_simulation( + sim_file=sim2_dir / 'channel_sd_day.txt', + has_units=True, + gis_id=561, + sim_col='flo_out', + output_dir=sim2_dir + ) + assert isinstance(stats_dict, dict) + assert len(stats_dict) == 2 + + # Error: reading TXT file + with pytest.raises(Exception) as exc_info: + pySWATPlus.DataManager().simulated_timeseries_df( + sim_file=sim2_dir / 'channel_sd_day', + has_units=True + ) + assert 'Error reading the file' in exc_info.value.args[0] + # Pass: adding invalid object with flag sim2_reader = pySWATPlus.TxtinoutReader( tio_dir=sim2_dir @@ -153,7 +169,7 @@ def test_error_txtinoutreader_class(): pySWATPlus.TxtinoutReader( tio_dir=tmp_dir ) - assert exc_info.value.args[0] == 'Expected exactly one .exe file in the parent folder, but found none or multiple' + assert 'Expected exactly one ".exe" file in directory' in exc_info.value.args[0] def test_error_enable_object_in_print_prt( @@ -519,41 +535,41 @@ def test_write_calibration_file( ) par_change = [ - pySWATPlus.types.ModifyDict(name='cn2', change_type='pctchg', value=50), - pySWATPlus.types.ModifyDict(name='cn3_swf', change_type='absval', value=0.5), - pySWATPlus.types.ModifyDict(name='ovn', change_type='pctchg', value=50), - pySWATPlus.types.ModifyDict(name='lat_ttime', change_type='absval', value=100), - pySWATPlus.types.ModifyDict(name='latq_co', change_type='absval', value=0.5), - pySWATPlus.types.ModifyDict(name='lat_len', change_type='pctchg', value=50), - pySWATPlus.types.ModifyDict(name='canmx', change_type='absval', value=50), - pySWATPlus.types.ModifyDict(name='esco', change_type='absval', value=0.5), - pySWATPlus.types.ModifyDict(name='epco', change_type='absval', value=0.5), - - pySWATPlus.types.ModifyDict(name='perco', change_type='absval', value=0.5, conditions={'hsg': ['A']}), - pySWATPlus.types.ModifyDict(name='perco', change_type='absval', value=0.5, conditions={'hsg': ['B']}), - pySWATPlus.types.ModifyDict(name='perco', change_type='absval', value=0.5, conditions={'hsg': ['C']}), - pySWATPlus.types.ModifyDict(name='perco', change_type='absval', value=0.5, conditions={'hsg': ['D']}), - - pySWATPlus.types.ModifyDict(name='z', change_type='pctchg', value=20), - pySWATPlus.types.ModifyDict(name='bd', change_type='pctchg', value=50), - pySWATPlus.types.ModifyDict(name='awc', change_type='pctchg', value=100), - pySWATPlus.types.ModifyDict(name='k', change_type='pctchg', value=100), - - pySWATPlus.types.ModifyDict(name='surlag', change_type='absval', value=10), - pySWATPlus.types.ModifyDict(name='evrch', change_type='absval', value=0.8), - pySWATPlus.types.ModifyDict(name='evlai', change_type='absval', value=5), - pySWATPlus.types.ModifyDict(name='ffcb', change_type='absval', value=0.5), - - pySWATPlus.types.ModifyDict(name='chn', change_type='absval', value=0.05), - pySWATPlus.types.ModifyDict(name='chk', change_type='absval', value=100), - - pySWATPlus.types.ModifyDict(name='alpha', change_type='absval', value=0.3, units=list(range(1, 143))), - pySWATPlus.types.ModifyDict(name='bf_max', change_type='absval', value=0.3, units=list(range(1, 143))), - pySWATPlus.types.ModifyDict(name='deep_seep', change_type='absval', value=0.1, units=list(range(1, 143))), - pySWATPlus.types.ModifyDict(name='sp_yld', change_type='absval', value=0.2, units=list(range(1, 143))), - pySWATPlus.types.ModifyDict(name='flo_min', change_type='absval', value=10, units=list(range(1, 143))), - pySWATPlus.types.ModifyDict(name='revap_co', change_type='absval', value=0.1, units=list(range(1, 143))), - pySWATPlus.types.ModifyDict(name='revap_min', change_type='absval', value=5, units=list(range(1, 143))), + pySWATPlus.newtype.ModifyDict(name='cn2', change_type='pctchg', value=50), + pySWATPlus.newtype.ModifyDict(name='cn3_swf', change_type='absval', value=0.5), + pySWATPlus.newtype.ModifyDict(name='ovn', change_type='pctchg', value=50), + pySWATPlus.newtype.ModifyDict(name='lat_ttime', change_type='absval', value=100), + pySWATPlus.newtype.ModifyDict(name='latq_co', change_type='absval', value=0.5), + pySWATPlus.newtype.ModifyDict(name='lat_len', change_type='pctchg', value=50), + pySWATPlus.newtype.ModifyDict(name='canmx', change_type='absval', value=50), + pySWATPlus.newtype.ModifyDict(name='esco', change_type='absval', value=0.5), + pySWATPlus.newtype.ModifyDict(name='epco', change_type='absval', value=0.5), + + pySWATPlus.newtype.ModifyDict(name='perco', change_type='absval', value=0.5, conditions={'hsg': ['A']}), + pySWATPlus.newtype.ModifyDict(name='perco', change_type='absval', value=0.5, conditions={'hsg': ['B']}), + pySWATPlus.newtype.ModifyDict(name='perco', change_type='absval', value=0.5, conditions={'hsg': ['C']}), + pySWATPlus.newtype.ModifyDict(name='perco', change_type='absval', value=0.5, conditions={'hsg': ['D']}), + + pySWATPlus.newtype.ModifyDict(name='z', change_type='pctchg', value=20), + pySWATPlus.newtype.ModifyDict(name='bd', change_type='pctchg', value=50), + pySWATPlus.newtype.ModifyDict(name='awc', change_type='pctchg', value=100), + pySWATPlus.newtype.ModifyDict(name='k', change_type='pctchg', value=100), + + pySWATPlus.newtype.ModifyDict(name='surlag', change_type='absval', value=10), + pySWATPlus.newtype.ModifyDict(name='evrch', change_type='absval', value=0.8), + pySWATPlus.newtype.ModifyDict(name='evlai', change_type='absval', value=5), + pySWATPlus.newtype.ModifyDict(name='ffcb', change_type='absval', value=0.5), + + pySWATPlus.newtype.ModifyDict(name='chn', change_type='absval', value=0.05), + pySWATPlus.newtype.ModifyDict(name='chk', change_type='absval', value=100), + + pySWATPlus.newtype.ModifyDict(name='alpha', change_type='absval', value=0.3, units=list(range(1, 143))), + pySWATPlus.newtype.ModifyDict(name='bf_max', change_type='absval', value=0.3, units=list(range(1, 143))), + pySWATPlus.newtype.ModifyDict(name='deep_seep', change_type='absval', value=0.1, units=list(range(1, 143))), + pySWATPlus.newtype.ModifyDict(name='sp_yld', change_type='absval', value=0.2, units=list(range(1, 143))), + pySWATPlus.newtype.ModifyDict(name='flo_min', change_type='absval', value=10, units=list(range(1, 143))), + pySWATPlus.newtype.ModifyDict(name='revap_co', change_type='absval', value=0.1, units=list(range(1, 143))), + pySWATPlus.newtype.ModifyDict(name='revap_min', change_type='absval', value=5, units=list(range(1, 143))), ] # Run the method diff --git a/tests/test_types.py b/tests/test_types.py deleted file mode 100644 index 745ad3a..0000000 --- a/tests/test_types.py +++ /dev/null @@ -1,27 +0,0 @@ -import pytest -import pySWATPlus - - -def test_error_basedict(): - - # Error: unit value is less than or equal to 0 - with pytest.raises(Exception) as exc_info: - pySWATPlus.types.BaseDict( - name='esco', - change_type='absval', - units=[-1, 0, 2] - ) - assert 'For parameter "esco": all values for "units" must be > 0' in str(exc_info.value) - - -def test_error_bounddict(): - - # Error: upper_bound is smaller than lower_bound in BoundDict - with pytest.raises(Exception) as exc_info: - pySWATPlus.types.BoundDict( - name='perco', - change_type='absval', - lower_bound=1.0, - upper_bound=0.5 - ) - assert 'For parameter "perco": upper_bound=0.5 must be greater than lower_bound=1.0' in str(exc_info.value) diff --git a/tests/test_utils.py b/tests/test_utils.py index 1240619..770f845 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -30,12 +30,12 @@ (-12345678901234, ' -12345678901234'), ] ) -def test_format_val_field_edge_cases( +def test_calibration_val_field_str( value, expected ): - result = pySWATPlus.utils._format_val_field(value) + result = pySWATPlus.utils._calibration_val_field_str(value) # Check total length = 16 assert len(result) == 16 @@ -44,34 +44,34 @@ def test_format_val_field_edge_cases( assert result == expected -def test_compact_units(): +def test_dict_units_compact(): # --- empty input --- - assert pySWATPlus.utils._compact_units([]) == [] + assert pySWATPlus.utils._dict_units_compact([]) == [] # --- id 0 in array --- with pytest.raises(Exception) as exc_info: - pySWATPlus.utils._compact_units([0, 1]) + pySWATPlus.utils._dict_units_compact([0, 1]) assert exc_info.value.args[0] == 'All unit IDs must be 1-based (Fortran-style).' # --- single element --- - assert pySWATPlus.utils._compact_units([1]) == [1] + assert pySWATPlus.utils._dict_units_compact([1]) == [1] # --- consecutive sequence --- - assert pySWATPlus.utils._compact_units([1, 2, 3, 4]) == [1, -4] + assert pySWATPlus.utils._dict_units_compact([1, 2, 3, 4]) == [1, -4] # --- non-consecutive numbers --- - assert pySWATPlus.utils._compact_units([1, 2, 3, 5]) == [1, -3, 5] + assert pySWATPlus.utils._dict_units_compact([1, 2, 3, 5]) == [1, -3, 5] # --- unordered input --- - assert pySWATPlus.utils._compact_units([5, 2, 4, 1, 3]) == [1, -5] + assert pySWATPlus.utils._dict_units_compact([5, 2, 4, 1, 3]) == [1, -5] # --- input with duplicates --- - assert pySWATPlus.utils._compact_units([3, 3, 1, 1, 2]) == [1, -3] + assert pySWATPlus.utils._dict_units_compact([3, 3, 1, 1, 2]) == [1, -3] # --- large range --- large_range = list(range(1, 1001)) - assert pySWATPlus.utils._compact_units(large_range) == [1, -1000] + assert pySWATPlus.utils._dict_units_compact(large_range) == [1, -1000] # --- single non-consecutive elements --- - assert pySWATPlus.utils._compact_units([1, 2, 4, 6]) == [1, -2, 4, 6] + assert pySWATPlus.utils._dict_units_compact([1, 2, 4, 6]) == [1, -2, 4, 6] diff --git a/tests/test_validators.py b/tests/test_validators.py index f4863e8..bb76e53 100644 --- a/tests/test_validators.py +++ b/tests/test_validators.py @@ -7,10 +7,10 @@ @pytest.fixture(scope='class') def txtinout_reader(): - # set up TxtInOut direcotry path + # TxtInOut direcotry path tio_dir = os.path.join(os.path.dirname(__file__), 'TxtInOut') - # initialize TxtinoutReader class + # Initialize TxtinoutReader class output = pySWATPlus.TxtinoutReader( tio_dir=tio_dir ) @@ -24,7 +24,7 @@ def test_calibration_parameters( # Pass: parameter exists parameters = [ - pySWATPlus.types.ModifyDict(**{'name': 'cn2', 'value': 0.5, 'change_type': 'absval'}) + pySWATPlus.newtype.ModifyDict(**{'name': 'cn2', 'value': 0.5, 'change_type': 'absval'}) ] pySWATPlus.validators._calibration_parameters( input_dir=txtinout_reader.root_dir, @@ -33,7 +33,7 @@ def test_calibration_parameters( # Error: parameter does not exist parameters = [ - pySWATPlus.types.ModifyDict(**{'name': 'obj_that_doesnt_exist', 'value': 0.5, 'change_type': 'absval'}) + pySWATPlus.newtype.ModifyDict(**{'name': 'obj_that_doesnt_exist', 'value': 0.5, 'change_type': 'absval'}) ] with pytest.raises(ValueError, match='obj_that_doesnt_exist'): pySWATPlus.validators._calibration_parameters( @@ -47,28 +47,28 @@ def test_calibration_units( ): # Pass: parameter that supports units - param_change = pySWATPlus.types.ModifyDict(name='cn2', change_type='pctchg', value=-50, units=[1, 2, 3]) + param_change = pySWATPlus.newtype.ModifyDict(name='cn2', change_type='pctchg', value=-50, units=[1, 2, 3]) pySWATPlus.validators._calibration_units( input_dir=txtinout_reader.root_dir, param_change=param_change ) # Pass: parameter that supports units with range - param_change = pySWATPlus.types.ModifyDict(name='cn2', change_type='pctchg', value=-50, units=range(1, 4)) + param_change = pySWATPlus.newtype.ModifyDict(name='cn2', change_type='pctchg', value=-50, units=range(1, 4)) pySWATPlus.validators._calibration_units( input_dir=txtinout_reader.root_dir, param_change=param_change ) # Pass: parameter that supports units with set - param_change = pySWATPlus.types.ModifyDict(name='cn2', change_type='pctchg', value=-50, units={1, 2, 3}) + param_change = pySWATPlus.newtype.ModifyDict(name='cn2', change_type='pctchg', value=-50, units={1, 2, 3}) pySWATPlus.validators._calibration_units( input_dir=txtinout_reader.root_dir, param_change=param_change ) # Error: Parameter that does not support units - param_change = pySWATPlus.types.ModifyDict(name='organicn', change_type='pctchg', value=-50, units=[1, 2, 3]) + param_change = pySWATPlus.newtype.ModifyDict(name='organicn', change_type='pctchg', value=-50, units=[1, 2, 3]) with pytest.raises(Exception) as exc_info: pySWATPlus.validators._calibration_units( input_dir=txtinout_reader.root_dir, @@ -83,7 +83,7 @@ def test_calibration_units( sep=r'\s+', usecols=['id'] ) - param_change = pySWATPlus.types.ModifyDict(name='cn2', change_type='pctchg', value=-50, units=[len(df) + 1]) + param_change = pySWATPlus.newtype.ModifyDict(name='cn2', change_type='pctchg', value=-50, units=[len(df) + 1]) with pytest.raises(Exception) as exc_info: pySWATPlus.validators._calibration_units( input_dir=txtinout_reader.root_dir, @@ -97,7 +97,7 @@ def test_calibration_conditions( ): # Pass: No conditions - param_change = pySWATPlus.types.ModifyDict(name="cn2", change_type="pctchg", value=-50) + param_change = pySWATPlus.newtype.ModifyDict(name="cn2", change_type="pctchg", value=-50) pySWATPlus.validators._calibration_conditions( input_dir=txtinout_reader.root_dir, param_change=param_change @@ -120,7 +120,7 @@ def test_calibration_conditions( 'landuse': [valid_landuse[0]] } - param_change = pySWATPlus.types.ModifyDict(name='cn2', change_type='pctchg', value=-50, conditions=conditions) + param_change = pySWATPlus.newtype.ModifyDict(name='cn2', change_type='pctchg', value=-50, conditions=conditions) pySWATPlus.validators._calibration_conditions( input_dir=txtinout_reader.root_dir, param_change=param_change @@ -131,7 +131,7 @@ def test_calibration_conditions( 'invalid_cond': ['A', 'B'], } - param_change = pySWATPlus.types.ModifyDict(name='cn2', change_type='pctchg', value=-50, conditions=conditions) + param_change = pySWATPlus.newtype.ModifyDict(name='cn2', change_type='pctchg', value=-50, conditions=conditions) with pytest.raises(Exception) as exc_info: pySWATPlus.validators._calibration_conditions( input_dir=txtinout_reader.root_dir, @@ -144,7 +144,7 @@ def test_calibration_conditions( conditions = { 'hsg': ['invalid'], } - param_change = pySWATPlus.types.ModifyDict(name='cn2', change_type='pctchg', value=-50, conditions=conditions) + param_change = pySWATPlus.newtype.ModifyDict(name='cn2', change_type='pctchg', value=-50, conditions=conditions) with pytest.raises(Exception) as exc_info: pySWATPlus.validators._calibration_conditions( input_dir=txtinout_reader.root_dir, @@ -155,7 +155,7 @@ def test_calibration_conditions( conditions = { 'texture': ['invalid'], } - param_change = pySWATPlus.types.ModifyDict(name='cn2', change_type='pctchg', value=-50, conditions=conditions) + param_change = pySWATPlus.newtype.ModifyDict(name='cn2', change_type='pctchg', value=-50, conditions=conditions) with pytest.raises(Exception) as exc_info: pySWATPlus.validators._calibration_conditions( input_dir=txtinout_reader.root_dir, @@ -166,7 +166,7 @@ def test_calibration_conditions( conditions = { 'plant': ['invalid'], } - param_change = pySWATPlus.types.ModifyDict(name='cn2', change_type='pctchg', value=-50, conditions=conditions) + param_change = pySWATPlus.newtype.ModifyDict(name='cn2', change_type='pctchg', value=-50, conditions=conditions) with pytest.raises(Exception) as exc_info: pySWATPlus.validators._calibration_conditions( input_dir=txtinout_reader.root_dir, @@ -177,7 +177,7 @@ def test_calibration_conditions( conditions = { 'landuse': ['invalid'], } - param_change = pySWATPlus.types.ModifyDict(name='cn2', change_type='pctchg', value=-50, conditions=conditions) + param_change = pySWATPlus.newtype.ModifyDict(name='cn2', change_type='pctchg', value=-50, conditions=conditions) with pytest.raises(Exception) as exc_info: pySWATPlus.validators._calibration_conditions( input_dir=txtinout_reader.root_dir, From febbfd4490a023e17e37e7ad3452870c73441422 Mon Sep 17 00:00:00 2001 From: Debasish Pal <48341250+debpal@users.noreply.github.com> Date: Wed, 22 Oct 2025 15:37:44 +0300 Subject: [PATCH 12/12] Remove an algorithm for numba support requirement --- docs/userguide/model_calibration.md | 1 - pySWATPlus/calibration.py | 8 +++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/docs/userguide/model_calibration.md b/docs/userguide/model_calibration.md index 9ca3d28..0d02a15 100644 --- a/docs/userguide/model_calibration.md +++ b/docs/userguide/model_calibration.md @@ -18,7 +18,6 @@ the calibration interface offers flexible options for optimizing model parameter - Multi-objective algorithms - [Non-dominated sorted Genetic Algorithm-II](https://doi.org/10.1109/4235.996017) - - [Adaptive Geometry Estimation based Multi-objective Evolutionary Algorithm - II](https://doi.org/10.1145/3512290.3528732). - Five indicators for comparing simulated and observed values: diff --git a/pySWATPlus/calibration.py b/pySWATPlus/calibration.py index 08aac1f..c8e1d93 100644 --- a/pySWATPlus/calibration.py +++ b/pySWATPlus/calibration.py @@ -161,7 +161,6 @@ class Calibration(pymoo.core.problem.Problem): # type: ignore[misc] - `GA`: Genetic Algorithm (**single-objective**). - `DE`: [Differential Evolution Algorithm](https://doi.org/10.1007/3-540-31306-0) (**single-objective**). - `NSGA2`: [Non-dominated sorted Genetic Algorithm - II](https://doi.org/10.1109/4235.996017) (**multi-objective**). - - `AGEMOEA2`: [Adaptive Geometry Estimation based Multi-objective Evolutionary Algorithm - II](https://doi.org/10.1145/3512290.3528732) (**multi-objective**). !!! Note Multi-objective algorithms can be used for single-objective optimization, but not vice versa. @@ -307,7 +306,7 @@ def _evaluate( # Display start of current generation number self.track_gen = self.track_gen + 1 print( - f'{"\n"}Started generation: {self.track_gen}/{self.n_gen}{"\n"}' + f'\nStarted generation: {self.track_gen}/{self.n_gen}\n' ) # Simulation in separate CPU @@ -395,7 +394,7 @@ def _evaluate( # Print end of current generation number print( - f'{"\n"}Completed generation: {self.track_gen}/{self.n_gen}{"\n"}' + f'\nCompleted generation: {self.track_gen}/{self.n_gen}\n' ) def _objectives_directions( @@ -515,8 +514,7 @@ def _algorithm_class( api_module = { 'GA': importlib.import_module('pymoo.algorithms.soo.nonconvex.ga'), 'DE': importlib.import_module('pymoo.algorithms.soo.nonconvex.de'), - 'NSGA2': importlib.import_module('pymoo.algorithms.moo.nsga2'), - 'AGEMOEA2': importlib.import_module('pymoo.algorithms.moo.age2') + 'NSGA2': importlib.import_module('pymoo.algorithms.moo.nsga2') } # Check algorithm name is valid