From f999ab7b922bb12bd0bfbc98530556aebb3bfff8 Mon Sep 17 00:00:00 2001 From: "M. A. Kowalski" Date: Mon, 16 Jun 2025 16:49:40 +0100 Subject: [PATCH 1/6] feat: change output format to CSV Closes #35 Writes a set of CSV files to a folder instead of 'pickle'ing the output data class. Is more human-readable and should make reading data by post-processing scripts easier. The drawback is that if new members of unsupported type are introduced they may not be written (a warning is raised in that case) and adding extra case to the export functions might be necessary. --- src/dementpy.py | 2 +- src/initialization.py | 52 +++++++++++++++++++++++++++++++++++++++- src/output.py | 55 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 2 deletions(-) diff --git a/src/dementpy.py b/src/dementpy.py index c04f8b6..8447bb5 100644 --- a/src/dementpy.py +++ b/src/dementpy.py @@ -101,6 +101,6 @@ def main(): #...export the Output_init object to the output_folder using the export() funtion in the utility module os.chdir('../'+output_folder) - export(Output_init, site, outname) + Output_init.export(outname) main() \ No newline at end of file diff --git a/src/initialization.py b/src/initialization.py index e0da301..ed6338f 100644 --- a/src/initialization.py +++ b/src/initialization.py @@ -6,6 +6,10 @@ import pandas as pd import numpy as np +import warnings +import numbers +from pathlib import Path + from substrate import Substrate from monomer import Monomer from enzyme import Enzyme @@ -163,4 +167,50 @@ def initialize_data(runtime_parameters, site): 'Psi': daily_psi # water potential } - return Data_Dictionary \ No newline at end of file + return Data_Dictionary + + +def export_initialization_dict(base_path: Path | str, d: dict) -> None: + """Export contents of the initialisation directory to a folder. + + Writes each of the items of a type below to a separate CSV file + - pandas.DataFrame + - pandas.Series + - numpy.ndarray of rank below 2 + All scalar numbers are grouped in a single CSV 'scalars.csv' file. + + Note: + All other items are ignored following a warning! + If you need them written you need to add extra entry. + """ + + # Create space for output + base_path = Path(base_path) + base_path.mkdir(parents=True, exist_ok=True) + + # Collect all scalar numbers + scalar_numbers = dict() + + for name, member in d.items(): + if isinstance(member, (pd.DataFrame, pd.Series)): + fname = name + ".csv" + member.to_csv(base_path / fname) + elif isinstance(member, np.ndarray): + if len(member.shape) <= 2: + fname = name + ".csv" + np.savetxt(fname, member, delimiter=",") + else: + warnings.warn( + f"Member '{name}' of initialisation dictionary could not be saved since " + f"it is an array of rank higher than 2 (rank: {len(member.shape)})." + ) + elif isinstance(member, numbers.Number): + scalar_numbers[name] = member + else: + warnings.warn( + f"Initialisation member '{name}' has unsupported type '{type(member)}'. " + f"It has not been exported to the output directory '{base_path}'." + ) + + # Print numbers + pd.Series(scalar_numbers).to_csv(base_path / "scalars.csv") diff --git a/src/output.py b/src/output.py index ac555f2..5971c5c 100644 --- a/src/output.py +++ b/src/output.py @@ -1,6 +1,12 @@ # output.py module dealing with outputs of DEMENTpy. # Bin Wang, January, 2020 +from pathlib import Path +import warnings +import numbers + +from initialization import export_initialization_dict + import numpy as np import pandas as pd @@ -293,3 +299,52 @@ def microbes_tradeoff(self, ecosystem, year, day): GY_grid = ecosystem.Microbe_C_Gain.groupby(level=0,sort=False).sum() GY_grid.name = self.cycle*year + (day+1) self.Growth_yield = pd.concat([self.Growth_yield,GY_grid],axis=1,sort=False) + + + def export(self, base_path: Path | str) -> None: + """Export contents of the output file to a directory. + + Exports each class member of type pandas.DataFrame to a separate CSV file. + All pandas.Series members are combined in a DataFrame and printed dto 'series.csv' file. + Similarly all scalar numerical members are grouped in 'scalars.csv'. + + Parameters: + base_path : Path + A path that names the root directory where contents will be exported. + If the directory does not exist it will be created. + """ + # Create space for output + base_path = Path(base_path) + base_path.mkdir(parents=True, exist_ok=True) + + # Collect all series and scalar data + # We will dump them at the end + series_data = dict() + scalar_numbers = dict() + + for name, member in vars(self).items(): + if isinstance(member, pd.DataFrame): + fname = name + ".csv" + member.to_csv(base_path / fname) + elif isinstance(member, pd.Series): + series_data[name] = member + elif isinstance(member, numbers.Number): + scalar_numbers[name] = member + elif name == "Initialization": + # Special case - Initialization dictionary + # Serialise it to a subfolder + path = base_path / name + export_initialization_dict(path, member) + else: + warnings.warn( + f"Output member '{name}' has unsupported type '{type(member)}'. " + f"It has not been exported to the output directory '{base_path}'." + ) + + # If it happens that Series have different lengths they will be padded + # with missing data labels (NaNs) + series_data = pd.concat(series_data, axis=1) + series_data.to_csv(base_path / "series.csv") + + # Print numbers + pd.Series(scalar_numbers).to_csv(base_path / "scalars.csv") From a833749a96241be2105fdee6deb22e4136dad0db Mon Sep 17 00:00:00 2001 From: Jeth Walkup <161084507+jgwalkup@users.noreply.github.com> Date: Tue, 20 Jan 2026 13:01:17 -0500 Subject: [PATCH 2/6] Add export_to_netcdf and rename export method Refactor export methods to support CSV and NetCDF formats. --- src/output.py | 86 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 84 insertions(+), 2 deletions(-) diff --git a/src/output.py b/src/output.py index 5971c5c..adc96dd 100644 --- a/src/output.py +++ b/src/output.py @@ -5,10 +5,12 @@ import warnings import numbers -from initialization import export_initialization_dict +from initialization import export_initialization_dict_to_csv +from initialization import export_initialization_dict_to_netcdf import numpy as np import pandas as pd +import xarray as xr class Output(): """ @@ -301,7 +303,7 @@ def microbes_tradeoff(self, ecosystem, year, day): self.Growth_yield = pd.concat([self.Growth_yield,GY_grid],axis=1,sort=False) - def export(self, base_path: Path | str) -> None: + def export_to_csv(self, base_path: Path | str) -> None: """Export contents of the output file to a directory. Exports each class member of type pandas.DataFrame to a separate CSV file. @@ -348,3 +350,83 @@ def export(self, base_path: Path | str) -> None: # Print numbers pd.Series(scalar_numbers).to_csv(base_path / "scalars.csv") + + def export_to_netcdf(self, base_path: Path | str) -> None: + """Export contents of the output file to a directory in NetCDF format. + - Each pandas.DataFrame member is saved to a separate .nc file. + - All pandas.Series members are combined and saved to a single 'series.nc' file. + - All scalar numerical members are grouped and saved to 'scalars.nc'. + + Parameters: + base_path : Path + A path that names the root directory where contents will be exported. + If the directory does not exist it will be created. + """ + # Create space for output + base_path = Path(base_path) + base_path.mkdir(parents=True, exist_ok=True) + + # Collect all series and scalar data + series_data = dict() + scalar_numbers = dict() + + for name, member in vars(self).items(): + # convert each DataFrame to an xarray Dataset and save to .nc + if isinstance(member, pd.DataFrame): + # Ensure column names are strings + member.columns = member.columns.astype(str) + if member.index.name is not None: + member.index.name = str(member.index.name) + fname = name + ".nc" # use the .nc extension + try: + xarray_member = xr.Dataset.from_dataframe(member) + xarray_member.to_netcdf(base_path / fname) + except Exception as e: + warnings.warn( + f"Could not export DataFrame '{name}' to NetCDF. Error: {e}" + ) + + elif isinstance(member, pd.Series): + series_data[name] = member + + elif isinstance(member, numbers.Number): + scalar_numbers[name] = member + + elif name == "Initialization": + # Special case - Initialization dictionary + # Serialise it to a subfolder + path = base_path / name + export_initialization_dict_to_netcdf(path, member) + elif isinstance(member, pd.Series): + xrmember = xr.DataArray(member) + fname = name + ".nc" + xrmember.to_netcdf(base_path / fname) + + else: + warnings.warn( + f"Output member '{name}' has unsupported type '{type(member)}'. " + f"It has not been exported to the output directory '{base_path}'." + ) + + # process and save Series + if series_data: + try: + # Combine all Series into a single DataFrame. + combined_series_df = pd.concat(series_data, axis=1) + # Convert the combined DataFrame to an xarray Dataset. + series_dataset = xr.Dataset.from_dataframe(combined_series_df) + # Save the Series Dataset to a single NetCDF file. + series_dataset.to_netcdf(base_path / "series.nc") + except ValueError as e: + # This handles the "duplicate labels" error if it occurs. + warnings.warn( + f"Could not export combined series due to an error: {e}. " + "Consider cleaning the index of your Series data first." + ) + + if scalar_numbers: + # Create an xarray Dataset directly from the dictionary of scalars. + # Each key will become a variable in the NetCDF file. + scalars_dataset = xr.Dataset(scalar_numbers) + # Save the scalars Dataset to a NetCDF file. + scalars_dataset.to_netcdf(base_path / "scalars.nc") From 086224d2690dba052da97479d834ee150158da58 Mon Sep 17 00:00:00 2001 From: Jeth Walkup <161084507+jgwalkup@users.noreply.github.com> Date: Tue, 20 Jan 2026 13:04:56 -0500 Subject: [PATCH 3/6] Add NetCDF export functionality and rename export method Added a new method to export data to NetCDF format and renamed the existing export function. --- src/initialization.py | 85 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 1 deletion(-) diff --git a/src/initialization.py b/src/initialization.py index ed6338f..4f4c115 100644 --- a/src/initialization.py +++ b/src/initialization.py @@ -5,10 +5,12 @@ """ import pandas as pd import numpy as np +import xarray as xr import warnings import numbers from pathlib import Path +from typing import Union from substrate import Substrate from monomer import Monomer @@ -170,7 +172,7 @@ def initialize_data(runtime_parameters, site): return Data_Dictionary -def export_initialization_dict(base_path: Path | str, d: dict) -> None: +def export_initialization_dict_to_csv(base_path: Path | str, d: dict) -> None: """Export contents of the initialisation directory to a folder. Writes each of the items of a type below to a separate CSV file @@ -214,3 +216,84 @@ def export_initialization_dict(base_path: Path | str, d: dict) -> None: # Print numbers pd.Series(scalar_numbers).to_csv(base_path / "scalars.csv") + + def export_to_netcdf(self, base_path: Path | str) -> None: + """Export contents of the output file to a directory in NetCDF format. + - Each pandas.DataFrame member is saved to a separate .nc file. + - All pandas.Series members are combined and saved to a single 'series.nc' file. + - All scalar numerical members are grouped and saved to 'scalars.nc'. + + Parameters: + base_path : Path + A path that names the root directory where contents will be exported. + If the directory does not exist it will be created. + """ + # Create space for output + base_path = Path(base_path) + base_path.mkdir(parents=True, exist_ok=True) + + # Collect all series and scalar data + series_data = dict() + scalar_numbers = dict() + + for name, member in vars(self).items(): + # convert each DataFrame to an xarray Dataset and save to .nc + if isinstance(member, pd.DataFrame): + # Ensure column names are strings + member.columns = member.columns.astype(str) + if member.index.name is not None: + member.index.name = str(member.index.name) + fname = name + ".nc" # use the .nc extension + try: + xarray_member = xr.Dataset.from_dataframe(member) + xarray_member.to_netcdf(base_path / fname) + except Exception as e: + warnings.warn( + f"Could not export DataFrame '{name}' to NetCDF. Error: {e}" + ) + + elif isinstance(member, pd.Series): + series_data[name] = member + + elif isinstance(member, numbers.Number): + scalar_numbers[name] = member + + elif name == "Initialization": + # Special case - Initialization dictionary + # Serialise it to a subfolder + path = base_path / name + export_initialization_dict_to_netcdf(path, member) + elif isinstance(member, pd.Series): + xrmember = xr.DataArray(member) + fname = name + ".nc" + xrmember.to_netcdf(base_path / fname) + + else: + warnings.warn( + f"Output member '{name}' has unsupported type '{type(member)}'. " + f"It has not been exported to the output directory '{base_path}'." + ) + + # process and save Series + if series_data: + try: + # Combine all Series into a single DataFrame. + combined_series_df = pd.concat(series_data, axis=1) + # Convert the combined DataFrame to an xarray Dataset. + series_dataset = xr.Dataset.from_dataframe(combined_series_df) + # Save the Series Dataset to a single NetCDF file. + series_dataset.to_netcdf(base_path / "series.nc") + except ValueError as e: + # This handles the "duplicate labels" error if it occurs. + warnings.warn( + f"Could not export combined series due to an error: {e}. " + "Consider cleaning the index of your Series data first." + ) + + if scalar_numbers: + # Create an xarray Dataset directly from the dictionary of scalars. + # Each key will become a variable in the NetCDF file. + scalars_dataset = xr.Dataset(scalar_numbers) + # Save the scalars Dataset to a NetCDF file. + scalars_dataset.to_netcdf(base_path / "scalars.nc") + From 1396740c775b7e82d3a423d8ab05edd1537d8d79 Mon Sep 17 00:00:00 2001 From: Jeth Walkup <161084507+jgwalkup@users.noreply.github.com> Date: Tue, 20 Jan 2026 13:09:12 -0500 Subject: [PATCH 4/6] Implement conditional output format for exports Add export format option for output files. The user can now specify in the runtime.txt file if they want model output files to be in NetCDF (default) or CSV format. --- src/dementpy.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/dementpy.py b/src/dementpy.py index 8447bb5..f1b97d9 100644 --- a/src/dementpy.py +++ b/src/dementpy.py @@ -45,6 +45,7 @@ def main(): pulse = int(runtime.loc['pulse',1]) # number of pulses cycle = int(runtime.loc['end_time',1]) # number of time steps in each pulse interval = int(runtime.loc['interval',1]) # interval of time step to record outputs + Export_format = int(runtime.loc['output_CSV',1]) # interval of time step to record outputs mode = int(runtime.loc['dispersal',1]) # 0:'default' or 1:'dispersal' #...Initialize data by calling the Function: Initialize_Data() @@ -101,6 +102,9 @@ def main(): #...export the Output_init object to the output_folder using the export() funtion in the utility module os.chdir('../'+output_folder) - Output_init.export(outname) + if Export_format == 1: + Output_init.export_to_csv(outname) + else: + Output_init.export_to_netcdf(outname) -main() \ No newline at end of file +main() From bc8220e35a395c94fe834cd9a274e0094ee2895d Mon Sep 17 00:00:00 2001 From: Jeth Walkup <161084507+jgwalkup@users.noreply.github.com> Date: Tue, 20 Jan 2026 13:10:24 -0500 Subject: [PATCH 5/6] Add output_CSV parameter to runtime.txt Added 'output_CSV' parameter to runtime configuration to indicate the file format of model outputs. --- input/runtime.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/input/runtime.txt b/input/runtime.txt index 79e31f9..ac8237a 100755 --- a/input/runtime.txt +++ b/input/runtime.txt @@ -24,4 +24,5 @@ Input_NH4 0 Input_PO4 0 direct 0.95 dist 1 -interval 1 \ No newline at end of file +interval 1 +output_CSV 1 From ccc588dedf28cfc7ec263b75e8a7b24a1635f847 Mon Sep 17 00:00:00 2001 From: Jeth Walkup <161084507+jgwalkup@users.noreply.github.com> Date: Tue, 20 Jan 2026 17:06:05 -0500 Subject: [PATCH 6/6] Add xarray dependency to pyproject.toml --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 85bd93d..f9578a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,6 +7,7 @@ requires-python = ">=3.12" dependencies = [ "pandas>=2.2.3", "scipy>=1.15.2", + "xarray>=2025.12.0" ] [dependency-groups]