diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d8fa3539..f77d93272 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - Added capability to have transport models that require user input parameters - Add geologic hydrogen surface processing converter - Add baseclass for caching functionality +- Added postprocessing function to save timeseries - Minor reorg for profast tools - Removed hydrogen tank cost and performance models that were unused diff --git a/docs/user_guide/postprocessing_results.md b/docs/user_guide/postprocessing_results.md index 4df3ef15b..5c1e47578 100644 --- a/docs/user_guide/postprocessing_results.md +++ b/docs/user_guide/postprocessing_results.md @@ -88,3 +88,87 @@ print(model.prob.get_val("electrolyzer.total_hydrogen_produced", units='kg')) This will print the total hydrogen produced by the electrolyzer in kg. The `get_val` method is used to access the value of the variable in the `prob` object. The `units` argument is used to specify the units of the value to be returned. + +### Saving outputs + +The time series outputs can be saved to a csv output using the `save_case_timeseries_as_csv` function. If no variables are specified, then the function saves all time series variables in the simulation. Otherwise, the specified variables are saved. + +The `vars_to_save` argument supports three different input formats: + +1. **List of variable names** - saves variables with their default units +2. **Dictionary with units** - keys are variable names, values are the desired units +3. **Dictionary with options** - keys are variable names, values are dictionaries with `"units"` and/or `"alternative_name"` keys + +#### Example 1: Save all timeseries data + +```python +from h2integrate.core.h2integrate_model import H2IntegrateModel +from h2integrate.postprocess.sql_timeseries_to_csv import save_case_timeseries_as_csv + +# Create and run a H2Integrate model +model = H2IntegrateModel("top_level_config.yaml") +model.run() +model.post_process() + +# Save all timeseries data to a csv file +timeseries_data = save_case_timeseries_as_csv(model.recorder_path) +``` + +#### Example 2: Specify variables as a list + +When providing a list of variable names, the function uses the default units for each variable. + +```python +# Get a subset of timeseries data using a list of variable names +output_vars = [ + "electrolyzer.hydrogen_out", + "hopp.electricity_out", + "ammonia.ammonia_out", + "h2_storage.hydrogen_out", +] + +# Don't save subset of timeseries to a csv file using save_to_file=False +timeseries_data = save_case_timeseries_as_csv( + model.recorder_path, vars_to_save=output_vars, save_to_file=False +) +``` + +#### Example 3: Specify variables with custom units + +When providing a dictionary with variable names as keys and unit strings as values, the function converts each variable to the specified units. + +```python +# Specify variables with custom units +vars_with_units = { + "ammonia.hydrogen_in": "kg/h", + "h2_storage.hydrogen_in": "kg/h", + "electrolyzer.electricity_in": "kW", +} + +timeseries_data = save_case_timeseries_as_csv( + model.recorder_path, vars_to_save=vars_with_units, save_to_file=False +) +``` + +#### Example 4: Specify variables with alternative column names + +When providing a dictionary with variable names as keys and dictionaries as values, you can specify both custom units and alternative column names for the output DataFrame. + +```python +# Specify variables with alternative names and/or units +vars_with_options = { + "electrolyzer.hydrogen_out": {"alternative_name": "Electrolyzer Hydrogen Output"}, + "hopp.electricity_out": {"units": "kW", "alternative_name": "Plant Electricity Output"}, + "ammonia.ammonia_out": {"alternative_name": None}, # Uses default variable name + "h2_storage.hydrogen_out": {"alternative_name": "H2 Storage Hydrogen Output"}, +} + +timeseries_data = save_case_timeseries_as_csv( + model.recorder_path, vars_to_save=vars_with_options, save_to_file=False +) +# Resulting columns: "Electrolyzer Hydrogen Output (kg/h)", "Plant Electricity Output (kW)", etc. +``` + +```{note} +The `electricity_base_unit` argument (default: `"MW"`) controls the units used for electricity-based variables when no specific units are provided. Valid options are `"W"`, `"kW"`, `"MW"`, or `"GW"`. +``` diff --git a/examples/02_texas_ammonia/driver_config.yaml b/examples/02_texas_ammonia/driver_config.yaml index d3b06a03e..22fa1d24f 100644 --- a/examples/02_texas_ammonia/driver_config.yaml +++ b/examples/02_texas_ammonia/driver_config.yaml @@ -3,3 +3,20 @@ description: "This analysis runs a hybrid plant to match the first example in H2 general: folder_output: outputs + +recorder: + # required inputs + flag: True #record outputs + file: "cases.sql" #this file will be written to the folder `outputs` + + # optional but recommended inputs + overwrite_recorder: True #If True, do not create a unique recorder file for subsequent runs. Defaults to False. + recorder_attachment: "model" #"driver" or "model", defaults to "model". Use "driver" if running a parallel simulation. + includes: ["*"] #include everything + excludes: ["*resource_data*"] #exclude resource data + + # below are optional and defaulted to the OpenMDAO default + # record_inputs: True #defaults to True + # record_outputs: True #defaults to True + # record_residuals: True #defaults to True + # options_excludes: #this is only used if recorder_attachment is "model" diff --git a/examples/02_texas_ammonia/run_texas_ammonia_plant.py b/examples/02_texas_ammonia/run_texas_ammonia_plant.py index b4b53b5ec..14c766e51 100644 --- a/examples/02_texas_ammonia/run_texas_ammonia_plant.py +++ b/examples/02_texas_ammonia/run_texas_ammonia_plant.py @@ -1,16 +1,35 @@ import numpy as np from h2integrate.core.h2integrate_model import H2IntegrateModel +from h2integrate.postprocess.sql_timeseries_to_csv import save_case_timeseries_as_csv # Create a H2Integrate model model = H2IntegrateModel("02_texas_ammonia.yaml") + # Set battery demand profile to electrolyzer capacity # TODO: Update with demand module once it is developed demand_profile = np.ones(8760) * 640.0 model.setup() model.prob.set_val("battery.electricity_demand", demand_profile, units="MW") + # Run the model model.run() model.post_process() + +# Save all timeseries data to a csv file +timeseries_data = save_case_timeseries_as_csv(model.recorder_path) + +# Get a subset of timeseries data +vars_to_save = [ + "electrolyzer.hydrogen_out", + "hopp.electricity_out", + "ammonia.ammonia_out", + "h2_storage.hydrogen_out", +] + +# Don't save subset of timeseries to a csv file using save_to_file=False +timeseries_data = save_case_timeseries_as_csv( + model.recorder_path, vars_to_save=vars_to_save, save_to_file=False +) diff --git a/h2integrate/core/pose_optimization.py b/h2integrate/core/pose_optimization.py index e2ae0fc5b..2cc367e6d 100644 --- a/h2integrate/core/pose_optimization.py +++ b/h2integrate/core/pose_optimization.py @@ -448,8 +448,8 @@ def set_recorders(self, opt_prob): for current optimization problem Returns: - opt_prob (openmdao problem instance): openmdao problem instance for - current optimization problem edited to include a set up recorder + recorder_path (Path or None): Path to the recorder file if recorder is enabled, + None otherwise """ folder_output = self.config["general"]["folder_output"] @@ -525,7 +525,7 @@ def set_recorders(self, opt_prob): opt_prob.model.recording_options["excludes"] = self.config["recorder"].get( "excludes", ["*resource_data"] ) - return + return recorder_path if recorder_attachment == "driver": recorder_options += [ @@ -550,6 +550,7 @@ def set_recorders(self, opt_prob): "excludes", ["*resource_data"] ) return recorder_path + return None def set_restart(self, opt_prob): diff --git a/h2integrate/postprocess/sql_timeseries_to_csv.py b/h2integrate/postprocess/sql_timeseries_to_csv.py new file mode 100644 index 000000000..c8dd2d62c --- /dev/null +++ b/h2integrate/postprocess/sql_timeseries_to_csv.py @@ -0,0 +1,230 @@ +from pathlib import Path + +import numpy as np +import pandas as pd +import openmdao.api as om + + +def check_get_units_for_var(case, var, electricity_base_unit: str, user_specified_unit=None): + """Check the units for a variable within a case, with the following logic: + + 0) If ``user_specified_unit`` is a string, get the variable value in units of + ``user_specified_unit`` then continue to Step 5. + If ``user_specified_unit`` is None, continue to Step 1. + 1) Get the default units of the variable. Continue to Step 2. + 2) Check if the default units contain electricity units. + If the default units do contain an electricity unit, then continue to Step 3. + Otherwise, continue to Step 4. + 3) Replace the default electricity unit in the default units with ``electricity_base_unit``. + Get the variable value in units of the updated units and continue to Step 5. + 4) Get the variable value in the default units and continue to Step 5. + 5) Return the variable value and the corresponding units. + + Args: + case (om.recorders.case.Case): OpenMDAO case object. + var (str): variable name + electricity_base_unit (str): Units to save any electricity-based profiles in. + Must be either "W", "kW", "MW", or "GW". + user_specified_unit (str | None, optional): _description_. Defaults to None. + + Returns: + 2-element tuple containing + + - **val** (np.ndarray | list | tuple): value of the `var` in units `var_unit`. + - **var_unit** (str): units that `val` is returned in. + """ + electricity_type_units = ["W", "kW", "MW", "GW"] + # 0) check if user_specified_unit is not None + if user_specified_unit is not None: + # Get the variable value in units of ``user_specified_unit`` + val = case.get_val(var, units=user_specified_unit) + return val, user_specified_unit + + # 1) Get the default units of the variable + var_unit = case._get_units(var) + + # 2) Check if the default units contain electricity units. + is_electric = any(electricity_unit in var_unit for electricity_unit in electricity_type_units) + if is_electric: + var_electricity_unit = [ + electricity_unit + for electricity_unit in electricity_type_units + if electricity_unit in var_unit + ] + # 3) Replace the default electricity unit in var_unit with electricity_base_unit + new_var_unit = var_unit.replace(var_electricity_unit[-1], electricity_base_unit) + val = case.get_val(var, units=new_var_unit) + return val, new_var_unit + + # 4) Get the variable value in the default units + val = case.get_val(var, units=var_unit) + + # 5) Return the variable value and the corresponding units + return val, var_unit + + +def save_case_timeseries_as_csv( + sql_fpath: Path | str, + case_index: int = -1, + electricity_base_unit="MW", + vars_to_save: dict | list = {}, + save_to_file: bool = True, +): + """Summarize timeseries data from a case within an sql recorder file to a DataFrame + and save to csv file if `save_to_file` is True. + + Each column is a variable, each row is a timestep. + Column names are formatted as: + + - "{promoted variable name} ({units})" for continuous variables + + Args: + sql_fpath (Path | str): Filepath to sql recorder file. + case_index (int, optional): Index of the case in the sql file to save results for. + Defaults to -1. + electricity_base_unit (str, optional): Units to save any electricity-based profiles in. + Must be either "W", "kW", "MW", or "GW". Defaults to "MW". + vars_to_save (dict | list, optional): An empty list or dictionary indicates to save + all the timeseries variables in the case. If a list, should be a list of variable names + to save. If a dictionary, should have keys of variable names and either values of units + for the corresponding variable or a dictionary containing the keys "units" and/or + "alternative_name". Defaults to {}. + save_to_file (bool, optional): Whether to save the summary csv file to the same + folder as the sql file(s). Defaults to True. + + Raises: + ValueError: if electricity_base_unit is not "W", "kW", "MW", or "GW". + FileNotFoundError: If the sql file does not exist or multiple sql files have the same name. + ValueError: If no valid timeseries variables are input with vars_to_save and + vars_to_save is not an empty list or dictionary. + + Returns: + pd.DataFrame: summary of timeseries results from the sql file. + """ + electricity_type_units = ["W", "kW", "MW", "GW"] + if electricity_base_unit not in electricity_type_units: + msg = ( + f"Invalid input for electricity_base_unit {electricity_base_unit}. " + f"Valid options are {electricity_type_units}." + ) + raise ValueError(msg) + + sql_fpath = Path(sql_fpath) + + # check if multiple sql files exist with the same name and suffix. + sql_files = list(Path(sql_fpath.parent).glob(f"{sql_fpath.name}*")) + + # check that at least one sql file exists + if len(sql_files) == 0: + raise FileNotFoundError(f"{sql_fpath} file does not exist.") + + # check if a metadata file is contained in sql_files + contains_meta_sql = any("_meta" in sql_file.suffix for sql_file in sql_files) + if contains_meta_sql: + # remove metadata file from filelist + sql_files = [sql_file for sql_file in sql_files if "_meta" not in sql_file.suffix] + + # check that only one sql file was input + if len(sql_files) > 1: + msg = ( + f"{sql_fpath} points to {len(sql_files)} different sql files, please specify the " + f"filepath of a single sql file." + ) + raise FileNotFoundError(msg) + + # load the sql file and extract cases + cr = om.CaseReader(Path(sql_files[0])) + case = cr.get_case(case_index) + + # get list of input and output names + output_var_dict = case.list_outputs(val=False, out_stream=None, return_format="dict") + input_var_dict = case.list_inputs(val=False, out_stream=None, return_format="dict") + + # create list of variables to loop through + var_list = [v["prom_name"] for v in output_var_dict.values()] + var_list += [v["prom_name"] for v in input_var_dict.values()] + var_list.sort() + + # if vars_to_save is not empty, then only include the variables in var_list + if vars_to_save: + if isinstance(vars_to_save, dict): + varnames_to_save = list(vars_to_save.keys()) + var_list = [v for v in var_list if v in varnames_to_save] + if isinstance(vars_to_save, list): + var_list = [v for v in var_list if v in vars_to_save] + + if len(var_list) == 0: + raise ValueError("No variables were found to be saved") + + # initialize output dictionaries + var_to_values = {} # variable to the units + var_to_units = {} # variable to the value + var_to_alternative_names = [] # variable to the alternative name + for var in var_list: + if var in var_to_values: + # don't duplicate data + continue + + # get the value + val = case.get_val(var) + + # Skip costs that are per year of plant life (not per timestep) + if "varopex" in var.lower() or "annual_fixed_costs" in var.lower(): + continue + + # skip discrete inputs/outputs (like resource data) + if isinstance(val, (dict, pd.DataFrame, pd.Series)): + continue + + # skip scalar data + if isinstance(val, (int, float, str, bool)): + continue + + if isinstance(val, (np.ndarray, list, tuple)): + if len(val) > 1: + user_units = None + alternative_name = None + # Only do this if vars_to_save is a dict and it is not empty + if vars_to_save and isinstance(vars_to_save, dict): + # Only do this if the vars_to_save[var] is a dict for units and alternative name + if isinstance(vars_to_save[var], dict): + user_units = vars_to_save[var].get("units", None) + alternative_name = vars_to_save[var].get("alternative_name", None) + # Otherwise, just pull the units directly + # This means that you can only specify units by itself, not alternative names + # Should we make all of these be entered as dicts then? + else: + user_units = vars_to_save.get(var, None) + + var_val, var_units = check_get_units_for_var( + case, var, electricity_base_unit, user_specified_unit=user_units + ) + var_to_units[var] = var_units + var_to_values[var] = var_val + var_to_alternative_names.append(alternative_name) + + # map alternative names to variable names if not None + alt_name_mapper = { + old_name: new_name if new_name is not None else old_name + for old_name, new_name in zip(var_to_values.keys(), var_to_alternative_names) + } + # update var_to_values and var_to_units with alternative names + var_to_values = {alt_name_mapper[k]: v for k, v in var_to_values.items()} + var_to_units = {alt_name_mapper[k]: v for k, v in var_to_units.items()} + + # rename columns to include units + column_rename_mapper = { + v_name: f"{v_name} ({v_units})" for v_name, v_units in var_to_units.items() + } + + results = pd.DataFrame(var_to_values) + + results = results.rename(columns=column_rename_mapper) + + # save file to csv + if save_to_file: + csv_fname = f"{sql_fpath.name.replace('.sql','_').strip('_')}_Case{case_index}.csv" + output_fpath = sql_fpath.parent / csv_fname + results.to_csv(output_fpath, index=False) + + return results diff --git a/h2integrate/postprocess/test/__init__.py b/h2integrate/postprocess/test/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/h2integrate/postprocess/test/test_sql_timeseries_to_csv.py b/h2integrate/postprocess/test/test_sql_timeseries_to_csv.py new file mode 100644 index 000000000..993c31764 --- /dev/null +++ b/h2integrate/postprocess/test/test_sql_timeseries_to_csv.py @@ -0,0 +1,117 @@ +import os + +from pytest import fixture + +from h2integrate import EXAMPLE_DIR +from h2integrate.core.h2integrate_model import H2IntegrateModel +from h2integrate.postprocess.sql_timeseries_to_csv import save_case_timeseries_as_csv + + +@fixture +def run_example_02_sql_fpath(): + # check if case file exists, if so, return the filepath + sql_fpath = EXAMPLE_DIR / "02_texas_ammonia" / "outputs" / "cases.sql" + if sql_fpath.exists(): + return sql_fpath + else: + os.chdir(EXAMPLE_DIR / "02_texas_ammonia") + # Create a H2Integrate model + h2i = H2IntegrateModel("02_texas_ammonia.yaml") + + # Run the model + h2i.run() + + return h2i.recorder_path.absolute() + + +def test_save_csv_all_results(subtests, run_example_02_sql_fpath): + expected_csv_fpath = EXAMPLE_DIR / "02_texas_ammonia" / "outputs" / "cases_Case-1.csv" + res = save_case_timeseries_as_csv(run_example_02_sql_fpath, save_to_file=True) + + with subtests.test("Check number of columns"): + assert len(res.columns.to_list()) == 35 + + with subtests.test("Check number of rows"): + assert len(res) == 8760 + + with subtests.test("CSV File exists"): + assert expected_csv_fpath.exists() + + +def test_make_df_from_varname_list(subtests, run_example_02_sql_fpath): + vars_to_save = [ + "electrolyzer.hydrogen_out", + "combiner.electricity_out", + "ammonia.ammonia_out", + "h2_storage.hydrogen_out", + ] + + res = save_case_timeseries_as_csv( + run_example_02_sql_fpath, vars_to_save=vars_to_save, save_to_file=False + ) + + with subtests.test("Check number of columns"): + assert len(res.columns.to_list()) == len(vars_to_save) + + with subtests.test("Check number of rows"): + assert len(res) == 8760 + + with subtests.test("All vars in dataframe"): + colnames_no_units = [c.split("(")[0].strip() for c in res.columns.to_list()] + assert all(var_name in colnames_no_units for var_name in vars_to_save) + + +def test_make_df_from_varname_unit_dict(subtests, run_example_02_sql_fpath): + vars_units_to_save = { + "ammonia.hydrogen_in": "kg/h", + "h2_storage.hydrogen_in": "kg/h", + "electrolyzer.electricity_in": "kW", + } + + res = save_case_timeseries_as_csv( + run_example_02_sql_fpath, vars_to_save=vars_units_to_save, save_to_file=False + ) + + with subtests.test("Check number of columns"): + assert len(res.columns.to_list()) == len(vars_units_to_save) + + with subtests.test("Check number of rows"): + assert len(res) == 8760 + + with subtests.test("All vars in dataframe"): + expected_colnames = [ + f"{v_name} ({v_unit})" for v_name, v_unit in vars_units_to_save.items() + ] + assert all(c_name in res.columns.to_list() for c_name in expected_colnames) + + +def test_alternative_column_names(subtests, run_example_02_sql_fpath): + vars_to_save = { + "electrolyzer.hydrogen_out": {"alternative_name": "Electrolyzer Hydrogen Output"}, + "combiner.electricity_out": {"units": "kW", "alternative_name": "Plant Electricity Output"}, + "ammonia.ammonia_out": {"alternative_name": None}, + "h2_storage.hydrogen_out": {"alternative_name": "H2 Storage Hydrogen Output"}, + } + + res = save_case_timeseries_as_csv( + run_example_02_sql_fpath, + vars_to_save=vars_to_save, + save_to_file=False, + ) + + expected_name_list = [ + "Electrolyzer Hydrogen Output (kg/h)", + "Plant Electricity Output (kW)", + "ammonia.ammonia_out (kg/h)", + "H2 Storage Hydrogen Output (kg/h)", + ] + + with subtests.test("Check number of columns"): + assert len(res.columns.to_list()) == len(vars_to_save) + + with subtests.test("Check number of rows"): + assert len(res) == 8760 + + with subtests.test("All vars in dataframe with units"): + expected_colnames = [f"{v_name}" for v_name in expected_name_list] + assert all(c_name in res.columns.to_list() for c_name in expected_colnames)