Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
bbdd582
Setting up testing runs
ChristopherCaradonna Sep 25, 2025
cf4fd58
progress
ChristopherCaradonna Oct 29, 2025
14316b4
Plotting updates - works for measure comparisons, but needs cleanup
ChristopherCaradonna Nov 5, 2025
4185c1d
Move AWS/S3 methods to comstock.py
ChristopherCaradonna Nov 6, 2025
7f605d1
Move S3 retrieval methods to comstock.py
ChristopherCaradonna Nov 6, 2025
50b35fd
works with both states and counties but not yet multiple at one time
ChristopherCaradonna Nov 7, 2025
d529345
refactor code to accommodate multiple state/county queries together f…
ChristopherCaradonna Nov 7, 2025
f234281
Update compare_upgrades-test_timeseries_plots.py
ChristopherCaradonna Nov 7, 2025
6e878d5
Add AMI plot functionality and some cleanup
ChristopherCaradonna Nov 14, 2025
2854224
Adds warning if an applicability list for timeseries is empty instead…
ChristopherCaradonna Nov 18, 2025
5b49d3e
Merge branch 'ccaradon/sdr_2025_r4' into ccaradon/sdr_2025_r4_fix_tim…
ChristopherCaradonna Nov 19, 2025
925eccb
Merge branch 'ccaradon/sdr_2025_r4' into ccaradon/sdr_2025_r4_fix_tim…
ChristopherCaradonna Dec 22, 2025
d64e60a
Update postprocessing/comstockpostproc/plotting_mixin.py
ChristopherCaradonna Dec 22, 2025
9d337a2
Update postprocessing/comstockpostproc/comstock.py
ChristopherCaradonna Dec 22, 2025
1014b95
Update postprocessing/comstockpostproc/plotting_mixin.py
ChristopherCaradonna Dec 22, 2025
3ef0577
Update postprocessing/comstockpostproc/plotting_mixin.py
ChristopherCaradonna Dec 22, 2025
14650c5
PR review updates
ChristopherCaradonna Dec 30, 2025
168b72c
Merge branch 'ccaradon/sdr_2025_r4_fix_timeseries_plots' of https://g…
ChristopherCaradonna Dec 30, 2025
043aec3
Various updates from PR comments
ChristopherCaradonna Dec 30, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 93 additions & 29 deletions postprocessing/compare_comstock_to_ami.py.template
Original file line number Diff line number Diff line change
Expand Up @@ -12,37 +12,101 @@ logger = logging.getLogger(__name__)
def main():
# ComStock run
comstock = cspp.ComStock(
s3_base_dir='eulp/comstock_core',
comstock_run_name='ami_comparison',
comstock_run_version='ami_comparison',
comstock_year=2018,
truth_data_version='v01',
buildstock_csv_name='buildstock.csv',
acceptable_failure_percentage=0.9,
drop_failed_runs=True,
color_hex='#0072B2',
skip_missing_columns=True,
athena_table_name='ami_comparison',
reload_from_csv=False,
include_upgrades=False
)

# CBECS
s3_base_dir='com-sdr', # If run not on S3, download results_up**.parquet manually
comstock_run_name='rtuadv_v11', # Name of the run on S3
comstock_run_version='rtuadv_v11', # Use whatever you want to see in plot and folder names
comstock_year=2018, # Typically don't change this
athena_table_name=None, # Typically don't change this
truth_data_version='v01', # Typically don't change this
buildstock_csv_name='buildstock.csv', # Download buildstock.csv manually
acceptable_failure_percentage=0.25, # Can increase this when testing and high failure are OK
drop_failed_runs=True, # False if you want to evaluate which runs failed in raw output data
color_hex='#0072B2', # Color used to represent this run in plots
skip_missing_columns=True, # False if you want to ensure you have all data specified for export
reload_from_cache=False, # True if CSV already made and want faster reload times
include_upgrades=False, # False if not looking at upgrades
# output_dir = 's3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2025/comstock_amy2018_release_1'
)


# Stock Estimation for Apportionment:
stock_estimate = cspp.Apportion(
stock_estimation_version='2025R3', # Only updated when a new stock estimate is published
truth_data_version='v01', # Typically don't change this
reload_from_cache=False # Set to "True" if you have already run apportionment and would like to keep consistant values between postprocessing runs.
)

# Scale ComStock runs to the 'truth data' from StockE V3 estimates using bucket-based apportionment
base_sim_outs = comstock.get_sim_outs_for_upgrade(0)
comstock.create_allocated_weights(stock_estimate, base_sim_outs, reload_from_cache=False)

# CBECS
cbecs = cspp.CBECS(
cbecs_year = 2018,
truth_data_version='v01',
color_hex='#009E73',
reload_from_csv=False
cbecs_year=2018, # 2012 and 2018 currently available
truth_data_version='v01', # Typically don't change this
color_hex='#009E73', # Color used to represent CBECS in plots
reload_from_csv=True # True if CSV already made and want faster reload times
)

# Scale ComStock run to CBECS 2018 AND remove non-ComStock buildings from CBECS

# First scale ComStock runs to the 'truth data' from StockE V3 estimates using bucket-based apportionment
# Then scale both ComStock runs to CBECS 2018 AND remove non-ComStock buildings from CBECS
# This is how weights in the models are set to represent national energy consumption
comstock.add_national_scaling_weights(cbecs, remove_non_comstock_bldg_types_from_cbecs=True)
base_sim_outs = comstock.get_sim_outs_for_upgrade(0)
alloc_wts = comstock.get_allocated_weights()
comstock.create_allocated_weights_scaled_to_cbecs(cbecs, base_sim_outs, alloc_wts, remove_non_comstock_bldg_types_from_cbecs=True)
comstock.create_allocated_weights_plus_util_bills_for_upgrade(0)


# county resolution, files by state and county
county_resolution = {
'geo_top_dir': 'by_state_and_county',
'partition_cols': {
comstock.STATE_ABBRV: 'state',
comstock.COUNTY_ID: 'county',
},
'aggregation_levels': [comstock.COUNTY_ID],
'data_types': ['full'],
'file_types': ['parquet'],
}

# state level resolution, one single national file
state_resolution = {
'geo_top_dir': 'national_by_state',
'partition_cols': {},
'aggregation_levels': [[comstock.STATE_ABBRV, comstock.CZ_ASHRAE]],
'data_types': ['full'], # other options: 'detailed', 'basic' **If using multiple options, order must go from more detailed to less detailed.
'file_types': ['parquet'], # other options:'parquet'
}

# specify the export level
# IMPORTANT: if making county level timeseries plots, must export county level data to S3. This does not occur automatically.
geo_exports = [county_resolution] #state_resolution

for geo_export in geo_exports:
# write files locally as needed - usually not needed for AMI comparison plots
#comstock.export_metadata_and_annual_results_for_upgrade(upgrade_id=0, geo_exports=[geo_export])

# Also write to S3 if making timeseries plots
s3_dir = f"s3://{comstock.s3_base_dir}/{comstock.comstock_run_name}/{comstock.comstock_run_name}"
s3_output_dir = comstock.setup_fsspec_filesystem(s3_dir, aws_profile_name=None)
comstock.export_metadata_and_annual_results_for_upgrade(upgrade_id=0, geo_exports=[geo_export], output_dir=s3_output_dir)

# write select results to S3 for Athena/Glue when needed for timeseries plots
s3_dir = f"s3://{comstock.s3_base_dir}/{comstock.comstock_run_name}/{comstock.comstock_run_name}"
database = "enduse"
crawler_name = comstock.comstock_run_name # used to set name of crawler, cannot include slashes
workgroup = "eulp" # Athena workgroup to use
glue_service_role = "service-role/AWSGlueServiceRole-default"

# Export parquet files to S3 for Athena/Glue
comstock.create_sightglass_tables(
s3_location=f"{s3_dir}/metadata_and_annual_results_aggregates",
dataset_name=crawler_name,
database_name=database,
glue_service_role=glue_service_role)
comstock.fix_timeseries_tables(crawler_name, database)
comstock.create_views(crawler_name, database, workgroup)

# Export CBECS and ComStock data to wide and long formats for Tableau and to skip processing later
cbecs.export_to_csv_wide() # May comment this out after run once
comstock.export_to_csv_wide() # May comment this out after run once

# AMI
ami = cspp.AMI(
truth_data_version='v01',
Expand All @@ -52,8 +116,8 @@ def main():

# comparison
comparison = cspp.ComStockToAMIComparison(comstock, ami, make_comparison_plots=True)
comparison.export_plot_data_to_csv_wide()
#comparison.export_plot_data_to_csv_wide()

# Code to execute the script
if __name__ == "__main__":
main()
140 changes: 140 additions & 0 deletions postprocessing/compare_upgrades-test_timeseries_plots.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import logging
import comstockpostproc as cspp

logging.basicConfig(level='INFO') # Use DEBUG, INFO, or WARNING
logger = logging.getLogger(__name__)

def main():
# ComStock run
comstock = cspp.ComStock(
s3_base_dir='com-sdr', # If run not on S3, download results_up**.parquet manually
comstock_run_name='pump_v9', # Name of the run on S3
comstock_run_version='pump_v9', # Use whatever you want to see in plot and folder names
comstock_year=2018, # Typically don't change this
athena_table_name=None, # Typically don't change this
truth_data_version='v01', # Typically don't change this
buildstock_csv_name='buildstock.csv', # Download buildstock.csv manually
acceptable_failure_percentage=0.05, # Can increase this when testing and high failure are OK
drop_failed_runs=True, # False if you want to evaluate which runs failed in raw output data
color_hex='#0072B2', # Color used to represent this run in plots
skip_missing_columns=True, # False if you want to ensure you have all data specified for export
reload_from_cache=False, # True if CSV already made and want faster reload times
include_upgrades=True, # False if not looking at upgrades
upgrade_ids_to_skip=[], # Use [1, 3] etc. to exclude certain upgrades
make_timeseries_plots=False,
timeseries_locations_to_plot={
'MN': 'Minnesota', # specify location (either county ID or state ID) and corresponding name for plots and folders.
#'MA':'Massachusetts',
#'OR': 'Oregon',
#'LA': 'Louisiana',
#'AZ': 'Arizona',
#'TN': 'Tennessee',
('MA', 'NH', 'CT', 'VT', 'RI'): 'New England', # example of multiple states together - using tuples as keys
#'G4900350': 'Salt Lake City',
#'G2500250': 'Boston', # if specifying a county, you must export county level data to S3
#'G4804530': 'Austin',
('G2500250', 'G4804530'):'Baustin' # multiple counties together - using tuples as keys
},

upgrade_ids_for_comparison={} # Use {'<Name you want for comparison run folder>':[0,1,2]}; add as many upgrade IDs as needed, but plots look strange over 5
#output_dir = 's3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2025/comstock_amy2018_release_1'
)

# Stock Estimation for Apportionment:
stock_estimate = cspp.Apportion(
stock_estimation_version='2025R3', # Only updated when a new stock estimate is published
truth_data_version='v01', # Typically don't change this
reload_from_cache=False, # Set to "True" if you have already run apportionment and would like to keep consistant values between postprocessing runs.
#output_dir = 's3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2025/comstock_amy2018_release_1'
)

# Scale ComStock runs to the 'truth data' from StockE V3 estimates using bucket-based apportionment
base_sim_outs = comstock.get_sim_outs_for_upgrade(0)
comstock.create_allocated_weights(stock_estimate, base_sim_outs, reload_from_cache=False)

# CBECS
cbecs = cspp.CBECS(
cbecs_year=2018, # 2012 and 2018 currently available
truth_data_version='v01', # Typically don't change this
color_hex='#009E73', # Color used to represent CBECS in plots
reload_from_csv=False # True if CSV already made and want faster reload times
)

# Scale ComStock to CBECS 2018 AND remove non-ComStock buildings from CBECS
base_sim_outs = comstock.get_sim_outs_for_upgrade(0)
alloc_wts = comstock.get_allocated_weights()
comstock.create_allocated_weights_scaled_to_cbecs(cbecs, base_sim_outs, alloc_wts, remove_non_comstock_bldg_types_from_cbecs=True)

# Add utility bills onto allocated weights
for upgrade_id in comstock.upgrade_ids_to_process:
# up_sim_outs = comstock.get_sim_outs_for_upgrade(upgrade_id)
# up_alloc_wts = comstock.get_allocated_weights_scaled_to_cbecs_for_upgrade(upgrade_id)
comstock.create_allocated_weights_plus_util_bills_for_upgrade(upgrade_id)

# Specify geo exports

# county resolution, files by state and county
county_resolution = {
'geo_top_dir': 'by_state_and_county',
'partition_cols': {
comstock.STATE_ABBRV: 'state',
comstock.COUNTY_ID: 'county',
},
'aggregation_levels': [comstock.COUNTY_ID], # , comstock.COUNTY_ID], # Full tract resolution (agg=in.nhgis_tract_gisjoin)
'data_types': ['full'],
'file_types': ['parquet'],
}

# state level resolution, one single national file
state_resolution = {
'geo_top_dir': 'national_by_state',
'partition_cols': {},
'aggregation_levels': [[comstock.STATE_ABBRV, comstock.CZ_ASHRAE]],
'data_types': ['full'], # other options: 'detailed', 'basic' **If using multiple options, order must go from more detailed to less detailed.
'file_types': ['parquet'], # other options:'parquet'
}

# specify the export level
# IMPORTANT: if making county level timeseries plots, must export county level data to S3. This does not occur automatically.
geo_exports = [county_resolution] #county_resolution

for geo_export in geo_exports:
for upgrade_id in comstock.upgrade_ids_to_process:
#if upgrade_id == 0:
# continue
#comstock.export_metadata_and_annual_results_for_upgrade(upgrade_id, [geo_export])

# Also write to S3 if making timeseries plots
if comstock.make_timeseries_plots: # TODO: force geo exports to county data if couunty timeseries is requested.
s3_dir = f"s3://{comstock.s3_base_dir}/{comstock.comstock_run_name}/{comstock.comstock_run_name}"
s3_output_dir = comstock.setup_fsspec_filesystem(s3_dir, aws_profile_name=None)
comstock.export_metadata_and_annual_results_for_upgrade(upgrade_id=upgrade_id, geo_exports=[geo_export], output_dir=s3_output_dir)

# write select results to S3 for Athena/Glue when needed for timeseries plots
if comstock.make_timeseries_plots:
s3_dir = f"s3://{comstock.s3_base_dir}/{comstock.comstock_run_name}/{comstock.comstock_run_name}"
database = "enduse"
crawler_name = comstock.comstock_run_name # used to set name of crawler, cannot include slashes
workgroup = "eulp" # Athena workgroup to use
glue_service_role = "service-role/AWSGlueServiceRole-default"

# Export parquet files to S3 for Athena/Glue
comstock.create_sightglass_tables(s3_location=f"{s3_dir}/metadata_and_annual_results_aggregates",
dataset_name=crawler_name,
database_name=database,
glue_service_role=glue_service_role)
comstock.fix_timeseries_tables(crawler_name, database)
comstock.create_views(crawler_name, database, workgroup)

# Create measure run comparisons; only use if run has measures
comparison = cspp.ComStockMeasureComparison(comstock, timeseries_locations_to_plot=comstock.timeseries_locations_to_plot, make_comparison_plots = comstock.make_comparison_plots, make_timeseries_plots = comstock.make_timeseries_plots)

# Export dictionaries corresponding to the exported columns
#comstock.export_data_and_enumeration_dictionary()

# Code to execute the script
if __name__=="__main__":
main()
Loading