From b572ca565dfad61ec27376042ebaa72d01e56963 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 17 Nov 2025 14:43:58 -0800 Subject: [PATCH 1/8] CSV Forcings: Store file name, and report it as source of unconvertible data, rather than reporting uninitialized catchment_id --- include/forcing/CsvPerFeatureForcingProvider.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/forcing/CsvPerFeatureForcingProvider.hpp b/include/forcing/CsvPerFeatureForcingProvider.hpp index b8d05cb338..a625733988 100644 --- a/include/forcing/CsvPerFeatureForcingProvider.hpp +++ b/include/forcing/CsvPerFeatureForcingProvider.hpp @@ -35,9 +35,9 @@ class CsvPerFeatureForcingProvider : public data_access::GenericDataProvider CsvPerFeatureForcingProvider(forcing_params forcing_config):start_date_time_epoch(forcing_config.simulation_start_t), end_date_time_epoch(forcing_config.simulation_end_t), current_date_time_epoch(forcing_config.simulation_start_t), - forcing_vector_index(-1) + forcing_file_name(forcing_config.path) { - read_csv(forcing_config.path); + read_csv(forcing_file_name); } // BEGIN DataProvider interface methods @@ -172,7 +172,7 @@ class CsvPerFeatureForcingProvider : public data_access::GenericDataProvider } catch (const std::runtime_error& e) { data_access::unit_conversion_exception uce(e.what()); - uce.provider_model_name = "CsvPerFeatureProvider " + std::to_string(catchment_id); + uce.provider_model_name = "CsvPerFeatureProvider (file '" + forcing_file_name + "')"; uce.provider_bmi_var_name = output_name; uce.provider_units = available_forcings_units[output_name]; uce.unconverted_values.push_back(value); From dce7e80cd6dde7d4d0c6f72108a1865fb9f68b4e Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 17 Nov 2025 14:44:31 -0800 Subject: [PATCH 2/8] CSV Forcings: Clear out unused member variables and function --- .../forcing/CsvPerFeatureForcingProvider.hpp | 40 ------------------- 1 file changed, 40 deletions(-) diff --git a/include/forcing/CsvPerFeatureForcingProvider.hpp b/include/forcing/CsvPerFeatureForcingProvider.hpp index a625733988..7119801737 100644 --- a/include/forcing/CsvPerFeatureForcingProvider.hpp +++ b/include/forcing/CsvPerFeatureForcingProvider.hpp @@ -29,9 +29,6 @@ class CsvPerFeatureForcingProvider : public data_access::GenericDataProvider { public: - typedef struct tm time_type; - - CsvPerFeatureForcingProvider(forcing_params forcing_config):start_date_time_epoch(forcing_config.simulation_start_t), end_date_time_epoch(forcing_config.simulation_end_t), current_date_time_epoch(forcing_config.simulation_start_t), @@ -237,34 +234,6 @@ class CsvPerFeatureForcingProvider : public data_access::GenericDataProvider private: - /** - * @brief Checks forcing vector index bounds and adjusts index if out of vector bounds - * /// \todo: Bounds checking is based on precipitation vector. Consider potential for vectors of different sizes and indices. - */ - inline void check_forcing_vector_index_bounds() - { - std::stringstream ss; - //Check if forcing index is less than zero and if so, set to zero. - if (forcing_vector_index < 0) - { - forcing_vector_index = 0; - /// \todo: Return appropriate warning - ss << "WARNING: Forcing vector index is less than zero. Therefore, setting index to zero." << std::endl; - LOG(ss.str(), LogLevel::SEVERE); ss.str(""); - } - - //Check if forcing index is greater than or equal to the size of the size of the time vector and if so, set to zero. - else if (forcing_vector_index >= time_epoch_vector.size()) - { - forcing_vector_index = time_epoch_vector.size() - 1; - /// \todo: Return appropriate warning - ss << "WARNING: Reached beyond the size of the forcing vector. Therefore, setting index to last value of the vector." << std::endl; - LOG(ss.str(), LogLevel::SEVERE); ss.str(""); - } - - return; - } - /** * Get the current value of a forcing param identified by its name. * @@ -427,16 +396,7 @@ class CsvPerFeatureForcingProvider : public data_access::GenericDataProvider /// \todo: Consider making epoch time the iterator std::vector time_epoch_vector; - int forcing_vector_index; - - /// \todo: Are these used? - double precipitation_rate_meters_per_second; - double air_temperature_fahrenheit; - double latitude; //latitude (degrees_north) - double longitude; //longitude (degrees_east) - int catchment_id; - int day_of_year; std::string forcing_file_name; time_t start_date_time_epoch; From 62e36d748e2a917f24cd30bfaa8468187665ae55 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 17 Nov 2025 14:51:38 -0800 Subject: [PATCH 3/8] CSV Forcings: Improve const-safety and avoid a stray copy --- include/forcing/CsvPerFeatureForcingProvider.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/forcing/CsvPerFeatureForcingProvider.hpp b/include/forcing/CsvPerFeatureForcingProvider.hpp index 7119801737..d97dc02005 100644 --- a/include/forcing/CsvPerFeatureForcingProvider.hpp +++ b/include/forcing/CsvPerFeatureForcingProvider.hpp @@ -269,7 +269,7 @@ class CsvPerFeatureForcingProvider : public data_access::GenericDataProvider * Reads only data within the specified model start and end date-times. * @param file_name Forcing file name */ - void read_csv(std::string file_name) + void read_csv(std::string const& file_name) { int time_col_index = 0; //std::map col_indices; From 2594d8c5ee39b2e5b0a530ffbe833b2659d65001 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 17 Nov 2025 15:18:28 -0800 Subject: [PATCH 4/8] CSV Forcings: get_value_for_param_name: const-ify and tighten up error checking --- .../forcing/CsvPerFeatureForcingProvider.hpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/include/forcing/CsvPerFeatureForcingProvider.hpp b/include/forcing/CsvPerFeatureForcingProvider.hpp index d97dc02005..46306c7749 100644 --- a/include/forcing/CsvPerFeatureForcingProvider.hpp +++ b/include/forcing/CsvPerFeatureForcingProvider.hpp @@ -241,21 +241,23 @@ class CsvPerFeatureForcingProvider : public data_access::GenericDataProvider * @param index The index of the desired forcing time step from which to obtain the value. * @return The particular param's value at the given forcing time step. */ - inline double get_value_for_param_name(const std::string& name, int index) { - if (index >= time_epoch_vector.size() ) { + inline double get_value_for_param_name(const std::string& name, int index) const { + if (index < 0 || index >= time_epoch_vector.size() ) { std::string throw_msg; throw_msg.assign("Forcing had bad index " + std::to_string(index) + " for value lookup of " + name); LOG(throw_msg, LogLevel::WARNING); throw std::out_of_range(throw_msg); } - std::string can_name = name; - if(data_access::WellKnownFields.count(can_name) > 0){ - auto t = data_access::WellKnownFields.find(can_name)->second; - can_name = std::get<0>(t); + std::string const* can_name = &name; + + auto wkf_iter = data_access::WellKnownFields.find(name); + if (wkf_iter != data_access::WellKnownFields.end()) { + can_name = &std::get<0>(wkf_iter->second); } - if (forcing_vectors.count(can_name) > 0) { - return forcing_vectors[can_name].at(index); + auto forcings_iter = forcing_vectors.find(*can_name); + if (forcings_iter != forcing_vectors.end()) { + return forcings_iter->second.at(index); } else { std::string throw_msg; throw_msg.assign("Cannot get forcing value for unrecognized parameter name '" + name + "'."); From 12ebd2289cfa29b59fe2b588117f7adb9ee9a982 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Wed, 19 Nov 2025 10:49:55 -0800 Subject: [PATCH 5/8] CSV Forcings: check for consistent time strides between successive rows --- include/forcing/CsvPerFeatureForcingProvider.hpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/forcing/CsvPerFeatureForcingProvider.hpp b/include/forcing/CsvPerFeatureForcingProvider.hpp index 46306c7749..848af06f54 100644 --- a/include/forcing/CsvPerFeatureForcingProvider.hpp +++ b/include/forcing/CsvPerFeatureForcingProvider.hpp @@ -388,6 +388,14 @@ class CsvPerFeatureForcingProvider : public data_access::GenericDataProvider LOG(ss.str(), LogLevel::SEVERE); ss.str(""); //std::string throw_msg; throw_msg.assign("Error: Forcing data ends before the model end time."); } + + time_t duration = record_duration(); + for (size_t i = 1; i < time_epoch_vector.size(); ++i) { + time_t difference = time_epoch_vector[i] - time_epoch_vector[i-1]; + if (difference != duration) { + Logger::logMsgAndThrowError("Time intervals in forcing file '" + forcing_file_name + "' are not constant at row " + std::to_string(i)); + } + } } std::vector available_forcings; From 5a80a87b9ddc724f19c13a2b28f0b3a8f9b874f7 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Wed, 19 Nov 2025 11:20:20 -0800 Subject: [PATCH 6/8] CSV Forcings: Move implementation to a .cpp file --- .../forcing/CsvPerFeatureForcingProvider.hpp | 307 +----------------- src/forcing/CMakeLists.txt | 1 + src/forcing/CsvPerFeatureForcingProvider.cpp | 294 +++++++++++++++++ 3 files changed, 307 insertions(+), 295 deletions(-) create mode 100644 src/forcing/CsvPerFeatureForcingProvider.cpp diff --git a/include/forcing/CsvPerFeatureForcingProvider.hpp b/include/forcing/CsvPerFeatureForcingProvider.hpp index 848af06f54..6e7a565df0 100644 --- a/include/forcing/CsvPerFeatureForcingProvider.hpp +++ b/include/forcing/CsvPerFeatureForcingProvider.hpp @@ -29,13 +29,7 @@ class CsvPerFeatureForcingProvider : public data_access::GenericDataProvider { public: - CsvPerFeatureForcingProvider(forcing_params forcing_config):start_date_time_epoch(forcing_config.simulation_start_t), - end_date_time_epoch(forcing_config.simulation_end_t), - current_date_time_epoch(forcing_config.simulation_start_t), - forcing_file_name(forcing_config.path) - { - read_csv(forcing_file_name); - } + CsvPerFeatureForcingProvider(forcing_params forcing_config); // BEGIN DataProvider interface methods @@ -44,29 +38,21 @@ class CsvPerFeatureForcingProvider : public data_access::GenericDataProvider * * @return The inclusive beginning of the period of time over which this instance can provide this data. */ - long get_data_start_time() const override { - //FIXME: Trace this back and you will find that it is the simulation start time, not having anything to do with the forcing at all. - // Apparently this "worked", but at a minimum the description above is false. - return start_date_time_epoch; - } + long get_data_start_time() const override; /** * @brief the exclusive ending of the period of time over which this instance can provide data for this forcing. * * @return The exclusive ending of the period of time over which this instance can provide this data. */ - long get_data_stop_time() const override { - return end_date_time_epoch; - } + long get_data_stop_time() const override; /** * @brief the duration of one record of this forcing source * * @return The duration of one record of this forcing source */ - long record_duration() const override { - return time_epoch_vector[1] - time_epoch_vector[0]; - } + long record_duration() const override; /** * Get the index of the forcing time step that contains the given point in time. @@ -77,31 +63,7 @@ class CsvPerFeatureForcingProvider : public data_access::GenericDataProvider * @return The index of the forcing time step that contains the given point in time. * @throws std::out_of_range If the given point is not in any time step. */ - size_t get_ts_index_for_time(const time_t &epoch_time) const override { - if (epoch_time < start_date_time_epoch) { - std::string throw_msg; throw_msg.assign("Forcing had bad pre-start time for index query: " + std::to_string(epoch_time)); - LOG(throw_msg, LogLevel::WARNING); - throw std::out_of_range(throw_msg); - } - size_t i = 0; - // 1 hour - time_t seconds_in_time_step = 3600; - time_t time = start_date_time_epoch; - while (epoch_time >= time + seconds_in_time_step && time < end_date_time_epoch) { - i++; - time += seconds_in_time_step; - } - // The end_date_time_epoch is the epoch value of the BEGINNING of the last time step, not its end. - // I.e., to make sure we cover it, we have to go another time step beyond. - if (time >= end_date_time_epoch + 3600) { - std::string throw_msg; throw_msg.assign("Forcing had bad beyond-end time for index query: " + std::to_string(epoch_time)); - LOG(throw_msg, LogLevel::WARNING); - throw std::out_of_range(throw_msg); - } - else { - return i; - } - } + size_t get_ts_index_for_time(const time_t &epoch_time) const override; /** * Get the value of a forcing property for an arbitrary time period, converting units if needed. @@ -113,104 +75,11 @@ class CsvPerFeatureForcingProvider : public data_access::GenericDataProvider * @return The value of the forcing property for the described time period, with units converted if needed. * @throws std::out_of_range If data for the time period is not available. */ - double get_value(const CatchmentAggrDataSelector& selector, data_access::ReSampleMethod m) override - { - size_t current_index; - long time_remaining = selector.get_duration_secs(); - auto init_time = selector.get_init_time(); - auto output_name = selector.get_variable_name(); - auto output_units = selector.get_output_units(); + double get_value(const CatchmentAggrDataSelector& selector, data_access::ReSampleMethod m) override; - try { - current_index = get_ts_index_for_time(init_time); - } - catch (const std::out_of_range &e) { - std::string throw_msg; throw_msg.assign("Forcing had bad init_time " + std::to_string(init_time) + " for value request"); - LOG(throw_msg, LogLevel::WARNING); - throw std::out_of_range(throw_msg); - } - - std::vector involved_time_step_values; - - std::vector involved_time_step_seconds; - long ts_involved_s; - - time_t first_time_step_start_epoch = start_date_time_epoch + (current_index * 3600); - // Handle the first time step differently, since we need to do more to figure out how many seconds came from it - // Total time step size minus the offset of the beginning, before the init time - ts_involved_s = 3600 - (init_time - first_time_step_start_epoch); - - involved_time_step_seconds.push_back(ts_involved_s); - involved_time_step_values.push_back(get_value_for_param_name(output_name, current_index)); - time_remaining -= ts_involved_s; - current_index++; - - while (time_remaining > 0) { - if(current_index >= time_epoch_vector.size()) - return involved_time_step_values[involved_time_step_values.size()-1]; //TODO: Is this the right answer? Is returning any value off the end of the range valid? - ts_involved_s = time_remaining > 3600 ? 3600 : time_remaining; - involved_time_step_seconds.push_back(ts_involved_s); - involved_time_step_values.push_back(get_value_for_param_name(output_name, current_index)); - time_remaining -= ts_involved_s; - current_index++; - - } - double value = 0; - for (size_t i = 0; i < involved_time_step_values.size(); ++i) { - if (is_param_sum_over_time_step(output_name)) - value += involved_time_step_values[i] * ((double)involved_time_step_seconds[i] / 3600.0); - else - value += involved_time_step_values[i] * ((double)involved_time_step_seconds[i] / (double)selector.get_duration_secs()); - } - - // Convert units - try { - return UnitsHelper::get_converted_value(available_forcings_units[output_name], value, output_units); - } - catch (const std::runtime_error& e) { - data_access::unit_conversion_exception uce(e.what()); - uce.provider_model_name = "CsvPerFeatureProvider (file '" + forcing_file_name + "')"; - uce.provider_bmi_var_name = output_name; - uce.provider_units = available_forcings_units[output_name]; - uce.unconverted_values.push_back(value); - throw uce; - } - } - - virtual std::vector get_values(const CatchmentAggrDataSelector& selector, data_access::ReSampleMethod m) override - { - return std::vector(1, get_value(selector, m)); - } + virtual std::vector get_values(const CatchmentAggrDataSelector& selector, data_access::ReSampleMethod m) override; - - /** - * Get whether a param's value is an aggregate sum over the entire time step. - * - * Certain params, like rain fall, are aggregated sums over an entire time step. Others, such as pressure, are not - * such sums and instead something else like an instantaneous reading or an average value over the time step. - * - * It may be the case that forcing data is needed for some discretization different than the forcing time step. - * These values can be calculated (or at least approximated), but doing so requires knowing which values are summed - * versus not. - * - * @param name The name of the forcing param for which the current value is desired. - * @return Whether the param's value is an aggregate sum. - */ - inline bool is_param_sum_over_time_step(const std::string& name) const { - if (name == CSDMS_STD_NAME_RAIN_VOLUME_FLUX) { - return true; - } - if (name == CSDMS_STD_NAME_SOLAR_SHORTWAVE) { - return true; - } - if (name == CSDMS_STD_NAME_SOLAR_LONGWAVE) { - return true; - } - if (name == CSDMS_STD_NAME_LIQUID_EQ_PRECIP_RATE) { - return true; - } - return false; - } + bool is_param_sum_over_time_step(const std::string& name) const; /** * Get whether a property's per-time-step values are each an aggregate sum over the entire time step. @@ -224,13 +93,9 @@ class CsvPerFeatureForcingProvider : public data_access::GenericDataProvider * @param name The name of the forcing property for which the current value is desired. * @return Whether the property's value is an aggregate sum. */ - inline bool is_property_sum_over_time_step(const std::string& name) const override { - return is_param_sum_over_time_step(name); - } + bool is_property_sum_over_time_step(const std::string& name) const override; - boost::span get_available_variable_names() const override { - return available_forcings; - } + boost::span get_available_variable_names() const override; private: @@ -241,162 +106,14 @@ class CsvPerFeatureForcingProvider : public data_access::GenericDataProvider * @param index The index of the desired forcing time step from which to obtain the value. * @return The particular param's value at the given forcing time step. */ - inline double get_value_for_param_name(const std::string& name, int index) const { - if (index < 0 || index >= time_epoch_vector.size() ) { - std::string throw_msg; throw_msg.assign("Forcing had bad index " + std::to_string(index) + " for value lookup of " + name); - LOG(throw_msg, LogLevel::WARNING); - throw std::out_of_range(throw_msg); - } - - std::string const* can_name = &name; - - auto wkf_iter = data_access::WellKnownFields.find(name); - if (wkf_iter != data_access::WellKnownFields.end()) { - can_name = &std::get<0>(wkf_iter->second); - } - - auto forcings_iter = forcing_vectors.find(*can_name); - if (forcings_iter != forcing_vectors.end()) { - return forcings_iter->second.at(index); - } - else { - std::string throw_msg; throw_msg.assign("Cannot get forcing value for unrecognized parameter name '" + name + "'."); - LOG(throw_msg, LogLevel::WARNING); - throw std::runtime_error(throw_msg); - } - } + double get_value_for_param_name(const std::string& name, int index) const; /** * @brief Read Forcing Data from CSV * Reads only data within the specified model start and end date-times. * @param file_name Forcing file name */ - void read_csv(std::string const& file_name) - { - int time_col_index = 0; - //std::map col_indices; - std::vector*> local_valvec_index = {}; - - //Call CSVReader constuctor - CSVReader reader(file_name); - - //Get the data from CSV File - std::vector > data_list = reader.getData(); - - // Process the header (first) row.. - int col_num = 0; - for (const auto& col_head : data_list[0]){ - //std::cerr << s << std::endl; - if(col_head == "Time" || col_head == "time"){ - time_col_index = col_num; - local_valvec_index.push_back(nullptr); // make sure the column indices line up! - } else { - std::string var_name = col_head; - std::string units = ""; - - boost::trim(var_name); // remove leading/trailing ws - const auto var_name_close = var_name.back(); - if (var_name_close == ']' || var_name_close == ')') { - // found closing bracket/parenth - - const bool is_bracket = var_name_close == ']'; - const size_t var_name_open = is_bracket ? var_name.rfind('[') : var_name.rfind('('); - if (var_name_open != std::string::npos) { - // found matching opening bracket/parenth - - units = var_name.substr(var_name_open + 1); - units.pop_back(); // remove closing bracket - - var_name = var_name.substr(0, var_name_open); - boost::trim(var_name); // trim again in case of ws between name and units - } - } - - LOG("CsvProvider has variable '" + var_name + "' with units '" + units + "'", LogLevel::DEBUG); - - auto wkf = data_access::WellKnownFields.find(var_name); - if(wkf != data_access::WellKnownFields.end()){ - units = units.empty() ? std::get<1>(wkf->second) : units; - auto wkf_name = std::get<0>(wkf->second); - LOG("CsvProvider has well-known name '" + wkf_name + "' for variable '" + var_name + "' with units '" + units + "'", LogLevel::DEBUG); - available_forcings.push_back(var_name); // Allow lookup by non-canonical name - available_forcings_units[var_name] = units; // Allow lookup of units by non-canonical name - var_name = wkf_name; // Use the CSDMS name from here on - } - - forcing_vectors[var_name] = {}; - local_valvec_index.push_back(&(forcing_vectors[var_name])); - available_forcings.push_back(var_name); - available_forcings_units[var_name] = units; - } - col_num++; - } - - time_t current_row_date_time_epoch; - //Iterate through CSV starting on the second row - int i = 1; - for (i = 1; i < data_list.size(); i++) - { - //Row vector - std::vector& vec = data_list[i]; - - struct tm current_row_date_time_utc = tm(); - std::string time_str = vec[time_col_index]; - //TODO: Support more time string formats? This is basically ISO8601 but not complete, support TZ? - strptime(time_str.c_str(), "%Y-%m-%d %H:%M:%S", ¤t_row_date_time_utc); - - //Convert current row date-time UTC to epoch time - current_row_date_time_epoch = timegm(¤t_row_date_time_utc); - - //TODO: I am not sure this is a concern of this object. If forcing is retrieved that doesn't cover the - //needed time period, isn't that the requester's concern? (Methods exist to check this...) - //Ensure that forcing data covers the entire model period. Otherwise, throw an error. - if (i == 1 && start_date_time_epoch < current_row_date_time_epoch) - { - struct tm start_date_tm; - gmtime_r(&start_date_time_epoch, &start_date_tm); - - char tm_buff[128]; - strftime(tm_buff, 128, "%Y-%m-%d %H:%M:%S", &start_date_tm); - std::string throw_msg; throw_msg.assign("Error: Forcing data " + file_name + " begins after the model start time:" + std::string(tm_buff) + " < " + time_str); - LOG(throw_msg, LogLevel::WARNING); - throw std::out_of_range(throw_msg); - } - - - if (start_date_time_epoch <= current_row_date_time_epoch && current_row_date_time_epoch <= end_date_time_epoch) - { - time_epoch_vector.push_back(current_row_date_time_epoch); - - int c = -1; - for (auto& s : vec){ - c++; - if(c == time_col_index) - continue; - boost::algorithm::trim(s); - local_valvec_index[c]->push_back(boost::lexical_cast(s)); // This is supposed to update the vector in the map... - } - - } - - } - if (i <= 1 || current_row_date_time_epoch < end_date_time_epoch) - { - /// \todo TODO: Return appropriate error - std::stringstream ss; - ss << "WARNING: Forcing data ends before the model end time." << std::endl; - LOG(ss.str(), LogLevel::SEVERE); ss.str(""); - //std::string throw_msg; throw_msg.assign("Error: Forcing data ends before the model end time."); - } - - time_t duration = record_duration(); - for (size_t i = 1; i < time_epoch_vector.size(); ++i) { - time_t difference = time_epoch_vector[i] - time_epoch_vector[i-1]; - if (difference != duration) { - Logger::logMsgAndThrowError("Time intervals in forcing file '" + forcing_file_name + "' are not constant at row " + std::to_string(i)); - } - } - } + void read_csv(std::string const& file_name); std::vector available_forcings; std::unordered_map available_forcings_units; diff --git a/src/forcing/CMakeLists.txt b/src/forcing/CMakeLists.txt index f64456c873..4371bec62a 100644 --- a/src/forcing/CMakeLists.txt +++ b/src/forcing/CMakeLists.txt @@ -21,6 +21,7 @@ target_link_libraries(forcing PUBLIC ) target_sources(forcing PRIVATE "${CMAKE_CURRENT_LIST_DIR}/NullForcingProvider.cpp") +target_sources(forcing PRIVATE "${CMAKE_CURRENT_LIST_DIR}/CsvPerFeatureForcingProvider.cpp") if(NGEN_WITH_NETCDF) target_sources(forcing PRIVATE "${CMAKE_CURRENT_LIST_DIR}/NetCDFPerFeatureDataProvider.cpp") diff --git a/src/forcing/CsvPerFeatureForcingProvider.cpp b/src/forcing/CsvPerFeatureForcingProvider.cpp new file mode 100644 index 0000000000..d53066ad54 --- /dev/null +++ b/src/forcing/CsvPerFeatureForcingProvider.cpp @@ -0,0 +1,294 @@ +#include + + CsvPerFeatureForcingProvider::CsvPerFeatureForcingProvider(forcing_params forcing_config):start_date_time_epoch(forcing_config.simulation_start_t), + end_date_time_epoch(forcing_config.simulation_end_t), + current_date_time_epoch(forcing_config.simulation_start_t), + forcing_file_name(forcing_config.path) + { + read_csv(forcing_file_name); + } + + long CsvPerFeatureForcingProvider::get_data_start_time() const { + //FIXME: Trace this back and you will find that it is the simulation start time, not having anything to do with the forcing at all. + // Apparently this "worked", but at a minimum the description above is false. + return start_date_time_epoch; + } + + long CsvPerFeatureForcingProvider::get_data_stop_time() const { + return end_date_time_epoch; + } + + long CsvPerFeatureForcingProvider::record_duration() const { + return time_epoch_vector[1] - time_epoch_vector[0]; + } + + size_t CsvPerFeatureForcingProvider::get_ts_index_for_time(const time_t &epoch_time) const { + if (epoch_time < start_date_time_epoch) { + std::string throw_msg; throw_msg.assign("Forcing had bad pre-start time for index query: " + std::to_string(epoch_time)); + LOG(throw_msg, LogLevel::WARNING); + throw std::out_of_range(throw_msg); + } + size_t i = 0; + // 1 hour + time_t seconds_in_time_step = 3600; + time_t time = start_date_time_epoch; + while (epoch_time >= time + seconds_in_time_step && time < end_date_time_epoch) { + i++; + time += seconds_in_time_step; + } + // The end_date_time_epoch is the epoch value of the BEGINNING of the last time step, not its end. + // I.e., to make sure we cover it, we have to go another time step beyond. + if (time >= end_date_time_epoch + 3600) { + std::string throw_msg; throw_msg.assign("Forcing had bad beyond-end time for index query: " + std::to_string(epoch_time)); + LOG(throw_msg, LogLevel::WARNING); + throw std::out_of_range(throw_msg); + } + else { + return i; + } + } + + double CsvPerFeatureForcingProvider::get_value(const CatchmentAggrDataSelector& selector, data_access::ReSampleMethod m) + { + size_t current_index; + long time_remaining = selector.get_duration_secs(); + auto init_time = selector.get_init_time(); + auto output_name = selector.get_variable_name(); + auto output_units = selector.get_output_units(); + + try { + current_index = get_ts_index_for_time(init_time); + } + catch (const std::out_of_range &e) { + std::string throw_msg; throw_msg.assign("Forcing had bad init_time " + std::to_string(init_time) + " for value request"); + LOG(throw_msg, LogLevel::WARNING); + throw std::out_of_range(throw_msg); + } + + std::vector involved_time_step_values; + + std::vector involved_time_step_seconds; + long ts_involved_s; + + time_t first_time_step_start_epoch = start_date_time_epoch + (current_index * 3600); + // Handle the first time step differently, since we need to do more to figure out how many seconds came from it + // Total time step size minus the offset of the beginning, before the init time + ts_involved_s = 3600 - (init_time - first_time_step_start_epoch); + + involved_time_step_seconds.push_back(ts_involved_s); + involved_time_step_values.push_back(get_value_for_param_name(output_name, current_index)); + time_remaining -= ts_involved_s; + current_index++; + + while (time_remaining > 0) { + if(current_index >= time_epoch_vector.size()) + return involved_time_step_values[involved_time_step_values.size()-1]; //TODO: Is this the right answer? Is returning any value off the end of the range valid? + ts_involved_s = time_remaining > 3600 ? 3600 : time_remaining; + involved_time_step_seconds.push_back(ts_involved_s); + involved_time_step_values.push_back(get_value_for_param_name(output_name, current_index)); + time_remaining -= ts_involved_s; + current_index++; + + } + double value = 0; + for (size_t i = 0; i < involved_time_step_values.size(); ++i) { + if (is_param_sum_over_time_step(output_name)) + value += involved_time_step_values[i] * ((double)involved_time_step_seconds[i] / 3600.0); + else + value += involved_time_step_values[i] * ((double)involved_time_step_seconds[i] / (double)selector.get_duration_secs()); + } + + // Convert units + try { + return UnitsHelper::get_converted_value(available_forcings_units[output_name], value, output_units); + } + catch (const std::runtime_error& e) { + data_access::unit_conversion_exception uce(e.what()); + uce.provider_model_name = "CsvPerFeatureProvider (file '" + forcing_file_name + "')"; + uce.provider_bmi_var_name = output_name; + uce.provider_units = available_forcings_units[output_name]; + uce.unconverted_values.push_back(value); + throw uce; + } + } + + std::vector CsvPerFeatureForcingProvider::get_values(const CatchmentAggrDataSelector& selector, data_access::ReSampleMethod m) + { + return std::vector(1, get_value(selector, m)); + } + + bool CsvPerFeatureForcingProvider::is_param_sum_over_time_step(const std::string& name) const { + if (name == CSDMS_STD_NAME_RAIN_VOLUME_FLUX) { + return true; + } + if (name == CSDMS_STD_NAME_SOLAR_SHORTWAVE) { + return true; + } + if (name == CSDMS_STD_NAME_SOLAR_LONGWAVE) { + return true; + } + if (name == CSDMS_STD_NAME_LIQUID_EQ_PRECIP_RATE) { + return true; + } + return false; + } + + bool CsvPerFeatureForcingProvider::is_property_sum_over_time_step(const std::string& name) const { + return is_param_sum_over_time_step(name); + } + + boost::span CsvPerFeatureForcingProvider::get_available_variable_names() const { + return available_forcings; + } + + double CsvPerFeatureForcingProvider::get_value_for_param_name(const std::string& name, int index) const { + if (index < 0 || index >= time_epoch_vector.size() ) { + std::string throw_msg; throw_msg.assign("Forcing had bad index " + std::to_string(index) + " for value lookup of " + name); + LOG(throw_msg, LogLevel::WARNING); + throw std::out_of_range(throw_msg); + } + + std::string const* can_name = &name; + + auto wkf_iter = data_access::WellKnownFields.find(name); + if (wkf_iter != data_access::WellKnownFields.end()) { + can_name = &std::get<0>(wkf_iter->second); + } + + auto forcings_iter = forcing_vectors.find(*can_name); + if (forcings_iter != forcing_vectors.end()) { + return forcings_iter->second.at(index); + } + else { + std::string throw_msg; throw_msg.assign("Cannot get forcing value for unrecognized parameter name '" + name + "'."); + LOG(throw_msg, LogLevel::WARNING); + throw std::runtime_error(throw_msg); + } + } + + void CsvPerFeatureForcingProvider::read_csv(std::string const& file_name) + { + int time_col_index = 0; + //std::map col_indices; + std::vector*> local_valvec_index = {}; + + //Call CSVReader constuctor + CSVReader reader(file_name); + + //Get the data from CSV File + std::vector > data_list = reader.getData(); + + // Process the header (first) row.. + int col_num = 0; + for (const auto& col_head : data_list[0]){ + //std::cerr << s << std::endl; + if(col_head == "Time" || col_head == "time"){ + time_col_index = col_num; + local_valvec_index.push_back(nullptr); // make sure the column indices line up! + } else { + std::string var_name = col_head; + std::string units = ""; + + boost::trim(var_name); // remove leading/trailing ws + const auto var_name_close = var_name.back(); + if (var_name_close == ']' || var_name_close == ')') { + // found closing bracket/parenth + + const bool is_bracket = var_name_close == ']'; + const size_t var_name_open = is_bracket ? var_name.rfind('[') : var_name.rfind('('); + if (var_name_open != std::string::npos) { + // found matching opening bracket/parenth + + units = var_name.substr(var_name_open + 1); + units.pop_back(); // remove closing bracket + + var_name = var_name.substr(0, var_name_open); + boost::trim(var_name); // trim again in case of ws between name and units + } + } + + LOG("CsvProvider has variable '" + var_name + "' with units '" + units + "'", LogLevel::DEBUG); + + auto wkf = data_access::WellKnownFields.find(var_name); + if(wkf != data_access::WellKnownFields.end()){ + units = units.empty() ? std::get<1>(wkf->second) : units; + auto wkf_name = std::get<0>(wkf->second); + LOG("CsvProvider has well-known name '" + wkf_name + "' for variable '" + var_name + "' with units '" + units + "'", LogLevel::DEBUG); + available_forcings.push_back(var_name); // Allow lookup by non-canonical name + available_forcings_units[var_name] = units; // Allow lookup of units by non-canonical name + var_name = wkf_name; // Use the CSDMS name from here on + } + + forcing_vectors[var_name] = {}; + local_valvec_index.push_back(&(forcing_vectors[var_name])); + available_forcings.push_back(var_name); + available_forcings_units[var_name] = units; + } + col_num++; + } + + time_t current_row_date_time_epoch; + //Iterate through CSV starting on the second row + int i = 1; + for (i = 1; i < data_list.size(); i++) + { + //Row vector + std::vector& vec = data_list[i]; + + struct tm current_row_date_time_utc = tm(); + std::string time_str = vec[time_col_index]; + //TODO: Support more time string formats? This is basically ISO8601 but not complete, support TZ? + strptime(time_str.c_str(), "%Y-%m-%d %H:%M:%S", ¤t_row_date_time_utc); + + //Convert current row date-time UTC to epoch time + current_row_date_time_epoch = timegm(¤t_row_date_time_utc); + + //TODO: I am not sure this is a concern of this object. If forcing is retrieved that doesn't cover the + //needed time period, isn't that the requester's concern? (Methods exist to check this...) + //Ensure that forcing data covers the entire model period. Otherwise, throw an error. + if (i == 1 && start_date_time_epoch < current_row_date_time_epoch) + { + struct tm start_date_tm; + gmtime_r(&start_date_time_epoch, &start_date_tm); + + char tm_buff[128]; + strftime(tm_buff, 128, "%Y-%m-%d %H:%M:%S", &start_date_tm); + std::string throw_msg; throw_msg.assign("Error: Forcing data " + file_name + " begins after the model start time:" + std::string(tm_buff) + " < " + time_str); + LOG(throw_msg, LogLevel::WARNING); + throw std::out_of_range(throw_msg); + } + + + if (start_date_time_epoch <= current_row_date_time_epoch && current_row_date_time_epoch <= end_date_time_epoch) + { + time_epoch_vector.push_back(current_row_date_time_epoch); + + int c = -1; + for (auto& s : vec){ + c++; + if(c == time_col_index) + continue; + boost::algorithm::trim(s); + local_valvec_index[c]->push_back(boost::lexical_cast(s)); // This is supposed to update the vector in the map... + } + + } + + } + if (i <= 1 || current_row_date_time_epoch < end_date_time_epoch) + { + /// \todo TODO: Return appropriate error + std::stringstream ss; + ss << "WARNING: Forcing data ends before the model end time." << std::endl; + LOG(ss.str(), LogLevel::SEVERE); ss.str(""); + //std::string throw_msg; throw_msg.assign("Error: Forcing data ends before the model end time."); + } + + time_t duration = record_duration(); + for (size_t i = 1; i < time_epoch_vector.size(); ++i) { + time_t difference = time_epoch_vector[i] - time_epoch_vector[i-1]; + if (difference != duration) { + Logger::logMsgAndThrowError("Time intervals in forcing file '" + forcing_file_name + "' are not constant at row " + std::to_string(i)); + } + } + } From 1dab0eb2644877de0ffb7611076854589a119400 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Wed, 19 Nov 2025 11:21:17 -0800 Subject: [PATCH 7/8] CSV Forcings: Reformat .cpp file --- src/forcing/CsvPerFeatureForcingProvider.cpp | 496 +++++++++---------- 1 file changed, 245 insertions(+), 251 deletions(-) diff --git a/src/forcing/CsvPerFeatureForcingProvider.cpp b/src/forcing/CsvPerFeatureForcingProvider.cpp index d53066ad54..e1d6f1cb1c 100644 --- a/src/forcing/CsvPerFeatureForcingProvider.cpp +++ b/src/forcing/CsvPerFeatureForcingProvider.cpp @@ -1,294 +1,288 @@ #include - CsvPerFeatureForcingProvider::CsvPerFeatureForcingProvider(forcing_params forcing_config):start_date_time_epoch(forcing_config.simulation_start_t), - end_date_time_epoch(forcing_config.simulation_end_t), - current_date_time_epoch(forcing_config.simulation_start_t), - forcing_file_name(forcing_config.path) - { - read_csv(forcing_file_name); +CsvPerFeatureForcingProvider::CsvPerFeatureForcingProvider(forcing_params forcing_config) + : start_date_time_epoch(forcing_config.simulation_start_t) + , end_date_time_epoch(forcing_config.simulation_end_t) + , current_date_time_epoch(forcing_config.simulation_start_t) + , forcing_file_name(forcing_config.path) +{ + read_csv(forcing_file_name); +} + +long CsvPerFeatureForcingProvider::get_data_start_time() const { + //FIXME: Trace this back and you will find that it is the simulation start time, not having anything to do with the forcing at all. + // Apparently this "worked", but at a minimum the description above is false. + return start_date_time_epoch; +} + +long CsvPerFeatureForcingProvider::get_data_stop_time() const { + return end_date_time_epoch; +} + +long CsvPerFeatureForcingProvider::record_duration() const { + return time_epoch_vector[1] - time_epoch_vector[0]; +} + +size_t CsvPerFeatureForcingProvider::get_ts_index_for_time(const time_t &epoch_time) const { + if (epoch_time < start_date_time_epoch) { + std::string throw_msg; throw_msg.assign("Forcing had bad pre-start time for index query: " + std::to_string(epoch_time)); + LOG(throw_msg, LogLevel::WARNING); + throw std::out_of_range(throw_msg); } - - long CsvPerFeatureForcingProvider::get_data_start_time() const { - //FIXME: Trace this back and you will find that it is the simulation start time, not having anything to do with the forcing at all. - // Apparently this "worked", but at a minimum the description above is false. - return start_date_time_epoch; + size_t i = 0; + // 1 hour + time_t seconds_in_time_step = 3600; + time_t time = start_date_time_epoch; + while (epoch_time >= time + seconds_in_time_step && time < end_date_time_epoch) { + i++; + time += seconds_in_time_step; } - - long CsvPerFeatureForcingProvider::get_data_stop_time() const { - return end_date_time_epoch; + // The end_date_time_epoch is the epoch value of the BEGINNING of the last time step, not its end. + // I.e., to make sure we cover it, we have to go another time step beyond. + if (time >= end_date_time_epoch + 3600) { + std::string throw_msg; throw_msg.assign("Forcing had bad beyond-end time for index query: " + std::to_string(epoch_time)); + LOG(throw_msg, LogLevel::WARNING); + throw std::out_of_range(throw_msg); } - - long CsvPerFeatureForcingProvider::record_duration() const { - return time_epoch_vector[1] - time_epoch_vector[0]; + else { + return i; } +} - size_t CsvPerFeatureForcingProvider::get_ts_index_for_time(const time_t &epoch_time) const { - if (epoch_time < start_date_time_epoch) { - std::string throw_msg; throw_msg.assign("Forcing had bad pre-start time for index query: " + std::to_string(epoch_time)); - LOG(throw_msg, LogLevel::WARNING); - throw std::out_of_range(throw_msg); - } - size_t i = 0; - // 1 hour - time_t seconds_in_time_step = 3600; - time_t time = start_date_time_epoch; - while (epoch_time >= time + seconds_in_time_step && time < end_date_time_epoch) { - i++; - time += seconds_in_time_step; - } - // The end_date_time_epoch is the epoch value of the BEGINNING of the last time step, not its end. - // I.e., to make sure we cover it, we have to go another time step beyond. - if (time >= end_date_time_epoch + 3600) { - std::string throw_msg; throw_msg.assign("Forcing had bad beyond-end time for index query: " + std::to_string(epoch_time)); - LOG(throw_msg, LogLevel::WARNING); - throw std::out_of_range(throw_msg); - } - else { - return i; - } - } +double CsvPerFeatureForcingProvider::get_value(const CatchmentAggrDataSelector& selector, data_access::ReSampleMethod m) { + size_t current_index; + long time_remaining = selector.get_duration_secs(); + auto init_time = selector.get_init_time(); + auto output_name = selector.get_variable_name(); + auto output_units = selector.get_output_units(); - double CsvPerFeatureForcingProvider::get_value(const CatchmentAggrDataSelector& selector, data_access::ReSampleMethod m) - { - size_t current_index; - long time_remaining = selector.get_duration_secs(); - auto init_time = selector.get_init_time(); - auto output_name = selector.get_variable_name(); - auto output_units = selector.get_output_units(); + try { + current_index = get_ts_index_for_time(init_time); + } + catch (const std::out_of_range &e) { + std::string throw_msg; throw_msg.assign("Forcing had bad init_time " + std::to_string(init_time) + " for value request"); + LOG(throw_msg, LogLevel::WARNING); + throw std::out_of_range(throw_msg); + } - try { - current_index = get_ts_index_for_time(init_time); - } - catch (const std::out_of_range &e) { - std::string throw_msg; throw_msg.assign("Forcing had bad init_time " + std::to_string(init_time) + " for value request"); - LOG(throw_msg, LogLevel::WARNING); - throw std::out_of_range(throw_msg); - } + std::vector involved_time_step_values; - std::vector involved_time_step_values; + std::vector involved_time_step_seconds; + long ts_involved_s; - std::vector involved_time_step_seconds; - long ts_involved_s; + time_t first_time_step_start_epoch = start_date_time_epoch + (current_index * 3600); + // Handle the first time step differently, since we need to do more to figure out how many seconds came from it + // Total time step size minus the offset of the beginning, before the init time + ts_involved_s = 3600 - (init_time - first_time_step_start_epoch); - time_t first_time_step_start_epoch = start_date_time_epoch + (current_index * 3600); - // Handle the first time step differently, since we need to do more to figure out how many seconds came from it - // Total time step size minus the offset of the beginning, before the init time - ts_involved_s = 3600 - (init_time - first_time_step_start_epoch); + involved_time_step_seconds.push_back(ts_involved_s); + involved_time_step_values.push_back(get_value_for_param_name(output_name, current_index)); + time_remaining -= ts_involved_s; + current_index++; + while (time_remaining > 0) { + if(current_index >= time_epoch_vector.size()) + return involved_time_step_values[involved_time_step_values.size()-1]; //TODO: Is this the right answer? Is returning any value off the end of the range valid? + ts_involved_s = time_remaining > 3600 ? 3600 : time_remaining; involved_time_step_seconds.push_back(ts_involved_s); involved_time_step_values.push_back(get_value_for_param_name(output_name, current_index)); time_remaining -= ts_involved_s; current_index++; - while (time_remaining > 0) { - if(current_index >= time_epoch_vector.size()) - return involved_time_step_values[involved_time_step_values.size()-1]; //TODO: Is this the right answer? Is returning any value off the end of the range valid? - ts_involved_s = time_remaining > 3600 ? 3600 : time_remaining; - involved_time_step_seconds.push_back(ts_involved_s); - involved_time_step_values.push_back(get_value_for_param_name(output_name, current_index)); - time_remaining -= ts_involved_s; - current_index++; - - } - double value = 0; - for (size_t i = 0; i < involved_time_step_values.size(); ++i) { - if (is_param_sum_over_time_step(output_name)) - value += involved_time_step_values[i] * ((double)involved_time_step_seconds[i] / 3600.0); - else - value += involved_time_step_values[i] * ((double)involved_time_step_seconds[i] / (double)selector.get_duration_secs()); - } - - // Convert units - try { - return UnitsHelper::get_converted_value(available_forcings_units[output_name], value, output_units); - } - catch (const std::runtime_error& e) { - data_access::unit_conversion_exception uce(e.what()); - uce.provider_model_name = "CsvPerFeatureProvider (file '" + forcing_file_name + "')"; - uce.provider_bmi_var_name = output_name; - uce.provider_units = available_forcings_units[output_name]; - uce.unconverted_values.push_back(value); - throw uce; - } } - - std::vector CsvPerFeatureForcingProvider::get_values(const CatchmentAggrDataSelector& selector, data_access::ReSampleMethod m) - { - return std::vector(1, get_value(selector, m)); + double value = 0; + for (size_t i = 0; i < involved_time_step_values.size(); ++i) { + if (is_param_sum_over_time_step(output_name)) + value += involved_time_step_values[i] * ((double)involved_time_step_seconds[i] / 3600.0); + else + value += involved_time_step_values[i] * ((double)involved_time_step_seconds[i] / (double)selector.get_duration_secs()); } - bool CsvPerFeatureForcingProvider::is_param_sum_over_time_step(const std::string& name) const { - if (name == CSDMS_STD_NAME_RAIN_VOLUME_FLUX) { - return true; - } - if (name == CSDMS_STD_NAME_SOLAR_SHORTWAVE) { - return true; - } - if (name == CSDMS_STD_NAME_SOLAR_LONGWAVE) { - return true; - } - if (name == CSDMS_STD_NAME_LIQUID_EQ_PRECIP_RATE) { - return true; - } - return false; + // Convert units + try { + return UnitsHelper::get_converted_value(available_forcings_units[output_name], value, output_units); } - - bool CsvPerFeatureForcingProvider::is_property_sum_over_time_step(const std::string& name) const { - return is_param_sum_over_time_step(name); - } - - boost::span CsvPerFeatureForcingProvider::get_available_variable_names() const { - return available_forcings; + catch (const std::runtime_error& e) { + data_access::unit_conversion_exception uce(e.what()); + uce.provider_model_name = "CsvPerFeatureProvider (file '" + forcing_file_name + "')"; + uce.provider_bmi_var_name = output_name; + uce.provider_units = available_forcings_units[output_name]; + uce.unconverted_values.push_back(value); + throw uce; } +} - double CsvPerFeatureForcingProvider::get_value_for_param_name(const std::string& name, int index) const { - if (index < 0 || index >= time_epoch_vector.size() ) { - std::string throw_msg; throw_msg.assign("Forcing had bad index " + std::to_string(index) + " for value lookup of " + name); - LOG(throw_msg, LogLevel::WARNING); - throw std::out_of_range(throw_msg); - } +std::vector CsvPerFeatureForcingProvider::get_values(const CatchmentAggrDataSelector& selector, data_access::ReSampleMethod m) { + return std::vector(1, get_value(selector, m)); +} - std::string const* can_name = &name; +bool CsvPerFeatureForcingProvider::is_param_sum_over_time_step(const std::string& name) const { + if (name == CSDMS_STD_NAME_RAIN_VOLUME_FLUX) { + return true; + } + if (name == CSDMS_STD_NAME_SOLAR_SHORTWAVE) { + return true; + } + if (name == CSDMS_STD_NAME_SOLAR_LONGWAVE) { + return true; + } + if (name == CSDMS_STD_NAME_LIQUID_EQ_PRECIP_RATE) { + return true; + } + return false; +} + +bool CsvPerFeatureForcingProvider::is_property_sum_over_time_step(const std::string& name) const { + return is_param_sum_over_time_step(name); +} + +boost::span CsvPerFeatureForcingProvider::get_available_variable_names() const { + return available_forcings; +} + +double CsvPerFeatureForcingProvider::get_value_for_param_name(const std::string& name, int index) const { + if (index < 0 || index >= time_epoch_vector.size() ) { + std::string throw_msg; throw_msg.assign("Forcing had bad index " + std::to_string(index) + " for value lookup of " + name); + LOG(throw_msg, LogLevel::WARNING); + throw std::out_of_range(throw_msg); + } - auto wkf_iter = data_access::WellKnownFields.find(name); - if (wkf_iter != data_access::WellKnownFields.end()) { - can_name = &std::get<0>(wkf_iter->second); - } + std::string const* can_name = &name; - auto forcings_iter = forcing_vectors.find(*can_name); - if (forcings_iter != forcing_vectors.end()) { - return forcings_iter->second.at(index); - } - else { - std::string throw_msg; throw_msg.assign("Cannot get forcing value for unrecognized parameter name '" + name + "'."); - LOG(throw_msg, LogLevel::WARNING); - throw std::runtime_error(throw_msg); - } + auto wkf_iter = data_access::WellKnownFields.find(name); + if (wkf_iter != data_access::WellKnownFields.end()) { + can_name = &std::get<0>(wkf_iter->second); } - void CsvPerFeatureForcingProvider::read_csv(std::string const& file_name) - { - int time_col_index = 0; - //std::map col_indices; - std::vector*> local_valvec_index = {}; - - //Call CSVReader constuctor - CSVReader reader(file_name); - - //Get the data from CSV File - std::vector > data_list = reader.getData(); - - // Process the header (first) row.. - int col_num = 0; - for (const auto& col_head : data_list[0]){ - //std::cerr << s << std::endl; - if(col_head == "Time" || col_head == "time"){ - time_col_index = col_num; - local_valvec_index.push_back(nullptr); // make sure the column indices line up! - } else { - std::string var_name = col_head; - std::string units = ""; - - boost::trim(var_name); // remove leading/trailing ws - const auto var_name_close = var_name.back(); - if (var_name_close == ']' || var_name_close == ')') { - // found closing bracket/parenth - - const bool is_bracket = var_name_close == ']'; - const size_t var_name_open = is_bracket ? var_name.rfind('[') : var_name.rfind('('); - if (var_name_open != std::string::npos) { - // found matching opening bracket/parenth - - units = var_name.substr(var_name_open + 1); - units.pop_back(); // remove closing bracket - - var_name = var_name.substr(0, var_name_open); - boost::trim(var_name); // trim again in case of ws between name and units - } + auto forcings_iter = forcing_vectors.find(*can_name); + if (forcings_iter != forcing_vectors.end()) { + return forcings_iter->second.at(index); + } + else { + std::string throw_msg; throw_msg.assign("Cannot get forcing value for unrecognized parameter name '" + name + "'."); + LOG(throw_msg, LogLevel::WARNING); + throw std::runtime_error(throw_msg); + } +} + +void CsvPerFeatureForcingProvider::read_csv(std::string const& file_name) { + int time_col_index = 0; + //std::map col_indices; + std::vector*> local_valvec_index = {}; + + //Call CSVReader constuctor + CSVReader reader(file_name); + + //Get the data from CSV File + std::vector > data_list = reader.getData(); + + // Process the header (first) row.. + int col_num = 0; + for (const auto& col_head : data_list[0]){ + //std::cerr << s << std::endl; + if(col_head == "Time" || col_head == "time"){ + time_col_index = col_num; + local_valvec_index.push_back(nullptr); // make sure the column indices line up! + } else { + std::string var_name = col_head; + std::string units = ""; + + boost::trim(var_name); // remove leading/trailing ws + const auto var_name_close = var_name.back(); + if (var_name_close == ']' || var_name_close == ')') { + // found closing bracket/parenth + + const bool is_bracket = var_name_close == ']'; + const size_t var_name_open = is_bracket ? var_name.rfind('[') : var_name.rfind('('); + if (var_name_open != std::string::npos) { + // found matching opening bracket/parenth + + units = var_name.substr(var_name_open + 1); + units.pop_back(); // remove closing bracket + + var_name = var_name.substr(0, var_name_open); + boost::trim(var_name); // trim again in case of ws between name and units } + } - LOG("CsvProvider has variable '" + var_name + "' with units '" + units + "'", LogLevel::DEBUG); - - auto wkf = data_access::WellKnownFields.find(var_name); - if(wkf != data_access::WellKnownFields.end()){ - units = units.empty() ? std::get<1>(wkf->second) : units; - auto wkf_name = std::get<0>(wkf->second); - LOG("CsvProvider has well-known name '" + wkf_name + "' for variable '" + var_name + "' with units '" + units + "'", LogLevel::DEBUG); - available_forcings.push_back(var_name); // Allow lookup by non-canonical name - available_forcings_units[var_name] = units; // Allow lookup of units by non-canonical name - var_name = wkf_name; // Use the CSDMS name from here on - } + LOG("CsvProvider has variable '" + var_name + "' with units '" + units + "'", LogLevel::DEBUG); - forcing_vectors[var_name] = {}; - local_valvec_index.push_back(&(forcing_vectors[var_name])); - available_forcings.push_back(var_name); - available_forcings_units[var_name] = units; + auto wkf = data_access::WellKnownFields.find(var_name); + if(wkf != data_access::WellKnownFields.end()){ + units = units.empty() ? std::get<1>(wkf->second) : units; + auto wkf_name = std::get<0>(wkf->second); + LOG("CsvProvider has well-known name '" + wkf_name + "' for variable '" + var_name + "' with units '" + units + "'", LogLevel::DEBUG); + available_forcings.push_back(var_name); // Allow lookup by non-canonical name + available_forcings_units[var_name] = units; // Allow lookup of units by non-canonical name + var_name = wkf_name; // Use the CSDMS name from here on } - col_num++; + + forcing_vectors[var_name] = {}; + local_valvec_index.push_back(&(forcing_vectors[var_name])); + available_forcings.push_back(var_name); + available_forcings_units[var_name] = units; } + col_num++; + } - time_t current_row_date_time_epoch; - //Iterate through CSV starting on the second row - int i = 1; - for (i = 1; i < data_list.size(); i++) - { - //Row vector - std::vector& vec = data_list[i]; - - struct tm current_row_date_time_utc = tm(); - std::string time_str = vec[time_col_index]; - //TODO: Support more time string formats? This is basically ISO8601 but not complete, support TZ? - strptime(time_str.c_str(), "%Y-%m-%d %H:%M:%S", ¤t_row_date_time_utc); - - //Convert current row date-time UTC to epoch time - current_row_date_time_epoch = timegm(¤t_row_date_time_utc); - - //TODO: I am not sure this is a concern of this object. If forcing is retrieved that doesn't cover the - //needed time period, isn't that the requester's concern? (Methods exist to check this...) - //Ensure that forcing data covers the entire model period. Otherwise, throw an error. - if (i == 1 && start_date_time_epoch < current_row_date_time_epoch) - { - struct tm start_date_tm; - gmtime_r(&start_date_time_epoch, &start_date_tm); - - char tm_buff[128]; - strftime(tm_buff, 128, "%Y-%m-%d %H:%M:%S", &start_date_tm); - std::string throw_msg; throw_msg.assign("Error: Forcing data " + file_name + " begins after the model start time:" + std::string(tm_buff) + " < " + time_str); - LOG(throw_msg, LogLevel::WARNING); - throw std::out_of_range(throw_msg); - } + time_t current_row_date_time_epoch; + //Iterate through CSV starting on the second row + int i = 1; + for (i = 1; i < data_list.size(); i++) { + //Row vector + std::vector& vec = data_list[i]; + + struct tm current_row_date_time_utc = tm(); + std::string time_str = vec[time_col_index]; + //TODO: Support more time string formats? This is basically ISO8601 but not complete, support TZ? + strptime(time_str.c_str(), "%Y-%m-%d %H:%M:%S", ¤t_row_date_time_utc); + + //Convert current row date-time UTC to epoch time + current_row_date_time_epoch = timegm(¤t_row_date_time_utc); + + //TODO: I am not sure this is a concern of this object. If forcing is retrieved that doesn't cover the + //needed time period, isn't that the requester's concern? (Methods exist to check this...) + //Ensure that forcing data covers the entire model period. Otherwise, throw an error. + if (i == 1 && start_date_time_epoch < current_row_date_time_epoch) { + struct tm start_date_tm; + gmtime_r(&start_date_time_epoch, &start_date_tm); + + char tm_buff[128]; + strftime(tm_buff, 128, "%Y-%m-%d %H:%M:%S", &start_date_tm); + std::string throw_msg; throw_msg.assign("Error: Forcing data " + file_name + " begins after the model start time:" + std::string(tm_buff) + " < " + time_str); + LOG(throw_msg, LogLevel::WARNING); + throw std::out_of_range(throw_msg); + } - - if (start_date_time_epoch <= current_row_date_time_epoch && current_row_date_time_epoch <= end_date_time_epoch) - { - time_epoch_vector.push_back(current_row_date_time_epoch); - - int c = -1; - for (auto& s : vec){ - c++; - if(c == time_col_index) - continue; - boost::algorithm::trim(s); - local_valvec_index[c]->push_back(boost::lexical_cast(s)); // This is supposed to update the vector in the map... - } + if (start_date_time_epoch <= current_row_date_time_epoch && current_row_date_time_epoch <= end_date_time_epoch) { + time_epoch_vector.push_back(current_row_date_time_epoch); + + int c = -1; + for (auto& s : vec){ + c++; + if(c == time_col_index) + continue; + boost::algorithm::trim(s); + local_valvec_index[c]->push_back(boost::lexical_cast(s)); // This is supposed to update the vector in the map... } } - if (i <= 1 || current_row_date_time_epoch < end_date_time_epoch) - { - /// \todo TODO: Return appropriate error - std::stringstream ss; - ss << "WARNING: Forcing data ends before the model end time." << std::endl; - LOG(ss.str(), LogLevel::SEVERE); ss.str(""); - //std::string throw_msg; throw_msg.assign("Error: Forcing data ends before the model end time."); - } - time_t duration = record_duration(); - for (size_t i = 1; i < time_epoch_vector.size(); ++i) { - time_t difference = time_epoch_vector[i] - time_epoch_vector[i-1]; - if (difference != duration) { - Logger::logMsgAndThrowError("Time intervals in forcing file '" + forcing_file_name + "' are not constant at row " + std::to_string(i)); - } + } + if (i <= 1 || current_row_date_time_epoch < end_date_time_epoch) { + /// \todo TODO: Return appropriate error + std::stringstream ss; + ss << "WARNING: Forcing data ends before the model end time." << std::endl; + LOG(ss.str(), LogLevel::SEVERE); ss.str(""); + //std::string throw_msg; throw_msg.assign("Error: Forcing data ends before the model end time."); + } + + time_t duration = record_duration(); + for (size_t i = 1; i < time_epoch_vector.size(); ++i) { + time_t difference = time_epoch_vector[i] - time_epoch_vector[i-1]; + if (difference != duration) { + Logger::logMsgAndThrowError("Time intervals in forcing file '" + forcing_file_name + "' are not constant at row " + std::to_string(i)); } } +} From 4250e887f0111d71ef33e7a5870e9c407e4f8457 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Wed, 19 Nov 2025 13:24:06 -0800 Subject: [PATCH 8/8] CSV Forcings: Trap and report more time/duration errors and limitations --- src/forcing/CsvPerFeatureForcingProvider.cpp | 21 +++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/forcing/CsvPerFeatureForcingProvider.cpp b/src/forcing/CsvPerFeatureForcingProvider.cpp index e1d6f1cb1c..70378a3103 100644 --- a/src/forcing/CsvPerFeatureForcingProvider.cpp +++ b/src/forcing/CsvPerFeatureForcingProvider.cpp @@ -50,12 +50,14 @@ size_t CsvPerFeatureForcingProvider::get_ts_index_for_time(const time_t &epoch_ } double CsvPerFeatureForcingProvider::get_value(const CatchmentAggrDataSelector& selector, data_access::ReSampleMethod m) { - size_t current_index; - long time_remaining = selector.get_duration_secs(); auto init_time = selector.get_init_time(); + auto duration = selector.get_duration_secs(); auto output_name = selector.get_variable_name(); auto output_units = selector.get_output_units(); + size_t current_index; + long time_remaining = selector.get_duration_secs(); + try { current_index = get_ts_index_for_time(init_time); } @@ -81,15 +83,17 @@ double CsvPerFeatureForcingProvider::get_value(const CatchmentAggrDataSelector& current_index++; while (time_remaining > 0) { - if(current_index >= time_epoch_vector.size()) - return involved_time_step_values[involved_time_step_values.size()-1]; //TODO: Is this the right answer? Is returning any value off the end of the range valid? + if (current_index >= time_epoch_vector.size()) { + // XXX There may be callers almost-reasonably relying on being able to request data extending one record_duration() past the end + Logger::logMsgAndThrowError("Requested forcing value runs past the end of the data from CSV file '" + forcing_file_name + "'"); + } ts_involved_s = time_remaining > 3600 ? 3600 : time_remaining; involved_time_step_seconds.push_back(ts_involved_s); involved_time_step_values.push_back(get_value_for_param_name(output_name, current_index)); time_remaining -= ts_involved_s; current_index++; - } + double value = 0; for (size_t i = 0; i < involved_time_step_values.size(); ++i) { if (is_param_sum_over_time_step(output_name)) @@ -273,12 +277,15 @@ void CsvPerFeatureForcingProvider::read_csv(std::string const& file_name) { if (i <= 1 || current_row_date_time_epoch < end_date_time_epoch) { /// \todo TODO: Return appropriate error std::stringstream ss; - ss << "WARNING: Forcing data ends before the model end time." << std::endl; - LOG(ss.str(), LogLevel::SEVERE); ss.str(""); + ss << "CSV Forcing data ends before the model end time in file '" << forcing_file_name << "'"; + LOG(ss.str(), LogLevel::SEVERE); //std::string throw_msg; throw_msg.assign("Error: Forcing data ends before the model end time."); } time_t duration = record_duration(); + if (duration != 3600) { + Logger::logMsgAndThrowError("CSV reader is hard-coded for hour-long records"); + } for (size_t i = 1; i < time_epoch_vector.size(); ++i) { time_t difference = time_epoch_vector[i] - time_epoch_vector[i-1]; if (difference != duration) {