From b492d9e640c8641b51aba8d82b19c444f6736557 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=B4me=20Eyraud?= Date: Wed, 22 Oct 2025 17:08:42 -0400 Subject: [PATCH 1/3] Ideas and comments --- nvbench/cupti_profiler.cxx | 34 +++++++++++++++++++-------------- nvbench/detail/measure_cupti.cu | 10 ++++++++++ 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/nvbench/cupti_profiler.cxx b/nvbench/cupti_profiler.cxx index 6ce1cf72..d0eee5b2 100644 --- a/nvbench/cupti_profiler.cxx +++ b/nvbench/cupti_profiler.cxx @@ -68,11 +68,17 @@ cupti_profiler::cupti_profiler(nvbench::device_info device, std::vector metric_names with the metrics that are available within the GPU. + Failing gracefully will enable the measurement of other metrics if a metric is not available + + Using NVPW_MetricsEvaluator_GetMetricNames to see what metrics are available, and removing from the vector the ones that are not available. + + */ initialize_nvpw(); initialize_config_image(); initialize_counter_data_prefix_image(); initialize_counter_data_image(); - m_available = true; } @@ -149,7 +155,7 @@ void cupti_profiler::initialize_nvpw() namespace { - +// Eval Requests converts a single Human Readable CUPTI metric to a CUPTI metric evaluation request. class eval_request { NVPW_MetricsEvaluator *evaluator_ptr; @@ -169,7 +175,7 @@ class eval_request nvpw_call(NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest(¶ms)); } - + // Gets the depedencies of a metric name, ie what specific pointers is needed for a metric [[nodiscard]] std::vector get_raw_dependencies() { std::vector raw_dependencies; @@ -195,7 +201,7 @@ class eval_request NVPW_MetricEvalRequest request; }; - +// Is responsible for the initialisation of the metric evaluator, so it is used to do eval requests class metric_evaluator { bool initialized{}; @@ -214,7 +220,6 @@ class metric_evaluator NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE; scratch_buffer_param.pChipName = chip_name.c_str(); scratch_buffer_param.pCounterAvailabilityImage = counter_availability_image; - nvpw_call(NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize(&scratch_buffer_param)); scratch_buffer.resize(scratch_buffer_param.scratchBufferSize); @@ -260,7 +265,7 @@ class metric_evaluator namespace { - +// Gets the rawMectrics for each high level metrics, and put it into a RawMetricsRequest Vector [[nodiscard]] std::vector get_raw_metric_requests(const std::string &chip_name, const std::vector &metric_names, @@ -298,7 +303,7 @@ get_raw_metric_requests(const std::string &chip_name, class metrics_config { bool initialized{}; - + // create the availability image for the chip name void create(const std::string &chip_name, const std::uint8_t *availability_image) { NVPW_CUDA_RawMetricsConfig_Create_V2_Params params{}; @@ -313,7 +318,7 @@ class metrics_config raw_metrics_config = params.pRawMetricsConfig; initialized = true; } - + // Put the availability image into the raw_metrics_config void set_availability_image(const std::uint8_t *availability_image) { NVPW_RawMetricsConfig_SetCounterAvailability_Params params{}; @@ -324,7 +329,7 @@ class metrics_config nvpw_call(NVPW_RawMetricsConfig_SetCounterAvailability(¶ms)); } - + // Create a new group of metrics to measure void begin_config_group() { NVPW_RawMetricsConfig_BeginPassGroup_Params params{}; @@ -334,7 +339,7 @@ class metrics_config nvpw_call(NVPW_RawMetricsConfig_BeginPassGroup(¶ms)); } - + // Add the array of rawMetrics to the actual config void add_metrics(const std::vector &raw_metric_requests) { NVPW_RawMetricsConfig_AddMetrics_Params params{}; @@ -346,7 +351,7 @@ class metrics_config nvpw_call(NVPW_RawMetricsConfig_AddMetrics(¶ms)); } - + // End the config group configuration void end_config_group() { NVPW_RawMetricsConfig_EndPassGroup_Params params{}; @@ -356,7 +361,7 @@ class metrics_config nvpw_call(NVPW_RawMetricsConfig_EndPassGroup(¶ms)); } - + // Finalize the image for the configuration void generate() { NVPW_RawMetricsConfig_GenerateConfigImage_Params params{}; @@ -368,6 +373,7 @@ class metrics_config } public: + // Initalize a metric config pass with the RawMetricRequests and builds its config image metrics_config(const std::string &chip_name, const std::vector &raw_metric_requests, const std::uint8_t *availability_image) @@ -380,7 +386,7 @@ class metrics_config end_config_group(); generate(); } - + // Retreive the config image [[nodiscard]] std::vector get_config_image() { NVPW_RawMetricsConfig_GetConfigImage_Params params{}; @@ -691,7 +697,7 @@ void cupti_profiler::process_user_loop() cupti_call(cuptiProfilerEndSession(¶ms)); } } - +//Instead of returning a std::vector, maybe returning a map string -> double, with the string being the .pct name. std::vector cupti_profiler::get_counter_values() { metric_evaluator evaluator(m_chip_name, diff --git a/nvbench/detail/measure_cupti.cu b/nvbench/detail/measure_cupti.cu index 24028f2c..e6969ba9 100644 --- a/nvbench/detail/measure_cupti.cu +++ b/nvbench/detail/measure_cupti.cu @@ -159,7 +159,16 @@ std::vector add_metrics(nvbench::state &state) } } // namespace +/* + struct CustomCuptiMetrics{ + const char *metric_name; + const char *name; + const char *hint; + const char *description; + const double divider; + } +*/ measure_cupti_base::measure_cupti_base(state &exec_state) // clang-format off // (formatter doesn't handle `try :` very well...) @@ -167,6 +176,7 @@ try : m_state{exec_state} , m_launch{exec_state.get_cuda_stream()} , m_cupti{*m_state.get_device(), add_metrics(m_state)} + //Inside the state, have a std::vector, so when calling add_metrics, it adds the ones created by the user. {} // clang-format on catch (const std::exception &ex) From dd110d4acc977644b92c05a2a393554e05622b1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=B4me=20Eyraud?= Date: Wed, 29 Oct 2025 11:09:55 -0400 Subject: [PATCH 2/3] trying to list metrics --- nvbench/cupti_profiler.cuh | 2 ++ nvbench/cupti_profiler.cxx | 28 ++++++++++++++++++++++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/nvbench/cupti_profiler.cuh b/nvbench/cupti_profiler.cuh index 214706a7..d3351a8e 100644 --- a/nvbench/cupti_profiler.cuh +++ b/nvbench/cupti_profiler.cuh @@ -66,6 +66,7 @@ class cupti_profiler // Counter data std::vector m_metric_names; + std::vector m_verified_metric_names; std::vector m_data_image_prefix; std::vector m_config_image; std::vector m_data_image; @@ -116,6 +117,7 @@ private: void initialize_profiler(); void initialize_chip_name(); void initialize_availability_image(); + void verify_metric_names(); static void initialize_nvpw(); void initialize_config_image(); void initialize_counter_data_prefix_image(); diff --git a/nvbench/cupti_profiler.cxx b/nvbench/cupti_profiler.cxx index d0eee5b2..dea2f00f 100644 --- a/nvbench/cupti_profiler.cxx +++ b/nvbench/cupti_profiler.cxx @@ -146,6 +146,15 @@ void cupti_profiler::initialize_availability_image() cupti_call(cuptiProfilerGetCounterAvailability(¶ms)); } +void cupti_profiler::verify_metric_names() +{ + metric_evaluator evaluator(m_chip_name, m_availability_image.data()); + m_verified_metric_names = evaluator.list_metrics(); + for(std::string item:m_verified_metric_names){ + printf("%s\n", item.c_str());//TOCHECK + } +} + void cupti_profiler::initialize_nvpw() { NVPW_InitializeHost_Params params{}; @@ -239,6 +248,21 @@ class metric_evaluator evaluator_ptr = evaluator_params.pMetricsEvaluator; initialized = true; } + std::vector list_metrics(){ + std::vector available_metrics; + NVPW_MetricsEvaluator_GetMetricNames_Params list_metrics_params{}; + list_metrics_params.structSize = NVPW_MetricsEvaluator_GetMetricNames_Params_STRUCT_SIZE; + list_metrics_params.metricType = NVPW_MetricType::NVPW_METRIC_TYPE_THROUGHPUT; + list_metrics_params.pMetricsEvaluator = evaluator_ptr; + available_metrics.resize(list_metrics_params.numMetrics); + + nvpw_call(NVPW_MetricsEvaluator_GetMetricNames(&list_metrics_params)); + for (size_t metric_ix = 0; metric_ix < list_metrics_params.numMetrics; metric_ix++){ + size_t start_metric_ix = list_metrics_params.pMetricNameBeginIndices[metric_ix]; + available_metrics.push_back(&list_metrics_params.pMetricNames[start_metric_ix]); + } + return available_metrics; + } ~metric_evaluator() { @@ -303,7 +327,7 @@ get_raw_metric_requests(const std::string &chip_name, class metrics_config { bool initialized{}; - // create the availability image for the chip name + // Inits a rawMetricsConfig from an availability_image or chip_name void create(const std::string &chip_name, const std::uint8_t *availability_image) { NVPW_CUDA_RawMetricsConfig_Create_V2_Params params{}; @@ -514,7 +538,7 @@ void cupti_profiler::initialize_counter_data_prefix_image() namespace { - +// From the data_image_prefix, get the counter data imagesize [[nodiscard]] std::size_t get_counter_data_image_size(CUpti_Profiler_CounterDataImageOptions *options) { From 0639221528e582926ee761d32322900ab9f7eca4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=B4me=20Eyraud?= Date: Wed, 29 Oct 2025 12:16:44 -0400 Subject: [PATCH 3/3] listing available metrics is working --- nvbench/cupti_profiler.cxx | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/nvbench/cupti_profiler.cxx b/nvbench/cupti_profiler.cxx index dea2f00f..1fb7217e 100644 --- a/nvbench/cupti_profiler.cxx +++ b/nvbench/cupti_profiler.cxx @@ -68,6 +68,7 @@ cupti_profiler::cupti_profiler(nvbench::device_info device, std::vector metric_names with the metrics that are available within the GPU. Failing gracefully will enable the measurement of other metrics if a metric is not available @@ -146,14 +147,6 @@ void cupti_profiler::initialize_availability_image() cupti_call(cuptiProfilerGetCounterAvailability(¶ms)); } -void cupti_profiler::verify_metric_names() -{ - metric_evaluator evaluator(m_chip_name, m_availability_image.data()); - m_verified_metric_names = evaluator.list_metrics(); - for(std::string item:m_verified_metric_names){ - printf("%s\n", item.c_str());//TOCHECK - } -} void cupti_profiler::initialize_nvpw() { @@ -448,6 +441,17 @@ class metrics_config } // namespace +void cupti_profiler::verify_metric_names() +{ + metric_evaluator evaluator(m_chip_name, m_availability_image.data()); + m_verified_metric_names = evaluator.list_metrics(); + printf("Metrics \n");//TEMPORARY - Asserting functionnality + for(std::string item:m_verified_metric_names){ + printf("%s\n", item.c_str()); + } + printf("---------------------\n"); +} + void cupti_profiler::initialize_config_image() { m_config_image = metrics_config(m_chip_name,