From 75b946566119cda2977182370da5e31aa3def9dd Mon Sep 17 00:00:00 2001 From: Arav Agarwal Date: Fri, 20 Feb 2026 12:49:21 -0500 Subject: [PATCH 1/5] Update summarizer to utilize id_json_file for public ids --- .../result_summarizer/result_summarizer.py | 63 ++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/mlperf_logging/result_summarizer/result_summarizer.py b/mlperf_logging/result_summarizer/result_summarizer.py index 1cd3fb7d..4cdcec8c 100644 --- a/mlperf_logging/result_summarizer/result_summarizer.py +++ b/mlperf_logging/result_summarizer/result_summarizer.py @@ -13,6 +13,7 @@ import itertools import pandas as pd import yaml +import hashlib from ..compliance_checker import mlp_compliance from ..compliance_checker.mlp_compliance import usage_choices, rule_choices @@ -691,6 +692,57 @@ def _fill_empty_benchmark_scores( benchmark_scores[benchmark] = None +def _add_id_to_summary(summary, id_json_path): + """Add public ids to the summary file based on the json in id_json_path, which is a list of sha256 ids where the position in the list is the id. If id_json_path is specified but the file does not exist, it is created from scratch. + + Args: + summary (pd.DataFrame): Summary dataframe. + id_json_path (str): Path to json file. + """ + + id_json = [] + if os.path.exists(id_json_path): + with open(id_json_path, 'r') as f: + id_json = json.load(f) + + print(id_json) + + def get_hash(row): + columns_for_hashing = [ + 'division', + 'submitter', + 'system', + 'number_of_nodes', + 'host_processor_model_name', + 'host_processors_count', + 'accelerator_model_name', + 'accelerators_count', + 'framework' + ] + to_hash = ''.join(str(row[c]) for c in columns_for_hashing) + return hashlib.sha256(to_hash.encode('utf-8')).hexdigest() + + summary['hash'] = summary.apply(get_hash, axis=1) + + id_list = [] + + for elem in summary['hash']: + if elem in id_json: + id_list.append(id_json.index(elem) + 1) + else: + id_json.append(elem) + id_list.append(len(id_json)) + + summary['id'] = id_list + + with open(id_json_path, 'w') as f: + json.dump(id_json, f, indent=4) + + return summary + + + + def summarize_results(folder, usage, ruleset, csv_file=None, **kwargs): """Summarizes a set of results. @@ -837,6 +889,8 @@ def _check_and_update_system_specs(desc_keys, column_name, query=None): return strong_scaling_summary, weak_scaling_summary, power_summary, power_weak_scaling_summary + + def get_parser(): parser = argparse.ArgumentParser( prog='mlperf_logging.result_summarizer', @@ -857,6 +911,11 @@ def get_parser(): type=str, choices=rule_choices(), help='the ruleset such as 0.6.0, 0.7.0, or 1.0.0') + + parser.add_argument('--id_json_path', + type=str, + help='Path to id_json file to map runs to public ids. If specified but path is not found, file is created from scratch.') + parser.add_argument('--werror', action='store_true', help='Treat warnings as errors') @@ -874,6 +933,7 @@ def get_parser(): '--xlsx', type=str, help='Exports a xlsx of the results to the path specified') + return parser @@ -1042,7 +1102,8 @@ def _print_and_write(summaries, weak_scaling=False, mode='w', power = False): # Sort rows by their values summaries = summaries.sort_values(by=cols) - print(summaries) + summaries = _add_id_to_summary(summaries, args.id_json_path) + if args.csv is not None: csv = args.csv assert csv.endswith(".csv") From ba4e282695da25d2a9c82061f828598654998b32 Mon Sep 17 00:00:00 2001 From: Arav Agarwal Date: Fri, 20 Feb 2026 12:52:22 -0500 Subject: [PATCH 2/5] Add default for id_json --- mlperf_logging/result_summarizer/result_summarizer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mlperf_logging/result_summarizer/result_summarizer.py b/mlperf_logging/result_summarizer/result_summarizer.py index 4cdcec8c..b63e89e1 100644 --- a/mlperf_logging/result_summarizer/result_summarizer.py +++ b/mlperf_logging/result_summarizer/result_summarizer.py @@ -914,6 +914,7 @@ def get_parser(): parser.add_argument('--id_json_path', type=str, + default="id_list.json", help='Path to id_json file to map runs to public ids. If specified but path is not found, file is created from scratch.') parser.add_argument('--werror', From 32e29c0ca0dc09ac6dd0fad7df4151922a53de5a Mon Sep 17 00:00:00 2001 From: Arav Agarwal Date: Mon, 23 Feb 2026 18:58:20 -0500 Subject: [PATCH 3/5] Add error handling incase id_list points to existing non-compliant file --- mlperf_logging/result_summarizer/result_summarizer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mlperf_logging/result_summarizer/result_summarizer.py b/mlperf_logging/result_summarizer/result_summarizer.py index b63e89e1..aa65582f 100644 --- a/mlperf_logging/result_summarizer/result_summarizer.py +++ b/mlperf_logging/result_summarizer/result_summarizer.py @@ -704,7 +704,10 @@ def _add_id_to_summary(summary, id_json_path): if os.path.exists(id_json_path): with open(id_json_path, 'r') as f: id_json = json.load(f) - + + if not isinstance(id_json, list): + raise ValueError(f"id_json file {id_json_path} is not a list. Either delete the file or change the argument to a file that doesn't exist.") + print(id_json) def get_hash(row): From e40aefc17493891b2bc7109ef73d99155d5d6af0 Mon Sep 17 00:00:00 2001 From: Arav Agarwal Date: Wed, 25 Feb 2026 12:16:41 -0500 Subject: [PATCH 4/5] Remove debug statements --- mlperf_logging/result_summarizer/result_summarizer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mlperf_logging/result_summarizer/result_summarizer.py b/mlperf_logging/result_summarizer/result_summarizer.py index aa65582f..254bbd74 100644 --- a/mlperf_logging/result_summarizer/result_summarizer.py +++ b/mlperf_logging/result_summarizer/result_summarizer.py @@ -708,7 +708,6 @@ def _add_id_to_summary(summary, id_json_path): if not isinstance(id_json, list): raise ValueError(f"id_json file {id_json_path} is not a list. Either delete the file or change the argument to a file that doesn't exist.") - print(id_json) def get_hash(row): columns_for_hashing = [ From 014f69d1479e874f09e3b9c8140969eb8fa65803 Mon Sep 17 00:00:00 2001 From: Arav Agarwal Date: Wed, 25 Feb 2026 12:40:47 -0500 Subject: [PATCH 5/5] Ensure backwards compatibility by making feature opt-in --- mlperf_logging/result_summarizer/result_summarizer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlperf_logging/result_summarizer/result_summarizer.py b/mlperf_logging/result_summarizer/result_summarizer.py index 254bbd74..2e8c19af 100644 --- a/mlperf_logging/result_summarizer/result_summarizer.py +++ b/mlperf_logging/result_summarizer/result_summarizer.py @@ -916,7 +916,6 @@ def get_parser(): parser.add_argument('--id_json_path', type=str, - default="id_list.json", help='Path to id_json file to map runs to public ids. If specified but path is not found, file is created from scratch.') parser.add_argument('--werror', @@ -1105,7 +1104,8 @@ def _print_and_write(summaries, weak_scaling=False, mode='w', power = False): # Sort rows by their values summaries = summaries.sort_values(by=cols) - summaries = _add_id_to_summary(summaries, args.id_json_path) + if args.id_json_path is not None: + summaries = _add_id_to_summary(summaries, args.id_json_path) if args.csv is not None: csv = args.csv