From 75b946566119cda2977182370da5e31aa3def9dd Mon Sep 17 00:00:00 2001
From: Arav Agarwal <aravagar@umich.edu>
Date: Fri, 20 Feb 2026 12:49:21 -0500
Subject: [PATCH 1/5] Update summarizer to utilize id_json_file for public ids

---
 .../result_summarizer/result_summarizer.py    | 63 ++++++++++++++++++-
 1 file changed, 62 insertions(+), 1 deletion(-)

diff --git a/mlperf_logging/result_summarizer/result_summarizer.py b/mlperf_logging/result_summarizer/result_summarizer.py
index 1cd3fb7d..4cdcec8c 100644
--- a/mlperf_logging/result_summarizer/result_summarizer.py
+++ b/mlperf_logging/result_summarizer/result_summarizer.py
@@ -13,6 +13,7 @@
 import itertools
 import pandas as pd
 import yaml
+import hashlib
 
 from ..compliance_checker import mlp_compliance
 from ..compliance_checker.mlp_compliance import usage_choices, rule_choices
@@ -691,6 +692,57 @@ def _fill_empty_benchmark_scores(
                 benchmark_scores[benchmark] = None
 
 
+def _add_id_to_summary(summary, id_json_path):
+    """Add public ids to the summary file based on the json in id_json_path, which is a list of sha256 ids where the position in the list is the id. If id_json_path is specified but the file does not exist, it is created from scratch.
+
+    Args:
+        summary (pd.DataFrame): Summary dataframe.
+        id_json_path (str): Path to json file.
+    """
+    
+    id_json = []
+    if os.path.exists(id_json_path):
+        with open(id_json_path, 'r') as f:
+            id_json = json.load(f)
+    
+    print(id_json)
+    
+    def get_hash(row):
+        columns_for_hashing = [    
+            'division',
+            'submitter',
+            'system',
+            'number_of_nodes',
+            'host_processor_model_name',
+            'host_processors_count',
+            'accelerator_model_name',
+            'accelerators_count',
+            'framework'
+        ]
+        to_hash = ''.join(str(row[c]) for c in columns_for_hashing)
+        return hashlib.sha256(to_hash.encode('utf-8')).hexdigest()
+    
+    summary['hash'] = summary.apply(get_hash, axis=1)
+    
+    id_list = []
+
+    for elem in summary['hash']:
+        if elem in id_json:
+            id_list.append(id_json.index(elem) + 1)
+        else:
+            id_json.append(elem)
+            id_list.append(len(id_json))
+    
+    summary['id'] = id_list
+
+    with open(id_json_path, 'w') as f:
+        json.dump(id_json, f, indent=4)
+    
+    return summary
+
+     
+
+
 def summarize_results(folder, usage, ruleset, csv_file=None, **kwargs):
     """Summarizes a set of results.
 
@@ -837,6 +889,8 @@ def _check_and_update_system_specs(desc_keys, column_name, query=None):
     return strong_scaling_summary, weak_scaling_summary, power_summary, power_weak_scaling_summary
 
 
+
+
 def get_parser():
     parser = argparse.ArgumentParser(
         prog='mlperf_logging.result_summarizer',
@@ -857,6 +911,11 @@ def get_parser():
                         type=str,
                         choices=rule_choices(),
                         help='the ruleset such as 0.6.0, 0.7.0, or 1.0.0')
+    
+    parser.add_argument('--id_json_path',
+                        type=str,
+                        help='Path to id_json file to map runs to public ids. If specified but path is not found, file is created from scratch.')
+
     parser.add_argument('--werror',
                         action='store_true',
                         help='Treat warnings as errors')
@@ -874,6 +933,7 @@ def get_parser():
         '--xlsx',
         type=str,
         help='Exports a xlsx of the results to the path specified')
+    
 
     return parser
 
@@ -1042,7 +1102,8 @@ def _print_and_write(summaries, weak_scaling=False, mode='w', power = False):
 
             # Sort rows by their values
             summaries = summaries.sort_values(by=cols)
-            print(summaries)
+            summaries = _add_id_to_summary(summaries, args.id_json_path)
+            
             if args.csv is not None:
                 csv = args.csv
                 assert csv.endswith(".csv")

From ba4e282695da25d2a9c82061f828598654998b32 Mon Sep 17 00:00:00 2001
From: Arav Agarwal <aravagar@umich.edu>
Date: Fri, 20 Feb 2026 12:52:22 -0500
Subject: [PATCH 2/5] Add default for id_json

---
 mlperf_logging/result_summarizer/result_summarizer.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mlperf_logging/result_summarizer/result_summarizer.py b/mlperf_logging/result_summarizer/result_summarizer.py
index 4cdcec8c..b63e89e1 100644
--- a/mlperf_logging/result_summarizer/result_summarizer.py
+++ b/mlperf_logging/result_summarizer/result_summarizer.py
@@ -914,6 +914,7 @@ def get_parser():
     
     parser.add_argument('--id_json_path',
                         type=str,
+                        default="id_list.json",
                         help='Path to id_json file to map runs to public ids. If specified but path is not found, file is created from scratch.')
 
     parser.add_argument('--werror',

From 32e29c0ca0dc09ac6dd0fad7df4151922a53de5a Mon Sep 17 00:00:00 2001
From: Arav Agarwal <aravagar@umich.edu>
Date: Mon, 23 Feb 2026 18:58:20 -0500
Subject: [PATCH 3/5] Add error handling incase id_list points to existing
 non-compliant file

---
 mlperf_logging/result_summarizer/result_summarizer.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/mlperf_logging/result_summarizer/result_summarizer.py b/mlperf_logging/result_summarizer/result_summarizer.py
index b63e89e1..aa65582f 100644
--- a/mlperf_logging/result_summarizer/result_summarizer.py
+++ b/mlperf_logging/result_summarizer/result_summarizer.py
@@ -704,7 +704,10 @@ def _add_id_to_summary(summary, id_json_path):
     if os.path.exists(id_json_path):
         with open(id_json_path, 'r') as f:
             id_json = json.load(f)
-    
+        
+        if not isinstance(id_json, list):
+            raise ValueError(f"id_json file {id_json_path} is not a list. Either delete the file or change the argument to a file that doesn't exist.")
+
     print(id_json)
     
     def get_hash(row):

From e40aefc17493891b2bc7109ef73d99155d5d6af0 Mon Sep 17 00:00:00 2001
From: Arav Agarwal <aravagar@umich.edu>
Date: Wed, 25 Feb 2026 12:16:41 -0500
Subject: [PATCH 4/5] Remove debug statements

---
 mlperf_logging/result_summarizer/result_summarizer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mlperf_logging/result_summarizer/result_summarizer.py b/mlperf_logging/result_summarizer/result_summarizer.py
index aa65582f..254bbd74 100644
--- a/mlperf_logging/result_summarizer/result_summarizer.py
+++ b/mlperf_logging/result_summarizer/result_summarizer.py
@@ -708,7 +708,6 @@ def _add_id_to_summary(summary, id_json_path):
         if not isinstance(id_json, list):
             raise ValueError(f"id_json file {id_json_path} is not a list. Either delete the file or change the argument to a file that doesn't exist.")
 
-    print(id_json)
     
     def get_hash(row):
         columns_for_hashing = [    

From 014f69d1479e874f09e3b9c8140969eb8fa65803 Mon Sep 17 00:00:00 2001
From: Arav Agarwal <aravagar@umich.edu>
Date: Wed, 25 Feb 2026 12:40:47 -0500
Subject: [PATCH 5/5] Ensure backwards compatibility by making feature opt-in

---
 mlperf_logging/result_summarizer/result_summarizer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlperf_logging/result_summarizer/result_summarizer.py b/mlperf_logging/result_summarizer/result_summarizer.py
index 254bbd74..2e8c19af 100644
--- a/mlperf_logging/result_summarizer/result_summarizer.py
+++ b/mlperf_logging/result_summarizer/result_summarizer.py
@@ -916,7 +916,6 @@ def get_parser():
     
     parser.add_argument('--id_json_path',
                         type=str,
-                        default="id_list.json",
                         help='Path to id_json file to map runs to public ids. If specified but path is not found, file is created from scratch.')
 
     parser.add_argument('--werror',
@@ -1105,7 +1104,8 @@ def _print_and_write(summaries, weak_scaling=False, mode='w', power = False):
 
             # Sort rows by their values
             summaries = summaries.sort_values(by=cols)
-            summaries = _add_id_to_summary(summaries, args.id_json_path)
+            if args.id_json_path is not None:  
+                summaries = _add_id_to_summary(summaries, args.id_json_path)
             
             if args.csv is not None:
                 csv = args.csv