Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
f1df5f8
add deepseek constant
denys-fridman Dec 2, 2025
d2f27fe
add deepseek to compliance check
denys-fridman Dec 2, 2025
63db84d
rm closed_bert.yaml
denys-fridman Feb 6, 2026
1de3bbe
update deepseek values
denys-fridman Feb 6, 2026
3e965eb
rm closed_retinanet.yaml
denys-fridman Feb 6, 2026
bcbef0a
rm unused configs + update deepseek
denys-fridman Feb 6, 2026
3eca23b
fix deepseek name
denys-fridman Feb 6, 2026
ec1d0aa
fix deepseek name
denys-fridman Feb 6, 2026
cded0c8
+DEEPSEEK_V3 -> +DEEPSEEK_V3_671B
denys-fridman Feb 6, 2026
c528ecb
add deepseek_v3_671b RCP support for training 6.0
denys-fridman Feb 24, 2026
adff4f6
update deepseek rcps: set Creator to NVIDIA and Platform to GB300
denys-fridman Feb 24, 2026
749650a
rename deepseek_v3_671b -> deepseekv3_671b across files and filenames
denys-fridman Feb 25, 2026
e8300ba
add opt_base_learning_rate check for deepseekv3_671b (sqrt scaling fr…
denys-fridman Feb 25, 2026
1dc9e8d
add max_steps and decay_steps checks for deepseekv3_671b
denys-fridman Feb 25, 2026
d09e52f
apply same checks to open_deepseekv3_671b.yaml
denys-fridman Feb 25, 2026
1941059
update deepseekv3_671b target loss to 4.05
denys-fridman Feb 25, 2026
9ddf73a
set deepseekv3_671b submission runs to 5
denys-fridman Feb 25, 2026
3bb7acf
set deepseekv3_671b submission runs to 3
denys-fridman Feb 26, 2026
c3204bc
update deepseekv3_671b target loss to 3.6
denys-fridman Feb 26, 2026
b972604
update deepseekv3_671b platform to GB300 NVL72
denys-fridman Feb 26, 2026
e50cf5e
update deepseekv3_671b RCPs: use BS 15360/16384/18432
denys-fridman Feb 26, 2026
29f9b8b
add global_batch_size >= 15360 check for deepseekv3_671b
denys-fridman Feb 26, 2026
d11d808
restore checks and POSTs in open_deepseekv3_671b.yaml
denys-fridman Feb 26, 2026
48ca186
rename DEEPSEEK_V3_671B -> DEEPSEEKV3_671B in constants.py
denys-fridman Feb 26, 2026
462b442
remove checks from open_deepseekv3_671b.yaml
denys-fridman Feb 26, 2026
5ad02fb
rename DeepSeek-V3-671B -> DeepSeekV3-671B in result_summarizer config
denys-fridman Feb 26, 2026
46bcffb
Update platform value
ShriyaRishab Feb 26, 2026
a46ecdb
Update platform description in JSON file
ShriyaRishab Feb 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions mlperf_logging/benchmark_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
# TODO: Update with official values
'llama31_8b': 10,
'flux1': 10,
'deepseekv3_671b': 3,
},

'hpc' : {
Expand Down Expand Up @@ -158,11 +159,12 @@
],
'6.0': [
'llama31_8b',
'dlrm_dcnv2',
'dlrm_dcnv2',
'flux1',
'llama2_70b_lora',
'llama31_405b'
]
'llama31_405b',
'deepseekv3_671b'
]
},

'hpc': {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
- KEY:
NAME: submission_benchmark
REQ: EXACTLY_ONE
CHECK: " v['value'] in ['flux1', 'dlrm_dcnv2', 'llama31_8b', 'llama2_70b_lora', 'llama31_405b'] "
CHECK: " v['value'] in ['flux1', 'dlrm_dcnv2', 'llama31_8b', 'llama2_70b_lora', 'llama31_405b', 'deepseekv3_671b'] "
POST: " enqueue_config('training_6.0.0/closed_{}.yaml'.format(v['value'])) "

- KEY:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
- KEY:
NAME: global_batch_size
REQ: EXACTLY_ONE
CHECK: " v['value'] >= 15360 "
POST: >
s['global_batch_size'] = v['value']

- KEY:
NAME: max_sequence_length
REQ: EXACTLY_ONE
CHECK: " v['value'] == 4096 "

- KEY:
NAME: opt_name
REQ: EXACTLY_ONE
CHECK: " v['value'] == 'adamw' "

- KEY:
NAME: opt_base_learning_rate
REQ: EXACTLY_ONE
CHECK: " abs(v['value'] - 2.4e-05 * (s['global_batch_size'] / 16384) ** 0.5) < 1e-9 "

- KEY:
NAME: max_steps
REQ: EXACTLY_ONE
CHECK: " v['value'] == 12000 "
POST: >
s['max_steps'] = v['value']

- KEY:
NAME: opt_learning_rate_warmup_steps
REQ: EXACTLY_ONE
CHECK: " v['value'] == 4 "
POST: >
s['opt_learning_rate_warmup_steps'] = v['value']

- KEY:
NAME: opt_learning_rate_decay_steps
REQ: EXACTLY_ONE
CHECK: " v['value'] == s['max_steps'] - s['opt_learning_rate_warmup_steps'] "

- KEY:
NAME: opt_learning_rate_decay_schedule
REQ: EXACTLY_ONE
CHECK: " v['value'] == 'cosine with linear warmup' "

- KEY:
NAME: opt_adamw_beta_1
REQ: EXACTLY_ONE
CHECK: " v['value'] == 0.9 "

- KEY:
NAME: opt_adamw_beta_2
REQ: EXACTLY_ONE
CHECK: " v['value'] == 0.95 "

- KEY:
NAME: opt_adamw_epsilon
REQ: EXACTLY_ONE
CHECK: " v['value'] == 1e-08 "

- KEY:
NAME: opt_adamw_weight_decay
REQ: EXACTLY_ONE
CHECK: " v['value'] == 0.1 "

- KEY:
NAME: opt_gradient_clip_norm
REQ: EXACTLY_ONE
CHECK: " v['value'] == 1.0 "

- KEY:
NAME: gradient_accumulation_steps
REQ: EXACTLY_ONE
CHECK: " v['value'] > 0 "

- KEY:
NAME: eval_samples
REQ: EXACTLY_ONE
CHECK: " v['value'] == 1024 "

- KEY:
NAME: eval_accuracy
REQ: AT_LEAST_ONE
CHECK:
- "'samples_count' in v['metadata']"
ATLEAST_ONE_CHECK: "(v['value'] <= 3.6) and v['value'] > 0.0"
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
- KEY:
NAME: submission_benchmark
REQ: EXACTLY_ONE
CHECK: " v['value'] in ['flux1', 'dlrm_dcnv2', 'llama31_8b', 'llama2_70b_lora', 'llama31_405b'] "
CHECK: " v['value'] in ['flux1', 'dlrm_dcnv2', 'llama31_8b', 'llama2_70b_lora', 'llama31_405b', 'deepseekv3_671b'] "
POST: " enqueue_config('training_6.0.0/open_{}.yaml'.format(v['value'])) "
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
- KEY:
NAME: global_batch_size
REQ: EXACTLY_ONE
CHECK: " v['value'] >= 15360 "
POST: >
s['global_batch_size'] = v['value']

- KEY:
NAME: max_sequence_length
REQ: EXACTLY_ONE
CHECK: " v['value'] == 4096 "

- KEY:
NAME: opt_name
REQ: EXACTLY_ONE
CHECK: " v['value'] == 'adamw' "

- KEY:
NAME: max_steps
REQ: EXACTLY_ONE

- KEY:
NAME: opt_base_learning_rate
REQ: EXACTLY_ONE

- KEY:
NAME: opt_learning_rate_warmup_steps
REQ: EXACTLY_ONE

- KEY:
NAME: opt_learning_rate_decay_steps
REQ: EXACTLY_ONE

- KEY:
NAME: opt_learning_rate_decay_schedule
REQ: EXACTLY_ONE

- KEY:
NAME: opt_adamw_beta_1
REQ: EXACTLY_ONE

- KEY:
NAME: opt_adamw_beta_2
REQ: EXACTLY_ONE

- KEY:
NAME: opt_adamw_epsilon
REQ: EXACTLY_ONE

- KEY:
NAME: opt_adamw_weight_decay
REQ: EXACTLY_ONE

- KEY:
NAME: opt_gradient_clip_norm
REQ: EXACTLY_ONE

- KEY:
NAME: gradient_accumulation_steps
REQ: EXACTLY_ONE
CHECK: " v['value'] > 0 "

- KEY:
NAME: eval_samples
REQ: EXACTLY_ONE
CHECK: " v['value'] == 1024 "

- KEY:
NAME: eval_accuracy
REQ: AT_LEAST_ONE
CHECK:
- "'epoch_num' in v['metadata']"
ATLEAST_ONE_CHECK: "(v['value'] <= 3.6) and v['value'] > 0.0"

1 change: 1 addition & 0 deletions mlperf_logging/mllog/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
LLAMA31_405B = "llama31_405b"
LLAMA31_8B = "llama31_8b"
FLUX1 = "flux1"
DEEPSEEKV3_671B = "deepseekv3_671b"

# Constant values - model info
ADAGRAD = "adagrad"
Expand Down
3 changes: 2 additions & 1 deletion mlperf_logging/rcp_checker/rcp_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def is_version_at_least(version, min_version):
'flux1': 10,
'llama31_405b': 3,
'llama31_8b': 10,
'deepseekv3_671b': 3,
},
"hpc": {
'cosmoflow': 10,
Expand Down Expand Up @@ -109,7 +110,7 @@ def read_submission_file(result_file, ruleset, use_train_samples):
eval_metric = json.loads(eval_accuracy_str)["metadata"]["metric"]
eval_score = json.loads(eval_accuracy_str)["value"]
stable_diffusion_eval_results[eval_step][eval_metric] = eval_score
elif benchmark in {"llama2_70b_lora", "flux1", "llama31_405b", "llama31_8b"} and ("eval_error" in str or "eval_accuracy" in str):
elif benchmark in {"llama2_70b_lora", "flux1", "llama31_405b", "llama31_8b", "deepseekv3_671b"} and ("eval_error" in str or "eval_accuracy" in str):
eval_accuracy_str = str
conv_epoch = json.loads(eval_accuracy_str)["metadata"]["samples_count"]
eval_score = json.loads(eval_accuracy_str)["value"]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
{
"deepseekv3_671b_ref_15360":
{
"Benchmark": "deepseekv3_671b",
"Creator": "NVIDIA",
"When": "Reference RCPs before 6.0 submission",
"Platform": "64 NVIDIA GB300 nodes",
"Precision": "BF16",
"BS": 15360,
"Hyperparams": {
"opt_base_learning_rate": 0.000023238,
"opt_learning_rate_warmup_steps": 4,
"max_steps": 12000,
"gradient_accumulation_steps": 240
},
"Epochs to converge": [
// TODO(dfridman)
]
},

"deepseekv3_671b_ref_16384":
{
"Benchmark": "deepseekv3_671b",
"Creator": "NVIDIA",
"When": "Reference RCPs before 6.0 submission",
"Platform": "64 NVIDIA GB300 nodes",
"Precision": "BF16",
"BS": 16384,
"Hyperparams": {
"opt_base_learning_rate": 0.000024,
"opt_learning_rate_warmup_steps": 4,
"max_steps": 12000,
"gradient_accumulation_steps": 256
},
"Epochs to converge": [
// TODO(dfridman)
]
},

"deepseekv3_671b_ref_18432":
{
"Benchmark": "deepseekv3_671b",
"Creator": "NVIDIA",
"When": "Reference RCPs before 6.0 submission",
"Platform": "64 NVIDIA GB300 nodes",
"Precision": "BF16",
"BS": 18432,
"Hyperparams": {
"opt_base_learning_rate": 0.000025456,
"opt_learning_rate_warmup_steps": 4,
"max_steps": 12000,
"gradient_accumulation_steps": 288
},
"Epochs to converge": [
// TODO(dfridman)
]
}
}
3 changes: 2 additions & 1 deletion mlperf_logging/result_summarizer/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,12 @@ columns:
llama31_405b: ["Benchmark results (minutes)", "LLM", "C4", "Llama31-405B"]
default: [" ", " ", " "]
"6.0.0":
dlrm_dcnv2: ["Benchmark results (minutes)", "Recommendation", "1TB Multihot Clickthrough", "DLRM DCNv2"]
dlrm_dcnv2: ["Benchmark results (minutes)", "Recommendation", "1TB Multihot Clickthrough", "DLRM DCNv2"]
flux1: ["Benchmark results (minutes)", "Text to image", "CC12M and Coco-2014 for eval", "Flux1"]
llama2_70b_lora: ["Benchmark results (minutes)", "LLM-Finetune", "SCROLSS Gov Report", "LLama2-70B-LoRA"]
llama31_8b: ["Benchmark results (minutes)", "Small LLM", "C4", "Llama31-8b"]
llama31_405b: ["Benchmark results (minutes)", "LLM", "C4", "Llama31-405B"]
deepseekv3_671b: ["Benchmark results (minutes)", "LLM", "C4", "DeepSeekV3-671B"]
default: [" ", " ", " "]

hpc:
Expand Down