Skip to content

Commit

Permalink
Update for v4.1: Add new seeds + update checker + update compliance t…
Browse files Browse the repository at this point in the history
…est table (mlcommons#1736)
  • Loading branch information
pgmpablo157321 authored Jun 20, 2024
1 parent 44ae828 commit a536cd2
Show file tree
Hide file tree
Showing 5 changed files with 239 additions and 34 deletions.
5 changes: 3 additions & 2 deletions compliance/nvidia/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,6 @@ The `run_verification.py` found in each test directory will copy the test files
| 3d-unet | [TEST01](./TEST01/), [TEST05](./TEST05/) |
| rnnt | [TEST01](./TEST01/), [TEST05](./TEST05/) |
| gpt-j | - |
| stable-diffusion-xl | - |
| Llama2-70b | [TEST06]() |
| stable-diffusion-xl | [TEST01](./TEST01/), [TEST04](./TEST04/), [TEST05](./TEST05/) |
| Llama2-70b | [TEST06](./TEST06/) |
| mixtral-8x7b | [TEST06](./TEST06/) |
12 changes: 6 additions & 6 deletions mlperf.conf
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ stable-diffusion-xl.*.performance_sample_count_override = 5000
3d-unet.*.performance_sample_count_override = 0

# Set seeds. The seeds will be distributed two weeks before the submission.
*.*.qsl_rng_seed = 13281865557512327830
*.*.sample_index_rng_seed = 198141574272810017
*.*.schedule_rng_seed = 7575108116881280410
*.*.qsl_rng_seed = 3066443479025735752
*.*.sample_index_rng_seed = 10688027786191513374
*.*.schedule_rng_seed = 14962580496156340209
# Set seeds for TEST_05. The seeds will be distributed two weeks before the submission.
*.*.test05_qsl_rng_seed = 2376919268182438552
*.*.test05_sample_index_rng_seed = 11176391829184272374
*.*.test05_schedule_rng_seed = 3911940905271271337
*.*.test05_qsl_rng_seed = 16799458546791641818
*.*.test05_sample_index_rng_seed = 5453809927556429288
*.*.test05_schedule_rng_seed = 5435552105434836064


*.SingleStream.target_latency_percentile = 90
Expand Down
2 changes: 1 addition & 1 deletion text_to_image/tools/sample_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def get_args():
"--n", type=int, default=10, help="Dataset download location"
)
parser.add_argument(
"--seed", "-s", type=int, default=926019364, help="Dataset download location"
"--seed", "-s", type=int, default=633994880, help="Dataset download location"
)
args = parser.parse_args()
return args
Expand Down
20 changes: 10 additions & 10 deletions text_to_image/tools/sample_ids.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
4459
4015
2705
1682
4048
4683
3757
1578
3319
95
4655
2569
1303
109
4509
3009
2179
1826
2094
3340
234 changes: 219 additions & 15 deletions tools/submission/submission_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,169 @@
"stable-diffusion-xl": {"SingleStream": 1024, "Server": 270336, "Offline": 1}
},
},
"v4.1": {
"models": [
"resnet",
"retinanet",
"bert-99",
"bert-99.9",
"dlrm-v2-99",
"dlrm-v2-99.9",
"3d-unet-99",
"3d-unet-99.9",
"gptj-99",
"gptj-99.9",
"llama2-70b-99",
"llama2-70b-99.9",
"stable-diffusion-xl",
"mixtral-8x7b"
],
"required-scenarios-datacenter": {
"resnet": ["Server", "Offline"],
"retinanet": ["Server", "Offline"],
"bert-99": ["Server", "Offline"],
"bert-99.9": ["Server", "Offline"],
"dlrm-v2-99": ["Server", "Offline"],
"dlrm-v2-99.9": ["Server", "Offline"],
"3d-unet-99": ["Offline"],
"3d-unet-99.9": ["Offline"],
"gptj-99": ["Server", "Offline"],
"gptj-99.9": ["Server", "Offline"],
"llama2-70b-99": ["Server", "Offline"],
"llama2-70b-99.9": ["Server", "Offline"],
"stable-diffusion-xl": ["Server", "Offline"],
"mixtral-8x7b": ["Server", "Offline"]
},
"optional-scenarios-datacenter": {},
"required-scenarios-edge": {
"resnet": ["SingleStream", "MultiStream", "Offline"],
"retinanet": ["SingleStream", "MultiStream", "Offline"],
"bert-99": ["SingleStream", "Offline"],
"3d-unet-99": ["SingleStream", "Offline"],
"3d-unet-99.9": ["SingleStream", "Offline"],
"gptj-99": ["SingleStream", "Offline"],
"gptj-99.9": ["SingleStream", "Offline"],
"stable-diffusion-xl": ["SingleStream", "Offline"],
},
"optional-scenarios-edge": {},
"required-scenarios-datacenter-edge": {
"resnet": ["SingleStream", "Offline", "MultiStream", "Server"],
"retinanet": ["SingleStream", "Offline", "MultiStream", "Server"],
"bert-99": ["SingleStream", "Offline", "Server"],
"bert-99.9": ["Offline", "Server"],
"dlrm-v2-99": ["Offline", "Server"],
"dlrm-v2-99.9": ["Offline", "Server"],
"3d-unet-99": ["SingleStream", "Offline"],
"3d-unet-99.9": ["SingleStream", "Offline"],
"gptj-99": ["SingleStream", "Offline", "Server"],
"gptj-99.9": ["SingleStream", "Offline", "Server"],
"llama2-70b-99": ["Server", "Offline"],
"llama2-70b-99.9": ["Server", "Offline"],
"stable-diffusion-xl": ["SingleStream", "Offline", "Server"],
"mixtral-8x7b": ["SingleStream""Server", "Offline"]
},
"optional-scenarios-datacenter-edge": {},
"accuracy-target": {
"resnet": ("acc", 76.46 * 0.99),
"retinanet": ("mAP", 37.55 * 0.99),
"bert-99": ("F1", 90.874 * 0.99),
"bert-99.9": ("F1", 90.874 * 0.999),
"dlrm-v2-99": ("AUC", 80.31 * 0.99),
"dlrm-v2-99.9": ("AUC", 80.31 * 0.999),
"3d-unet-99": ("DICE", 0.86170 * 0.99),
"3d-unet-99.9": ("DICE", 0.86170 * 0.999),
"gptj-99" : ("ROUGE1", 42.9865 * 0.99, "ROUGE2", 20.1235 * 0.99, "ROUGEL", 29.9881 * 0.99, "GEN_LEN", 4016878*0.9),
"gptj-99.9" : ("ROUGE1", 42.9865 * 0.999, "ROUGE2", 20.1235 * 0.999, "ROUGEL", 29.9881 * 0.999, "GEN_LEN", 4016878*0.9),
"llama2-70b-99" : ("ROUGE1", 44.4312 * 0.99, "ROUGE2", 22.0352 * 0.99, "ROUGEL", 28.6162 * 0.99, "TOKENS_PER_SAMPLE", 294.45*0.9),
"llama2-70b-99.9" : ("ROUGE1", 44.4312 * 0.999, "ROUGE2", 22.0352 * 0.999, "ROUGEL", 28.6162 * 0.999, "TOKENS_PER_SAMPLE", 294.45*0.9),
"stable-diffusion-xl": ("CLIP_SCORE", 31.68631873, "FID_SCORE", 23.01085758),
# TODO: Mixtral metrics
# "mixtral-8x7b" : ("ROUGE1", X * 0.99, "ROUGE2", X * 0.99, "ROUGEL", X * 0.99, "TOKENS_PER_SAMPLE", X * 0.9, "gsm8k_accuracy": 73.78*0.99, "mbxp_accuracy": 60.12 * 0.99),
},
"accuracy-upper-limit": {
"stable-diffusion-xl": ("CLIP_SCORE", 31.81331801, "FID_SCORE", 23.95007626),
"llama2-70b-99" : ("TOKENS_PER_SAMPLE", 294.45*1.1),
"llama2-70b-99.9" : ("TOKENS_PER_SAMPLE", 294.45*1.1)
# "mixtral-8x7b" :("TOKENS_PER_SAMPLE", X * 0.9)
},
"performance-sample-count": {
"resnet": 1024,
"retinanet": 64,
"bert-99": 10833,
"bert-99.9": 10833,
"dlrm-v2-99": 204800,
"dlrm-v2-99.9": 204800,
"3d-unet-99": 43,
"3d-unet-99.9": 43,
"gptj-99": 13368,
"gptj-99.9": 13368,
"llama2-70b-99": 24576,
"llama2-70b-99.9": 24576,
"stable-diffusion-xl": 5000,
"mixtral-8x7b": 15000,
},
# TODO: Update this list.
"model_mapping": {
# map model names to the official mlperf model class
"ssd-resnet34": "retinanet",
"mobilenet": "resnet",
"resnet50": "resnet"
},
"seeds": {
# TODO: Update random seeds
"qsl_rng_seed": 3066443479025735752,
"sample_index_rng_seed": 10688027786191513374,
"schedule_rng_seed": 14962580496156340209,
},
"test05_seeds": {
# TODO: Update random seeds
"qsl_rng_seed": 16799458546791641818,
"sample_index_rng_seed": 5453809927556429288,
"schedule_rng_seed": 5435552105434836064,
},
"ignore_errors": [],
"latency-constraint": {
"resnet": {"Server": 15000000},
"retinanet": {"Server": 100000000},
"bert-99": {"Server": 130000000},
"bert-99.9": {"Server": 130000000},
"dlrm-v2-99": {"Server": 60000000},
"dlrm-v2-99.9": {"Server": 60000000},
"gptj-99": {"Server": 20000000000},
"gptj-99.9": {"Server": 20000000000},
"llama2-70b-99": {"Server": 20000000000},
"llama2-70b-99.9": {"Server": 20000000000},
"stable-diffusion-xl" : {"Server": 20000000000}
# TODO: Mixtral metrics
# "mixtral-8x7b" : {"Server": 20000000000}
},
"min-queries": {
"resnet": {
"SingleStream": 1024,
"MultiStream": 270336,
"Server": 270336,
"Offline": 1,
},
"retinanet": {
"SingleStream": 1024,
"MultiStream": 270336,
"Server": 270336,
"Offline": 1,
},
"bert-99": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
"bert-99.9": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
"dlrm-v2-99": {"Server": 270336, "Offline": 1},
"dlrm-v2-99.9": {"Server": 270336, "Offline": 1},
"3d-unet-99": {"SingleStream": 1024, "Offline": 1},
"3d-unet-99.9": {"SingleStream": 1024, "Offline": 1},
"gptj-99": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
"gptj-99.9": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
"llama2-70b-99": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
"llama2-70b-99.9": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
"stable-diffusion-xl": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
"mixtral-8x7b": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
},
},
}

VALID_DIVISIONS = ["open", "closed", "network"]
Expand Down Expand Up @@ -221,6 +384,20 @@
"3319",
"95"
]
},
"v4.1": {
"images": [
"4655",
"2569",
"1303",
"109",
"4509",
"3009",
"2179",
"1826",
"2094",
"3340"
]
}
}
}
Expand Down Expand Up @@ -255,7 +432,8 @@
"gptj-99.9": 13368,
"llama2-70b-99": 24576,
"llama2-70b-99.9": 24576,
"stable-diffusion-xl": 5000
"stable-diffusion-xl": 5000,
"mixtral-8x7b": 15000
}

SCENARIO_MAPPING = {
Expand Down Expand Up @@ -302,8 +480,8 @@
},
"v4.1": {
"llama2-70b-99": {
"Offline": "result_tokens_per_second",
"Server": "result_completed_tokens_per_second",
"Offline": "result_tokens_per_second",
"Server": "result_completed_tokens_per_second",
},
"llama2-70b-99.9": {
"Offline": "result_tokens_per_second",
Expand All @@ -316,16 +494,33 @@
"gptj-99.9": {
"Offline": "result_inferred_tokens_per_second",
"Server": "result_inferred_completed_tokens_per_second",
},
"mixtral-8x7b": {
"Offline": "result_tokens_per_second",
"Server": "result_completed_tokens_per_second",
}
}
}

LLAMA2_LATENCY_LIMITS = {
# We might add interactive in the next round. Latency in ns
"conversational": {
"ttft": 2000 * 1000000,
"tpot": 200 * 1000000
}
LLM_LATENCY_LIMITS = {
"llama2-70b-99":{
"conversational": {
"ttft": 2000 * 1000000,
"tpot": 200 * 1000000
}
},
"llama2-70b-99.9":{
"conversational": {
"ttft": 2000 * 1000000,
"tpot": 200 * 1000000
}
},
# "mixtral-8x7b":{
# "conversational": {
# "ttft": 2000 * 1000000,
# "tpot": 200 * 1000000
# }
# }
}

ACC_PATTERN = {
Expand Down Expand Up @@ -799,13 +994,13 @@ def check_accuracy_dir(config, model, path, verbose):
return is_valid, result_acc


def extra_check_llama2(mlperf_log, scenario):
def extra_check_llm(mlperf_log, scenario, model):
if (mlperf_log["requested_use_token_latencies"]):
if scenario == "Offline":
# For offline no further checks are necessary
return None, True
else:
for constraint, limits in LLAMA2_LATENCY_LIMITS.items():
for constraint, limits in LLM_LATENCY_LIMITS[model].items():
if mlperf_log["result_first_token_99.00_percentile_latency_ns"] < limits["ttft"] and mlperf_log["result_time_per_output_token_99.00_percentile_ns"] < limits["tpot"]:
return constraint, True
else:
Expand Down Expand Up @@ -867,8 +1062,8 @@ def check_performance_dir(
res = float(mlperf_log[RESULT_FIELD_BENCHMARK_OVERWRITE[version][model][scenario]])


if model in ["llama2-70b-99", "llama2-70b-99.9"]:
llama_constraint, is_valid = extra_check_llama2(mlperf_log, scenario_fixed)
if model in ["llama2-70b-99", "llama2-70b-99.9", "mixtral-8x7b"]:
llama_constraint, is_valid = extra_check_llm(mlperf_log, scenario_fixed, model)

latency_99_percentile = mlperf_log["result_99.00_percentile_latency_ns"]
latency_mean = mlperf_log["result_mean_latency_ns"]
Expand Down Expand Up @@ -2344,8 +2539,7 @@ def check_compliance_dir(
"gptj-99.9",
"llama2-70b-99",
"llama2-70b-99.9",
"stable-diffusion-xl"

"mixtral-8x7b"
]:
test_list.remove("TEST04")

Expand All @@ -2355,13 +2549,23 @@ def check_compliance_dir(
"llama2-70b-99",
"llama2-70b-99.9",
"stable-diffusion-xl"
"mixtral-8x7b"
]:
test_list.remove("TEST05")

if model in [
"gptj-99",
"gptj-99.9",
"llama2-70b-99",
"llama2-70b-99.9",
"mixtral-8x7b"
]:
test_list.remove("TEST01")

if model in [
"llama2-70b-99",
"llama2-70b-99.9",
"mixtral-8x7b"
]:
test_list.append("TEST06")

Expand Down

0 comments on commit a536cd2

Please sign in to comment.