-
Notifications
You must be signed in to change notification settings - Fork 0
/
exp_dis_train.py
75 lines (66 loc) · 2.18 KB
/
exp_dis_train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from util import Util, colors, markers
import matplotlib.pyplot as plt
MODEL_TO_DIR = {
"bert" : "text_classification_fp16",
"swin" : "Swin-Transformer",
"GPT" : "GPT"
}
GPU_CNT_TO_NUM = {
1 : "",
2 : 2,
4 : 4
}
ALGOS = [None, "ckpt", "L1"]
ALG_TO_NAME = {
None : "exact",
"ckpt" : "checkpoint",
"L1" : "quantize"
}
ALGNAME_NORMALIZE = {
"exact": "org",
"checkpoint": "ckpt",
"quantize": "quantize"
}
suffix = "pdf"
def collect_max_tpt(hardware, model):
data = {}
for gpu_cnt in [1, 2, 4]:
ips_dir = f"{MODEL_TO_DIR[model]}/{GPU_CNT_TO_NUM[gpu_cnt]}{hardware}/results/speed_results.json"
for alg in ALGOS:
if ALG_TO_NAME[alg] not in data:
data[ALG_TO_NAME[alg]] = {}
if model == "swin":
if alg == "ckpt":
cond = lambda obj: obj["ckpt"] == True
else:
cond = lambda obj: obj['algorithm'] == alg and obj["ckpt"] == False
elif model == "GPT":
cond = lambda obj : (obj["alg"] == alg)
else:
cond = lambda obj : (obj['algorithm'] == alg)
btimes = Util.load_data(ips_dir, "batch_size", "ips", cond)
print(alg, btimes)
max_tpt = max(btimes.values())
data[ALG_TO_NAME[alg]][gpu_cnt] = max_tpt
return data
def plot_gpu_tpt(model, hardware, data):
fig, ax = plt.subplots()
fig.set_size_inches(4, 4)
for alg in ALG_TO_NAME.values():
marker = markers[ALGNAME_NORMALIZE[alg]]
color = colors[ALGNAME_NORMALIZE[alg]]
plt.plot(["1", "2", "4"], (data[alg].values()),
f"-{marker}", label = alg, color = color, markersize=10)
ax.tick_params(axis='x', labelsize=18)
ax.tick_params(axis='y', labelsize=18)
plt.xlabel("number of GPUs", size=16)
plt.ylabel("max throughput (records/s)", size=16)
plt.legend()
plt.tight_layout()
plt.savefig(f"graphs/implications/{model}_{hardware}_gpu.{suffix}")
if __name__ == "__main__":
hardware = "v100"
model = "GPT"
data = collect_max_tpt(hardware, model)
print(data)
plot_gpu_tpt(model, hardware, data)