forked from hclhkbu/dlbench
-
Notifications
You must be signed in to change notification settings - Fork 0
/
benchmark.py
138 lines (126 loc) · 4.97 KB
/
benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import argparse
import sys,os,time
import subprocess
import collect_gpu_power as cgp
from threading import Thread
# Parse arguments
parser = argparse.ArgumentParser(description='Benchmark deep learning tools')
parser.add_argument('-config', type=str, help='Path to the config file')
parser.add_argument('-post', type=bool, default=False, help='Post to our server. You should keep it False')
parser.add_argument('-debug', type=bool, default=False, help='Debug benchmark.py')
args = parser.parse_args()
if args.debug: print "[DEBUG] args:" + str(args)
# Parse config file
config_experiments = False
experiments = ''
host_file = None
flag = ''
tools = ''
cpu_name = ''
gpu_name = ''
cuda_driver = ''
cudnn = ''
cuda = ''
cpu_count = '1'
if args.config is not None:
with open(args.config) as f:
content = f.readlines()
#print content
for line in content:
line = line.split('#')[0].replace('\t','').replace('\n','').replace(' ', '')
if len(line) < 1 or "None" in line:
continue
if not config_experiments:
if "flag:" in line:
flag = line.split(':')[1]
elif "tools:" in line:
tools = line.split(':')[1].split(',')
elif "{" in line:
config_experiments = True
elif "host_file:" in line:
host_file = line.split(':')[1]
elif "cpu_name:" in line:
cpu_name = line.split(':')[1]
elif "gpu_name:" in line:
gpu_name = line.split(':')[1]
elif "cuda_driver:" in line:
cuda_driver = line.split(':')[1]
elif "cudnn:" in line:
cudnn = line.split(':')[1]
elif "cuda:" in line:
cuda = line.split(':')[1]
else:
if "}" in line:
config_experiments = False
experiments = experiments[:len(experiments)-1].replace('\t','').replace(' ','').split(':')
else:
experiments += line + ':'
else:
print("Please add -config <path to your config file>")
sys.exit(0)
post_flags = " -f " + flag + " -P " + cpu_name + " -r " + cuda_driver + " -C " + cuda + " -D " + cudnn
if args.debug:
print "[DEBUG] Defalut post flags:" + str(post_flags)
print "[DEBUG] Tool(s):" + str(tools)
print "[DEBUG] Experiments:" + str(experiments)
# Benchmark each tool
root_path = os.path.dirname(os.path.abspath(__file__))
host_name = subprocess.check_output("hostname", shell=True).strip().split('\n')[0]
if os.path.exists(root_path + "/logs/") is not True:
os.system("rm -rf logs")
print "Creating log directory... " + root_path + "/logs/"
os.system("mkdir logs")
if args.debug:
print "[DEBUG] Benchmark running on: " + host_name
print "[DEBUG] Root path:" + root_path
for tool in tools:
work_dir = root_path + "/tools/" + tool
for experiment in experiments:
os.chdir(work_dir)
exp_args = experiment.split(";")
device_name = ''
device_count = exp_args[3]
log_file = ''
if "-1" in exp_args[2]:
device_name = cpu_name
log_file = tool + "-" + exp_args[0] + "-" + exp_args[1] + "-" + device_name + "-c" + exp_args[3] + "-" +"b"+ exp_args[4] + "-"
else:
device_name = gpu_name
log_file = tool + "-" + exp_args[0] + "-" + exp_args[1] + "-" + device_name + "-devId" + exp_args[2] + "-c" + exp_args[3] + "-" +"b"+ exp_args[4] + "-"
print "\n-------Benchmarking " + tool + " " + exp_args[1] + "-------"
log_file += time.ctime()+ "-" + host_name + ".log"
log_file = log_file.replace(" ","_")
power_log_file = '%s/logs/power_%s' % (root_path, log_file)
bm_script = "python " + tool + "bm.py"
bm_script += " -netType " + exp_args[0] + " -log "+log_file+" -batchSize "+exp_args[4]+" -network "+exp_args[1]+" -lr "+exp_args[7]
if "-1" in exp_args[2]:
bm_script += " -devId " + exp_args[2] + " -numEpochs " + exp_args[5] + " -epochSize " + exp_args[6] + " -cpuCount " + exp_args[3]
post_flags += " -c " + cpu_name
else:
bm_script += " -devId " + exp_args[2] + " -numEpochs " + exp_args[5] + " -epochSize " + exp_args[6] + " -gpuCount " + exp_args[3]
post_flags += " -d " + gpu_name
if host_file is not None and len(host_file) > 4:
bm_script += " -hostFile " + host_file
print bm_script
try:
thread = Thread(target = cgp.start_collecting_gpu_power, args = (bm_script, power_log_file))
thread.start()
result_args = subprocess.check_output(bm_script, shell=True).strip().split('\n')[0]
except Exception as e:
print "Benchmark failed with " + bm_script
os.system("cat " + root_path + "/logs/" + log_file)
continue
power, mem = cgp.get_average_gpu_power_and_mem(gpu_name, power_log_file)
post_flags += " " + result_args + " -b " + exp_args[4] + " -g " + exp_args[3] + " -e " + exp_args[6] + " -E " + exp_args[5]
post_flags += " -l " + log_file + " -T " + tool + " -n " + exp_args[1]
os.chdir(root_path)
if args.post is True:
post_script = "python post_record.py " + post_flags
print post_script
print(subprocess.check_output(post_script, shell=True).strip().split('\n')[0])
post_flags = " -f " + flag + " -d " + device_name + " -P " + cpu_name + " -A " + str(mem) + " -r " + cuda_driver + " -C " + cuda + " -D " + cudnn + " -p " + str(power)
post_script = ''
else:
print "Result:"
print result_args
print "Done!"