forked from hclhkbu/NV-DVFS-Benchmark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dvfs_benchmark.py
143 lines (116 loc) · 5.75 KB
/
dvfs_benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import os,sys
import argparse
import subprocess
import time
import re
import ConfigParser
import json
parser = argparse.ArgumentParser()
parser.add_argument('--benchmark-setting', type=str, help='gpu benchmark setting', default='p100')
parser.add_argument('--kernel-setting', type=str, help='kernels of benchmark', default='synthetic')
parser.add_argument('--app-root', type=str, help='folder of applications', default='applications/linux')
opt = parser.parse_args()
print opt
BS_SETTING = '%s.cfg' % opt.benchmark_setting
KS_SETTING = '%s.cfg' % opt.kernel_setting
APP_ROOT = opt.app_root
LOG_ROOT = 'logs/%s-%s' % (opt.benchmark_setting, opt.kernel_setting)
try:
os.makedirs(LOG_ROOT)
except OSError:
pass
# Reading benchmark settings
cf_bs = ConfigParser.SafeConfigParser()
cf_bs.read("configs/benchmarks/%s" % BS_SETTING)
cf_ks = ConfigParser.SafeConfigParser()
cf_ks.read("configs/kernels/%s" % KS_SETTING)
running_iters = cf_bs.getint("profile_control", "iters")
running_time = cf_bs.getint("profile_control", "secs")
nvIns_dev_id = cf_bs.getint("profile_control", "nvIns_device_id")
cuda_dev_id = cf_bs.getint("profile_control", "cuda_device_id")
pw_sample_int = cf_bs.getint("profile_control", "power_sample_interval")
rest_int = cf_bs.getint("profile_control", "rest_time")
metrics = json.loads(cf_bs.get("profile_control", "metrics"))
core_frequencies = json.loads(cf_bs.get("dvfs_control", "coreF"))
memory_frequencies = json.loads(cf_bs.get("dvfs_control", "memF"))
powerState = cf_bs.getint("dvfs_control", "powerState")
if powerState == 5:
freqState = 1
else:
freqState = powerState
# Read GPU application settings
benchmark_programs = cf_ks.sections()
print benchmark_programs
print metrics
print core_frequencies
print memory_frequencies
if 'linux' in sys.platform:
pw_sampling_cmd = 'nohup ./nvml_samples -device=%d -si=%d -output=%s/%s 1>null 2>&1 &'
app_exec_cmd = './%s/%s %s -device=%d -secs=%d >> %s/%s'
dvfs_cmd = 'gpu=%d fcore=%s fmem=%s ./adjustClock.sh' % (nvIns_dev_id, '%s', '%s')
kill_pw_cmd = 'killall nvml_samples'
elif 'win' in sys.platform:
pw_sampling_cmd = 'start /B nvml_samples.exe -device=%d -si=%d -output=%s/%s > nul'
app_exec_cmd = '%s\\%s %s -device=%d -secs=%d >> %s/%s'
if powerState !=0:
dvfs_cmd = 'nvidiaInspector.exe -forcepstate:%s,%d -setGpuClock:%s,%d,%s -setMemoryClock:%s,%d,%s' \
% (nvIns_dev_id, powerState, nvIns_dev_id, freqState, '%s', nvIns_dev_id, freqState, '%s')
else:
dvfs_cmd = 'nvidiaInspector.exe -setBaseClockOffset:%s,%d,%s -setMemoryClockOffset:%s,%d,%s' \
% (nvIns_dev_id, freqState, '%s', nvIns_dev_id, freqState, '%s')
kill_pw_cmd = 'tasklist|findstr "nvml_samples.exe" && taskkill /F /IM nvml_samples.exe'
for core_f in core_frequencies:
for mem_f in memory_frequencies:
# set specific frequency
command = dvfs_cmd % (core_f, mem_f)
print command
os.system(command)
time.sleep(rest_int)
for i, app in enumerate(benchmark_programs):
#if i <= 18:
# continue
args = json.loads(cf_ks.get(app, 'args'))
for argNo, arg in enumerate(args):
# arg, number = re.subn('-device=[0-9]*', '-device=%d' % cuda_dev_id, arg)
powerlog = 'benchmark_%s_core%d_mem%d_input%02d_power.log' % (app, core_f, mem_f, argNo)
perflog = 'benchmark_%s_core%d_mem%d_input%02d_perf.log' % (app, core_f, mem_f, argNo)
metricslog = 'benchmark_%s_core%d_mem%d_input%02d_metrics.log' % (app, core_f, mem_f, argNo)
# start record power data
os.system("echo \"arg:%s\" > %s/%s" % (arg, LOG_ROOT, powerlog))
command = pw_sampling_cmd % (nvIns_dev_id, pw_sample_int, LOG_ROOT, powerlog)
print command
os.system(command)
time.sleep(rest_int)
# execute program to collect power data
os.system("echo \"arg:%s\" > %s/%s" % (arg, LOG_ROOT, perflog))
command = app_exec_cmd % (APP_ROOT, app, arg, cuda_dev_id, running_time, LOG_ROOT, perflog)
print command
os.system(command)
time.sleep(rest_int)
# stop record power data
os.system(kill_pw_cmd)
# execute program to collect time data
command = 'nvprof --profile-child-processes %s/%s %s -device=%d -secs=5 >> %s/%s 2>&1' % (APP_ROOT, app, arg, cuda_dev_id, LOG_ROOT, perflog)
print command
os.system(command)
time.sleep(rest_int)
# collect grid and block settings
command = 'nvprof --print-gpu-trace --profile-child-processes %s/%s %s -device=%d -iters=10 > %s/%s 2>&1' % (APP_ROOT, app, arg, cuda_dev_id, LOG_ROOT, metricslog)
print command
os.system(command)
time.sleep(rest_int)
# execute program to collect metrics data
metCount = 0
# to be fixed, the stride should be a multiplier of the metric number
stride = 2
while metCount < len(metrics):
if metCount + stride > len(metrics):
metStr = ','.join(metrics[metCount:])
else:
metStr = ','.join(metrics[metCount:metCount + stride])
command = 'nvprof --devices %s --metrics %s %s/%s %s -device=%d -iters=10 >> %s/%s 2>&1' % (cuda_dev_id, metStr, APP_ROOT, app, arg, cuda_dev_id, LOG_ROOT, metricslog)
print command
os.system(command)
time.sleep(1)
metCount += stride
time.sleep(rest_int)