-
Notifications
You must be signed in to change notification settings - Fork 0
/
setup.py
174 lines (154 loc) · 9.57 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
from genericpath import exists
import json
import os
import shutil
import fileinput
import numpy as np
import sys
sys.path.insert(0, os.getcwd() + '/python_scripts')
from utils import *
from attention import *
from attention_output import *
from intermediate import *
from intermediate_output import *
from comm import *
from gmi import *
# read file system
with open('config.json', 'r') as f:
json_object = json.load(f)
data_path = 'ibert_data/encoder'
num_encoder_layer=1
hidden_size = json_object['hidden_size']
num_hidden_layers = json_object['num_hidden_layers']
num_attention_heads = json_object['num_attention_heads']
intermediate_size = json_object['intermediate_size']
max_sentence_len = json_object['max_sentence_len']
# compute cluster id of each layer
cluster_id_per_layer = get_cluster_id_per_layer(json_object)
# compute the number of compute kernels in each layer
num_compute_kernel_per_layer = get_num_compute_kernel_per_layer(json_object, cluster_id_per_layer)
# compute number of compute kernels for each cluster
num_compute_kernel_per_cluster = get_num_compute_kernel_per_cluster(json_object, num_compute_kernel_per_layer)
# compute_kern id for each layer
compute_kern_id_per_layer = get_compute_kern_id_per_layer(json_object, num_attention_heads, cluster_id_per_layer, num_compute_kernel_per_layer)
# compute id and name of virtual kernels for each cluster
virtual_kernel_per_cluster, virtual_kernel_id_per_cluster, virtual_kernel_dest_per_cluster, virtual_kernel_src_per_cluster, num_virtual_kernel_per_cluster = get_virtual_kernel_per_cluster(json_object, num_compute_kernel_per_cluster, cluster_id_per_layer, compute_kern_id_per_layer)
# compute number of comm_kernels for each cluster
comm_kernel_per_cluster, comm_kernel_id_per_cluster, comm_kernel_dest_per_cluster, comm_kernel_src_per_cluster, total_num_kernel_per_cluster = get_comm_kernel_per_cluster(json_object, num_compute_kernel_per_cluster, num_virtual_kernel_per_cluster, cluster_id_per_layer, virtual_kernel_id_per_cluster, compute_kern_id_per_layer)
# compute_kern dest for each layer
compute_kern_dest_per_layer = get_compute_kern_dest_per_layer(json_object, num_attention_heads, cluster_id_per_layer, virtual_kernel_id_per_cluster, virtual_kernel_per_cluster, compute_kern_id_per_layer, comm_kernel_id_per_cluster)
# compute_kern src for each layer
compute_kern_src_per_layer = get_compute_kern_src_per_layer(json_object, num_attention_heads, cluster_id_per_layer, virtual_kernel_id_per_cluster, compute_kern_id_per_layer, comm_kernel_id_per_cluster)
# dest cluster for each layer
dest_cluster_per_layer = get_dest_cluster_per_layer(json_object, cluster_id_per_layer)
# layer part
part_per_layer = get_part_per_layer(json_object)
# cluster part
part_per_cluster = get_part_per_cluster(json_object, cluster_id_per_layer, part_per_layer)
attention_0_cluster_id = cluster_id_per_layer[0]
attention_1_cluster_id = cluster_id_per_layer[1]
attention_2_cluster_id = cluster_id_per_layer[2]
attention_output_0_cluster_id = cluster_id_per_layer[3]
attention_output_1_cluster_id = cluster_id_per_layer[4]
intermediate_0_cluster_id = cluster_id_per_layer[5]
intermediate_output_0_cluster_id = cluster_id_per_layer[6]
intermediate_output_1_cluster_id = cluster_id_per_layer[7]
attention_0_kern_id = compute_kern_id_per_layer[0]
attention_1_kern_id = compute_kern_id_per_layer[1]
attention_2_kern_id = compute_kern_id_per_layer[2]
attention_output_0_kern_id = compute_kern_id_per_layer[3]
attention_output_1_kern_id = compute_kern_id_per_layer[4]
intermediate_0_kern_id = compute_kern_id_per_layer[5]
intermediate_output_0_kern_id = compute_kern_id_per_layer[6]
intermediate_output_1_kern_id = compute_kern_id_per_layer[7]
attention_0_kern_dest = compute_kern_dest_per_layer[0]
attention_1_kern_dest = compute_kern_dest_per_layer[1]
attention_2_kern_dest = compute_kern_dest_per_layer[2]
attention_output_0_kern_dest = compute_kern_dest_per_layer[3]
attention_output_1_kern_dest = compute_kern_dest_per_layer[4]
intermediate_0_kern_dest = compute_kern_dest_per_layer[5]
intermediate_output_0_kern_dest = compute_kern_dest_per_layer[6]
intermediate_output_1_kern_dest = compute_kern_dest_per_layer[7]
attention_0_kern_src = compute_kern_src_per_layer[0]
attention_1_kern_src = compute_kern_src_per_layer[1]
attention_2_kern_src = compute_kern_src_per_layer[2]
attention_output_0_kern_src = compute_kern_src_per_layer[3]
attention_output_1_kern_src = compute_kern_src_per_layer[4]
intermediate_0_kern_src = compute_kern_src_per_layer[5]
intermediate_output_0_kern_src = compute_kern_src_per_layer[6]
intermediate_output_1_kern_src = compute_kern_src_per_layer[7]
# generate script
cwd = os.getcwd()
cluster = json_object['cluster']
for encoder_layer in range(num_encoder_layer):
for i in range(len(cluster)):
num_kernel = num_compute_kernel_per_cluster[i] + len(comm_kernel_id_per_cluster[i]) + 1
for j in range(num_kernel):
kern_path = 'kern/layer_' + str(encoder_layer) + '/cluster_' + str(i) + '/kern_' + str(j)
if not os.path.exists(kern_path):
os.makedirs(kern_path)
bash_file = open(kern_path + '/build.sh', 'w')
generate_script(bash_file, cwd, kern_path)
bash_file = open('build.sh', 'w')
for encoder_layer in range(num_encoder_layer):
for i in range(len(cluster)):
num_kernel = num_compute_kernel_per_cluster[i] + len(comm_kernel_id_per_cluster[i]) + 1
for j in range(num_kernel):
kern_path = 'kern/layer_' + str(encoder_layer) + '/cluster_' + str(i) + '/kern_' + str(j)
bash_file.write('for((i=0;i<1;i++)); do nohup bash ' + kern_path + '/build.sh' + ' & done' + '\n')
bash_file.write('wait\n\n')
for encoder_layer in range(num_encoder_layer):
# build gmi kernels
for i in range(len(virtual_kernel_per_cluster)):
build_gmi_kernel(data_path, part_per_cluster[i], encoder_layer, i, virtual_kernel_per_cluster[i], virtual_kernel_id_per_cluster[i], virtual_kernel_dest_per_cluster[i], virtual_kernel_src_per_cluster[i])
for encoder_layer in range(num_encoder_layer):
# build comm kernels
for i in range(len(comm_kernel_per_cluster)):
for j in range(len(comm_kernel_per_cluster[i])):
build_comm_kernel(data_path, part_per_cluster[i], encoder_layer, i, comm_kernel_per_cluster[i][j], comm_kernel_id_per_cluster[i][j], comm_kernel_dest_per_cluster[i][j], comm_kernel_src_per_cluster[i][j])
for encoder_layer in range(num_encoder_layer):
# build attention_0
if attention_0_cluster_id != attention_1_cluster_id:
out_cluster=True
else:
out_cluster=False
attention_0(data_path, json_object['attention_0'], attention_0_kern_id, attention_0_kern_dest, dest_cluster_per_layer[0], hidden_size, num_attention_heads, max_sentence_len, encoder_layer, attention_0_cluster_id, out_cluster)
# build attention_1
if attention_1_cluster_id != attention_2_cluster_id:
out_cluster=True
else:
out_cluster=False
attention_1(data_path, json_object['attention_1'], attention_1_kern_id, attention_1_kern_dest, attention_1_kern_src, dest_cluster_per_layer[1], hidden_size, num_attention_heads, max_sentence_len, encoder_layer, attention_1_cluster_id, out_cluster)
# build attention_2
if attention_2_cluster_id != attention_output_0_cluster_id:
out_cluster=True
else:
out_cluster=False
partition = json_object['attention_output_0']['partition']
attention_2(data_path, json_object['attention_2'], attention_2_kern_id, attention_2_kern_dest, attention_2_kern_src, dest_cluster_per_layer[2], hidden_size, num_attention_heads, max_sentence_len, encoder_layer, attention_2_cluster_id, partition, out_cluster)
# build attention_output_0
if attention_output_0_cluster_id != attention_output_1_cluster_id:
out_cluster=True
else:
out_cluster=False
attention_output_0(data_path, json_object['attention_output_0'], attention_output_0_kern_id, attention_output_0_kern_dest, attention_output_0_kern_src, dest_cluster_per_layer[3], hidden_size, num_attention_heads, max_sentence_len, encoder_layer, attention_output_0_cluster_id, out_cluster)
# build attention_output_1
if attention_output_1_cluster_id != intermediate_0_cluster_id:
out_cluster=True
else:
out_cluster=False
attention_output_1(data_path, json_object['attention_output_1'], attention_output_1_kern_id, attention_output_1_kern_dest, attention_output_1_kern_src, dest_cluster_per_layer[4], hidden_size, num_attention_heads, max_sentence_len, encoder_layer, attention_output_1_cluster_id, out_cluster)
# # build intermediate_0
if intermediate_0_cluster_id != intermediate_output_0_cluster_id:
out_cluster=True
else:
out_cluster=False
intermediate_0(data_path, json_object['intermediate_0'], intermediate_0_kern_id, intermediate_0_kern_dest, dest_cluster_per_layer[5], hidden_size, intermediate_size, encoder_layer, intermediate_0_cluster_id, out_cluster)
# # intermediate_output_0
if intermediate_output_0_cluster_id != intermediate_output_1_cluster_id:
out_cluster=True
else:
out_cluster=False
intermediate_output_0(data_path, json_object['intermediate_output_0'], intermediate_output_0_kern_id, intermediate_output_0_kern_dest, dest_cluster_per_layer[6], hidden_size, intermediate_size, encoder_layer, intermediate_output_0_cluster_id, out_cluster)
# intermediate_output_1
intermediate_output_1(data_path, json_object['intermediate_output_1'], intermediate_output_1_kern_id, intermediate_output_1_kern_dest, intermediate_output_1_kern_src, dest_cluster_per_layer[7], hidden_size, intermediate_size, encoder_layer, intermediate_output_1_cluster_id)