-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconvolution.config
120 lines (100 loc) · 3.78 KB
/
convolution.config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#========================================================
#| TEST CONFIGURATION VARIABLES |
#========================================================
# [*] TMIN: Minimum Execution Time for each convolution.
# If TEST activated, TMIN value must be 0.0
#--------------------------------------------------------
TMIN=2.0
#--------------------------------------------------------
#--------------------------------------------------------
# [*] TEST: [ T:Enable, F:Disable ]
#--------------------------------------------------------
TEST=T
#--------------------------------------------------------
#--------------------------------------------------------
# [*] DEBUG: [ T:Enable, F:Disable ]
#--------------------------------------------------------
DEBUG=F
#--------------------------------------------------------
#--------------------------------------------------------
# [*] THREAD_NUM: [ 1-N ]
#--------------------------------------------------------
THREADS=8
#--------------------------------------------------------
#--------------------------------------------------------
# [*] Convolution Algorithm
#--------------------------------------------------------
# Algorithms availables:
#--------------------------------------------------------
# [*] CONVDIRECT
# [*] CONVGEMM
# [*] LOWERING
# [*] WINOGRAD
ALGORITHM="CONVDIRECT"
#--------------------------------------------------------
#--------------------------------------------------------
# [*] Algorithm for Lowering & Winograd
#--------------------------------------------------------
# Options availables for Lowering:
# [*] B3A2C0
# [*] A3B2C0
# [*] BLIS
# [*] OPENBLAS
GEMM="B3A2C0"
#--------------------------------------------------------
#----------------------------------------------------------
# [*] GEMM Loop Parallelization.
#----------------------------------------------------------
# Select the number of the loop to apply the parallelization
# Possible values :
# [*] 3
# [*] 4
# [*] 5
#
# Warning: Only available for B3A2C0 and A3B2C0
#----------------------------------------------------------
PARALLEL_LOOP=4
#----------------------------------------------------------
#--------------------------------------------------------
# [*] MR-NR Values
#--------------------------------------------------------
# Micro-kernel configuration
#--------------------------------------------------------
MR=20
NR=4
#--------------------------------------------------------
#--------------------------------------------------------
# [*] MC-NC-KC Values
# Cache Level Configuration
# [*] Minimum value 32
# [*] If -1, the values are calcutated based on cache
# Model Level.
#--------------------------------------------------------
MC=-1
NC=-1
KC=-1
#--------------------------------------------------------
#--------------------------------------------------------
# [*] Micro-kernels BestOf Mode [ T:Enable, F:Disable ]
# WARNING: Only avaible for:
# [*] CONVDIRECT
# [*] CONVGEMM
# [*] LOWERING + B3A2C0
#--------------------------------------------------------
BESTOF=T
#--------------------------------------------------------
#--------------------------------------------------------
# [*] Model Level parameters
# To increase the performace, select a correct
# cache configuration file "ONLY FILE NAME".
#
# If your platmorm is not supported, please,
# create your own file configuration.
#
# Predefined configurations availables in "cache-arch"
# directory.
#--------------------------------------------------------
PLATFORM=NVIDIA_CARMEL
#--------------------------------------------------------
#========================================================
#========================================================