-
Notifications
You must be signed in to change notification settings - Fork 0
/
squeezenet.c
181 lines (156 loc) · 5.31 KB
/
squeezenet.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
/*
* Copyright (C) 2017 GreenWaves Technologies
* All rights reserved.
*
* This software may be modified and distributed under the terms
* of the BSD license. See the LICENSE file for details.
*
*/
#include <stdio.h>
#ifndef __EMUL__
/* PMSIS includes. */
#include "pmsis.h"
#endif
#include "squeezenet.h"
#include "squeezenetKernels.h"
#include "gaplib/ImgIO.h"
#define __XSTR(__s) __STR(__s)
#define __STR(__s) #__s
#define AT_INPUT_SIZE (AT_INPUT_WIDTH*AT_INPUT_HEIGHT*AT_INPUT_COLORS)
#define NUM_CLASSES 1001
AT_HYPERFLASH_FS_EXT_ADDR_TYPE __PREFIX(_L3_Flash) = 0;
AT_HYPERFLASH_FS_EXT_ADDR_TYPE __PREFIX(_L3_PrivilegedFlash) = 0;
int max_class;
int max_value;
// Softmax always outputs Q15 short int even from 8 bit input
L2_MEM short int *ResOut;
typedef unsigned char IMAGE_IN_T;
char *ImageName;
#ifdef __EMUL__
unsigned char * __restrict__ Input_1;
#endif
static void RunNetwork()
{
printf("Running on cluster\n");
#ifdef PERF
printf("Start timer\n");
gap_cl_starttimer();
gap_cl_resethwtimer();
#endif
GPIO_HIGH();
__PREFIX(CNN)(ResOut);
GPIO_LOW();
printf("Runner completed\n");
printf("\n");
//Checki Results
max_class=0;
max_value=0;
for (int i=0;i<NUM_CLASSES;i++){
if(ResOut[i]>max_value){
max_value=ResOut[i];
max_class=i;
}
}
printf("Class detected: %d, with value: %d\n", max_class, max_value);
//FIX2FP(max_value,15);
printf("\n");
}
int start()
{
OPEN_GPIO_MEAS();
ImageName = __XSTR(AT_IMAGE);
struct pi_device cluster_dev;
struct pi_cluster_conf conf;
pi_cluster_conf_init(&conf);
conf.cc_stack_size = STACK_SIZE;
pi_open_from_conf(&cluster_dev, (void *)&conf);
pi_cluster_open(&cluster_dev);
pi_freq_set(PI_FREQ_DOMAIN_FC, FREQ_FC*1000*1000);
pi_freq_set(PI_FREQ_DOMAIN_CL, FREQ_CL*1000*1000);
pi_freq_set(PI_FREQ_DOMAIN_PERIPH, FREQ_PE*1000*1000);
printf("Set FC Frequency = %d MHz, CL Frequency = %d MHz, PERIIPH Frequency = %d MHz\n",
pi_freq_get(PI_FREQ_DOMAIN_FC), pi_freq_get(PI_FREQ_DOMAIN_CL), pi_freq_get(PI_FREQ_DOMAIN_PERIPH));
#ifdef VOLTAGE
pi_pmu_voltage_set(PI_PMU_VOLTAGE_DOMAIN_CHIP, VOLTAGE);
pi_pmu_voltage_set(PI_PMU_VOLTAGE_DOMAIN_CHIP, VOLTAGE);
printf("Voltage: %dmV\n", VOLTAGE);
#endif
struct pi_cluster_task *task = pi_l2_malloc(sizeof(struct pi_cluster_task));
if(task==NULL) {
printf("pi_cluster_task alloc Error!\n");
pmsis_exit(-1);
}
pi_cluster_task(task, (void (*)(void *))&RunNetwork, NULL);
pi_cluster_task_stacks(task, NULL, SLAVE_STACK_SIZE);
#if defined(__GAP8__)
task->entry = &RunNetwork;
task->stack_size = STACK_SIZE;
task->slave_stack_size = SLAVE_STACK_SIZE;
#endif
printf("Stack sizes: %d %d\n", STACK_SIZE, SLAVE_STACK_SIZE);
ResOut = (short int *) AT_L2_ALLOC(0, NUM_CLASSES*sizeof(short int));
if (ResOut==0) {
printf("Failed to allocate Memory for Result (%ld bytes)\n", NUM_CLASSES*sizeof(short int));
return 1;
}
printf("Constructor\n");
// IMPORTANT - MUST BE CALLED AFTER THE CLUSTER IS SWITCHED ON!!!!
if (__PREFIX(CNN_Construct)())
{
printf("Graph constructor exited with error\n");
return 1;
}
printf("Reading image\n");
if (ReadImageFromFile(ImageName, AT_INPUT_WIDTH, AT_INPUT_HEIGHT, AT_INPUT_COLORS,
Input_1, AT_INPUT_SIZE*sizeof(IMAGE_IN_T), IMGIO_OUTPUT_CHAR, 0)) {
printf("Failed to load image %s\n", ImageName);
return 1;
}
#ifdef IMAGE_SUB_128
for (int i=0; i<AT_INPUT_SIZE; i++) Input_1[i] -= 128;
#endif
printf("Finished reading image\n");
int input_checksum = 0;
for (int i=0; i<AT_INPUT_SIZE; i++) input_checksum += Input_1[i];
printf("input_checksum: %d\n", input_checksum);
pi_cluster_send_task_to_cl(&cluster_dev, task);
__PREFIX(CNN_Destruct)();
pi_cluster_close(&cluster_dev);
#ifdef PERF
unsigned int TotalCycles = 0, TotalOper = 0;
{
printf("\n");
for (unsigned int i=0; i<(sizeof(AT_GraphPerf)/sizeof(unsigned int)); i++) {
TotalCycles += AT_GraphPerf[i]; TotalOper += AT_GraphOperInfosNames[i];
}
for (unsigned int i=0; i<(sizeof(AT_GraphPerf)/sizeof(unsigned int)); i++) {
printf("%45s: Cycles: %12u, Cyc%%: %5.1f%%, Operations: %12u, Op%%: %5.1f%%, Operations/Cycle: %f\n", AT_GraphNodeNames[i], AT_GraphPerf[i], 100*((float) (AT_GraphPerf[i]) / TotalCycles), AT_GraphOperInfosNames[i], 100*((float) (AT_GraphOperInfosNames[i]) / TotalOper), ((float) AT_GraphOperInfosNames[i])/ AT_GraphPerf[i]);
}
printf("\n");
printf("%45s: Cycles: %12u, Cyc%%: 100.0%%, Operations: %12u, Op%%: 100.0%%, Operations/Cycle: %f\n", "Total", TotalCycles, TotalOper, ((float) TotalOper)/ TotalCycles);
printf("\n");
}
#endif
#ifdef GROUND_TRUTH
if (max_class != GROUND_TRUTH) {
printf("Error class predicted: %d ground truth: %d\n", max_class, GROUND_TRUTH); pmsis_exit(-1);
return -1;
}
else
printf("Correct prediction\n");
#endif
#if defined(PERF_CI) && defined(PERF)
if (TotalCycles > PERF_CI) {
printf("Error in CI for performance: we expected to be faster: %d > %d\n", TotalCycles, PERF_CI);
return -1;
}
printf("Performance Regression passed\n");
#endif
printf("Ended\n");
return 0;
}
int main(void)
{
printf("\n\n\t *** NNTOOL SQUEEZENET ***\n\n");
return start();
}