forked from Kagami/go-face
-
Notifications
You must be signed in to change notification settings - Fork 0
/
facerec.cc
272 lines (236 loc) · 8.91 KB
/
facerec.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
#include <shared_mutex>
#include <dlib/dnn.h>
#include <dlib/image_loader/image_loader.h>
#include <dlib/image_processing/frontal_face_detector.h>
#include <dlib/graph_utils.h>
#include "facerec.h"
#include "jpeg_mem_loader.h"
#include "classify.h"
using namespace dlib;
template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET>
using residual = add_prev1<block<N,BN,1,tag1<SUBNET>>>;
template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET>
using residual_down = add_prev2<avg_pool<2,2,2,2,skip1<tag2<block<N,BN,2,tag1<SUBNET>>>>>>;
template <int N, template <typename> class BN, int stride, typename SUBNET>
using block = BN<con<N,3,3,1,1,relu<BN<con<N,3,3,stride,stride,SUBNET>>>>>;
template <int N, typename SUBNET> using ares = relu<residual<block,N,affine,SUBNET>>;
template <int N, typename SUBNET> using ares_down = relu<residual_down<block,N,affine,SUBNET>>;
template <typename SUBNET> using alevel0 = ares_down<256,SUBNET>;
template <typename SUBNET> using alevel1 = ares<256,ares<256,ares_down<256,SUBNET>>>;
template <typename SUBNET> using alevel2 = ares<128,ares<128,ares_down<128,SUBNET>>>;
template <typename SUBNET> using alevel3 = ares<64,ares<64,ares<64,ares_down<64,SUBNET>>>>;
template <typename SUBNET> using alevel4 = ares<32,ares<32,ares<32,SUBNET>>>;
template <long num_filters, typename SUBNET> using con5d = con<num_filters,5,5,2,2,SUBNET>;
template <long num_filters, typename SUBNET> using con5 = con<num_filters,5,5,1,1,SUBNET>;
template <typename SUBNET> using downsampler = relu<affine<con5d<32, relu<affine<con5d<32, relu<affine<con5d<16,SUBNET>>>>>>>>>;
template <typename SUBNET> using rcon5 = relu<affine<con5<45,SUBNET>>>;
using cnn_anet_type = loss_mmod<con<1,9,9,1,1,rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>;
using anet_type = loss_metric<fc_no_bias<128,avg_pool_everything<
alevel0<
alevel1<
alevel2<
alevel3<
alevel4<
max_pool<3,3,2,2,relu<affine<con<32,7,7,2,2,
input_rgb_image_sized<150>
>>>>>>>>>>>>;
static const size_t RECT_LEN = 4;
static const size_t DESCR_LEN = 128;
static const size_t SHAPE_LEN = 2;
static const size_t RECT_SIZE = RECT_LEN * sizeof(long);
static const size_t DESCR_SIZE = DESCR_LEN * sizeof(float);
static const size_t SHAPE_SIZE = SHAPE_LEN * sizeof(long);
static std::vector<matrix<rgb_pixel>> jitter_image(
const matrix<rgb_pixel>& img,
int count
);
class FaceRec {
public:
FaceRec(const char* model_dir) {
detector_ = get_frontal_face_detector();
std::string dir = model_dir;
std::string shape_predictor_path = dir + "/shape_predictor_5_face_landmarks.dat";
std::string resnet_path = dir + "/dlib_face_recognition_resnet_model_v1.dat";
std::string cnn_resnet_path = dir + "/mmod_human_face_detector.dat";
deserialize(shape_predictor_path) >> sp_;
deserialize(resnet_path) >> net_;
deserialize(cnn_resnet_path) >> cnn_net_;
jittering = 0;
size = 150;
padding = 0.25;
}
std::tuple<std::vector<rectangle>, std::vector<descriptor>, std::vector<full_object_detection>>
Recognize(const matrix<rgb_pixel>& img,int max_faces,int type) {
std::vector<rectangle> rects;
std::vector<descriptor> descrs;
std::vector<full_object_detection> shapes;
if(type == 0) {
std::lock_guard<std::mutex> lock(detector_mutex_);
rects = detector_(img);
} else{
std::lock_guard<std::mutex> lock(cnn_net_mutex_);
auto dets = cnn_net_(img);
for (auto&& d : dets) {
rects.push_back(d.rect);
}
}
// Short circuit.
if (rects.size() == 0 || (max_faces > 0 && rects.size() > (size_t)max_faces))
return {std::move(rects), std::move(descrs), std::move(shapes)};
std::sort(rects.begin(), rects.end());
for (const auto& rect : rects) {
auto shape = sp_(img, rect);
shapes.push_back(shape);
matrix<rgb_pixel> face_chip;
extract_image_chip(img, get_face_chip_details(shape, size, padding), face_chip);
std::lock_guard<std::mutex> lock(net_mutex_);
if (jittering > 0) {
descrs.push_back(mean(mat(net_(jitter_image(std::move(face_chip), jittering)))));
} else {
descrs.push_back(net_(face_chip));
}
}
return {std::move(rects), std::move(descrs), std::move(shapes)};
}
void SetSamples(std::vector<descriptor>&& samples, std::vector<int>&& cats) {
std::unique_lock<std::shared_mutex> lock(samples_mutex_);
samples_ = std::move(samples);
cats_ = std::move(cats);
}
int Classify(const descriptor& test_sample, float tolerance) {
std::shared_lock<std::shared_mutex> lock(samples_mutex_);
return classify(samples_, cats_, test_sample, tolerance);
}
void Config(unsigned long new_size, double new_padding, int new_jittering) {
size = new_size;
padding = new_padding;
jittering = new_jittering;
}
private:
std::mutex detector_mutex_;
std::mutex net_mutex_;
std::mutex cnn_net_mutex_;
std::shared_mutex samples_mutex_;
frontal_face_detector detector_;
shape_predictor sp_;
anet_type net_;
cnn_anet_type cnn_net_;
std::vector<descriptor> samples_;
std::vector<int> cats_;
int jittering;
unsigned long size;
double padding;
};
// Plain C interface for Go.
facerec* facerec_init(const char* model_dir) {
facerec* rec = (facerec*)calloc(1, sizeof(facerec));
try {
FaceRec* cls = new FaceRec(model_dir);
rec->cls = (void*)cls;
} catch(serialization_error& e) {
rec->err_str = strdup(e.what());
rec->err_code = SERIALIZATION_ERROR;
} catch (std::exception& e) {
rec->err_str = strdup(e.what());
rec->err_code = UNKNOWN_ERROR;
}
return rec;
}
void facerec_config(facerec* rec, unsigned long size, double padding, int jittering) {
FaceRec* cls = (FaceRec*)(rec->cls);
cls->Config(size,padding,jittering);
}
faceret* facerec_recognize(facerec* rec, const uint8_t* img_data, int len, int max_faces,int type) {
faceret* ret = (faceret*)calloc(1, sizeof(faceret));
FaceRec* cls = (FaceRec*)(rec->cls);
matrix<rgb_pixel> img;
std::vector<rectangle> rects;
std::vector<descriptor> descrs;
std::vector<full_object_detection> shapes;
try {
// TODO(Kagami): Support more file types?
load_mem_jpeg(img, img_data, len);
std::tie(rects, descrs, shapes) = cls->Recognize(img, max_faces,type);
} catch(image_load_error& e) {
ret->err_str = strdup(e.what());
ret->err_code = IMAGE_LOAD_ERROR;
return ret;
} catch (std::exception& e) {
ret->err_str = strdup(e.what());
ret->err_code = UNKNOWN_ERROR;
return ret;
}
ret->num_faces = descrs.size();
if (ret->num_faces == 0)
return ret;
ret->rectangles = (long*)malloc(ret->num_faces * RECT_SIZE);
for (int i = 0; i < ret->num_faces; i++) {
long* dst = ret->rectangles + i * RECT_LEN;
dst[0] = rects[i].left();
dst[1] = rects[i].top();
dst[2] = rects[i].right();
dst[3] = rects[i].bottom();
}
ret->descriptors = (float*)malloc(ret->num_faces * DESCR_SIZE);
for (int i = 0; i < ret->num_faces; i++) {
void* dst = (uint8_t*)(ret->descriptors) + i * DESCR_SIZE;
void* src = (void*)&descrs[i](0,0);
memcpy(dst, src, DESCR_SIZE);
}
ret->num_shapes = shapes[0].num_parts();
ret->shapes = (long*)malloc(ret->num_faces * ret->num_shapes * SHAPE_SIZE);
for (int i = 0; i < ret->num_faces; i++) {
long* dst = ret->shapes + i * ret->num_shapes * SHAPE_LEN;
const auto& shape = shapes[i];
for (int j = 0; j < ret->num_shapes; j++) {
dst[j*SHAPE_LEN] = shape.part(j).x();
dst[j*SHAPE_LEN+1] = shape.part(j).y();
}
}
return ret;
}
void facerec_set_samples(
facerec* rec,
const float* c_samples,
const int32_t* c_cats,
int len
) {
FaceRec* cls = (FaceRec*)(rec->cls);
std::vector<descriptor> samples;
samples.reserve(len);
for (int i = 0; i < len; i++) {
descriptor sample = mat(c_samples + i*DESCR_LEN, DESCR_LEN, 1);
samples.push_back(std::move(sample));
}
std::vector<int> cats(c_cats, c_cats + len);
cls->SetSamples(std::move(samples), std::move(cats));
}
int facerec_classify(facerec* rec, const float* c_test_sample, float tolerance) {
FaceRec* cls = (FaceRec*)(rec->cls);
descriptor test_sample = mat(c_test_sample, DESCR_LEN, 1);
return cls->Classify(test_sample, tolerance);
}
void facerec_free(facerec* rec) {
if (rec) {
if (rec->cls) {
FaceRec* cls = (FaceRec*)(rec->cls);
delete cls;
rec->cls = NULL;
}
free(rec);
}
}
static std::vector<matrix<rgb_pixel>> jitter_image(
const matrix<rgb_pixel>& img,
int count
)
{
// All this function does is make count copies of img, all slightly jittered by being
// zoomed, rotated, and translated a little bit differently. They are also randomly
// mirrored left to right.
thread_local dlib::rand rnd;
std::vector<matrix<rgb_pixel>> crops;
for (int i = 0; i < count; ++i)
crops.push_back(jitter_image(img,rnd));
return crops;
}