-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDataTransformer.h
165 lines (132 loc) · 4.71 KB
/
DataTransformer.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
//
// Created by Michal Faber on 14/09/2017.
//
#ifndef DATA_TRANSFORMER_DATATRANSFORMER_H
#define DATA_TRANSFORMER_DATATRANSFORMER_H
#include <vector>
#include <opencv2/core/core.hpp>
#include <opencv2/opencv.hpp>
#include "utils.h"
using namespace cv;
using namespace std;
struct TransformationParameter {
// For data pre-processing, we can do simple scaling and subtracting the
// data mean, if provided. Note that the mean subtraction is always carried
// out before scaling.
float scale = 1;
// Specify if we want to randomly mirror data.
bool mirror = false;
// Specify if we would like to randomly crop an image.
int crop_size = 0;
// mean_file and mean_value cannot be specified at the same time
string mean_file;
// if specified can be repeated once (would substract it from all the channels)
// or can be repeated the same number of times as channels
// (would subtract them from the corresponding channel)
float mean_value; // should be array
int stride = 4;
float scale_cvg = 0.5;
int max_cvg_len = 50;
int min_cvg_len = 50;
bool opaque_coverage = true;
string coverage = "gridbox_max";
float flip_prob = 0.5;
float max_rotate_degree = 5.0;
bool visualize = false;
int crop_size_x = 368;
int crop_size_y = 368;
float scale_prob = 0.5;
float scale_min = 0.9;
float scale_max = 1.1;
float bbox_norm_factor = 300;
string img_header = ".";
// Force the decoded image to have 3 color channels.
bool force_color = false;
// Force the decoded image to have 1 color channels.
bool force_gray = false;
float target_dist = 1.0;
float center_perterb_max = 10.0;
float sigma = 7.0;
float sigma_center = 21.0;
float clahe_tile_size = 8.0;
float clahe_clip_limit = 4.0;
bool do_clahe = false;
int num_parts = 14;
int num_total_augs = 82;
string aug_way = "rand";
int gray = 0;
int np_in_lmdb = 16;
bool transform_body_joint = true;
};
class CPMDataTransformer {
public:
explicit CPMDataTransformer(const TransformationParameter& param);
struct AugmentSelection {
bool flip;
float degree;
Size crop;
float scale;
};
struct Joints {
vector<Point2f> joints;
vector<float> isVisible;
};
struct MetaData {
string dataset;
Size img_size;
bool isValidation;
int numOtherPeople;
int people_index;
int annolist_index;
int write_number;
int total_write_number;
int epoch;
Point2f objpos; //objpos_x(float), objpos_y (float)
float scale_self;
Joints joint_self; //(3*16)
vector<Point2f> objpos_other; //length is numOtherPeople
vector<float> scale_other; //length is numOtherPeople
vector<Joints> joint_others; //length is numOtherPeople
};
/**
* @brief Initialize the Random number generations if needed by the
* transformation.
*/
void InitRand();
/**
* @brief Generates a random integer from Uniform({0, 1, ..., n-1}).
*
* @param n
* The upperbound (exclusive) value of the random number.
* @return
* A uniformly random integer value from ({0, 1, ..., n-1}).
*/
int Rand(int n);
void TransformMetaJoints(MetaData& meta);
void TransformJoints(Joints& joints);
bool onPlane(Point p, Size img_size);
bool augmentation_flip(Mat& img, Mat& img_aug, Mat& mask_miss, Mat& mask_all, MetaData& meta, int mode);
float augmentation_rotate(Mat& img_src, Mat& img_aug, Mat& mask_miss, Mat& mask_all, MetaData& meta, int mode);
float augmentation_scale(Mat& img, Mat& img_temp, Mat& mask_miss, Mat& mask_all, MetaData& meta, int mode);
Size augmentation_croppad(Mat& img_temp, Mat& img_aug, Mat& mask_miss, Mat& mask_miss_aug, Mat& mask_all, Mat& mask_all_aug, MetaData& meta, int mode);
void generateLabelMap(double*, Mat&, MetaData meta);
void putGaussianMaps(double* entry, Point2f center, int stride, int grid_x, int grid_y, float sigma);
void putVecMaps(double* entryX, double* entryY, Mat& count, Point2f centerA, Point2f centerB, int stride, int grid_x, int grid_y, float sigma, int thre);
void clahe(Mat& img, int, int);
void dumpEverything(double* transformed_data, double* transformed_label, MetaData meta);
void ReadMetaData(MetaData& meta, const uchar *data, size_t offset3, size_t offset1);
void Transform_nv(const uchar *data, const int datum_channels, const int datum_height, const int datum_width, uchar* transformed_data, double* transformed_label);
void swapLeftRight(Joints& j);
void SetAugTable(int numData);
void RotatePoint(Point2f& p, Mat R);
protected:
// Tranformation parameters
TransformationParameter param_;
boost::shared_ptr<RNGen::RNG> rng_;
vector<vector<float> > aug_degs;
vector<vector<int> > aug_flips;
int np;
int np_in_lmdb;
bool is_table_set;
};
#endif //DATA_TRANSFORMER_DATATRANSFORMER_H