-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathtracker.m
executable file
·204 lines (177 loc) · 9.91 KB
/
tracker.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
function [pr_curve] = tracker(base_path, target, target_sz, ...
padding, kernel, lambda, output_sigma_factor, interp_factor, cell_size, ...
features,cnn_model)
% Deep Hyperspectral Kernelized/Dual Correlation Filter (DeepHKCF) tracking.
% This function implements the pipeline for tracking with the KCF (by
% choosing a non-linear kernel) and DCF (by choosing a linear kernel) and
% uses the CNN features fine-tuned on Aerial Vehicle Detection.
%
% It is meant to be called by the interface function RUN_TRACKER, which
% sets up the parameters and loads the video information.
%
% Parameters:
% VIDEO_PATH is the location of the image files (must end with a slash
% '/' or '\').
% IMG_FILES is a cell array of image file names.
% POS and TARGET_SZ are the initial position and size of the target
% (both in format [rows, columns]).
% PADDING is the additional tracked region, for context, relative to
% the target size.
% KERNEL is a struct describing the kernel. The field TYPE must be one
% of 'gaussian', 'polynomial' or 'linear'. The optional fields SIGMA,
% POLY_A and POLY_B are the parameters for the Gaussian and Polynomial
% kernels.
% OUTPUT_SIGMA_FACTOR is the spatial bandwidth of the regression
% target, relative to the target size.
% INTERP_FACTOR is the adaptation rate of the tracker.
% CELL_SIZE is the number of pixels per cell (must be 1 if using raw
% pixels).
% FEATURES is a struct describing the used features (see GET_FEATURES).
% SHOW_VISUALIZATION will show an interactive video if set to true.
% CNN_MODEL is the Deep Convolutional Neural Network model to be used
% to extract features from the Region of Interest
%
% Outputs:
% POSITIONS is an Nx2 matrix of target positions over time (in the
% format [rows, columns]).
% TIME is the tracker execution time, without video loading/rendering.
%
% Joao F. Henriques, 2014 - Modified by Burak Uzkent, 2017
% window size, taking padding into account
window_sz = floor(target_sz * (1 + padding));
%create regression labels, gaussian shaped, with a bandwidth
%proportional to target size
output_sigma = sqrt(prod(target_sz)) * output_sigma_factor / cell_size;
if isfield(features, 'deep_HSI') && features.deep_HSI
yf = fft2(gaussian_shaped_labels(output_sigma, ceil(window_sz / cell_size)));
else
yf = fft2(gaussian_shaped_labels(output_sigma, floor(window_sz / cell_size)));
end
%store pre-computed cosine window - to avoid distortion due to FFT
cos_window = hann_window(size(yf,1))' * hann_window(size(yf,2));
time = 0; %to calculate FPS
frameCounter = 1; %Frame Index
for frame = target.firstFrame:target.lastFrame
%load HSI Image - Handle - For now keep reading the same image
flag_tr_sp = 0;
imgHandle = matfile([base_path 'Image_' num2str(frame) '.mat']);
tic();
if frameCounter > 1
%Apply Homograpy to Previous Position
applyHomograpy(target, 1);
%Sample The ROI From the Full Image
xCoord = target.x-(window_sz(1)/3)*3:target.x+(window_sz(1)/3)*3-1;
yCoord = target.y-(window_sz(2)/3)*3:target.y+(window_sz(2)/3)*3-1;
xCoord = boundary_handling(xCoord);
yCoord = boundary_handling(yCoord);
hsi_roi = imgHandle.img(xCoord,yCoord,:);
% Extract Deep Features
roi_deep_features = conv_features(hsi_roi, features, cell_size, cos_window, cnn_model);
%Sample SubWindows
number_rois = 4;
SubWindowsX = round(linspace(1,size(xCoord,2)-window_sz(1),number_rois));
SubWindowsY = round(linspace(1,size(yCoord,2)-window_sz(2),number_rois));
for i = 1:number_rois % Search Through ROIs
for j = 1:number_rois
%ROI Mapping
x_in = ceil(SubWindowsX(i) * size(roi_deep_features,1) / size(hsi_roi,1));
y_in = ceil(SubWindowsY(j) * size(roi_deep_features,1) / size(hsi_roi,2));
x_end = ceil((SubWindowsX(i)+window_sz(1)) * size(roi_deep_features,1) / size(hsi_roi,1));
y_end = ceil((SubWindowsY(j)+window_sz(2)) * size(roi_deep_features,2) / size(hsi_roi,2));
features_roi = im_resize(roi_deep_features(x_in:x_end,y_in:y_end,:),[window_sz(1) window_sz(2)]);
% Apply Hanning Window
features_roi = bsxfun(@times, features_roi, cos_window);
%obtain a subwindow for detection at the position from last
SubWindowX{i,j} = xCoord(1) + SubWindowsX(i) + window_sz(1)/2;
SubWindowY{i,j} = yCoord(1) + SubWindowsY(j) + window_sz(2)/2;
%frame, and convert to Fourier domain (its size is unchanged)
zf = fft2(features_roi);
%calculate response of the classifier at all shifts
switch kernel.type
case 'gaussian',
kzf = gaussian_correlation(zf, model_xf, kernel.sigma);
case 'polynomial',
kzf = polynomial_correlation(zf, model_xf, kernel.poly_a, kernel.poly_b);
case 'linear',
kzf = linear_correlation(zf, model_xf);
end
response = real(ifft2(model_alphaf .* kzf)); %equation for fast detection
%target location is at the maximum response. we must take into
%account the fact that, if the target doesn't move, the peak
%will appear at the top-left corner, not at the center (this is
%discussed in the paper). the responses wrap around cyclically.
[vert(i,j), horiz(i,j)] = find(response == max(response(:)), 1);
confidence(i,j) = max(max(response)); %Confidence of Tracker
end
end
%Shift the tracker to new position
[iX,iY] = find(confidence == max(max(confidence)),1);
vert_delta = vert(iX,iY);
horiz_delta = horiz(iX,iY);
if vert_delta > size(zf,1) / 2, %wrap around to negative half-space of vertical axis
vert_delta = vert_delta - size(zf,1);
end
if horiz_delta > size(zf,2) / 2, %same for horizontal axis
horiz_delta = horiz_delta - size(zf,2);
end
target.x = SubWindowX{iX,iY} + cell_size * [vert_delta - 1];
target.y = SubWindowY{iX,iY} + cell_size * [horiz_delta - 1];
end
if frameCounter > 1
xCoord_tr = target.x-(window_sz(1)/2.0)*1:target.x+(window_sz(1)/2)*1-1;
yCoord_tr = target.y-(window_sz(2)/2.0)*1:target.y+(window_sz(2)/2)*1-1;
xCoord_tr = boundary_handling(xCoord_tr);
yCoord_tr = boundary_handling(yCoord_tr);
xc_lg = all(ismember(xCoord_tr, xCoord));
yc_lg = all(ismember(yCoord_tr, yCoord));
if xc_lg == true && yc_lg == true
%ROI Mapping
x_in = ceil((xCoord_tr(1) - xCoord(1)) * size(roi_deep_features,1) / size(hsi_roi,1));
y_in = ceil((yCoord_tr(1) - yCoord(1)) * size(roi_deep_features,1) / size(hsi_roi,2));
x_end = ceil((xCoord_tr(1) - xCoord(1) + window_sz(1)) * size(roi_deep_features,1) / size(hsi_roi,1));
y_end = ceil((yCoord_tr(1) - yCoord(1) + window_sz(2)) * size(roi_deep_features,2) / size(hsi_roi,2));
features_roi = im_resize(roi_deep_features(x_in:x_end,y_in:y_end,:),[window_sz(1) window_sz(2)]);
flag_tr_sp = 1;
end
end
%obtain a subwindow for training at newly estimated target position
%Sample The ROI From the Full Image
if flag_tr_sp == 0
xCoord = target.x-(window_sz(1)/3)*3:target.x+(window_sz(1)/3)*3-1;
yCoord = target.y-(window_sz(2)/3)*3:target.y+(window_sz(2)/3)*3-1;
xCoord = boundary_handling(xCoord);
yCoord = boundary_handling(yCoord);
hsi_roi = imgHandle.img(xCoord,yCoord,:);
%Extract Features and Do ROI Mapping
roi_deep_features = conv_features(hsi_roi, features, cell_size, cos_window, cnn_model);
features_roi = im_resize(roi_deep_features(14:41,14:41,:),[window_sz(1) window_sz(2)]);
end
% Apply Hanning Window
features_roi = bsxfun(@times, features_roi, cos_window);
xf = fft2(features_roi);
%Kernel Ridge Regression, calculate alphas (in Fourier domain)
switch kernel.type
case 'gaussian',
kf = gaussian_correlation(xf, xf, kernel.sigma);
case 'polynomial',
kf = polynomial_correlation(xf, xf, kernel.poly_a, kernel.poly_b);
case 'linear',
kf = linear_correlation(xf, xf);
end
alphaf = yf ./ (kf + lambda); %equation for fast training
if frameCounter == 1, %first frame, train with a single image
model_alphaf = alphaf; %Initiate the Model
model_xf = xf; %Initiate the Model
else
%subsequent frames, interpolate model
model_alphaf = (1 - interp_factor) * model_alphaf + interp_factor * alphaf;
model_xf = (1 - interp_factor) * model_xf + interp_factor * xf;
end
%Transfer the results to global array
time = toc();
results(frameCounter,:) = [target.x target.y frame time];
frameCounter = frameCounter + 1;
end
%Compute Precision
pr_curve = precision_curve(target,results);
end