-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathloss.py
332 lines (271 loc) · 12 KB
/
loss.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
class MS_SSIM_L1_LOSS(nn.Module):
# Have to use cuda, otherwise the speed is too slow.
def __init__(self, gaussian_sigmas=[0.5, 1.0, 2.0, 4.0, 8.0],
data_range = 1.0,
K=(0.01, 0.03),
alpha=0.025,
compensation=200.0,
cuda_dev=0,):
super(MS_SSIM_L1_LOSS, self).__init__()
self.DR = data_range
self.C1 = (K[0] * data_range) ** 2
self.C2 = (K[1] * data_range) ** 2
self.pad = int(2 * gaussian_sigmas[-1])
self.alpha = alpha
self.compensation=compensation
filter_size = int(4 * gaussian_sigmas[-1] + 1)
g_masks = torch.zeros((3*len(gaussian_sigmas), 1, filter_size, filter_size))
for idx, sigma in enumerate(gaussian_sigmas):
# r0,g0,b0,r1,g1,b1,...,rM,gM,bM
g_masks[3*idx+0, 0, :, :] = self._fspecial_gauss_2d(filter_size, sigma)
g_masks[3*idx+1, 0, :, :] = self._fspecial_gauss_2d(filter_size, sigma)
g_masks[3*idx+2, 0, :, :] = self._fspecial_gauss_2d(filter_size, sigma)
self.g_masks = g_masks.cuda(cuda_dev)
def _fspecial_gauss_1d(self, size, sigma):
"""Create 1-D gauss kernel
Args:
size (int): the size of gauss kernel
sigma (float): sigma of normal distribution
Returns:
torch.Tensor: 1D kernel (size)
"""
coords = torch.arange(size).to(dtype=torch.float)
coords -= size // 2
g = torch.exp(-(coords ** 2) / (2 * sigma ** 2))
g /= g.sum()
return g.reshape(-1)
def _fspecial_gauss_2d(self, size, sigma):
"""Create 2-D gauss kernel
Args:
size (int): the size of gauss kernel
sigma (float): sigma of normal distribution
Returns:
torch.Tensor: 2D kernel (size x size)
"""
gaussian_vec = self._fspecial_gauss_1d(size, sigma)
return torch.outer(gaussian_vec, gaussian_vec)
def forward(self, x, y):
b, c, h, w = x.shape
mux = F.conv2d(x, self.g_masks, groups=1, padding=self.pad)
muy = F.conv2d(y, self.g_masks, groups=1, padding=self.pad)
mux2 = mux * mux
muy2 = muy * muy
muxy = mux * muy
sigmax2 = F.conv2d(x * x, self.g_masks, groups=1, padding=self.pad) - mux2
sigmay2 = F.conv2d(y * y, self.g_masks, groups=1, padding=self.pad) - muy2
sigmaxy = F.conv2d(x * y, self.g_masks, groups=1, padding=self.pad) - muxy
# l(j), cs(j) in MS-SSIM
l = (2 * muxy + self.C1) / (mux2 + muy2 + self.C1) # [B, 15, H, W]
cs = (2 * sigmaxy + self.C2) / (sigmax2 + sigmay2 + self.C2)
lM = l[:, -1, :, :] * l[:, -2, :, :] * l[:, -3, :, :]
PIcs = cs.prod(dim=1)
loss_ms_ssim = 1 - lM*PIcs # [B, H, W]
loss_l1 = F.l1_loss(x, y, reduction='none') # [B, 3, H, W]
# average l1 loss in 3 channels
gaussian_l1 = F.conv2d(loss_l1, self.g_masks.narrow(dim=0, start=-3, length=3),
groups=1, padding=self.pad).mean(1) # [B, H, W]
loss_mix = self.alpha * loss_ms_ssim + (1 - self.alpha) * gaussian_l1 / self.DR
loss_mix = self.compensation*loss_mix
return loss_mix.mean()
from torchgeometry.image import get_gaussian_kernel2d
class SSIM(nn.Module):
r"""Creates a criterion that measures the Structural Similarity (SSIM)
index between each element in the input `x` and target `y`.
The index can be described as:
.. math::
\text{SSIM}(x, y) = \frac{(2\mu_x\mu_y+c_1)(2\sigma_{xy}+c_2)}
{(\mu_x^2+\mu_y^2+c_1)(\sigma_x^2+\sigma_y^2+c_2)}
where:
- :math:`c_1=(k_1 L)^2` and :math:`c_2=(k_2 L)^2` are two variables to
stabilize the division with weak denominator.
- :math:`L` is the dynamic range of the pixel-values (typically this is
:math:`2^{\#\text{bits per pixel}}-1`).
the loss, or the Structural dissimilarity (DSSIM) can be finally described
as:
.. math::
\text{loss}(x, y) = \frac{1 - \text{SSIM}(x, y)}{2}
Arguments:
window_size (int): the size of the kernel.
max_val (float): the dynamic range of the images. Default: 1.
reduction (str, optional): Specifies the reduction to apply to the
output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied,
'mean': the sum of the output will be divided by the number of elements
in the output, 'sum': the output will be summed. Default: 'none'.
Returns:
Tensor: the ssim index.
Shape:
- Input: :math:`(B, C, H, W)`
- Target :math:`(B, C, H, W)`
- Output: scale, if reduction is 'none', then :math:`(B, C, H, W)`
Examples::
>>> input1 = torch.rand(1, 4, 5, 5)
>>> input2 = torch.rand(1, 4, 5, 5)
>>> ssim = tgm.losses.SSIM(5, reduction='none')
>>> loss = ssim(input1, input2) # 1x4x5x5
"""
def __init__(
self,
window_size: int,
reduction: str = 'none',
max_val: float = 1.0) -> None:
super(SSIM, self).__init__()
self.window_size: int = window_size
self.max_val: float = max_val
self.reduction: str = reduction
self.window: torch.Tensor = get_gaussian_kernel2d(
(window_size, window_size), (1.5, 1.5))
self.padding: int = self.compute_zero_padding(window_size)
self.C1: float = (0.01 * self.max_val) ** 2
self.C2: float = (0.03 * self.max_val) ** 2
@staticmethod
def compute_zero_padding(kernel_size: int) -> int:
"""Computes zero padding."""
return (kernel_size - 1) // 2
def filter2D(
self,
input: torch.Tensor,
kernel: torch.Tensor,
channel: int) -> torch.Tensor:
return F.conv2d(input, kernel, padding=self.padding, groups=channel)
def forward(self, img1: torch.Tensor, img2: torch.Tensor) -> torch.Tensor:
if not torch.is_tensor(img1):
raise TypeError("Input img1 type is not a torch.Tensor. Got {}"
.format(type(img1)))
if not torch.is_tensor(img2):
raise TypeError("Input img2 type is not a torch.Tensor. Got {}"
.format(type(img2)))
if not len(img1.shape) == 4:
raise ValueError("Invalid img1 shape, we expect BxCxHxW. Got: {}"
.format(img1.shape))
if not len(img2.shape) == 4:
raise ValueError("Invalid img2 shape, we expect BxCxHxW. Got: {}"
.format(img2.shape))
if not img1.shape == img2.shape:
raise ValueError("img1 and img2 shapes must be the same. Got: {}"
.format(img1.shape, img2.shape))
if not img1.device == img2.device:
raise ValueError("img1 and img2 must be in the same device. Got: {}"
.format(img1.device, img2.device))
if not img1.dtype == img2.dtype:
raise ValueError("img1 and img2 must be in the same dtype. Got: {}"
.format(img1.dtype, img2.dtype))
# prepare kernel
b, c, h, w = img1.shape
tmp_kernel: torch.Tensor = self.window.to(img1.device).to(img1.dtype)
kernel: torch.Tensor = tmp_kernel.repeat(c, 1, 1, 1)
# compute local mean per channel
mu1: torch.Tensor = self.filter2D(img1, kernel, c)
mu2: torch.Tensor = self.filter2D(img2, kernel, c)
mu1_sq = mu1.pow(2)
mu2_sq = mu2.pow(2)
mu1_mu2 = mu1 * mu2
# compute local sigma per channel
sigma1_sq = self.filter2D(img1 * img1, kernel, c) - mu1_sq
sigma2_sq = self.filter2D(img2 * img2, kernel, c) - mu2_sq
sigma12 = self.filter2D(img1 * img2, kernel, c) - mu1_mu2
ssim_map = ((2 * mu1_mu2 + self.C1) * (2 * sigma12 + self.C2)) / \
((mu1_sq + mu2_sq + self.C1) * (sigma1_sq + sigma2_sq + self.C2))
loss = torch.clamp(1. - ssim_map, min=0, max=1) / 2.
if self.reduction == 'mean':
loss = torch.mean(loss)
elif self.reduction == 'sum':
loss = torch.sum(loss)
elif self.reduction == 'none':
pass
return loss
######################
# functional interface
######################
def ssim(
img1: torch.Tensor,
img2: torch.Tensor,
window_size: int,
reduction: str = 'none',
max_val: float = 1.0) -> torch.Tensor:
r"""Function that measures the Structural Similarity (SSIM) index between
each element in the input `x` and target `y`.
See :class:`torchgeometry.losses.SSIM` for details.
"""
return SSIM(window_size, reduction, max_val)(img1, img2)
def make_one_hot(input, num_classes):
"""Convert class index tensor to one hot encoding tensor.
Args:
input: A tensor of shape [N, 1, *]
num_classes: An int of number of class
Returns:
A tensor of shape [N, num_classes, *]
"""
shape = np.array(input.shape)
shape[1] = num_classes
shape = tuple(shape)
result = torch.zeros(shape)
result = result.scatter_(1, input.cpu(), 1)
return result
class BinaryDiceLoss(nn.Module):
"""Dice loss of binary class
Args:
smooth: A float number to smooth loss, and avoid NaN error, default: 1
p: Denominator value: \sum{x^p} + \sum{y^p}, default: 2
predict: A tensor of shape [N, *]
target: A tensor of shape same with predict
reduction: Reduction method to apply, return mean over batch if 'mean',
return sum if 'sum', return a tensor of shape [N,] if 'none'
Returns:
Loss tensor according to arg reduction
Raise:
Exception if unexpected reduction
"""
def __init__(self, smooth=1, p=2, reduction='mean'):
super(BinaryDiceLoss, self).__init__()
self.smooth = smooth
self.p = p
self.reduction = reduction
def forward(self, predict, target):
assert predict.shape[0] == target.shape[0], "predict & target batch size don't match"
predict = predict.contiguous().view(predict.shape[0], -1)
target = target.contiguous().view(target.shape[0], -1)
num = torch.sum(torch.mul(predict, target), dim=1) + self.smooth
den = torch.sum(predict.pow(self.p) + target.pow(self.p), dim=1) + self.smooth
loss = 1 - num / den
if self.reduction == 'mean':
return loss.mean()
elif self.reduction == 'sum':
return loss.sum()
elif self.reduction == 'none':
return loss
else:
raise Exception('Unexpected reduction {}'.format(self.reduction))
class DiceLoss(nn.Module):
"""Dice loss, need one hot encode input
Args:
weight: An array of shape [num_classes,]
ignore_index: class index to ignore
predict: A tensor of shape [N, C, *]
target: A tensor of same shape with predict
other args pass to BinaryDiceLoss
Return:
same as BinaryDiceLoss
"""
def __init__(self, weight=None, ignore_index=None, **kwargs):
super(DiceLoss, self).__init__()
self.kwargs = kwargs
self.weight = weight
self.ignore_index = ignore_index
def forward(self, predict, target):
assert predict.shape == target.shape, 'predict & target shape do not match'
dice = BinaryDiceLoss(**self.kwargs)
total_loss = 0
predict = F.softmax(predict, dim=1)
for i in range(target.shape[1]):
if i != self.ignore_index:
dice_loss = dice(predict[:, i], target[:, i])
if self.weight is not None:
assert self.weight.shape[0] == target.shape[1], \
'Expect weight shape [{}], get[{}]'.format(target.shape[1], self.weight.shape[0])
dice_loss *= self.weights[i]
total_loss += dice_loss
return total_loss/target.shape[1]