-
Notifications
You must be signed in to change notification settings - Fork 4
/
utils.py
219 lines (196 loc) · 7.96 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
import torch
import numpy as np
from PIL import Image
from torchvision import transforms
from torch.utils.data.dataset import Dataset
import random
import matplotlib.pyplot as plt
import os
import torch.nn as nn
import torch.nn.functional as F
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
def seed_pytorch(seed=42):
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
def random_crop(img, mask, patch_size): # HR: N*H*W
h, w = img.shape
if min(h, w) < patch_size:
img = np.pad(img, ((0, max(h, patch_size)-h),(0, max(w, patch_size)-w)), mode='constant')
mask = np.pad(mask, ((0, max(h, patch_size)-h),(0, max(w, patch_size)-w)), mode='constant')
h, w = img.shape
h_start = random.randint(0, h - patch_size)
h_end = h_start + patch_size
w_start = random.randint(0, w - patch_size)
w_end = w_start + patch_size
img_patch = img[h_start:h_end, w_start:w_end]
mask_patch = mask[h_start:h_end, w_start:w_end]
return img_patch, mask_patch
def Normalized(img, img_norm_cfg):
return (img-img_norm_cfg['mean'])/img_norm_cfg['std']
def Denormalization(img, img_norm_cfg):
return img*img_norm_cfg['std']+img_norm_cfg['mean']
class FocalLoss(nn.Module):
"""focal loss.
More details can be found in the `paper
<https://arxiv.org/abs/1808.01244>`_
Code is modified from `kp_utils.py
<https://github.com/princeton-vl/CornerNet/blob/master/models/py_utils/kp_utils.py#L152>`_ # noqa: E501
Please notice that the target in GaussianFocalLoss is a gaussian heatmap,
not 0/1 binary target.
Args:
alpha (float): Power of prediction.
gamma (float): Power of target for negative samples.
reduction (str): Options are "none", "mean" and "sum".
loss_weight (float): Loss weight of current loss.
"""
def __init__(self,
alpha=2.0,
gamma=4.0,
reduction='mean',
loss_weight=1.0):
super(FocalLoss, self).__init__()
self.alpha = alpha
self.gamma = gamma
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self,
preds,
target,
weight=None,
avg_factor=None,
reduction_override=None):
"""Forward function.
Args:
pred (torch.Tensor): The prediction.
target (torch.Tensor): The learning target of the prediction.
weight (torch.Tensor, optional): The weight of loss for each
prediction. Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
reduction_override (str, optional): The reduction method used to
override the original reduction method of the loss.
Defaults to None.
"""
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
if isinstance(preds, list) or isinstance(preds, tuple):
loss_total = 0
for i in range(len(preds)):
pred = preds[i]
loss_reg = self.loss_weight * focal_loss(
pred,
target,
alpha=self.alpha,
gamma=self.gamma)
loss_reg = weight_reduce_loss(loss_reg, weight, reduction, avg_factor)
loss_total = loss_total + loss_reg
return loss_total
else:
pred = preds
loss_reg = self.loss_weight * focal_loss(
pred,
target,
alpha=self.alpha,
gamma=self.gamma)
loss_reg = weight_reduce_loss(loss_reg, weight, reduction, avg_factor)
loss_total = loss_reg
return loss_total
def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
"""Apply element-wise weight and reduce loss.
Args:
loss (Tensor): Element-wise loss.
weight (Tensor): Element-wise weights.
reduction (str): Same as built-in losses of PyTorch.
avg_factor (float): Average factor when computing the mean of losses.
Returns:
Tensor: Processed loss values.
"""
# if weight is specified, apply element-wise weight
if weight is not None:
loss = loss * weight
# if avg_factor is not specified, just reduce the loss
if avg_factor is None:
loss = reduce_loss(loss, reduction)
else:
# if reduction is mean, then average the loss by avg_factor
if reduction == 'mean':
loss = loss.sum() / avg_factor
# if reduction is 'none', then do nothing, otherwise raise an error
elif reduction != 'none':
raise ValueError('avg_factor can not be used with reduction="sum"')
return loss
def reduce_loss(loss, reduction):
"""Reduce loss as specified.
Args:
loss (Tensor): Elementwise loss tensor.
reduction (str): Options are "none", "mean" and "sum".
Return:
Tensor: Reduced loss tensor.
"""
reduction_enum = F._Reduction.get_enum(reduction)
# none: 0, elementwise_mean:1, sum: 2
if reduction_enum == 0:
return loss
elif reduction_enum == 1:
return loss.mean()
elif reduction_enum == 2:
return loss.sum()
def focal_loss(pred, target, alpha=2.0, gamma=4.0):
"""`Focal Loss <https://arxiv.org/abs/1708.02002>'
Args:
pred (torch.Tensor): The prediction.
target (torch.Tensor): The learning target of the prediction.
alpha (float, optional): A balanced form for Focal Loss.
Defaults to 2.0.
gamma (float, optional): The gamma for calculating the modulating
factor. Defaults to 4.0.
"""
eps = 1e-12
pos_weights = target
neg_weights = (1 - target).pow(gamma)
pos_loss = -(pred + eps).log() * (1 - pred).pow(alpha) * pos_weights
neg_loss = -(1 - pred + eps).log() * pred.pow(alpha) * neg_weights
return pos_loss + neg_loss
def get_img_norm_cfg(dataset_name, dataset_dir):
if dataset_name == 'NUAA-SIRST':
img_norm_cfg = dict(mean=101.06385040283203, std=34.619606018066406)
elif dataset_name == 'NUDT-SIRST':
img_norm_cfg = dict(mean=107.80905151367188, std=33.02274703979492)
elif dataset_name == 'IRSTD-1K':
img_norm_cfg = dict(mean=87.4661865234375, std=39.71953201293945)
elif dataset_name == 'SIRST3':
img_norm_cfg = dict(mean=95.010, std=41.511)
elif dataset_name == 'NUDT-SIRST-Sea':
img_norm_cfg = dict(mean=43.62403869628906, std=18.91838264465332)
elif dataset_name == 'SIRST4':
img_norm_cfg = dict(mean=62.10432052612305, std=23.96998405456543)
else:
with open(dataset_dir+'/img_idx/train_' + dataset_name + '.txt', 'r') as f:
train_list = f.read().splitlines()
with open(dataset_dir+'/img_idx/test_' + dataset_name + '.txt', 'r') as f:
test_list = f.read().splitlines()
img_list = train_list + test_list
img_dir = dataset_dir + '/images/'
mean_list = []
std_list = []
for img_pth in img_list:
img = Image.open(img_dir + img_pth).convert('I')
img = np.array(img, dtype=np.float32)
mean_list.append(img.mean())
std_list.append(img.std())
img_norm_cfg = dict(mean=float(np.array(mean_list).mean()), std=float(np.array(std_list).mean()))
print(dataset_name + ':\t' + str(img_norm_cfg))
return img_norm_cfg
def PadImg(img, times=32):
h, w = img.shape
if not h % times == 0:
img = np.pad(img, ((0, (h//times+1)*times-h),(0, 0)), mode='constant')
if not w % times == 0:
img = np.pad(img, ((0, 0),(0, (w//times+1)*times-w)), mode='constant')
return img