-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathtransforms.py
277 lines (241 loc) · 11.5 KB
/
transforms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
from torchvision.transforms import v2
import math
import warnings
from typing import Any, cast, Dict, List, Optional, Sequence, Tuple, Type, Union
import PIL.Image
import torch
from torchvision import tv_tensors
from torchvision.transforms import _functional_tensor as _FT
from torchvision.transforms.v2 import functional as F, InterpolationMode, Transform
from torchvision.transforms.v2.functional._meta import get_size
from torchvision.transforms.v2.functional._utils import _FillTypeJIT
from torchvision.transforms.v2._utils import _get_fill, _setup_size, query_size
ImageOrVideo = Union[torch.Tensor, PIL.Image.Image, tv_tensors.Image, tv_tensors.Video]
class TwoHotMixUp:
"""This implementation of MixUp returns both targets as class indices instead of
class probabilities and reshape & mix-up (prefetch_factor * batch_size) samples
into (prefetch_factor) batches at once. Note that this does mean that (prefetch_factor)
batches share the same lam(bda) value.
"""
def __init__(self, alpha: float, prefetch_factor: int, batch_size: int):
self.prefetch_factor = prefetch_factor
self.batch_size = batch_size
self._dist = None
if alpha:
self._dist = torch.distributions.Beta(alpha, alpha)
def __call__(self, images, labels):
_, *sample_shape = images.shape
images = images.reshape(self.prefetch_factor, self.batch_size, *sample_shape)
labels = labels.reshape(self.prefetch_factor, self.batch_size)
if self._dist:
lam = self._dist.sample()
images = images.roll(1, dims=1).mul_(1.0 - lam).add_(images, alpha=lam)
return images, lam, labels, labels.roll(1, dims=1)
else:
return images, 1, labels, labels
class TFInceptionCrop(Transform):
"""TensorFlow-style Inception crop, i.e. tf.slice() with the bbox returned by
tf.image.sample_distorted_bounding_box(). Note that get_params() is not supported.
"""
def __init__(
self,
size: Union[int, Sequence[int]],
scale: Tuple[float, float] = (0.08, 1.0),
ratio: Tuple[float, float] = (3.0 / 4.0, 4.0 / 3.0),
interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
antialias: Optional[bool] = True,
max_attempts: int = 100,
) -> None:
super().__init__()
self.size = _setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.")
if not isinstance(scale, Sequence):
raise TypeError("Scale should be a sequence")
if not isinstance(ratio, Sequence):
raise TypeError("Ratio should be a sequence")
if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
warnings.warn("Scale and ratio should be of kind (min, max)")
self.scale = scale
self.ratio = ratio
self.interpolation = interpolation
self.antialias = antialias
self.max_attempts = max_attempts
self._ratio = torch.tensor(self.ratio)
def _get_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
"""Almost line-by-line translation of the core logic of
tensorflow/core/kernels/image/sample_distorted_bounding_box_op.cc
"""
original_height, original_width = query_size(flat_inputs)
original_area = original_height * original_width
min_area = self.scale[0] * original_area
max_area = self.scale[1] * original_area
ratio = self._ratio
for _ in range(self.max_attempts):
aspect_ratio = torch.empty(1).uniform_(
ratio[0], # type: ignore[arg-type]
ratio[1], # type: ignore[arg-type]
).item()
min_height = round(math.sqrt(min_area / aspect_ratio))
max_height = round(math.sqrt(max_area / aspect_ratio))
# TODO(b/140767341): Rewrite the generation logic to be more tolerant
# of floating point behavior.
if round(max_height * aspect_ratio) > original_width:
# We must find the smallest max_height satisfying
# round(max_height * aspect_ratio) <= original_width:
EPSILON = 0.0000001
max_height = int((original_width + 0.5 - EPSILON) / aspect_ratio)
# If due to some precision issues, we still cannot guarantee
# round(max_height * aspect_ratio) <= original_width, subtract 1 from
# max height.
if round(max_height * aspect_ratio) > original_width:
max_height -= 1
max_height = min(max_height, original_height)
min_height = min(min_height, max_height)
# We need to generate a random number in the closed range
# [min_height, max_height].
height = torch.randint(min_height, max_height + 1, size=(1,)).item()
width = round(height * aspect_ratio)
# Let us not fail if rounding error causes the area to be
# outside the constraints.
# Try first with a slightly bigger rectangle first.
area = width * height
if area < min_area:
height += 1
width = round(height * aspect_ratio)
area = width * height
# Let us not fail if rounding error causes the area to be
# outside the constraints.
# Try first with a slightly smaller rectangle first.
if area > max_area:
height -= 1
width = round(height * aspect_ratio)
area = width * height
# Now, we explored all options to rectify small rounding errors.
# If the constraints can be satisfied: break out of the loop.
if 0 < width <= original_width and 0 < height <= original_height and min_area <= area <= max_area:
i = torch.randint(0, original_height - height + 1, size=(1,)).item()
j = torch.randint(0, original_width - width + 1, size=(1,)).item()
break
else:
# Fallback to the entire image
width = original_width
height = original_height
i = j = 0
return dict(top=i, left=j, height=height, width=width)
def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
return self._call_kernel(
F.resized_crop, inpt, **params, size=self.size, interpolation=self.interpolation, antialias=self.antialias
)
# Implemented with references to big_vision and https://github.com/pytorch/vision/pull/6609
def _solarize_add(
image: ImageOrVideo, addition: int = 0, threshold: int = 128
) -> ImageOrVideo:
bound = _FT._max_value(image.dtype) if isinstance(image, torch.Tensor) else 255
added_image = image.to(torch.int64) + addition
added_image = added_image.clip(0, bound).to(torch.uint8)
return torch.where(image < threshold, added_image, image)
def _cutout(
image: ImageOrVideo,
pad_size: int,
replace: int = 0,
) -> ImageOrVideo:
_, img_h, img_w = F.get_dimensions(image)
# Sample the center location in the image where the zero mask will be applied.
cutout_center_height = int(torch.randint(img_h, ()))
cutout_center_width = int(torch.randint(img_w, ()))
lower_pad = max(0, cutout_center_height - pad_size)
upper_pad = max(0, img_h - cutout_center_height - pad_size)
left_pad = max(0, cutout_center_width - pad_size)
right_pad = max(0, img_w - cutout_center_width - pad_size)
cutout_shape = [img_h - (lower_pad + upper_pad), img_w - (left_pad + right_pad)]
return F.erase(image, lower_pad, left_pad, cutout_shape[0], cutout_shape[1], torch.tensor(replace).unsqueeze(1).unsqueeze(1))
class RandAugment17(v2.RandAugment):
def _apply_image_or_video_transform(
self,
image: ImageOrVideo,
transform_id: str,
magnitude: float,
interpolation: Union[InterpolationMode, int],
fill: Dict[Union[Type, str], _FillTypeJIT],
) -> ImageOrVideo:
# Note: this cast is wrong and is only here to make mypy happy (it disagrees with torchscript)
image = cast(torch.Tensor, image)
fill_ = _get_fill(fill, type(image))
if transform_id == "Identity":
return image
elif transform_id == "ShearX":
# magnitude should be arctan(magnitude)
# official autoaug: (1, level, 0, 0, 1, 0)
# https://github.com/tensorflow/models/blob/dd02069717128186b88afa8d857ce57d17957f03/research/autoaugment/augmentation_transforms.py#L290
# compared to
# torchvision: (1, tan(level), 0, 0, 1, 0)
# https://github.com/pytorch/vision/blob/0c2373d0bba3499e95776e7936e207d8a1676e65/torchvision/transforms/functional.py#L976
return F.affine(
image,
angle=0.0,
translate=[0, 0],
scale=1.0,
shear=[math.degrees(math.atan(magnitude)), 0.0],
interpolation=interpolation,
fill=fill_,
center=[0, 0],
)
elif transform_id == "ShearY":
# magnitude should be arctan(magnitude)
# See above
return F.affine(
image,
angle=0.0,
translate=[0, 0],
scale=1.0,
shear=[0.0, math.degrees(math.atan(magnitude))],
interpolation=interpolation,
fill=fill_,
center=[0, 0],
)
elif transform_id == "TranslateX":
return F.affine(
image,
angle=0.0,
translate=[int(magnitude), 0],
scale=1.0,
interpolation=interpolation,
shear=[0.0, 0.0],
fill=fill_,
)
elif transform_id == "TranslateY":
return F.affine(
image,
angle=0.0,
translate=[0, int(magnitude)],
scale=1.0,
interpolation=interpolation,
shear=[0.0, 0.0],
fill=fill_,
)
elif transform_id == "Rotate":
return F.rotate(image, angle=magnitude, interpolation=interpolation, fill=fill_)
elif transform_id == "Brightness":
return F.adjust_brightness(image, brightness_factor=1.0 + magnitude)
elif transform_id == "Color":
return F.adjust_saturation(image, saturation_factor=1.0 + magnitude)
elif transform_id == "Contrast":
return F.adjust_contrast(image, contrast_factor=1.0 + magnitude)
elif transform_id == "Sharpness":
return F.adjust_sharpness(image, sharpness_factor=1.0 + magnitude)
elif transform_id == "Posterize":
return F.posterize(image, bits=int(magnitude))
elif transform_id == "Solarize":
bound = _FT._max_value(image.dtype) if isinstance(image, torch.Tensor) else 255.0
return F.solarize(image, threshold=bound * magnitude)
elif transform_id == "AutoContrast":
return F.autocontrast(image)
elif transform_id == "Equalize":
return F.equalize(image)
elif transform_id == "Invert":
return F.invert(image)
elif transform_id == "SolarizeAdd":
return _solarize_add(image, addition=int(magnitude))
elif transform_id == "Cutout":
return _cutout(image, pad_size=int(magnitude), replace=fill_)
else:
raise ValueError(f"No transform available for {transform_id}")