@@ -91,8 +91,9 @@ class Skew(Operation):
91
91
"""
92
92
Perform perspective skewing on images.
93
93
"""
94
- def __init__ (self , probability , magnitude ):
94
+ def __init__ (self , probability , skew_type = "TILT" , magnitude = None ):
95
95
Operation .__init__ (self , probability )
96
+ self .skew_type = skew_type
96
97
self .magnitude = magnitude
97
98
98
99
def perform_operation (self , image ):
@@ -102,14 +103,120 @@ def perform_operation(self, image):
102
103
:return: The transformed image.
103
104
"""
104
105
105
- # Use PIL to do this by generating the transform matrix
106
+ w , h = image . size
106
107
107
- skew_matrix = [0 ,0 , 1 ,100 , 100 ,1 , 100 ,100 ]
108
+ x1 = 0
109
+ x2 = h
110
+ y1 = 0
111
+ y2 = w
112
+
113
+ original_plane = [(y1 , x1 ), (y2 , x1 ), (y2 , x2 ), (y1 , x2 )]
114
+
115
+ max_skew_amount = max (w , h )
116
+
117
+ if not self .magnitude :
118
+ skew_amount = random .randint (1 , max_skew_amount )
119
+ elif self .magnitude :
120
+ max_skew_amount /= self .magnitude
121
+ skew_amount = max_skew_amount
122
+
123
+ # We have two choices now: we tilt in one of four directions
124
+ # or we skew a corner.
125
+
126
+ if self .skew_type == "TILT" or self .skew_type == "TILT_LEFT_RIGHT" or self .skew_type == "TILT_TOP_BOTTOM" :
127
+
128
+ if self .skew_type == "TILT" :
129
+ skew_direction = random .randint (0 , 3 )
130
+ elif self .skew_type == "TILT_LEFT_RIGHT" :
131
+ skew_direction = random .randint (0 , 1 )
132
+ elif self .skew_type == "TILT_TOP_BOTTOM" :
133
+ skew_direction = random .randint (2 , 3 )
134
+
135
+ if skew_direction == 0 :
136
+ # Left Tilt
137
+ new_plane = [(y1 , x1 - skew_amount ), # Top Left
138
+ (y2 , x1 ), # Top Right
139
+ (y2 , x2 ), # Bottom Right
140
+ (y1 , x2 + skew_amount )] # Bottom Left
141
+ elif skew_direction == 1 :
142
+ # Right Tilt
143
+ new_plane = [(y1 , x1 ), # Top Left
144
+ (y2 , x1 - skew_amount ), # Top Right
145
+ (y2 , x2 + skew_amount ), # Bottom Right
146
+ (y1 , x2 )] # Bottom Left
147
+ elif skew_direction == 2 :
148
+ # Forward Tilt
149
+ new_plane = [(y1 - skew_amount , x1 ), # Top Left
150
+ (y2 + skew_amount , x1 ), # Top Right
151
+ (y2 , x2 ), # Bottom Right
152
+ (y1 , x2 )] # Bottom Left
153
+ elif skew_direction == 3 :
154
+ # Backward Tilt
155
+ new_plane = [(y1 , x1 ), # Top Left
156
+ (y2 , x1 ), # Top Right
157
+ (y2 + skew_amount , x2 ), # Bottom Right
158
+ (y1 - skew_amount , x2 )] # Bottom Left
159
+
160
+ if self .skew_type == "CORNER" :
161
+
162
+ skew_direction = random .randint (0 , 7 )
163
+
164
+ if skew_direction == 0 :
165
+ # Skew possibility 0
166
+ new_plane = [(y1 - skew_amount , x1 ), (y2 , x1 ), (y2 , x2 ), (y1 , x2 )]
167
+ elif skew_direction == 1 :
168
+ # Skew possibility 1
169
+ new_plane = [(y1 , x1 - skew_amount ), (y2 , x1 ), (y2 , x2 ), (y1 , x2 )]
170
+ elif skew_direction == 2 :
171
+ # Skew possibility 2
172
+ new_plane = [(y1 , x1 ), (y2 + skew_amount , x1 ), (y2 , x2 ), (y1 , x2 )]
173
+ elif skew_direction == 3 :
174
+ # Skew possibility 3
175
+ new_plane = [(y1 , x1 ), (y2 , x1 - skew_amount ), (y2 , x2 ), (y1 , x2 )]
176
+ elif skew_direction == 4 :
177
+ # Skew possibility 4
178
+ new_plane = [(y1 , x1 ), (y2 , x1 ), (y2 + skew_amount , x2 ), (y1 , x2 )]
179
+ elif skew_direction == 5 :
180
+ # Skew possibility 5
181
+ new_plane = [(y1 , x1 ), (y2 , x1 ), (y2 , x2 + skew_amount ), (y1 , x2 )]
182
+ elif skew_direction == 6 :
183
+ # Skew possibility 6
184
+ new_plane = [(y1 , x1 ), (y2 , x1 ), (y2 , x2 ), (y1 - skew_amount , x2 )]
185
+ elif skew_direction == 7 :
186
+ # Skew possibility 7
187
+ new_plane = [(y1 , x1 ), (y2 , x1 ), (y2 , x2 ), (y1 , x2 + skew_amount )]
188
+
189
+ if self .skew_type == "ALL" :
190
+ # Not currently in use, as it makes little sense to skew by the same amount
191
+ # in every direction if we have set magnitude manually.
192
+ # It may make sense to keep this, if we ensure the skew_amount below is randomised
193
+ # and cannot be manually set by the user.
194
+ corners = dict ()
195
+ corners ["top_left" ] = (y1 - random .randint (1 , skew_amount ), x1 - random .randint (1 , skew_amount ))
196
+ corners ["top_right" ] = (y2 + random .randint (1 , skew_amount ), x1 - random .randint (1 , skew_amount ))
197
+ corners ["bottom_right" ] = (y2 + random .randint (1 , skew_amount ), x2 + random .randint (1 , skew_amount ))
198
+ corners ["bottom_left" ] = (y1 - random .randint (1 , skew_amount ), x2 + random .randint (1 , skew_amount ))
199
+
200
+ new_plane = [corners ["top_left" ], corners ["top_right" ], corners ["bottom_right" ], corners ["bottom_left" ]]
108
201
109
202
# To calculate the coefficients required by PIL for the perspective skew,
110
203
# see the following Stack Overflow discussion: https://goo.gl/sSgJdj
204
+ matrix = []
205
+
206
+ for p1 , p2 in zip (new_plane , original_plane ):
207
+ matrix .append ([p1 [0 ], p1 [1 ], 1 , 0 , 0 , 0 , - p2 [0 ] * p1 [0 ], - p2 [0 ] * p1 [1 ]])
208
+ matrix .append ([0 , 0 , 0 , p1 [0 ], p1 [1 ], 1 , - p2 [1 ] * p1 [0 ], - p2 [1 ] * p1 [1 ]])
209
+
210
+ A = np .matrix (matrix , dtype = np .float )
211
+ B = np .array (original_plane ).reshape (8 )
212
+
213
+ perspective_skew_coefficients_matrix = np .dot (np .linalg .inv (A .T * A ) * A .T , B )
214
+ perspective_skew_coefficients_matrix = np .array (perspective_skew_coefficients_matrix ).reshape (8 )
111
215
112
- return image .transform (image .size , Image .PERSPECTIVE , skew_matrix , resample = Image .BICUBIC )
216
+ return image .transform (image .size ,
217
+ Image .PERSPECTIVE ,
218
+ perspective_skew_coefficients_matrix ,
219
+ resample = Image .BICUBIC )
113
220
114
221
115
222
class Rotate (Operation ):
@@ -129,7 +236,9 @@ def perform_operation(self, image):
129
236
130
237
:math:`E = \\ frac{\\ frac{\\ sin{\\ theta_{a}}}{\\ sin{\\ theta_{b}}}\\ Big(X-\\ frac{\\ sin{\\ theta_{a}}}{\\ sin{\\ theta_{b}}} Y\\ Big)}{1-\\ frac{(\\ sin{\\ theta_{a}})^2}{(\\ sin{\\ theta_{b}})^2}}`
131
238
132
- which describes how :math:`E` is derived, and then follows :math:`B = Y - E` and :math:`A = \\ frac{\\ sin{\\ theta_{a}}}{\\ sin{\\ theta_{b}}} B`.
239
+ which describes how :math:`E` is derived, and then follows
240
+ :math:`B = Y - E` and
241
+ :math:`A = \\ frac{\\ sin{\\ theta_{a}}}{\\ sin{\\ theta_{b}}} B`.
133
242
134
243
:param image: The image to rotate.
135
244
:return: The rotated image.
@@ -154,41 +263,79 @@ def perform_operation(self, image):
154
263
angle_a = abs (self .rotation )
155
264
angle_b = 90 - angle_a
156
265
157
- # Python deals in radians so get our radians
158
- angle_a_rad = math .radians (angle_a )
159
- angle_b_rad = math .radians (angle_b )
266
+ # We need the sin of angle a and b a few times
267
+ sin_angle_a = math .sin ( math . radians (angle_a ) )
268
+ sin_angle_b = math .sin ( math . radians (angle_b ) )
160
269
161
- # Find the maximum area of the rectangle that could be cropped
162
- E = (math . sin ( angle_a_rad )) / ( math . sin ( angle_b_rad ) ) * \
163
- (Y - X * (math . sin ( angle_a_rad ) / math . sin ( angle_b_rad ) ))
164
- E = E / 1 - (math . sin ( angle_a_rad ) ** 2 / math . sin ( angle_b_rad ) ** 2 )
270
+ # Now we find the maximum area of the rectangle that could be cropped
271
+ E = (sin_angle_a / sin_angle_b ) * \
272
+ (Y - X * (sin_angle_a / sin_angle_b ))
273
+ E = E / 1 - (sin_angle_a ** 2 / sin_angle_b ** 2 )
165
274
B = X - E
166
- A = (math . sin ( angle_a_rad ) / math . sin ( angle_b_rad ) ) * B
275
+ A = (sin_angle_a / sin_angle_b ) * B
167
276
168
277
# Crop this area from the rotated image
169
- image = image .crop ((E , A , X - E , Y - A ))
278
+ image = image .crop ((int ( round ( E )), int ( round ( A )), int ( round ( X - E )), int ( round ( Y - A )) ))
170
279
171
280
# Return the image, re-sized to the size of the image passed originally
172
281
return image .resize ((x , y ), resample = Image .BICUBIC )
173
282
174
283
175
284
class RotateRange (Operation ):
176
- def __init__ (self , probability , rotate_range ):
285
+ def __init__ (self , probability , max_left_rotation , max_right_rotation ):
177
286
Operation .__init__ (self , probability )
178
- self .max_left_rotation = - abs (rotate_range [ 0 ]) # Ensure always negative
179
- self .max_right_rotation = abs (rotate_range [ 1 ] ) # Ensure always positive
287
+ self .max_left_rotation = - abs (max_left_rotation ) # Ensure always negative
288
+ self .max_right_rotation = abs (max_right_rotation ) # Ensure always positive
180
289
181
290
def perform_operation (self , image ):
182
- # This may be of use: http://stackoverflow.com/questions/34747946/rotating-a-square-in-pil
183
- random_left = random .randint (self .max_left_rotation , - 1 )
184
- random_right = random .randint (1 , self .max_right_rotation )
291
+ random_left = random .randint (self .max_left_rotation , - 5 )
292
+ random_right = random .randint (5 , self .max_right_rotation )
185
293
186
294
left_or_right = random .randint (0 , 1 )
187
295
296
+ rotation = 0
297
+
188
298
if left_or_right == 0 :
189
- return image . rotate ( random_left )
299
+ rotation = random_left
190
300
elif left_or_right == 1 :
191
- return image .rotate (random_right )
301
+ rotation = random_right
302
+
303
+ # Get size before we rotate
304
+ x = image .size [0 ]
305
+ y = image .size [1 ]
306
+
307
+ # Rotate, while expanding the canvas size
308
+ image = image .rotate (rotation , expand = True , resample = Image .BICUBIC )
309
+
310
+ # Get size after rotation, which includes the empty space
311
+ X = image .size [0 ]
312
+ Y = image .size [1 ]
313
+
314
+ # Get our two angles needed for the calculation of the largest area
315
+ angle_a = abs (rotation )
316
+ angle_b = 90 - angle_a
317
+
318
+ # Python deals in radians so get our radians
319
+ angle_a_rad = math .radians (angle_a )
320
+ angle_b_rad = math .radians (angle_b )
321
+
322
+ # Calculate the sins
323
+ angle_a_sin = math .sin (angle_a_rad )
324
+ angle_b_sin = math .sin (angle_b_rad )
325
+
326
+ # Find the maximum area of the rectangle that could be cropped
327
+ E = (math .sin (angle_a_rad )) / (math .sin (angle_b_rad )) * \
328
+ (Y - X * (math .sin (angle_a_rad ) / math .sin (angle_b_rad )))
329
+ E = E / 1 - (math .sin (angle_a_rad ) ** 2 / math .sin (angle_b_rad ) ** 2 )
330
+ B = X - E
331
+ A = (math .sin (angle_a_rad ) / math .sin (angle_b_rad )) * B
332
+
333
+ # Crop this area from the rotated image
334
+ # image = image.crop((E, A, X - E, Y - A))
335
+ image = image .crop ((int (round (E )), int (round (A )), int (round (X - E )), int (round (Y - A ))))
336
+
337
+ # Return the image, re-sized to the size of the image passed originally
338
+ return image .resize ((x , y ), resample = Image .BICUBIC )
192
339
193
340
194
341
class Resize (Operation ):
@@ -319,7 +466,7 @@ def perform_operation(self, image):
319
466
# And here we are using SciKit Image's `transform` class.
320
467
# shear_transformer = transform.AffineTransform(shear=amount_to_shear)
321
468
# image_sheared = transform.warp(image_array, shear_transformer)
322
-
469
+ #
323
470
# Because of warnings
324
471
# with warnings.catch_warnings():
325
472
# warnings.simplefilter("ignore")
@@ -328,28 +475,90 @@ def perform_operation(self, image):
328
475
329
476
width , height = image .size
330
477
331
- angle_to_shear = int (random .uniform (self .max_shear_left , self .max_shear_right ))
478
+ max_shear_left = - 20
479
+ max_shear_right = 20
332
480
481
+ angle_to_shear = int (random .uniform (max_shear_left - 1 , max_shear_right + 1 ))
482
+ if angle_to_shear != - 1 : angle_to_shear += 1
483
+
484
+ # We use the angle phi in radians later
333
485
phi = math .tan (math .radians (angle_to_shear ))
334
486
335
- # Here we need the unknown b, where a is
336
- # the height of the image and phi is the
337
- # angle we want to shear (our knowns):
338
- # b = tan(phi) * a
339
- shift_in_pixels = phi * height
487
+ # Alternative method
488
+ # Calculate our offset when cropping
489
+ # We know one angle, phi (angle_to_shear)
490
+ # We known theta = 180-90-phi
491
+ # We know one side, opposite (height of image)
492
+ # Adjacent is therefore:
493
+ # tan(theta) = opposite / adjacent
494
+ # A = opposite / tan(theta)
495
+ # theta = math.radians(180-90-angle_to_shear)
496
+ # A = height / math.tan(theta)
340
497
341
498
# Transformation matrices can be found here:
342
499
# https://en.wikipedia.org/wiki/Transformation_matrix
343
500
# The PIL affine transform expects the first two rows of
344
501
# any of the affine transformation matrices, seen here:
345
502
# https://en.wikipedia.org/wiki/Transformation_matrix#/media/File:2D_affine_transformation_matrix.svg
346
503
347
- # Note: PIL expects the inverse scale, so 1/scale_factor for example.
348
- return image .transform ((int (round (width + shift_in_pixels )), height ),
349
- Image .AFFINE ,
350
- (1 , phi , - shift_in_pixels ,
351
- 0 , 1 , 0 ),
352
- Image .BICUBIC )
504
+ directions = ["x" , "y" ]
505
+ direction = random .choice (directions )
506
+
507
+ if direction == "x" :
508
+ # Here we need the unknown b, where a is
509
+ # the height of the image and phi is the
510
+ # angle we want to shear (our knowns):
511
+ # b = tan(phi) * a
512
+ shift_in_pixels = phi * height
513
+
514
+ if shift_in_pixels > 0 :
515
+ shift_in_pixels = math .ceil (shift_in_pixels )
516
+ else :
517
+ shift_in_pixels = math .floor (shift_in_pixels )
518
+
519
+ # For negative tilts, we reverse phi and set offset to 0
520
+ # Also matrix offset differs from pixel shift for neg
521
+ # but not for pos so we will copy this value in case
522
+ # we need to change it
523
+ matrix_offset = shift_in_pixels
524
+ if angle_to_shear <= 0 :
525
+ shift_in_pixels = abs (shift_in_pixels )
526
+ matrix_offset = 0
527
+ phi = abs (phi ) * - 1
528
+
529
+ # Note: PIL expects the inverse scale, so 1/scale_factor for example.
530
+ transform_matrix = (1 , phi , - matrix_offset ,
531
+ 0 , 1 , 0 )
532
+
533
+ image = image .transform ((int (round (width + shift_in_pixels )), height ),
534
+ Image .AFFINE ,
535
+ transform_matrix ,
536
+ Image .BICUBIC )
537
+
538
+ image = image .crop ((abs (shift_in_pixels ), 0 , width , height ))
539
+
540
+ return image .resize ((width , height ), resample = Image .BICUBIC )
541
+
542
+ elif direction == "y" :
543
+ shift_in_pixels = phi * width
544
+
545
+ matrix_offset = shift_in_pixels
546
+ if angle_to_shear <= 0 :
547
+ shift_in_pixels = abs (shift_in_pixels )
548
+ matrix_offset = 0
549
+ phi = abs (phi ) * - 1
550
+
551
+ transform_matrix = (1 , 0 , 0 ,
552
+ phi , 1 , - matrix_offset )
553
+
554
+ image = image .transform ((width , int (round (height + shift_in_pixels ))),
555
+ Image .AFFINE ,
556
+ transform_matrix ,
557
+ Image .BICUBIC )
558
+
559
+ image = image .crop ((0 , abs (shift_in_pixels ), width , height ))
560
+
561
+ return image .resize ((width , height ), resample = Image .BICUBIC )
353
562
354
563
355
564
class Scale (Operation ):
0 commit comments