Skip to content

Commit 81b4686

Browse files
committed
New Skew class and associated operations.
1 parent 583797b commit 81b4686

File tree

4 files changed

+301
-45
lines changed

4 files changed

+301
-45
lines changed

Augmentor/Operations.py

+244-35
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,9 @@ class Skew(Operation):
9191
"""
9292
Perform perspective skewing on images.
9393
"""
94-
def __init__(self, probability, magnitude):
94+
def __init__(self, probability, skew_type="TILT", magnitude=None):
9595
Operation.__init__(self, probability)
96+
self.skew_type = skew_type
9697
self.magnitude = magnitude
9798

9899
def perform_operation(self, image):
@@ -102,14 +103,120 @@ def perform_operation(self, image):
102103
:return: The transformed image.
103104
"""
104105

105-
# Use PIL to do this by generating the transform matrix
106+
w, h = image.size
106107

107-
skew_matrix = [0,0, 1,100, 100,1, 100,100]
108+
x1 = 0
109+
x2 = h
110+
y1 = 0
111+
y2 = w
112+
113+
original_plane = [(y1, x1), (y2, x1), (y2, x2), (y1, x2)]
114+
115+
max_skew_amount = max(w, h)
116+
117+
if not self.magnitude:
118+
skew_amount = random.randint(1, max_skew_amount)
119+
elif self.magnitude:
120+
max_skew_amount /= self.magnitude
121+
skew_amount = max_skew_amount
122+
123+
# We have two choices now: we tilt in one of four directions
124+
# or we skew a corner.
125+
126+
if self.skew_type == "TILT" or self.skew_type == "TILT_LEFT_RIGHT" or self.skew_type == "TILT_TOP_BOTTOM":
127+
128+
if self.skew_type == "TILT":
129+
skew_direction = random.randint(0, 3)
130+
elif self.skew_type == "TILT_LEFT_RIGHT":
131+
skew_direction = random.randint(0, 1)
132+
elif self.skew_type == "TILT_TOP_BOTTOM":
133+
skew_direction = random.randint(2, 3)
134+
135+
if skew_direction == 0:
136+
# Left Tilt
137+
new_plane = [(y1, x1 - skew_amount), # Top Left
138+
(y2, x1), # Top Right
139+
(y2, x2), # Bottom Right
140+
(y1, x2 + skew_amount)] # Bottom Left
141+
elif skew_direction == 1:
142+
# Right Tilt
143+
new_plane = [(y1, x1), # Top Left
144+
(y2, x1 - skew_amount), # Top Right
145+
(y2, x2 + skew_amount), # Bottom Right
146+
(y1, x2)] # Bottom Left
147+
elif skew_direction == 2:
148+
# Forward Tilt
149+
new_plane = [(y1 - skew_amount, x1), # Top Left
150+
(y2 + skew_amount, x1), # Top Right
151+
(y2, x2), # Bottom Right
152+
(y1, x2)] # Bottom Left
153+
elif skew_direction == 3:
154+
# Backward Tilt
155+
new_plane = [(y1, x1), # Top Left
156+
(y2, x1), # Top Right
157+
(y2 + skew_amount, x2), # Bottom Right
158+
(y1 - skew_amount, x2)] # Bottom Left
159+
160+
if self.skew_type == "CORNER":
161+
162+
skew_direction = random.randint(0, 7)
163+
164+
if skew_direction == 0:
165+
# Skew possibility 0
166+
new_plane = [(y1 - skew_amount, x1), (y2, x1), (y2, x2), (y1, x2)]
167+
elif skew_direction == 1:
168+
# Skew possibility 1
169+
new_plane = [(y1, x1 - skew_amount), (y2, x1), (y2, x2), (y1, x2)]
170+
elif skew_direction == 2:
171+
# Skew possibility 2
172+
new_plane = [(y1, x1), (y2 + skew_amount, x1), (y2, x2), (y1, x2)]
173+
elif skew_direction == 3:
174+
# Skew possibility 3
175+
new_plane = [(y1, x1), (y2, x1 - skew_amount), (y2, x2), (y1, x2)]
176+
elif skew_direction == 4:
177+
# Skew possibility 4
178+
new_plane = [(y1, x1), (y2, x1), (y2 + skew_amount, x2), (y1, x2)]
179+
elif skew_direction == 5:
180+
# Skew possibility 5
181+
new_plane = [(y1, x1), (y2, x1), (y2, x2 + skew_amount), (y1, x2)]
182+
elif skew_direction == 6:
183+
# Skew possibility 6
184+
new_plane = [(y1, x1), (y2, x1), (y2, x2), (y1 - skew_amount, x2)]
185+
elif skew_direction == 7:
186+
# Skew possibility 7
187+
new_plane = [(y1, x1), (y2, x1), (y2, x2), (y1, x2 + skew_amount)]
188+
189+
if self.skew_type == "ALL":
190+
# Not currently in use, as it makes little sense to skew by the same amount
191+
# in every direction if we have set magnitude manually.
192+
# It may make sense to keep this, if we ensure the skew_amount below is randomised
193+
# and cannot be manually set by the user.
194+
corners = dict()
195+
corners["top_left"] = (y1 - random.randint(1, skew_amount), x1 - random.randint(1, skew_amount))
196+
corners["top_right"] = (y2 + random.randint(1, skew_amount), x1 - random.randint(1, skew_amount))
197+
corners["bottom_right"] = (y2 + random.randint(1, skew_amount), x2 + random.randint(1, skew_amount))
198+
corners["bottom_left"] = (y1 - random.randint(1, skew_amount), x2 + random.randint(1, skew_amount))
199+
200+
new_plane = [corners["top_left"], corners["top_right"], corners["bottom_right"], corners["bottom_left"]]
108201

109202
# To calculate the coefficients required by PIL for the perspective skew,
110203
# see the following Stack Overflow discussion: https://goo.gl/sSgJdj
204+
matrix = []
205+
206+
for p1, p2 in zip(new_plane, original_plane):
207+
matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0] * p1[0], -p2[0] * p1[1]])
208+
matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1] * p1[0], -p2[1] * p1[1]])
209+
210+
A = np.matrix(matrix, dtype=np.float)
211+
B = np.array(original_plane).reshape(8)
212+
213+
perspective_skew_coefficients_matrix = np.dot(np.linalg.inv(A.T * A) * A.T, B)
214+
perspective_skew_coefficients_matrix = np.array(perspective_skew_coefficients_matrix).reshape(8)
111215

112-
return image.transform(image.size, Image.PERSPECTIVE, skew_matrix, resample=Image.BICUBIC)
216+
return image.transform(image.size,
217+
Image.PERSPECTIVE,
218+
perspective_skew_coefficients_matrix,
219+
resample=Image.BICUBIC)
113220

114221

115222
class Rotate(Operation):
@@ -129,7 +236,9 @@ def perform_operation(self, image):
129236
130237
:math:`E = \\frac{\\frac{\\sin{\\theta_{a}}}{\\sin{\\theta_{b}}}\\Big(X-\\frac{\\sin{\\theta_{a}}}{\\sin{\\theta_{b}}} Y\\Big)}{1-\\frac{(\\sin{\\theta_{a}})^2}{(\\sin{\\theta_{b}})^2}}`
131238
132-
which describes how :math:`E` is derived, and then follows :math:`B = Y - E` and :math:`A = \\frac{\\sin{\\theta_{a}}}{\\sin{\\theta_{b}}} B`.
239+
which describes how :math:`E` is derived, and then follows
240+
:math:`B = Y - E` and
241+
:math:`A = \\frac{\\sin{\\theta_{a}}}{\\sin{\\theta_{b}}} B`.
133242
134243
:param image: The image to rotate.
135244
:return: The rotated image.
@@ -154,41 +263,79 @@ def perform_operation(self, image):
154263
angle_a = abs(self.rotation)
155264
angle_b = 90 - angle_a
156265

157-
# Python deals in radians so get our radians
158-
angle_a_rad = math.radians(angle_a)
159-
angle_b_rad = math.radians(angle_b)
266+
# We need the sin of angle a and b a few times
267+
sin_angle_a = math.sin(math.radians(angle_a))
268+
sin_angle_b = math.sin(math.radians(angle_b))
160269

161-
# Find the maximum area of the rectangle that could be cropped
162-
E = (math.sin(angle_a_rad)) / (math.sin(angle_b_rad)) * \
163-
(Y - X * (math.sin(angle_a_rad) / math.sin(angle_b_rad)))
164-
E = E / 1 - (math.sin(angle_a_rad) ** 2 / math.sin(angle_b_rad) ** 2)
270+
# Now we find the maximum area of the rectangle that could be cropped
271+
E = (sin_angle_a / sin_angle_b) * \
272+
(Y - X * (sin_angle_a / sin_angle_b))
273+
E = E / 1 - (sin_angle_a ** 2 / sin_angle_b ** 2)
165274
B = X - E
166-
A = (math.sin(angle_a_rad) / math.sin(angle_b_rad)) * B
275+
A = (sin_angle_a / sin_angle_b) * B
167276

168277
# Crop this area from the rotated image
169-
image = image.crop((E, A, X - E, Y - A))
278+
image = image.crop((int(round(E)), int(round(A)), int(round(X - E)), int(round(Y - A))))
170279

171280
# Return the image, re-sized to the size of the image passed originally
172281
return image.resize((x, y), resample=Image.BICUBIC)
173282

174283

175284
class RotateRange(Operation):
176-
def __init__(self, probability, rotate_range):
285+
def __init__(self, probability, max_left_rotation, max_right_rotation):
177286
Operation.__init__(self, probability)
178-
self.max_left_rotation = -abs(rotate_range[0]) # Ensure always negative
179-
self.max_right_rotation = abs(rotate_range[1]) # Ensure always positive
287+
self.max_left_rotation = -abs(max_left_rotation) # Ensure always negative
288+
self.max_right_rotation = abs(max_right_rotation) # Ensure always positive
180289

181290
def perform_operation(self, image):
182-
# This may be of use: http://stackoverflow.com/questions/34747946/rotating-a-square-in-pil
183-
random_left = random.randint(self.max_left_rotation, -1)
184-
random_right = random.randint(1, self.max_right_rotation)
291+
random_left = random.randint(self.max_left_rotation, -5)
292+
random_right = random.randint(5, self.max_right_rotation)
185293

186294
left_or_right = random.randint(0, 1)
187295

296+
rotation = 0
297+
188298
if left_or_right == 0:
189-
return image.rotate(random_left)
299+
rotation = random_left
190300
elif left_or_right == 1:
191-
return image.rotate(random_right)
301+
rotation = random_right
302+
303+
# Get size before we rotate
304+
x = image.size[0]
305+
y = image.size[1]
306+
307+
# Rotate, while expanding the canvas size
308+
image = image.rotate(rotation, expand=True, resample=Image.BICUBIC)
309+
310+
# Get size after rotation, which includes the empty space
311+
X = image.size[0]
312+
Y = image.size[1]
313+
314+
# Get our two angles needed for the calculation of the largest area
315+
angle_a = abs(rotation)
316+
angle_b = 90 - angle_a
317+
318+
# Python deals in radians so get our radians
319+
angle_a_rad = math.radians(angle_a)
320+
angle_b_rad = math.radians(angle_b)
321+
322+
# Calculate the sins
323+
angle_a_sin = math.sin(angle_a_rad)
324+
angle_b_sin = math.sin(angle_b_rad)
325+
326+
# Find the maximum area of the rectangle that could be cropped
327+
E = (math.sin(angle_a_rad)) / (math.sin(angle_b_rad)) * \
328+
(Y - X * (math.sin(angle_a_rad) / math.sin(angle_b_rad)))
329+
E = E / 1 - (math.sin(angle_a_rad) ** 2 / math.sin(angle_b_rad) ** 2)
330+
B = X - E
331+
A = (math.sin(angle_a_rad) / math.sin(angle_b_rad)) * B
332+
333+
# Crop this area from the rotated image
334+
# image = image.crop((E, A, X - E, Y - A))
335+
image = image.crop((int(round(E)), int(round(A)), int(round(X - E)), int(round(Y - A))))
336+
337+
# Return the image, re-sized to the size of the image passed originally
338+
return image.resize((x, y), resample=Image.BICUBIC)
192339

193340

194341
class Resize(Operation):
@@ -319,7 +466,7 @@ def perform_operation(self, image):
319466
# And here we are using SciKit Image's `transform` class.
320467
# shear_transformer = transform.AffineTransform(shear=amount_to_shear)
321468
# image_sheared = transform.warp(image_array, shear_transformer)
322-
469+
#
323470
# Because of warnings
324471
# with warnings.catch_warnings():
325472
# warnings.simplefilter("ignore")
@@ -328,28 +475,90 @@ def perform_operation(self, image):
328475

329476
width, height = image.size
330477

331-
angle_to_shear = int(random.uniform(self.max_shear_left, self.max_shear_right))
478+
max_shear_left = -20
479+
max_shear_right = 20
332480

481+
angle_to_shear = int(random.uniform(max_shear_left - 1, max_shear_right + 1))
482+
if angle_to_shear != -1: angle_to_shear += 1
483+
484+
# We use the angle phi in radians later
333485
phi = math.tan(math.radians(angle_to_shear))
334486

335-
# Here we need the unknown b, where a is
336-
# the height of the image and phi is the
337-
# angle we want to shear (our knowns):
338-
# b = tan(phi) * a
339-
shift_in_pixels = phi * height
487+
# Alternative method
488+
# Calculate our offset when cropping
489+
# We know one angle, phi (angle_to_shear)
490+
# We known theta = 180-90-phi
491+
# We know one side, opposite (height of image)
492+
# Adjacent is therefore:
493+
# tan(theta) = opposite / adjacent
494+
# A = opposite / tan(theta)
495+
# theta = math.radians(180-90-angle_to_shear)
496+
# A = height / math.tan(theta)
340497

341498
# Transformation matrices can be found here:
342499
# https://en.wikipedia.org/wiki/Transformation_matrix
343500
# The PIL affine transform expects the first two rows of
344501
# any of the affine transformation matrices, seen here:
345502
# https://en.wikipedia.org/wiki/Transformation_matrix#/media/File:2D_affine_transformation_matrix.svg
346503

347-
# Note: PIL expects the inverse scale, so 1/scale_factor for example.
348-
return image.transform((int(round(width + shift_in_pixels)), height),
349-
Image.AFFINE,
350-
(1, phi, -shift_in_pixels,
351-
0, 1, 0),
352-
Image.BICUBIC)
504+
directions = ["x", "y"]
505+
direction = random.choice(directions)
506+
507+
if direction == "x":
508+
# Here we need the unknown b, where a is
509+
# the height of the image and phi is the
510+
# angle we want to shear (our knowns):
511+
# b = tan(phi) * a
512+
shift_in_pixels = phi * height
513+
514+
if shift_in_pixels > 0:
515+
shift_in_pixels = math.ceil(shift_in_pixels)
516+
else:
517+
shift_in_pixels = math.floor(shift_in_pixels)
518+
519+
# For negative tilts, we reverse phi and set offset to 0
520+
# Also matrix offset differs from pixel shift for neg
521+
# but not for pos so we will copy this value in case
522+
# we need to change it
523+
matrix_offset = shift_in_pixels
524+
if angle_to_shear <= 0:
525+
shift_in_pixels = abs(shift_in_pixels)
526+
matrix_offset = 0
527+
phi = abs(phi) * -1
528+
529+
# Note: PIL expects the inverse scale, so 1/scale_factor for example.
530+
transform_matrix = (1, phi, -matrix_offset,
531+
0, 1, 0)
532+
533+
image = image.transform((int(round(width + shift_in_pixels)), height),
534+
Image.AFFINE,
535+
transform_matrix,
536+
Image.BICUBIC)
537+
538+
image = image.crop((abs(shift_in_pixels), 0, width, height))
539+
540+
return image.resize((width, height), resample=Image.BICUBIC)
541+
542+
elif direction == "y":
543+
shift_in_pixels = phi * width
544+
545+
matrix_offset = shift_in_pixels
546+
if angle_to_shear <= 0:
547+
shift_in_pixels = abs(shift_in_pixels)
548+
matrix_offset = 0
549+
phi = abs(phi) * -1
550+
551+
transform_matrix = (1, 0, 0,
552+
phi, 1, -matrix_offset)
553+
554+
image = image.transform((width, int(round(height + shift_in_pixels))),
555+
Image.AFFINE,
556+
transform_matrix,
557+
Image.BICUBIC)
558+
559+
image = image.crop((0, abs(shift_in_pixels), width, height))
560+
561+
return image.resize((width, height), resample=Image.BICUBIC)
353562

354563

355564
class Scale(Operation):

0 commit comments

Comments
 (0)