pad in forward

lezwon · lezwon · commit 55b4efaadbdf · 2022-03-08T17:11:43.000+05:30
diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py
@@ -1,7 +1,7 @@
 import collections.abc
 import math
 import warnings
-from typing import Any, Dict, List, Union, Sequence, Tuple, cast
+from typing import Any, Dict, List, Union, Sequence, Tuple, cast, Literal
 
 import PIL.Image
 import torch
@@ -259,7 +259,14 @@ def apply_recursively(obj: Any) -> Any:
 
 
 class RandomCrop(Transform):
-    def __init__(self, size, padding=None, pad_if_needed=False, fill=0, padding_mode="constant"):
+    def __init__(
+        self,
+        size=Union[int, Sequence[int]],
+        padding: Sequence[int] = None,
+        pad_if_needed: bool = False,
+        fill: Union[int, str, Sequence[int]] = 0,
+        padding_mode: Union[str, Literal["constant", "edge", "reflect", "symmetric"]] = "constant",
+    ):
         super().__init__()
         self.size = _setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.")
 
@@ -269,12 +276,9 @@ def __init__(self, size, padding=None, pad_if_needed=False, fill=0, padding_mode
         self.padding_mode = padding_mode
 
     def _get_params(self, sample: Any) -> Dict[str, Any]:
-
         """Get parameters for ``crop`` for a random crop.
-
         Args:
             sample (PIL Image, Tensor or features.Image): Image to be cropped.
-
         Returns:
             dict: Dict containing 'top', 'left', 'height', and 'width'
         """
@@ -294,19 +298,51 @@ def _get_params(self, sample: Any) -> Dict[str, Any]:
         return dict(top=i, left=j, height=th, width=tw)
 
     def _transform(self, input: Any, params: Dict[str, Any]) -> Any:
-
         if isinstance(input, features.Image):
-            output = F.random_crop_image_tensor(input, **params, padding=self.padding)
-            input = features.Image.new_like(input, output)
+            output = F.crop_image_tensor(input, **params)
+            return features.Image.new_like(input, output)
         elif isinstance(input, PIL.Image.Image):
-            input = F.random_crop_image_pil(input, **params)
+            return F.crop_image_pil(input, **params)
+        elif is_simple_tensor(input):
+            return F.crop_image_tensor(input, **params)
         else:
-            input = F.random_crop_image_tensor(input, **params)
-
-        return input
+            return input
 
     def forward(self, *inputs: Any) -> Any:
         sample = inputs if len(inputs) > 1 else inputs[0]
         if has_any(sample, features.BoundingBox, features.SegmentationMask):
             raise TypeError(f"BoundingBox'es and SegmentationMask's are not supported by {type(self).__name__}()")
+
+        if isinstance(sample, features.Image):
+            output = F.random_pad_image_tensor(
+                sample,
+                output_size=self.size,
+                image_size=get_image_dimensions(sample),
+                padding=self.padding,
+                pad_if_needed=self.pad_if_needed,
+                fill=self.fill,
+                padding_mode=self.padding_mode,
+            )
+            sample = features.Image.new_like(sample, output)
+        elif isinstance(sample, PIL.Image.Image):
+            sample = F.random_pad_image_pil(
+                sample,
+                output_size=self.size,
+                image_size=get_image_dimensions(sample),
+                padding=self.padding,
+                pad_if_needed=self.pad_if_needed,
+                fill=self.fill,
+                padding_mode=self.padding_mode,
+            )
+        elif is_simple_tensor(sample):
+            sample = F.random_pad_image_tensor(
+                sample,
+                output_size=self.size,
+                image_size=get_image_dimensions(sample),
+                padding=self.padding,
+                pad_if_needed=self.pad_if_needed,
+                fill=self.fill,
+                padding_mode=self.padding_mode,
+            )
+
         return super().forward(sample)
diff --git a/torchvision/prototype/transforms/functional/__init__.py b/torchvision/prototype/transforms/functional/__init__.py
@@ -48,8 +48,8 @@
     center_crop_image_pil,
     resized_crop_image_tensor,
     resized_crop_image_pil,
-    random_crop_image_tensor,
-    random_crop_image_pil,
+    random_pad_image_tensor,
+    random_pad_image_pil,
     affine_image_tensor,
     affine_image_pil,
     rotate_image_tensor,
diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py
@@ -451,3 +451,52 @@ def random_crop_image_pil(
         img = pad_image_pil(img, padding, fill, padding_mode)
 
     return crop_image_pil(img, top, left, height, width)
+
+
+def random_pad_image_tensor(
+    img: torch.Tensor,
+    output_size: List[int],
+    image_size: Tuple[int, int, int],
+    padding: List[int] = None,
+    pad_if_needed: bool = False,
+    fill: int = 0,
+    padding_mode: str = "constant",
+) -> torch.Tensor:
+    _, height, width = image_size
+
+    if padding is not None:
+        img = pad_image_tensor(img, padding, fill, padding_mode)
+    # pad the width if needed
+    if pad_if_needed and width < output_size[1]:
+        padding = [output_size[1] - width, 0]
+        img = pad_image_tensor(img, padding, fill, padding_mode)
+    # pad the height if needed
+    if pad_if_needed and height < output_size[0]:
+        padding = [0, output_size[0] - height]
+        img = pad_image_tensor(img, padding, fill, padding_mode)
+    return img
+
+
+def random_pad_image_pil(
+    img: PIL.Image.Image,
+    output_size: List[int],
+    image_size: Tuple[int, int, int],
+    padding: List[int] = None,
+    pad_if_needed: bool = False,
+    fill: int = 0,
+    padding_mode: Literal["constant", "edge", "reflect", "symmetric"] = "constant",
+) -> PIL.Image.Image:
+
+    _, height, width = image_size
+
+    if padding is not None:
+        img = pad_image_pil(img, padding, fill, padding_mode)
+    # pad the width if needed
+    if pad_if_needed and width < output_size[1]:
+        padding = [output_size[1] - width, 0]
+        img = pad_image_pil(img, padding, fill, padding_mode)
+    # pad the height if needed
+    if pad_if_needed and height < output_size[0]:
+        padding = [0, output_size[0] - height]
+        img = pad_image_pil(img, padding, fill, padding_mode)
+    return img