BonsaiAI
diff --git a/‎onnx_tf/handlers/backend/resize.py
Lines changed: 79 additions & 126 deletions b/‎onnx_tf/handlers/backend/resize.py
Lines changed: 79 additions & 126 deletions
@@ -120,46 +120,35 @@ def args_check(cls, node, **kwargs):
 
   @classmethod
   def version_10(cls, node, **kwargs):
+    # x, roi and scales are all in NCHW format
     x = kwargs["tensor_dict"][node.inputs[0]]
     x_shape = tf_shape(x)
     scales = kwargs["tensor_dict"][node.inputs[1]]
 
-    n_in_scales_is_one = tf.equal(scales[0], 1)
-    c_in_scales_is_one = tf.logical_or(tf.equal(scales[1], 1),
-                                       tf.equal(scales[3], 1))
-    assert_n_c_in_scales_are_ones = tf.Assert(
-        tf.logical_and(n_in_scales_is_one, c_in_scales_is_one), [scales])
+    h_w_scale = scales[2:]
+    h_w_shape = x_shape[2:]
+    new_h_w_shape = tf.cast(h_w_scale * tf.cast(h_w_shape, scales.dtype),
+                            tf.int32)
 
-    with tf.control_dependencies([assert_n_c_in_scales_are_ones]):
-      x_in_NCHW_format = tf.equal(scales[1], 1)
-      h_w_scale = tf.where(x_in_NCHW_format, scales[2:], scales[1:3])
-      h_w_shape = tf.where(x_in_NCHW_format, x_shape[2:], x_shape[1:3])
-      new_h_w_shape = tf.cast(h_w_scale * tf.cast(h_w_shape, scales.dtype),
-                              tf.int32)
-
-      mode = node.attrs.get("mode", "nearest")
-      if mode.lower() == "linear":
-        mode = tf.image.ResizeMethod.BILINEAR
-      else:
-        mode = tf.image.ResizeMethod.NEAREST_NEIGHBOR
-
-      def process_NCHW_format(x):
-        x_t = tf.transpose(x, perm=[0, 2, 3, 1])
-        y = tf.image.resize(x_t, size=new_h_w_shape, method=mode)
-        y_t = tf.transpose(y, perm=[0, 3, 1, 2])
-        return y_t
-
-      def process_NHWC_format(x):
-        y = tf.image.resize(x, size=new_h_w_shape, method=mode)
-        return y
+    mode = node.attrs.get("mode", "nearest")
+    if mode.lower() == "linear":
+      mode = tf.image.ResizeMethod.BILINEAR
+    else:
+      mode = tf.image.ResizeMethod.NEAREST_NEIGHBOR
 
-      output = tf.cond(x_in_NCHW_format, lambda: process_NCHW_format(x),
-                       lambda: process_NHWC_format(x))
+    # The input image is in NCHW format. But tf.image.resize only
+    # support channel last data format. Therefore need to transpose
+    # to NHWC format first then process resize and then transpose
+    # back to NCHW format.
+    x_t = tf.transpose(x, perm=[0, 2, 3, 1])
+    y = tf.image.resize(x_t, size=new_h_w_shape, method=mode)
+    output = tf.transpose(y, perm=[0, 3, 1, 2])
 
-      return [output]
+    return [output]
 
   @classmethod
   def version_11(cls, node, **kwargs):
+    # x, roi, scales and sizes are all in NCHW format
     tensor_dict = kwargs["tensor_dict"]
     x = tensor_dict[node.inputs[0]]
     x_shape = tf_shape(x)
@@ -172,99 +161,63 @@ def version_11(cls, node, **kwargs):
     extrapolation_value = node.attrs.get("extrapolation_value", 0.0)
     mode = node.attrs.get("mode", "nearest")
 
-    param = scales if len(node.inputs) == 3 else sizes
-    n_in_param_is_one = tf.equal(param[0], 1)
-    c_in_param_is_one = tf.logical_or(tf.equal(param[1], 1),
-                                      tf.equal(param[3], 1))
-    assert_n_c_in_param_are_ones = tf.Assert(
-        tf.logical_and(n_in_param_is_one, c_in_param_is_one), [param])
-
-    with tf.control_dependencies([assert_n_c_in_param_are_ones]):
-      if mode.lower() == "linear":
-        mode = tf.image.ResizeMethod.BILINEAR
-        tf_resize = tf.compat.v1.image.resize_bilinear
-      elif mode.lower() == "cubic":
-        mode = tf.image.ResizeMethod.BICUBIC
-        tf_resize = tf.compat.v1.image.resize_bicubic
-      else:
-        mode = tf.image.ResizeMethod.NEAREST_NEIGHBOR
-        tf_resize = tf.compat.v1.image.resize_nearest_neighbor
-
-      x_in_NCHW_format = tf.equal(param[1], 1)
-
-      if len(node.inputs) == 3:  # only scales is defined
-        h_w_scale = tf.where(x_in_NCHW_format, scales[2:], scales[1:3])
-        h_w_shape = tf.where(x_in_NCHW_format, x_shape[2:], x_shape[1:3])
-        new_size = tf.cast(h_w_scale * tf.cast(h_w_shape, scales.dtype),
-                           tf.int32)
-      else:  # sizes is defined
-        # The number of elements of 'sizes' should be the same as the rank of input 'X'
-        sizes.set_shape(x_shape.shape)
-        new_size = tf.cast(tf.where(x_in_NCHW_format, sizes[2:], sizes[1:3]),
-                           tf.int32)
-      # Tensorflow require the shape of "size" in the "tf.image.resize" must be known at
-      # graph creation time. However in the dynamic shape situation, the shape of "new_size"
-      # will be "None", the actual shape can only be determine at runtime. But we know
-      # "new_size" should always contain [h, w], therefore the shape must be 2.
-      new_size.set_shape([2])
-
-      def get_NCHW_boxes():
-        indices = []
-        x_rank = len(x.get_shape())
-        for i in range(2, x_rank):
-          indices.insert(i - 2, i)
-          indices.insert(i, i + x_rank)
-        return tf.expand_dims(tf.gather(roi, indices, axis=0), 0)
-
-      def get_NHWC_boxes():
-        indices = []
-        x_rank = len(x.get_shape())
-        for i in range(1, x_rank - 1):
-          indices.insert(i - 1, i)
-          indices.insert(i + 1, i + x_rank)
-        return tf.expand_dims(tf.gather(roi, indices, axis=0), 0)
-
-      box_indices = tf.cast(tf.range(0, x_shape[0]), dtype=tf.int32)
-
-      def process_NCHW_format():
-        x_t = tf.transpose(x, perm=[0, 2, 3, 1])
-        if coordinate_transformation_mode == "tf_crop_and_resize":
-          boxes = get_NCHW_boxes()
-          y = tf.image.crop_and_resize(x_t, boxes, box_indices, new_size, mode,
-                                       extrapolation_value)
-        elif coordinate_transformation_mode == "align_corners":
-          y = tf_resize(x_t,
-                        size=new_size,
-                        align_corners=True,
-                        half_pixel_centers=False)
-        elif coordinate_transformation_mode == "asymmetric":
-          y = tf_resize(x_t,
-                        size=new_size,
-                        align_corners=False,
-                        half_pixel_centers=False)
-        else:  # half_pixel or tf_half_pixel_for_nn
-          y = tf.image.resize(x_t, size=new_size, method=mode)
-        return tf.transpose(y, perm=[0, 3, 1, 2])
-
-      def process_NHWC_format():
-        if coordinate_transformation_mode == "tf_crop_and_resize":
-          boxes = get_NHWC_boxes()
-          return tf.image.crop_and_resize(x, boxes, box_indices, new_size, mode,
-                                          extrapolation_value)
-        elif coordinate_transformation_mode == "align_corners":
-          return tf_resize(x,
-                           size=new_size,
-                           align_corners=True,
-                           half_pixel_centers=False)
-        elif coordinate_transformation_mode == "asymmetric":
-          return tf_resize(x,
-                           size=new_size,
-                           align_corners=False,
-                           half_pixel_centers=False)
-        else:  # half_pixel or tf_half_pixel_for_nn
-          return tf.image.resize(x, size=new_size, method=mode)
-
-      output = tf.cond(x_in_NCHW_format, process_NCHW_format,
-                       process_NHWC_format)
-
-      return [output]
+    if mode.lower() == "linear":
+      mode = tf.image.ResizeMethod.BILINEAR
+      tf_resize = tf.compat.v1.image.resize_bilinear
+    elif mode.lower() == "cubic":
+      mode = tf.image.ResizeMethod.BICUBIC
+      tf_resize = tf.compat.v1.image.resize_bicubic
+    else:
+      mode = tf.image.ResizeMethod.NEAREST_NEIGHBOR
+      tf_resize = tf.compat.v1.image.resize_nearest_neighbor
+
+    if len(node.inputs) == 3:  # only scales is defined
+      h_w_scale = scales[2:]
+      h_w_shape = x_shape[2:]
+      new_size = tf.cast(h_w_scale * tf.cast(h_w_shape, scales.dtype),
+                         tf.int32)
+    else:  # sizes is defined
+      # The number of elements of 'sizes' should be the same as the rank of input 'X'
+      sizes.set_shape(x_shape.shape)
+      new_size = tf.cast(sizes[2:], tf.int32)
+    # Tensorflow require the shape of "size" in the "tf.image.resize" must be known at
+    # graph creation time. However in the dynamic shape situation, the shape of "new_size"
+    # will be "None", the actual shape can only be determine at runtime. But we know
+    # "new_size" should always contain [h, w], therefore the shape must be 2.
+    new_size.set_shape([2])
+
+    # get boxes for crop
+    indices = []
+    x_rank = len(x.get_shape())
+    for i in range(2, x_rank):
+      indices.insert(i - 2, i)
+      indices.insert(i, i + x_rank)
+    boxes = tf.expand_dims(tf.gather(roi, indices, axis=0), 0)
+
+    # get box_indices for crop
+    box_indices = tf.cast(tf.range(0, x_shape[0]), dtype=tf.int32)
+
+    # The input image is in NCHW format. But tf.image.crop_and_resize,
+    # tf.image.resize and tf.compat.v1.image.resize_xx only support
+    # channel last data format. Therefore need to transpose to NHWC
+    # formar first then process resize and then transpose back to
+    # NCHW format.
+    x_t = tf.transpose(x, perm=[0, 2, 3, 1])
+    if coordinate_transformation_mode == "tf_crop_and_resize":
+      y = tf.image.crop_and_resize(x_t, boxes, box_indices, new_size, mode,
+                                   extrapolation_value)
+    elif coordinate_transformation_mode == "align_corners":
+      y = tf_resize(x_t,
+                    size=new_size,
+                    align_corners=True,
+                    half_pixel_centers=False)
+    elif coordinate_transformation_mode == "asymmetric":
+      y = tf_resize(x_t,
+                    size=new_size,
+                    align_corners=False,
+                    half_pixel_centers=False)
+    else:  # half_pixel or tf_half_pixel_for_nn
+      y = tf.image.resize(x_t, size=new_size, method=mode)
+    output = tf.transpose(y, perm=[0, 3, 1, 2])
+
+    return [output]