Merge branch 'infer' of https://github.com/horcham/mindocr into new_pipe

horcham · horcham · commit 963d34c7059d · 2024-04-28T19:49:35.000+08:00
diff --git a/configs/layout/yolov8/yolov8n.yaml b/configs/layout/yolov8/yolov8n.yaml
@@ -159,7 +159,7 @@ predict:
   max_device_memory: 8GB
   amp_level: O0
   mode: 0
-  ckpt_load_path: /root/.mindspore/models/dbnet_resnet50-c3a4aa24.ckpt
+  ckpt_load_path: /root/.mindspore/models/yolov8n-4b9e8004.ckpt
   dataset_sink_mode: False
   dataset:
     type: PublayNetDataset
@@ -169,17 +169,12 @@ predict:
     transform_pipeline:
       - func_name: letterbox
         scaleup: False
-      - func_name: label_norm
-        xyxy2xywh_: True
-      - func_name: label_pad
-        padding_size: 160
-        padding_value: -1
       - func_name: image_norm
         scale: 255.
       - func_name: image_transpose
         bgr2rgb: True
         hwc2chw: True
-    batch_size: &refine_batch_size 13
+    batch_size: *refine_batch_size
     stride: 64
     output_columns: ['image', 'labels', 'image_ids', 'hw_ori', 'hw_scale', 'pad']
     net_input_column_index: [ 0 ]  # input indices for network forward func in output_columns
diff --git a/deploy/py_infer/src/core/model/model.py b/deploy/py_infer/src/core/model/model.py
@@ -106,8 +106,8 @@ def warmup(self):
             height, width = hw_list[0]
             warmup_shape = [(*other_shape, height, width)]  # Only single input
 
-        dummy_tensor = [np.random.randn(*shape).astype(dtype) for shape, dtype in zip(warmup_shape, self.input_dtype)]
-        self.model.infer(dummy_tensor)
+        # dummy_tensor = [np.random.randn(*shape).astype(dtype) for shape, dtype in zip(warmup_shape, self.input_dtype)]
+        # self.model.infer(dummy_tensor)
 
     def __del__(self):
         if hasattr(self, "model") and self.model:
diff --git a/deploy/py_infer/src/data_process/postprocess/builder.py b/deploy/py_infer/src/data_process/postprocess/builder.py
@@ -44,6 +44,7 @@ def get_device_status():
     def _get_status():
         nonlocal status
         try:
+            ms.set_context(max_device_memory="0.01GB")
             status = ms.Tensor([0])[0:].asnumpy()[0]
         except RuntimeError:
             status = 1
diff --git a/deploy/py_infer/src/parallel/module/detection/det_post_node.py b/deploy/py_infer/src/parallel/module/detection/det_post_node.py
@@ -28,10 +28,9 @@ def concat_crops(self, crops: list):
         Returns:
             numpy.ndarray: A horizontally concatenated image array.
         """
-        crops_sorted = sorted(crops, key=lambda points: (points[0][1], points[0][0]))
-        max_height = max(crop.shape[0] for crop in crops_sorted)
+        max_height = max(crop.shape[0] for crop in crops)
         resized_crops = []
-        for crop in crops_sorted:
+        for crop in crops:
             h, w, c = crop.shape
             new_h = max_height
             new_w = int((w / h) * new_h)
@@ -48,6 +47,8 @@ def process(self, input_data):
 
         data = input_data.data
         boxes = self.text_detector.postprocess(data["pred"], data["shape_list"])
+        if self.is_concat:
+            boxes = sorted(boxes, key=lambda points: (points[0][1], points[0][0]))
 
         infer_res_list = []
         for box in boxes:
@@ -65,7 +66,7 @@ def process(self, input_data):
                 sub_image = cv_utils.crop_box_from_image(image, np.array(box))
                 sub_image_list.append(sub_image)
             if self.is_concat:
-                sub_image_list = [self.concat_crops(sub_image_list)]
+                sub_image_list = len(sub_image_list) * [self.concat_crops(sub_image_list)]
             input_data.sub_image_list = sub_image_list
 
         input_data.data = None
diff --git a/mindocr/infer/classification/classification.py b/mindocr/infer/classification/classification.py
@@ -29,7 +29,6 @@ def __init__(self, args):
         self.transforms = create_transforms(self.yaml_cfg.predict.dataset.transform_pipeline)
     
     def __call__(self, img):
-        print(img)
         data = {"image": img}
         data = run_transforms(data, self.transforms[1:])
         return data
diff --git a/mindocr/infer/classification/cls_post_node.py b/mindocr/infer/classification/cls_post_node.py
@@ -49,7 +49,7 @@ def process(self, input_data):
         scores = np.array(output["scores"]).tolist()
 
         batch = input_data.sub_image_size
-        if self.task_type.value in (TaskType.DET_CLS_REC.value, TaskType.Layout_DET_CLS_REC.value):
+        if self.task_type.value in (TaskType.DET_CLS_REC.value, TaskType.LAYOUT_DET_CLS_REC.value):
             sub_images = input_data.sub_image_list
             for i in range(batch):
                 angle, score = angles[i], scores[i]
diff --git a/mindocr/infer/common/__init__.py b/mindocr/infer/common/__init__.py
@@ -1,3 +1,3 @@
-from .collect_node import CollectNode
+from .collect_node2 import CollectNode
 from .decode_node import DecodeNode
 from .handout_node import HandoutNode
diff --git a/mindocr/infer/detection/det_post_node.py b/mindocr/infer/detection/det_post_node.py
@@ -9,14 +9,14 @@
 
 from pipeline.framework.module_base import ModuleBase
 from pipeline.tasks import TaskType
-from .detection import DetPostProcess
+from .detection import DetPostprocess
 from tools.infer.text.utils import crop_text_region
 from pipeline.data_process.utils.cv_utils import crop_box_from_image
 
 class DetPostNode(ModuleBase):
     def __init__(self, args, msg_queue, tqdm_info):
         super(DetPostNode, self).__init__(args, msg_queue, tqdm_info)
-        self.det_postprocess = DetPostProcess(args)
+        self.det_postprocess = DetPostprocess(args)
         self.task_type = self.args.task_type
         self.is_concat = self.args.is_concat
 
@@ -51,17 +51,11 @@ def process(self, input_data):
             return
 
         pred = input_data.data["det_infer_res"]
-        # print("pred:", len(pred))
         pred = pred[0]
         data_dict = {"shape_list": input_data.data["det_pre_res"]["shape_list"]}
         boxes = self.det_postprocess(pred, data_dict)
 
-        
-
         boxes = boxes['polys'][0]
-
-        # TODO ZHQ 对齐 tools/infer/text/postprocess.py?
-        # print(boxes)
         
         if self.is_concat:
             boxes = sorted(boxes, key=lambda points: (points[0][1], points[0][0]))
@@ -72,11 +66,9 @@ def process(self, input_data):
 
         input_data.infer_result = infer_res_list
 
-        # ZHQ TODO
-
-        # input_data.sub_image_total = len(infer_res_list)
-        # input_data.sub_image_size = len(infer_res_list)
         if self.task_type.value in (TaskType.DET.value, TaskType.DET_REC.value, TaskType.DET_CLS_REC.value):
+            if len(input_data.frame) == 0:
+                return
             image = input_data.frame[0]  # bs=1 for det
         else:
             image = input_data.data["layout_images"][0]
@@ -88,9 +80,6 @@ def process(self, input_data):
             sub_image_list = len(sub_image_list) * [self.concat_crops(sub_image_list)]
         input_data.sub_image_list = sub_image_list
 
-        # if not (self.args.crop_save_dir or self.args.vis_det_save_dir or self.args.vis_pipeline_save_dir):
-            # input_data.frame = None
-
         if not infer_res_list:
             input_data.skip = True
 
diff --git a/mindocr/infer/detection/det_pre_node.py b/mindocr/infer/detection/det_pre_node.py
@@ -35,17 +35,14 @@ def process(self, input_data):
                 return
             image = input_data.data["layout_images"][0]  # bs = 1 for det
         data = self.det_preprocesser({"image": image})
-        # print(data)
         
         if len(data["image"].shape) == 3:
             data["image"] = np.expand_dims(data["image"], 0)
         data["shape_list"] = np.expand_dims(data["shape_list"], 0)
-        # print(data["image"].shape)
-        # time.sleep(1000)
-        if self.task_type.value == TaskType.DET.value and not (self.args.crop_save_dir or self.args.vis_det_save_dir):
-            input_data.frame = None
+        # if self.task_type.value == TaskType.DET.value and not (self.args.crop_save_dir or self.args.vis_det_save_dir):
+        #     input_data.frame = None
 
-        if self.task_type.value in (TaskType.LAYOUT_DET.value, TaskType.LAYOUT_DET_REC, TaskType.LAYOUT_DET_CLS_REC):
+        if self.task_type.value in (TaskType.LAYOUT_DET.value, TaskType.LAYOUT_DET_REC.value, TaskType.LAYOUT_DET_CLS_REC.value):
             input_data.data["det_pre_res"] = data
         else:
             input_data.data = {"det_pre_res": data}
diff --git a/mindocr/infer/detection/detection.py b/mindocr/infer/detection/detection.py
@@ -36,8 +36,7 @@ def __init__(self, args):
                 break
         self.transforms = create_transforms(self.yaml_cfg.predict.dataset.transform_pipeline)
     
-    def __call__(self, img):
-        data = {"image": img}
+    def __call__(self, data):
         data = run_transforms(data, self.transforms[1:])
         return data
 
diff --git a/mindocr/infer/layout/layout_pre_node.py b/mindocr/infer/layout/layout_pre_node.py
@@ -47,7 +47,6 @@ def process(self, input_data):
             "target_size": [800, 800],
         }
         data = self.layout_preprocesser(data)
-        # print(data)
 
         if len(data["image"].shape) == 3:
             data["image"] = np.expand_dims(data["image"], 0)
diff --git a/mindocr/infer/node_config.py b/mindocr/infer/node_config.py
@@ -17,7 +17,8 @@
 
 __all__ = ["MODEL_DICT_v2",
            "DET_DESC_v2", "CLS_DESC_v2", "REC_DESC_v2",
-           "DET_REC_DESC_v2", "DET_CLS_REC_DESC_v2"]
+           "DET_REC_DESC_v2", "DET_CLS_REC_DESC_v2",
+           "LAYOUT_DESC_v2", "LAYOUT_DET_REC_DESC_v2", "LAYOUT_DET_CLS_REC_DESC_v2"]
 
 DET_DESC_v2 = [
     (("HandoutNode", "0", 1), ("DecodeNode", "0", 1)),
diff --git a/mindocr/infer/recognition/rec_pre_node.py b/mindocr/infer/recognition/rec_pre_node.py
@@ -31,7 +31,7 @@ def process(self, input_data):
             image = input_data.frame[0]
             data = [self.rec_preprocesser(image)["image"]]
             input_data.sub_image_size = 1
-            input_data.data["rec_pre_res"] = data
+            input_data.data = {"rec_pre_res": data}
             self.send_to_next_module(input_data)
         else:
             sub_image_list = input_data.sub_image_list
diff --git a/mindocr/losses/det_loss.py b/mindocr/losses/det_loss.py
@@ -1,4 +1,5 @@
 import logging
+import os
 from math import pi
 from typing import Tuple, Union
 
@@ -10,6 +11,8 @@
 __all__ = ["DBLoss", "PSEDiceLoss", "EASTLoss", "FCELoss"]
 _logger = logging.getLogger(__name__)
 
+OFFLINE_MODE = os.getenv("OFFLINE_MODE", None)
+
 
 class DBLoss(nn.LossBase):
     """
@@ -165,7 +168,13 @@ def construct(self, pred: Tensor, gt: Tensor, mask: Tensor) -> Tensor:
         neg_loss = (loss * negative).view(loss.shape[0], -1)
 
         neg_vals, _ = ops.sort(neg_loss)
-        neg_index = ops.stack((mnp.arange(loss.shape[0]), neg_vals.shape[1] - neg_count), axis=1)
+
+        if OFFLINE_MODE is None:
+            neg_index = ops.stack((mnp.arange(loss.shape[0]), neg_vals.shape[1] - neg_count), axis=1)
+        else:
+            neg_index = ops.stack(
+                (ops.arange(loss.shape[0], dtype=neg_count.dtype), neg_vals.shape[1] - neg_count), axis=1
+            )
         min_neg_score = ops.expand_dims(ops.gather_nd(neg_vals, neg_index), axis=1)
 
         neg_loss_mask = (neg_loss >= min_neg_score).astype(ms.float32)  # filter values less than top k
diff --git a/mindocr/losses/rec_loss.py b/mindocr/losses/rec_loss.py
@@ -1,3 +1,5 @@
+import os
+
 import numpy as np
 
 import mindspore as ms
@@ -6,6 +8,8 @@
 
 __all__ = ["CTCLoss", "AttentionLoss", "VisionLANLoss"]
 
+OFFLINE_MODE = os.getenv("OFFLINE_MODE", None)
+
 
 class CTCLoss(LossBase):
     """
@@ -147,14 +151,21 @@ class AttentionLoss(LossBase):
     def __init__(self, reduction: str = "mean", ignore_index: int = 0) -> None:
         super().__init__()
         # ignore <GO> symbol, assume it is placed at 0th index
-        self.criterion = nn.CrossEntropyLoss(reduction=reduction, ignore_index=ignore_index)
+        if OFFLINE_MODE is None:
+            self.criterion = nn.CrossEntropyLoss(reduction=reduction, ignore_index=ignore_index)
+        else:
+            self.reduction = reduction
+            self.ignore_index = ignore_index
 
     def construct(self, logits: Tensor, labels: Tensor) -> Tensor:
         labels = labels[:, 1:]  # without <GO> symbol
         num_classes = logits.shape[-1]
         logits = ops.reshape(logits, (-1, num_classes))
         labels = ops.reshape(labels, (-1,))
-        return self.criterion(logits, labels)
+        if OFFLINE_MODE is None:
+            return self.criterion(logits, labels)
+        else:
+            return ops.cross_entropy(logits, labels, reduction=self.reduction, ignore_index=self.ignore_index)
 
 
 class SARLoss(LossBase):
diff --git a/mindocr/models/necks/fpn.py b/mindocr/models/necks/fpn.py
@@ -1,3 +1,4 @@
+import os
 from typing import List, Tuple
 
 from mindspore import Tensor, nn, ops
@@ -7,14 +8,20 @@
 from ..utils.attention_cells import SEModule
 from .asf import AdaptiveScaleFusion
 
+OFFLINE_MODE = os.getenv("OFFLINE_MODE", None)
 
-def _resize_nn(x: Tensor, scale: int = 0, shape: Tuple[int] = None):
-    if scale == 1 or shape == x.shape[2:]:
-        return x
 
-    if scale:
-        shape = (x.shape[2] * scale, x.shape[3] * scale)
-    return ops.ResizeNearestNeighbor(shape)(x)
+if OFFLINE_MODE is None:
+    def _resize_nn(x: Tensor, scale: int = 0, shape: Tuple[int] = None):
+        if scale == 1 or shape == x.shape[2:]:
+            return x
+
+        if scale:
+            shape = (x.shape[2] * scale, x.shape[3] * scale)
+        return ops.ResizeNearestNeighbor(shape)(x)
+else:
+    def _resize_nn(x: Tensor, shape: Tensor):
+        return ops.ResizeNearestNeighborV2()(x, shape)
 
 
 class FPN(nn.Cell):
@@ -64,11 +71,18 @@ def construct(self, features: List[Tensor]) -> Tensor:
         for i, uc_op in enumerate(self.unify_channels):
             features[i] = uc_op(features[i])
 
-        for i in range(2, -1, -1):
-            features[i] += _resize_nn(features[i + 1], shape=features[i].shape[2:])
+        if OFFLINE_MODE is None:
+            for i in range(2, -1, -1):
+                features[i] += _resize_nn(features[i + 1], shape=features[i].shape[2:])
+
+            for i, out in enumerate(self.out):
+                features[i] = _resize_nn(out(features[i]), shape=features[0].shape[2:])
+        else:
+            for i in range(2, -1, -1):
+                features[i] += _resize_nn(features[i + 1], shape=ops.dyn_shape(features[i])[2:])
 
-        for i, out in enumerate(self.out):
-            features[i] = _resize_nn(out(features[i]), shape=features[0].shape[2:])
+            for i, out in enumerate(self.out):
+                features[i] = _resize_nn(out(features[i]), shape=ops.dyn_shape(features[0])[2:])
 
         return self.fuse(features[::-1])  # matching the reverse order of the original work
 
diff --git a/mindocr/models/transforms/tps_spatial_transformer.py b/mindocr/models/transforms/tps_spatial_transformer.py
@@ -1,4 +1,5 @@
 import itertools
+import os
 from typing import Optional, Tuple
 
 import numpy as np
@@ -8,6 +9,8 @@
 import mindspore.ops as ops
 from mindspore import Tensor
 
+OFFLINE_MODE = os.getenv("OFFLINE_MODE", None)
+
 
 def grid_sample(input: Tensor, grid: Tensor, canvas: Optional[Tensor] = None) -> Tensor:
     out_type = input.dtype
@@ -112,15 +115,22 @@ def __init__(
         self.target_coordinate_repr = Tensor(target_coordinate_repr, dtype=ms.float32)
         self.target_control_points = Tensor(target_control_points, dtype=ms.float32)
 
+        if OFFLINE_MODE is not None:
+            self.matmul = ops.BatchMatMul()
+
     def construct(
         self, input: Tensor, source_control_points: Tensor
     ) -> Tuple[Tensor, Tensor]:
         batch_size = ops.shape(source_control_points)[0]
 
         padding_matrix = ops.tile(self.padding_matrix, (batch_size, 1, 1))
         Y = ops.concat([source_control_points, padding_matrix], axis=1)
-        mapping_matrix = ops.matmul(self.inverse_kernel, Y)
-        source_coordinate = ops.matmul(self.target_coordinate_repr, mapping_matrix)
+        if OFFLINE_MODE is None:
+            mapping_matrix = ops.matmul(self.inverse_kernel, Y)
+            source_coordinate = ops.matmul(self.target_coordinate_repr, mapping_matrix)
+        else:
+            mapping_matrix = self.matmul(self.inverse_kernel[None, ...], Y)
+            source_coordinate = self.matmul(self.target_coordinate_repr[None, ...], mapping_matrix)
         grid = ops.reshape(
             source_coordinate,
             (-1, self.target_height, self.target_width, 2),
diff --git a/mindocr/models/utils/attention_cells.py b/mindocr/models/utils/attention_cells.py
diff --git a/pipeline/framework/module_base.py b/pipeline/framework/module_base.py
diff --git a/pipeline/framework/pipeline_manager.py b/pipeline/framework/pipeline_manager.py
diff --git a/pipeline/infer_args.py b/pipeline/infer_args.py
diff --git a/tools/data_for_export_convert.py b/tools/data_for_export_convert.py

Original file line number	Diff line number	Diff line change
`@@ -47,7 +47,6 @@ def process(self, input_data):`
`47`	`47`	`"target_size": [800, 800],`
`48`	`48`	`}`
`49`	`49`	`data = self.layout_preprocesser(data)`
`50`		`- # print(data)`
`51`	`50`
`52`	`51`	`if len(data["image"].shape) == 3:`
`53`	`52`	`data["image"] = np.expand_dims(data["image"], 0)`