DeepLink-org · DoorKickers · Jul 19, 2024 · Jul 22, 2024 · Jul 22, 2024 · Jul 22, 2024
@@ -123,8 +123,10 @@ def process_one_iter(log_file, clear_log, model_info: dict) -> None:
             cmd_run_one_iter = f"srun --job-name={job_name} --partition={partition}  --gres={gpu_requests} --time=40 python {train_path}"
             cmd_cp_one_iter = ""
         else:
-            cmd_run_one_iter = f"srun --job-name={job_name} --partition={partition}  --gres={gpu_requests} --time=40 sh SMART/tools/one_iter_tool/run_one_iter.sh {train_path} {config_path} {work_dir} {opt_arg}"
-            cmd_cp_one_iter = f"srun --job-name={job_name} --partition={partition}  --gres={gpu_requests} --time=30 sh SMART/tools/one_iter_tool/compare_one_iter.sh {package_name} {atol} {rtol} {metric}"
+            cmd_run_one_iter = f"bash SMART/tools/one_iter_tool/run_one_iter.sh {train_path} {config_path} {work_dir} {opt_arg}"
+            cmd_cp_one_iter = f"bash SMART/tools/one_iter_tool/compare_one_iter.sh {package_name} {atol} {rtol} {metric}"
+            # cmd_run_one_iter = f"srun --job-name={job_name} --partition={partition}  --gres={gpu_requests} --time=40 bash SMART/tools/one_iter_tool/run_one_iter.sh {train_path} {config_path} {work_dir} {opt_arg}"
+            # cmd_cp_one_iter = f"srun --job-name={job_name} --partition={partition}  --gres={gpu_requests} --time=30 bash SMART/tools/one_iter_tool/compare_one_iter.sh {package_name} {atol} {rtol} {metric}"
     elif device == "ascend":
         if "infer" in p2 and "infer" in p3:
             cmd_run_one_iter = f"python {train_path}"

@@ -1,18 +1,18 @@
 camb:
     # # transformers
-    - model_cfg: "transformers examples/pytorch/question-answering/run_bert_qa.py workdirs_bert"
+    # - model_cfg: "transformers examples/pytorch/question-answering/run_bert_qa.py workdirs_bert"
 
     # # mmpretrain
     - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet"
-    - model_cfg: "mmpretrain swin_transformer/swin-base_16xb64_in1k.py workdirs_swin_transformer"
-    - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer"
+    # - model_cfg: "mmpretrain swin_transformer/swin-base_16xb64_in1k.py workdirs_swin_transformer"
+    # - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer"
     - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2 --no-pin-memory"
       precision: {atol: 0.015, metric: 0.015, rtol: 0.01}
     - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenetv3"
     - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet"
-    - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext"
-    - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k_256.py workdirs_shufflenetv2"
-      precision: {atol: 0.015, metric: 0.015, rtol: 0.01}
+    # - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext"
+    # - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k_256.py workdirs_shufflenetv2"
+    #  precision: {atol: 0.015, metric: 0.015, rtol: 0.01}
 
     # # mmdetection
     - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3"
@@ -97,13 +97,19 @@ cuda:
     # - model_cfg: "mmagic stable_diffusion/stable-diffusion_ddim_denoisingunet_infer.py workdirs_stable_diffusion"
 
 ascend:
-    # mmsegmentation
-    # - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet"
-    #   fallback_op_list: "nll_loss2d_forward,nll_loss2d_backward,native_batch_norm,topk,convolution_overrideable,convolution_backward_overrideable,native_batch_norm_backward"
-    # - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr"
-    #   fallback_op_list: "fill_.Scalar,baddbmm.out,where.self,linear_backward,linear,uniform_,any.all_out,_foreach_addcdiv_.ScalarList,native_batch_norm_backward,convolution_overrideable"
     # mmpretrain
-    # - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet"
+    - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext"
+    - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet"
+    - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2"
+      precision: {atol: 0.015, metric: 0.015, rtol: 0.01}
+    - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenetv3"
+    - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet"
+    - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer"
+
+    # mmsegmentation
+    - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3"
+    - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus"
+    - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet"
 
 kunlunxin:
     # mmpretrain