pytorch · GregoryComer · Sep 16, 2025 · Sep 5, 2025 · Sep 5, 2025 · Sep 5, 2025
diff --git a/.github/workflows/_test_backend.yml b/.github/workflows/_test_backend.yml
@@ -0,0 +1,79 @@
+name: Test Backend
+
+on:
+  workflow_call:
+    inputs:
+      backend:
+        description: 'Backend to test (xnnpack, coreml, vulkan, qnn)'
+        required: true
+        type: string
+      flows:
+        description: 'JSON array of flows to test'
+        required: true
+        type: string
+      ref:
+        description: 'Git ref to checkout'
+        required: false
+        type: string
+        default: ${{ github.sha }}
+      timeout:
+        description: 'Job timeout in minutes'
+        required: false
+        type: number
+        default: 120
+      run-linux:
+        description: 'Whether to run Linux tests'
+        required: false
+        type: boolean
+        default: false
+      run-macos:
+        description: 'Whether to run macOS tests'
+        required: false
+        type: boolean
+        default: false
+
+jobs:
+  test-backend-linux:
+    if: ${{ inputs.run-linux }}
+    strategy:
+      fail-fast: false
+      matrix:
+        flow: ${{ fromJSON(inputs.flows) }}
+        suite: [models, operators]
+
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    with:
+      ref: ${{ inputs.ref }}
+      runner: linux.4xlarge.memory
+      docker-image: ci-image:executorch-ubuntu-22.04-clang12
+      submodules: recursive
+      timeout: ${{ inputs.timeout }}
+      upload-artifact: test-report-${{ matrix.flow }}-${{ matrix.suite }}
+      script: |
+        set -eux
+
+        source .ci/scripts/test_backend_linux.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}"
+
+  test-backend-macos:
+    if: ${{ inputs.run-macos }}
+    strategy:
+      fail-fast: false
+      matrix:
+        flow: ${{ fromJSON(inputs.flows) }}
+        suite: [models, operators]
+
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    with:
+      ref: ${{ inputs.ref }}
+      runner: macos-m1-stable
+      python-version: "3.12"
+      submodules: recursive
+      timeout: ${{ inputs.timeout }}
+      upload-artifact: test-report-${{ matrix.flow }}-${{ matrix.suite }}
+      script: |
+        set -eux
+
+        # This is needed to get the prebuilt PyTorch wheel from S3
+        ${CONDA_RUN} --no-capture-output pip install awscli==1.37.21
+
+        source .ci/scripts/test_backend_macos.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}"
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
@@ -36,51 +36,3 @@ jobs:
     uses: ./.github/workflows/_link_check.yml
     with:
       ref: ${{ github.sha }}
-
-  backend-test-linux:
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    strategy:
-      fail-fast: false
-      matrix:
-        flow: [
-          qnn, qnn_16a16w, qnn_16a8w, qnn_16a4w, qnn_16a4w_block, qnn_8a8w,
-          vulkan, vulkan_static_int8_per_channel,
-          xnnpack, xnnpack_dynamic_int8_per_channel, xnnpack_static_int8_per_channel, xnnpack_static_int8_per_tensor
-        ]
-        suite: [models, operators]
-    with:
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      runner: linux.4xlarge.memory
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-      submodules: recursive
-      timeout: 120
-      upload-artifact: test-report-${{ matrix.flow }}-${{ matrix.suite }}
-      script: |
-        set -eux
-
-        source .ci/scripts/test_backend_linux.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}"
-
-  backend-test-macos:
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      fail-fast: false
-      matrix:
-        flow: [coreml, coreml_static_int8]
-        suite: [models, operators]
-    with:
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      runner: macos-m1-stable
-      python-version: 3.12
-      submodules: recursive
-      timeout: 120
-      upload-artifact: test-report-${{ matrix.flow }}-${{ matrix.suite }}
-      script: |
-        set -eux
-
-        # This is needed to get the prebuilt PyTorch wheel from S3
-        ${CONDA_RUN} --no-capture-output pip install awscli==1.37.21
-
-        source .ci/scripts/test_backend_macos.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}"
diff --git a/.github/workflows/test-backend-coreml.yml b/.github/workflows/test-backend-coreml.yml
@@ -0,0 +1,27 @@
+name: Test CoreML Backend
+
+on:
+  schedule:
+    - cron: 0 2 * * *
+  push:
+    tags:
+      - ciflow/nightly/*
+  pull_request:
+    paths:
+      - .github/workflows/test-backend-coreml.yml
+      - .github/workflows/_test_backend.yml
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
+
+jobs:
+  test-coreml:
+    uses: ./.github/workflows/_test_backend.yml
+    with:
+      backend: coreml
+      flows: '["coreml", "coreml_static_int8"]'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 120
+      run-macos: true
diff --git a/.github/workflows/test-backend-qnn.yml b/.github/workflows/test-backend-qnn.yml
@@ -0,0 +1,27 @@
+name: Test QNN Backend
+
+on:
+  schedule:
+    - cron: 0 2 * * *
+  push:
+    tags:
+      - ciflow/nightly/*
+  pull_request:
+    paths:
+      - .github/workflows/test-backend-qnn.yml
+      - .github/workflows/_test_backend.yml
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
+
+jobs:
+  test-qnn:
+    uses: ./.github/workflows/_test_backend.yml
+    with:
+      backend: qnn
+      flows: '["qnn", "qnn_16a16w", "qnn_16a8w", "qnn_16a4w", "qnn_16a4w_block", "qnn_8a8w"]'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 120
+      run-linux: true
diff --git a/.github/workflows/test-backend-vulkan.yml b/.github/workflows/test-backend-vulkan.yml
@@ -0,0 +1,27 @@
+name: Test Vulkan Backend
+
+on:
+  schedule:
+    - cron: 0 2 * * *
+  push:
+    tags:
+      - ciflow/nightly/*
+  pull_request:
+    paths:
+      - .github/workflows/test-backend-vulkan.yml
+      - .github/workflows/_test_backend.yml
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
+
+jobs:
+  test-vulkan:
+    uses: ./.github/workflows/_test_backend.yml
+    with:
+      backend: vulkan
+      flows: '["vulkan", "vulkan_static_int8_per_channel"]'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 120
+      run-linux: true
diff --git a/.github/workflows/test-backend-xnnpack.yml b/.github/workflows/test-backend-xnnpack.yml
@@ -0,0 +1,27 @@
+name: Test XNNPACK Backend
+
+on:
+  schedule:
+    - cron: 0 2 * * *
+  push:
+    tags:
+      - ciflow/nightly/*
+  pull_request:
+    paths:
+      - .github/workflows/test-backend-xnnpack.yml
+      - .github/workflows/_test_backend.yml
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
+
+jobs:
+  test-xnnpack:
+    uses: ./.github/workflows/_test_backend.yml
+    with:
+      backend: xnnpack
+      flows: '["xnnpack", "xnnpack_dynamic_int8_per_channel", "xnnpack_static_int8_per_channel", "xnnpack_static_int8_per_tensor"]'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 120
+      run-linux: true
@@ -1,6 +1,6 @@
 import logging
 
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Callable
 
 from executorch.backends.test.harness import Tester
@@ -35,6 +35,12 @@ class TestFlow:
     is_delegated: bool = True
     """ Indicates whether the flow is expected to generate CALL_DELEGATE nodes. """
 
+    skip_patterns: list[str] = field(default_factory=lambda: [])
+    """ Tests with names containing any substrings in this list are skipped. """
+
+    def should_skip_test(self, test_name: str) -> bool:
+        return any(pattern in test_name for pattern in self.skip_patterns)
+
 
 def all_flows() -> dict[str, TestFlow]:
     flows = []

@@ -19,6 +19,7 @@ def _create_coreml_flow(
             CoreMLTester, minimum_deployment_target=minimum_deployment_target
         ),
         quantize=quantize,
+        skip_patterns=["test_argmin", "test_argmax"],
     )
 
 

@@ -20,6 +20,7 @@ def _create_vulkan_flow_base(
         tester_factory=VulkanTester,
         quantize=quantize_stage_factory is not None,
         quantize_stage_factory=quantize_stage_factory,
+        skip_patterns=["float16", "float64"],  # Not supported in swiftshader
     )
 
 

@@ -12,6 +12,25 @@
 #
 
 
+def escape_for_markdown(text: str) -> str:
+    """
+    Modify a string to properly display in a markdown table cell.
+    """
+    if not text:
+        return text
+
+    # Replace newlines with <br /> tags
+    escaped = text.replace("\n", "<br />")
+
+    # Escape backslashes.
+    escaped = escaped.replace("\\", "\\\\")
+
+    # Escape pipe characters that would break table structure
+    escaped = escaped.replace("|", "\\|")
+
+    return escaped
+
+
 def generate_markdown(csv_path: str, exit_code: int = 0):  # noqa (C901)
     # Print warning if exit code is non-zero
     if exit_code != 0:
@@ -46,7 +65,7 @@ def generate_markdown(csv_path: str, exit_code: int = 0):  # noqa (C901)
 
     for row in data_rows:
         # Make a copy of the row to avoid modifying the original
-        processed_row = row.copy()
+        processed_row = [escape_for_markdown(cell) for cell in row]
 
         # Count results and collect failed tests
         if result_column_index is not None and result_column_index < len(row):
@@ -96,7 +115,8 @@ def generate_markdown(csv_path: str, exit_code: int = 0):  # noqa (C901)
     # Generate Failed Tests section
     print("# Failed Tests\n")
     if failed_tests:
-        print("| " + " | ".join(header) + " |")
+        escaped_header = [escape_for_markdown(col) for col in header]
+        print("| " + " | ".join(escaped_header) + " |")
         print("|" + "|".join(["---"] * len(header)) + "|")
         for row in failed_tests:
             print("| " + " | ".join(row) + " |")

@@ -52,6 +52,11 @@ def wrapped_test(self):
             "use_dynamic_shapes": use_dynamic_shapes,
         }
         with TestContext(test_name, test_func.__name__, flow.name, params):
+            if flow.should_skip_test(test_name):
+                raise unittest.SkipTest(
+                    f"Skipping test due to matching flow {flow.name} skip patterns"
+                )
+
             test_func(self, flow, dtype, use_dynamic_shapes)
 
     wrapped_test._name = test_func.__name__  # type: ignore

@@ -97,6 +97,11 @@ def _make_wrapped_test(
 ):
     def wrapped_test(self):
         with TestContext(test_name, test_base_name, flow.name, params):
+            if flow.should_skip_test(test_name):
+                raise unittest.SkipTest(
+                    f"Skipping test due to matching flow {flow.name} skip patterns"
+                )
+
             test_kwargs = copy.copy(params) or {}
             test_kwargs["flow"] = flow
 

@@ -45,6 +45,8 @@
         ]
     )
 
+CSV_FIELD_NAMES.append("Error")
+
 
 # Operators that are excluded from the counts returned by count_ops. These are used to
 # exclude operatations that are not logically relevant or delegatable to backends.
@@ -365,6 +367,15 @@ def write_csv_header(output: TextIO):
 def write_csv_row(record: TestCaseSummary, output: TextIO):
     writer = csv.DictWriter(output, CSV_FIELD_NAMES)
 
+    # Truncate error message if it's too long, keeping first and last 200 characters
+    error_message = ""
+    if record.error is not None:
+        error_str = str(record.error)
+        if len(error_str) > 400:
+            error_message = error_str[:200] + "..." + error_str[-200:]
+        else:
+            error_message = error_str
+
     row = {
         "Test ID": record.name,
         "Test Case": record.base_name,
@@ -373,6 +384,7 @@ def write_csv_row(record: TestCaseSummary, output: TextIO):
         "Params": _serialize_params(record.params),
         "Result": record.result.to_short_str(),
         "Result Detail": record.result.to_detail_str(),
+        "Error": error_message,
         "Delegated": "True" if record.is_delegated() else "False",
         "Quantize Time (s)": (
             f"{record.quantize_time.total_seconds():.3f}"
-Original file line number
+Diff line change
@@ Expand Up / @@ -19,6 +19,7 @@ def _create_coreml_flow( @@
                 CoreMLTester, minimum_deployment_target=minimum_deployment_target
             ),
             quantize=quantize,
+            skip_patterns=["test_argmin", "test_argmax"],
         )
@@ Expand Down @@