Add linear as a supported op for activation quantization. (#2577)

yixingli-apple · web-flow · commit 052291d7bd69 · 2025-07-31T16:29:30.000-07:00
diff --git a/coremltools/converters/mil/mil/passes/defs/optimize_activation_quantization.py b/coremltools/converters/mil/mil/passes/defs/optimize_activation_quantization.py
@@ -159,8 +159,8 @@ def _try_match_and_transform_pattern(
         - (`quantize` ->) dequantize` -> `conv` -> `relu` -> `quantize` -> `dequantize`
         """
 
-        # Reject if 1st operation is not `conv`/`add`/`pool`.
-        SUPPORTED_OP_TYPES = ["conv", "add", "avg_pool", "max_pool"]
+        # Reject if 1st operation is not `conv`/`add`/`pool`/`linear`.
+        SUPPORTED_OP_TYPES = ["conv", "add", "avg_pool", "max_pool", "linear"]
         if any(_check_child_op_type(dequantize_op, val) for val in SUPPORTED_OP_TYPES):
             pass
         else:
diff --git a/coremltools/optimize/coreml/_quantization_passes.py b/coremltools/optimize/coreml/_quantization_passes.py
@@ -1640,7 +1640,7 @@ class insert_prefix_quantize_dequantize_pair(AbstractActCompressionPass):
 
     _SUPPORTED_CONFIG_TYPE = OpLinearQuantizerConfig
 
-    SUPPORTED_UNARY_OP_TYPES = ["conv", "avg_pool", "max_pool"]
+    SUPPORTED_UNARY_OP_TYPES = ["conv", "avg_pool", "max_pool", "linear"]
     SUPPORTED_BINARY_OP_TYPES = ["add"]
     SUPPORTED_OP_TYPES = SUPPORTED_UNARY_OP_TYPES + SUPPORTED_BINARY_OP_TYPES
 
diff --git a/coremltools/test/optimize/coreml/test_passes.py b/coremltools/test/optimize/coreml/test_passes.py
@@ -900,6 +900,21 @@ def prog(x):
 
         return prog
 
+    @staticmethod
+    def _get_test_program_linear():
+        """An iOS17 program with linear."""
+        @mb.program(
+            input_specs=[mb.TensorSpec(shape=(30, 10))], opset_version=ct.target.iOS17
+        )
+        def prog(x):
+            linear_weight = np.random.rand(30, 10).astype(np.float32)
+            x = mb.cast(x=x, dtype="fp16")
+            x = mb.linear(x=x, weight=linear_weight)
+            x = mb.cast(x=x, dtype="fp32")
+            return x
+
+        return prog
+
     @staticmethod
     def _get_test_mlmodel_conv_concat():
         """A mlmodel has a concat with 2 inputs and 1 output all surrounded by conv."""
@@ -3710,6 +3725,57 @@ def test_global_config_activation_quantizer_on_pattern_3(self, mode, dtype, weig
             "cast",
         ]
 
+    @pytest.mark.parametrize(
+        "mode, dtype, weight_threshold",
+        itertools.product(
+            ["LINEAR", "LINEAR_SYMMETRIC"],
+            [np.int8, np.uint8, types.int8, types.uint8],
+            [1000],
+        ),
+    )
+    def test_global_config_activation_quantizer_on_pattern_4(self, mode, dtype, weight_threshold):
+        """
+        Global config would compress all operations with the same config
+        Valid patterns:
+        - linear
+        """
+
+        # Insert prefix quantize/dequantize pairs
+        op_config = cto.coreml.OpLinearQuantizerConfig(
+            mode=mode, dtype=dtype, weight_threshold=weight_threshold
+        )
+        config = cto.coreml.OptimizationConfig(global_config=op_config)
+
+        # Test case: conv
+        prog = self._get_test_program_linear()
+
+        # Create activation_stats to all intermediate tensors
+        activation_stats = gen_activation_stats_for_program(prog)
+
+        # Insert prefix quantize/dequantize pairs
+        graph_pass_1 = _insert_prefix_quantize_dequantize_pair(config)
+        graph_pass_1.set_options([PassOption("activation_stats", activation_stats)])
+
+        # Insert suffix quantize/dequantize pairs
+        graph_pass_2 = PASS_REGISTRY["compression::insert_suffix_quantize_dequantize_pair"]
+        graph_pass_2.set_options(
+            [PassOption("config", config), PassOption("activation_stats", activation_stats)]
+        )
+
+        apply_pass_and_basic_check(prog, graph_pass_1)
+        apply_pass_and_basic_check(prog, graph_pass_2)
+
+        print(get_op_types_in_program(prog))
+        assert get_op_types_in_program(prog) == [
+            "cast",
+            "quantize",
+            "dequantize",
+            "linear",
+            "quantize",
+            "dequantize",
+            "cast",
+        ]
+
 
 class TestGetActivationStats(TestCompressionPasses):
     def test_get_activation_calibration_stats_basic(self):