[quant][pt2e] Add transform_for_annotation method in Quantizer (pytorch#113115)

jerryzh168 · pytorchmergebot · commit 501d1182552a · 2023-11-09T20:23:29.000Z
Summary: Adding the method so that people can do some transformations before annotation to make the graph easier to annotate Test Plan: python test/test_quantization.py TestQuantizePT2E.test_transform_for_annotation Reviewers: Subscribers: Tasks: Tags: Differential Revision: [D51141080](https://our.internmc.facebook.com/intern/diff/D51141080) Pull Request resolved: pytorch#113115 Approved by: https://github.com/kimishpatel
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -358,7 +358,6 @@
     # torch.ao.quantization.quantizer.embedding_quantizer
     "get_embedding_operators_config",
     # torch.ao.quantization.quantizer.xnnpack_quantizer_utils
-    "convert_scalars_to_attrs",
     "get_bias_qspec",
     "get_input_act_qspec",
     "get_output_act_qspec",
diff --git a/test/allowlist_for_publicAPI.json b/test/allowlist_for_publicAPI.json
@@ -1567,7 +1567,6 @@
     "propagate_annotation"
   ],
   "torch.ao.quantization.quantizer.xnnpack_quantizer_utils": [
-    "convert_scalars_to_attrs",
     "register_annotator"
   ],
   "torch.backends.xeon.run_cpu": [
diff --git a/test/quantization/pt2e/test_quantize_pt2e.py b/test/quantization/pt2e/test_quantize_pt2e.py
@@ -1358,6 +1358,36 @@ def validate(self, model: torch.fx.GraphModule) -> None:
             ),
         )
 
+    def test_transform_for_annotation(self):
+        class TestQuantizer(Quantizer):
+            def transform_for_annotation(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
+                for n in model.graph.nodes:
+                    if n.target == torch.ops.aten.add.Tensor:
+                        n.target = torch.ops.aten.mul.Tensor
+                return model
+
+            def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
+                return model
+
+            def validate(self, model: torch.fx.GraphModule) -> None:
+                pass
+
+        class M(torch.nn.Module):
+            def forward(self, x):
+                return x + 3
+
+        m = M().eval()
+        quantizer = TestQuantizer()
+        example_inputs = (torch.randn(1, 2, 3, 3),)
+        m = capture_pre_autograd_graph(m, example_inputs)
+        m = prepare_pt2e(m, quantizer)
+        m(*example_inputs)
+        node_occurrence = {
+            ns.call_function(torch.ops.aten.add.Tensor): 0,
+            ns.call_function(torch.ops.aten.mul.Tensor): 1,
+        }
+        self.checkGraphModuleNodes(m, expected_node_occurrence=node_occurrence)
+
     def test_embedding_quantizer(self):
         m_eager = TestHelperModules.EmbeddingModule().eval()
         indices = torch.tensor(
diff --git a/test/quantization/pt2e/test_xnnpack_quantizer.py b/test/quantization/pt2e/test_xnnpack_quantizer.py
@@ -665,6 +665,39 @@ def test_mul_and_inplace_mul(self):
             node_list,
         )
 
+    def test_add_mul_scalar(self):
+        quantizer = XNNPACKQuantizer()
+        quantization_config = get_symmetric_quantization_config(is_per_channel=True)
+        quantizer.set_global(quantization_config)
+        example_inputs = (torch.randn(1, 3, 5, 5),)
+        node_occurrence = {
+            # two input and one output for first add, and output for second add
+            torch.ops.quantized_decomposed.quantize_per_tensor.default: 5,
+            torch.ops.quantized_decomposed.dequantize_per_tensor.default: 7,
+        }
+        node_list = [
+            torch.ops.quantized_decomposed.dequantize_per_tensor.default,
+            torch.ops.quantized_decomposed.dequantize_per_tensor.default,
+            torch.ops.aten.add.Tensor,
+            torch.ops.quantized_decomposed.quantize_per_tensor.default,
+            torch.ops.quantized_decomposed.dequantize_per_tensor.default,
+            torch.ops.aten.mul.Tensor,
+            torch.ops.quantized_decomposed.quantize_per_tensor.default,
+            torch.ops.quantized_decomposed.dequantize_per_tensor.default,
+            torch.ops.aten.add_.Tensor,
+            torch.ops.quantized_decomposed.quantize_per_tensor.default,
+            torch.ops.quantized_decomposed.dequantize_per_tensor.default,
+            torch.ops.aten.mul_.Tensor,
+            torch.ops.quantized_decomposed.quantize_per_tensor.default,
+        ]
+        self._test_quantizer(
+            TestHelperModules.AddMulScalar(),
+            example_inputs,
+            quantizer,
+            node_occurrence,
+            node_list,
+        )
+
 
 # TODO: express this using self._test_quantizer, add test for inception_v4
 class TestXNNPACKQuantizerModels(PT2EQuantizationTestCase):
diff --git a/torch/ao/quantization/quantize_pt2e.py b/torch/ao/quantization/quantize_pt2e.py
@@ -103,6 +103,7 @@ def calibrate(model, data_loader):
     # to be quantized before fusion
     # TODO: (maybe) rewrite this with subgraph_rewriter
     _fuse_conv_bn_(model)
+    quantizer.transform_for_annotation(model)
     quantizer.annotate(model)
     quantizer.validate(model)
     model = prepare(model, node_name_to_scope, is_qat=False)
@@ -170,6 +171,7 @@ def train_loop(model, train_data):
     torch._C._log_api_usage_once("quantization_api.quantize_pt2e.prepare_qat_pt2e")
     original_graph_meta = model.meta
     node_name_to_scope = _get_node_name_to_scope(model)
+    quantizer.transform_for_annotation(model)
     quantizer.annotate(model)
     quantizer.validate(model)
     # Perform fusion after annotate to avoid quantizing ops in the new
diff --git a/torch/ao/quantization/quantizer/quantizer.py b/torch/ao/quantization/quantizer/quantizer.py
@@ -157,6 +157,20 @@ class QuantizationAnnotation:
 
 
 class Quantizer(ABC):
+    def transform_for_annotation(
+        self, model: torch.fx.GraphModule
+    ) -> torch.fx.GraphModule:
+        """Allows for user defined transforms to run before annotating the graph.
+        This allows quantizer to allow quantizing part of the model that are otherwise not quantizable.
+        For example quantizer can
+        a) decompose a compound operator like scaled dot product attention,
+        into bmm and softmax if quantizer knows how to quantize bmm/softmax but not sdpa
+        or b) transform scalars to tensor to allow quantizing scalares.
+
+        Note: this is an optional method
+        """
+        return model
+
     # annotate nodes in the graph with observer or fake quant constructors
     # to convey the desired way of quantization
     @abstractmethod
diff --git a/torch/ao/quantization/quantizer/xnnpack_quantizer.py b/torch/ao/quantization/quantizer/xnnpack_quantizer.py
@@ -23,6 +23,7 @@
 from torch.ao.quantization.quantizer import QuantizationSpec, Quantizer
 
 from torch.ao.quantization.quantizer.xnnpack_quantizer_utils import (
+    _convert_scalars_to_attrs,
     OP_TO_ANNOTATOR,
     OperatorConfig,
     OperatorPatternType,
@@ -341,6 +342,12 @@ def set_module_name(
         self.module_name_config[module_name] = quantization_config
         return self
 
+    def transform_for_annotation(
+        self, model: torch.fx.GraphModule
+    ) -> torch.fx.GraphModule:
+        """Transforms scalar values to tensor attributes"""
+        return _convert_scalars_to_attrs(model)
+
     def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
         """just handling global spec for now"""
         # hacked for handling dynamic linear quant. will fix later.
diff --git a/torch/ao/quantization/quantizer/xnnpack_quantizer_utils.py b/torch/ao/quantization/quantizer/xnnpack_quantizer_utils.py
@@ -861,7 +861,8 @@ def propagate_annotation(model: torch.fx.GraphModule) -> None:
         )
 
 
-def convert_scalars_to_attrs(model: torch.fx.GraphModule) -> torch.fx.GraphModule:
+# TODO: make the list of ops customizable
+def _convert_scalars_to_attrs(model: torch.fx.GraphModule) -> torch.fx.GraphModule:
     for n in model.graph.nodes:
         if n.op != "call_function" or n.target not in [
             torch.ops.aten.add.Tensor,
diff --git a/torch/testing/_internal/common_quantization.py b/torch/testing/_internal/common_quantization.py
@@ -2628,6 +2628,14 @@ def forward(self, x, y):
             x *= y
             return x
 
+    class AddMulScalar(torch.nn.Module):
+        def forward(self, x):
+            x = x + 3
+            x = x * 3
+            x += 3
+            x *= 3
+            return x
+
     class ConvBnReLU2dAndLinearReLU(torch.nn.Module):
         def __init__(self):
             super().__init__()

Original file line number	Diff line number	Diff line change
`@@ -861,7 +861,8 @@ def propagate_annotation(model: torch.fx.GraphModule) -> None:`
`861`	`861`	`)`
`862`	`862`
`863`	`863`
`864`		`-def convert_scalars_to_attrs(model: torch.fx.GraphModule) -> torch.fx.GraphModule:`
	`864`	`+# TODO: make the list of ops customizable`
	`865`	`+def _convert_scalars_to_attrs(model: torch.fx.GraphModule) -> torch.fx.GraphModule:`
`865`	`866`	`for n in model.graph.nodes:`
`866`	`867`	`if n.op != "call_function" or n.target not in [`
`867`	`868`	`torch.ops.aten.add.Tensor,`