QNN Compilation command changes (#327)

shubhagr-quic · web-flow · commit fc89e8bdc032 · 2025-03-28T11:44:26.000+05:30
1. Changes in libraries used during Context binary generation.
2. Changed convertor spelling to converter to align with qairt-converter
string.

---------

Signed-off-by: Shubham Agrawal &lt;quic_shubhagr@quicinc.com&gt;
diff --git a/QEfficient/compile/qnn_compiler.py b/QEfficient/compile/qnn_compiler.py
@@ -102,8 +102,8 @@ def parse_qnn_config(self):
         self.qnn_config = {}
         # Copy key-value pairs to the class object
         for key, value in config_data.items():
-            if key == QnnConstants.CONVERTOR_ARGS_EXTENSION_STR:
-                self.check_extension_arg(key, value, QnnConstants.IMMUTABLE_CONVERTOR_ARGS)
+            if key == QnnConstants.CONVERTER_ARGS_EXTENSION_STR:
+                self.check_extension_arg(key, value, QnnConstants.IMMUTABLE_CONVERTER_ARGS)
             if key == QnnConstants.CONTEXT_BIN_ARGS_EXTENSION_STR:
                 self.check_extension_arg(key, value, QnnConstants.IMMUTABLE_CONTEXT_BIN_GEN_ARGS)
             self.qnn_config[key] = value
@@ -191,20 +191,20 @@ def create_qnn_compiler_config_json(self) -> str:
     def compile(self) -> str:
         """
         Compiles the given ``ONNX`` model during object creation using QNN compiler and saves the compiled ``qpc`` package at ``qnn_binary_dir``.
-            - Creates convertor command and convert onnx model to model.dlc using qairt-convertor
+            - Creates converter command and convert onnx model to model.dlc using qairt-converter
             - command line arguments and qnn_config.json (if provided) are used to create qnn_compiler_config.json for context-binary-generator
-            - model.dlc from convertor stage is passed into context-binary-generator command to create programqpc.bin.
+            - model.dlc from converter stage is passed into context-binary-generator command to create programqpc.bin.
 
         Returns:
             :str: Path to compiled ``qpc`` package.
         """
         if not (
             self.qnn_config
-            and (QnnConstants.SKIP_QNN_CONVERTOR_STEP_STR in self.qnn_config)
-            and self.qnn_config[QnnConstants.SKIP_QNN_CONVERTOR_STEP_STR]
+            and (QnnConstants.SKIP_QNN_CONVERTER_STEP_STR in self.qnn_config)
+            and self.qnn_config[QnnConstants.SKIP_QNN_CONVERTER_STEP_STR]
         ):
             converter_cmd = self.converter()
-            execute_command("convertor", converter_cmd, self.qpc_base_path)
+            execute_command("converter", converter_cmd, self.qpc_base_path)
 
         if not os.path.isfile(self.dlc_model_path):
             raise FileNotFoundError(
@@ -225,7 +225,7 @@ def compile(self) -> str:
 
     def converter(self) -> str:
         """
-        Creates QNN convertor command using provided options.
+        Creates QNN converter command using provided options.
 
         IMMUTABLE parameters which can not be overridden by the user using qnn_config.json:
             :input_network (str): Generated ``ONNX`` Model Path.
@@ -235,10 +235,10 @@ def converter(self) -> str:
             :float_bitwidth (int): Converts the graph to the specified float bitwidth, either 32 or 16(Default).
             :preserve_io_datatype(flag): Passed by default.
 
-        CONVERTOR_ARGS_EXTENSION passed in qnn_config.json is appended to the command created.
+        CONVERTER_ARGS_EXTENSION passed in qnn_config.json is appended to the command created.
 
         Returns:
-            :str: QNN Convertor command.
+            :str: QNN Converter command.
         """
         converter_tool = QnnConstants.QAIRT_CONVERTER.format(self.qnn_sdk_path, self.qnn_target)
 
@@ -250,10 +250,10 @@ def converter(self) -> str:
             f"--float_bitwidth {QnnConstants.FLOAT_BITWIDTH} "
         )
         # Add default arguments.
-        cmd += QnnConstants.CONVERTOR_DEFAULT_ARGS
+        cmd += QnnConstants.CONVERTER_DEFAULT_ARGS
 
-        if self.qnn_config and QnnConstants.CONVERTOR_ARGS_EXTENSION_STR in self.qnn_config:
-            cmd += self.qnn_config[QnnConstants.CONVERTOR_ARGS_EXTENSION_STR]
+        if self.qnn_config and QnnConstants.CONVERTER_ARGS_EXTENSION_STR in self.qnn_config:
+            cmd += self.qnn_config[QnnConstants.CONVERTER_ARGS_EXTENSION_STR]
 
         return cmd
 
@@ -266,7 +266,7 @@ def generate_context_binary(self) -> str:
             :backend_binary (str): Generated QPC binary file name, which is provided programqpc.bin
             :output_dir (str): Path to store generated Binaries (qnn_binary_dir).
             :model (str): Path to the <qnn_model_name.so> file containing a QNN network.
-            :dlc_path (str): Path to DLC file generated by QNN-Convertor.
+            :dlc_path (str): Path to DLC file generated by QNN-Converter.
             :config_file(str): Path to created qnn_compiler_config.json containing qnn_compile_backend.json & shared_library_path.
 
         Configurable parameters:
@@ -279,15 +279,13 @@ def generate_context_binary(self) -> str:
         """
         binary_gen_tool = QnnConstants.QNN_CONTEXT_BIN.format(self.qnn_sdk_path, self.qnn_target)
         backend_lib = QnnConstants.QNN_CONTEXT_LIB_BACKEND.format(self.qnn_sdk_path, self.qnn_target)
-        model_lib = QnnConstants.QNN_CONTEXT_LIB_MODEL.format(self.qnn_sdk_path, self.qnn_target)
         config_file_path = self.create_qnn_compiler_config_json()
 
         cmd = (
             f"{binary_gen_tool} --binary_file {QnnConstants.CONTEXT_BIN_NAME} "
             f"--backend_binary {QnnConstants.CONTEXT_BIN_QPC_NAME} "
             f"--output_dir {self.qnn_binary_dir} "
             f"--backend {backend_lib} "
-            f"--model {model_lib} "
             f"--dlc_path {self.dlc_model_path} "
             f"--config_file {config_file_path} "
         )
@@ -343,7 +341,7 @@ def compile(
 ) -> str:
     """
     Compiles the given ``ONNX`` model using QNN compiler and saves the compiled ``qpc`` package at ``qnn_binary_dir``.
-    Generates model.dlc during convertor stage, qnn_compile_backend.json for backend parameters of context-binary-generator.
+    Generates model.dlc during converter stage, qnn_compile_backend.json for backend parameters of context-binary-generator.
     Generates tensor-slicing configuration if multiple devices are passed in ``device_group``.
 
     ``Mandatory`` Args:
@@ -375,7 +373,7 @@ def compile(
 
     os.makedirs(qpc_base_path, exist_ok=True)
 
-    # Created custom_io_config.yaml file for QNN-Convertor stage.
+    # Created custom_io_config.yaml file for QNN-Converter stage.
     # TODO To make custom_io_config.yaml configurable as not all models need it.
     custom_io_file_path = os.path.join(qpc_base_path, "custom_io_config.yaml")
 
diff --git a/QEfficient/compile/qnn_config.json b/QEfficient/compile/qnn_config.json
@@ -1,5 +1,5 @@
 {
-    "convertor_args_extension": "",
+    "converter_args_extension": "",
     "context_binary_generator_args_extension": "--log_level debug",
     "qnn_compilation_backend":
     {
@@ -8,5 +8,5 @@
         "compiler_printPerfMetrics": false,
         "compiler_stat_level": 10
     },
-    "SKIP_QNN_CONVERTOR_STEP": false
+    "SKIP_QNN_CONVERTER_STEP": false
 }
diff --git a/QEfficient/utils/constants.py b/QEfficient/utils/constants.py
@@ -99,8 +99,7 @@ class QnnConstants:
     QNN_CONTEXT_BIN = "{}/bin/{}/qnn-context-binary-generator"
 
     # QNN Libraries required for compilation
-    QNN_CONTEXT_LIB_BACKEND = "{}/lib/{}/libQnnAicCC.so"
-    QNN_CONTEXT_LIB_MODEL = "{}/lib/{}/libQnnModelDlc.so"
+    QNN_CONTEXT_LIB_BACKEND = "{}/lib/{}/libQnnAic.so"
     QNN_CONTEXT_LIB_NET_RUN_EXTENSIONS = "{}/lib/{}/libQnnAicNetRunExtensions.so"
 
     # QNN Compilation target names
@@ -112,10 +111,10 @@ class QnnConstants:
     # TARGET System Architecture
     TARGET = "x86_64-linux-clang"  # TODO add support in infer to be override
 
-    # Convertor Arguments
+    # Converter Arguments
     FLOAT_BITWIDTH = 16
     FLOAT_BIAS_BITWIDTH = 32
-    CONVERTOR_DEFAULT_ARGS = "--preserve_io_datatype --onnx_skip_simplification "
+    CONVERTER_DEFAULT_ARGS = "--preserve_io_datatype --onnx_skip_simplification "
 
     # Context-Binary-Generator Arguments
     LOG_LEVEL = "error"
@@ -135,12 +134,12 @@ class QnnConstants:
     GRAPH_NAMES = [f"{MODEL_NAME}_configuration_1", f"{MODEL_NAME}_configuration_2"]
 
     # qnn_config JSON file supported Keys
-    CONVERTOR_ARGS_EXTENSION_STR = "convertor_args_extension"
+    CONVERTER_ARGS_EXTENSION_STR = "converter_args_extension"
     CONTEXT_BIN_ARGS_EXTENSION_STR = "context_binary_generator_args_extension"
     QNN_COMPILATION_BACKEND_STR = "qnn_compilation_backend"
-    SKIP_QNN_CONVERTOR_STEP_STR = "SKIP_QNN_CONVERTOR_STEP"
+    SKIP_QNN_CONVERTER_STEP_STR = "SKIP_QNN_CONVERTER_STEP"
 
-    IMMUTABLE_CONVERTOR_ARGS = [
+    IMMUTABLE_CONVERTER_ARGS = [
         "--input_network ",
         "--output_path ",
         "--config ",
diff --git a/QEfficient/utils/generate_qnn_network_specialization_config.py b/QEfficient/utils/generate_qnn_network_specialization_config.py
@@ -13,7 +13,7 @@
 from onnx import helper
 
 """
-    The network specilization file is generated by loading the onnx graph and fecthing the graph inputs and outputs.
+    The network specialization file is generated by loading the onnx graph and fetching the graph inputs and outputs.
 """
 
 
@@ -28,7 +28,7 @@ def fetch_nodes_info(
     kv_cache_batch_size: Optional[int] = None,
 ) -> None:
     """
-    Generates network specialization config custom IO file for convertor stage in QNN compilation.
+    Generates network specialization config custom IO file for converter stage in QNN compilation.
     Reads onnx graph and creates a custom IO configuration file according to the passed parameters and
     save it as a yaml file provided in file_path argument.
 
@@ -187,7 +187,7 @@ def generate_data_format_config(
 
     ``Optional`` Args:
         :data_format (str): Tensor format for KV nodes. ``Defaults to QNN_TENSOR_DATA_FORMAT_MX.``
-        :model_dlc_name (str): DLC Name generated by the convertor stage in QNN Compilation. ``Defaults to model.``
+        :model_dlc_name (str): DLC Name generated by the converter stage in QNN Compilation. ``Defaults to model.``
         :file_path (str): File path to save the generated data format config. ``Defaults to qnn_data_format_config.json.``
     """
 
diff --git a/tests/transformers/models/test_prefix_caching.py b/tests/transformers/models/test_prefix_caching.py
@@ -40,14 +40,14 @@ def test_simple_prefix_caching(model_name):
 def test_simple_prefix_caching_qnn(model_name):
     qeff_model = QEFFAutoModelForCausalLM.from_pretrained(model_name, continuous_batching=True)
     qnn_config = {
-        "convertor_args_extension": "",
+        "converter_args_extension": "",
         "context_binary_generator_args_extension": "--log_level debug",
         "qnn_compilation_backend": {
             "compiler_enable_depth_first": True,
             "compiler_printDDRStats": False,
             "compiler_printPerfMetrics": False,
         },
-        "SKIP_QNN_CONVERTOR_STEP": False,
+        "SKIP_QNN_CONVERTER_STEP": False,
     }
     qnn_config_json_path = os.path.join(os.getcwd(), "qnn_config.json")
     create_json(qnn_config_json_path, qnn_config)

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`{`
`2`		`- "convertor_args_extension": "",`
	`2`	`+ "converter_args_extension": "",`
`3`	`3`	`"context_binary_generator_args_extension": "--log_level debug",`
`4`	`4`	`"qnn_compilation_backend":`
`5`	`5`	`{`
`@@ -8,5 +8,5 @@`
`8`	`8`	`"compiler_printPerfMetrics": false,`
`9`	`9`	`"compiler_stat_level": 10`
`10`	`10`	`},`
`11`		`- "SKIP_QNN_CONVERTOR_STEP": false`
	`11`	`+ "SKIP_QNN_CONVERTER_STEP": false`
`12`	`12`	`}`