quic
diff --git a/‎QEfficient/base/modeling_qeff.py
Lines changed: 8 additions & 3 deletions b/‎QEfficient/base/modeling_qeff.py
Lines changed: 8 additions & 3 deletions
diff --git a/‎QEfficient/peft/auto.py
Lines changed: 4 additions & 0 deletions b/‎QEfficient/peft/auto.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎QEfficient/peft/lora/auto.py
Lines changed: 4 additions & 0 deletions b/‎QEfficient/peft/lora/auto.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎QEfficient/transformers/models/modeling_auto.py
Lines changed: 38 additions & 2 deletions b/‎QEfficient/transformers/models/modeling_auto.py
Lines changed: 38 additions & 2 deletions
diff --git a/‎QEfficient/utils/__init__.py
Lines changed: 1 addition & 0 deletions b/‎QEfficient/utils/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎QEfficient/utils/_utils.py
Lines changed: 113 additions & 1 deletion b/‎QEfficient/utils/_utils.py
Lines changed: 113 additions & 1 deletion
diff --git a/‎QEfficient/utils/constants.py
Lines changed: 2 additions & 0 deletions b/‎QEfficient/utils/constants.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎tests/peft/lora/test_lora_model.py
Lines changed: 4 additions & 0 deletions b/‎tests/peft/lora/test_lora_model.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎tests/peft/test_peft_model.py
Lines changed: 2 additions & 0 deletions b/‎tests/peft/test_peft_model.py
Lines changed: 2 additions & 0 deletions
@@ -19,11 +19,11 @@
 import onnx
 import torch
 
-from QEfficient.base.onnx_transforms import OnnxTransform
+from QEfficient.base.onnx_transforms import OnnxTransform, SplitTensorsTransform
 from QEfficient.base.pytorch_transforms import PytorchTransform
 from QEfficient.compile.qnn_compiler import compile as qnn_compile
 from QEfficient.generation.cloud_infer import QAICInferenceSession
-from QEfficient.utils import constants
+from QEfficient.utils import constants, dump_qconfig
 from QEfficient.utils._utils import load_json
 from QEfficient.utils.cache import QEFF_HOME, to_hashable
 
@@ -191,7 +191,8 @@ def _export(
                 transform_kwargs.update(onnx_transform_kwargs)
 
             for transform in self._onnx_transforms:
-                model, transformed = transform.apply(model, **transform_kwargs)
+                if not (self.enable_qnn and transform == SplitTensorsTransform):
+                    model, transformed = transform.apply(model, **transform_kwargs)
             model.metadata_props.append(
                 onnx.StringStringEntryProto(key="qeff_transforms", value=",".join(self._transform_names()))
             )
@@ -211,6 +212,7 @@ def _export(
         self.onnx_path = onnx_path
         return onnx_path
 
+    @dump_qconfig
     def _compile(
         self,
         onnx_path: Optional[str] = None,
@@ -336,8 +338,10 @@ def _compile(
             )
 
         self.qpc_path = qpc_path
+
         return qpc_path
 
+    @dump_qconfig
     def _qnn_compile(
         self,
         onnx_path: Optional[str] = None,
@@ -435,4 +439,5 @@ def _qnn_compile(
         )
 
         self.qpc_path = qpc_path
+
         return qpc_path
@@ -107,6 +107,10 @@ def model_hash(self) -> str:
         mhash = mhash.hexdigest()[:16]
         return mhash
 
+    @property
+    def get_model_config(self) -> dict:
+        return self.model.get_base_model().config.__dict__
+
     def load_adapter(self, model_id: str, adapter_name: str):
         """Loads a new adapter from huggingface hub or local path
 
 
@@ -90,6 +90,10 @@ def model_hash(self) -> str:
         mhash = mhash.hexdigest()[:16]
         return mhash
 
+    @property
+    def get_model_config(self) -> dict:
+        return self.model.model.config.__dict__
+
     def download_adapter(
         self,
         adapter_model_id: str,
 
@@ -229,6 +229,10 @@ def model_hash(self) -> str:
         mhash = mhash.hexdigest()[:16]
         return mhash
 
+    @property
+    def get_model_config(self) -> dict:
+        return self.model.config.__dict__
+
     def export(self, export_dir: Optional[str] = None) -> str:
         """
         Exports the model to ``ONNX`` format using ``torch.onnx.export``.
@@ -447,6 +451,10 @@ def model_name(self) -> str:
             mname = mname[4:]
         return mname
 
+    @property
+    def get_model_config(self) -> dict:
+        return self.model.model.vision_model.config.__dict__
+
 
 class QEffCausalLMForTextImageToTextModel(QEFFBaseModel):
     _pytorch_transforms = [
@@ -506,6 +514,10 @@ def model_name(self) -> str:
             mname = mname[4:]
         return mname
 
+    @property
+    def get_model_config(self) -> dict:
+        return self.model.language_model.config.__dict__
+
 
 class _QEffAutoModelForImageTextToTextDualQPC:
     _hf_auto_class = AutoModelForImageTextToText
@@ -1132,6 +1144,10 @@ def model_name(self) -> str:
             mname = mname[4:]
         return mname
 
+    @property
+    def get_model_config(self) -> dict:
+        return self.model.config.__dict__
+
 
 class QEFFAutoModelForImageTextToText:
     """
@@ -1187,6 +1203,7 @@ class QEFFAutoModelForCausalLM(QEFFBaseModel):
         :model (nn.Module):  PyTorch model
         :continuous_batching (bool): Weather this model will be used for continuous batching in future. If this is not set True here, the model can not be exported/compiled for continuous batching later.
         :is_tlm (bool): Whether this is a Speculative Decoding Target Language Model. If set to True, `num_logits_to_keep` input array will have to be fed to control the number of returned logits during prefill/decode.
+        :enable_qnn (bool): Enables QNN Compilation path for the model.
 
 
     .. code-block:: python
@@ -1217,6 +1234,7 @@ def __init__(
         model: nn.Module,
         continuous_batching: bool = False,
         is_tlm: bool = False,
+        enable_qnn: bool = False,
         **kwargs,
     ):
         model_class_name = model.__class__.__name__
@@ -1248,6 +1266,8 @@ def __init__(
             self.model, transformed = SpDTransform.apply(self.model)
         self.is_tlm = is_tlm
 
+        self.enable_qnn = enable_qnn
+
     @property
     def model_name(self) -> str:
         mname = self.model.__class__.__name__
@@ -1261,7 +1281,13 @@ def __repr__(self) -> str:
     @classmethod
     @with_replaced_quantizers
     def from_pretrained(
-        cls, pretrained_model_name_or_path, continuous_batching: bool = False, is_tlm: bool = False, *args, **kwargs
+        cls,
+        pretrained_model_name_or_path,
+        continuous_batching: bool = False,
+        is_tlm: bool = False,
+        enable_qnn: bool = False,
+        *args,
+        **kwargs,
     ):
         """
         This method serves as the easiest entry point into using QEfficient. The interface is designed to be similar to transformers.AutoModelForCausalLM.
@@ -1272,6 +1298,7 @@ def from_pretrained(
             :pretrained_name_or_path (str): Model card name from HuggingFace or local path to model directory.
             :continuous_batching (bool): Whether this model will be used for continuous batching in future. If this is not set True here, the model can not be exported/compiled for continuous batching later.
             :is_tlm (bool): Whether this is a Speculative Decoding Target Language Model. If set to True, `num_logits_to_keep` input array will have to be fed to control the number of returned logits during prefill/decode.
+            :enable_qnn (bool): Enables QNN Compilation path for the model.
             :args, kwargs: Additional arguments to pass to transformers.AutoModelForCausalLM.
 
         .. code-block:: python
@@ -1305,6 +1332,7 @@ def from_pretrained(
         kv_offload = kwargs.pop("kv_offload", None)
 
         kwargs.update({"attn_implementation": "eager", "low_cpu_mem_usage": False})
+
         model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
 
         # This is support models that should be classified to in a different auto class but transformers load them via this class
@@ -1314,7 +1342,7 @@ def from_pretrained(
                 model, kv_offload=kv_offload
             )
 
-        return cls(model, is_tlm=is_tlm, continuous_batching=continuous_batching)
+        return cls(model, is_tlm=is_tlm, continuous_batching=continuous_batching, enable_qnn=enable_qnn)
 
     @property
     def model_hash(self) -> str:
@@ -1327,6 +1355,10 @@ def model_hash(self) -> str:
         mhash = mhash.hexdigest()[:16]
         return mhash
 
+    @property
+    def get_model_config(self) -> dict:
+        return self.model.config.__dict__
+
     def export(self, export_dir: Optional[str] = None) -> str:
         """
         Exports the model to ``ONNX`` format using ``torch.onnx.export``.
@@ -1640,6 +1672,10 @@ def model_hash(self) -> str:
         mhash = mhash.hexdigest()[:16]
         return mhash
 
+    @property
+    def get_model_config(self) -> dict:
+        return self.model.config.__dict__
+
     def export(self, export_dir: Optional[str] = None) -> str:
         """
         Exports the model to ``ONNX`` format using ``torch.onnx.export``.
 
@@ -11,6 +11,7 @@
 )
 from QEfficient.utils._utils import (  # noqa: F401
     check_and_assign_cache_dir,
+    dump_qconfig,
     get_num_layers_from_config,
     get_onnx_dir_name,
     get_padding_shape_from_config,
 
@@ -8,16 +8,18 @@
 import json
 import os
 import subprocess
+import xml.etree.ElementTree as ET
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 import requests
 import torch
+import yaml
 from huggingface_hub import login, snapshot_download
 from requests.exceptions import HTTPError
 from transformers import AutoProcessor, AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast
 
-from QEfficient.utils.constants import QEFF_MODELS_DIR, Constants
+from QEfficient.utils.constants import QEFF_MODELS_DIR, Constants, QnnConstants
 from QEfficient.utils.logging_utils import logger
 
 
@@ -442,3 +444,113 @@ class IOInfo:
 
     def __repr__(self):
         return f"input_name:{self.name}\tdatatype:{self.datatype}\tshape:{self.shape}"
+
+
+def dump_qconfig(func):
+    def wrapper(self, *args, **kwargs):
+        result = func(self, *args, **kwargs)
+        create_and_dump_qconfigs(
+            self.qpc_path,
+            self.onnx_path,
+            self.get_model_config,
+            [cls.__name__ for cls in self._pytorch_transforms],
+            [cls.__name__ for cls in self._onnx_transforms],
+            kwargs.get("specializations"),
+            kwargs.get("mdp_ts_num_devices", 1),
+            kwargs.get("num_speculative_tokens"),
+            **{
+                k: v
+                for k, v in kwargs.items()
+                if k not in ["specializations", "mdp_ts_num_devices", "num_speculative_tokens", "custom_io"]
+            },
+        )
+        return result
+
+    return wrapper
+
+
+def create_and_dump_qconfigs(
+    qpc_path,
+    onnx_path,
+    huggingface_config,
+    pytorch_transforms,
+    onnx_transforms,
+    specializations,
+    mdp_ts_num_devices,
+    num_speculative_tokens,
+    **compiler_options,
+):
+    """
+    This Method creates a JSON file which contains all the configs for a model.
+    Such as huggingface configs, QEff transforms, QAIC sdk version, QNN sdk, compilation dir, qpc dir and
+    many other compilation options.
+    """
+    qnn_config = compiler_options["qnn_config"] if "qnn_config" in compiler_options else None
+    enable_qnn = True if "qnn_config" in compiler_options else None
+
+    qconfig_file_path = os.path.join(os.path.dirname(qpc_path), "qconfig.json")
+    onnx_path = str(onnx_path)
+    specializations_file_path = str(os.path.join(os.path.dirname(qpc_path), "specializations.json"))
+    compile_dir = str(os.path.dirname(qpc_path))
+    qnn_config_path = (
+        (qnn_config if qnn_config is not None else "QEfficient/compile/qnn_config.json") if enable_qnn else None
+    )
+
+    # Extract QAIC SDK Apps Version from SDK XML file
+    tree = ET.parse(Constants.SDK_APPS_XML)
+    root = tree.getroot()
+    qaic_version = root.find(".//base_version").text
+
+    # Extract QNN SDK details from YAML file if the environment variable is set
+    qnn_sdk_details = None
+    qnn_sdk_path = os.getenv(QnnConstants.QNN_SDK_PATH_ENV_VAR_NAME)
+    if qnn_sdk_path:
+        qnn_sdk_yaml_path = os.path.join(qnn_sdk_path, QnnConstants.QNN_SDK_YAML)
+        with open(qnn_sdk_yaml_path, "r") as file:
+            qnn_sdk_details = yaml.safe_load(file)
+
+    # Ensure all objects in the configs dictionary are JSON serializable
+    def make_serializable(obj):
+        if isinstance(obj, (int, float, str, bool, type(None))):
+            return obj
+        elif isinstance(obj, (list, tuple)):
+            return [make_serializable(item) for item in obj]
+        elif isinstance(obj, dict):
+            return {key: make_serializable(value) for key, value in obj.items()}
+        elif hasattr(obj, "__dict__"):
+            return make_serializable(vars(obj))
+        return str(obj)
+
+    qconfigs = {
+        "huggingface_config": make_serializable(huggingface_config),
+        "qpc_config": {
+            "QEff_config": {
+                "pytorch_transforms": make_serializable(pytorch_transforms),
+                "onnx_transforms": make_serializable(onnx_transforms),
+                "onnx_path": onnx_path,
+            },
+        },
+    }
+
+    aic_compiler_config = {
+        "apps_sdk_version": qaic_version,
+        "compile_dir": compile_dir,
+        "specializations_file_path": specializations_file_path,
+        "specializations": make_serializable(specializations),
+        "mdp_ts_num_devices": mdp_ts_num_devices,
+        "num_speculative_tokens": num_speculative_tokens,
+        **compiler_options,
+    }
+    qnn_config = {
+        "enable_qnn": enable_qnn,
+        "qnn_config_path": qnn_config_path,
+    }
+    # Put AIC or qnn details.
+    if enable_qnn:
+        qconfigs["qpc_config"]["qnn_config"] = qnn_config
+        if qnn_sdk_details:
+            qconfigs["qpc_config"]["qnn_config"].update(qnn_sdk_details)
+    else:
+        qconfigs["qpc_config"]["aic_compiler_config"] = aic_compiler_config
+
+    create_json(qconfig_file_path, qconfigs)
@@ -75,12 +75,14 @@ class Constants:
     MAX_QPC_LIMIT = 30
     MAX_RETRIES = 5  # This constant will be used set the maximum number of retry attempts for downloading a model using huggingface_hub snapshot_download
     NUM_SPECULATIVE_TOKENS = 2
+    SDK_APPS_XML = "/opt/qti-aic/versions/apps.xml"  # This xml file is parsed to find out the SDK version.
 
 
 @dataclass
 class QnnConstants:
     # QNN PATH to be read from environment variable.
     QNN_SDK_PATH_ENV_VAR_NAME = "QNN_SDK_ROOT"
+    QNN_SDK_YAML = "sdk.yaml"
 
     # QNN Compilation tools
     QAIRT_CONVERTER = "{}/bin/{}/qairt-converter"
 
@@ -4,6 +4,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 #
 # -----------------------------------------------------------------------------
+
+import os
 from pathlib import Path
 from time import perf_counter
 
@@ -225,6 +227,7 @@ def test_auto_lora_model_for_causal_lm_noncb_export_compile_generate(
     # test compile
     qeff_model.compile(prefill_seq_len=32, ctx_len=64)
     assert Path(qeff_model.qpc_path).is_dir()
+    assert os.path.isfile(os.path.join(os.path.dirname(qeff_model.qpc_path), "qconfig.json"))
 
     # test generate
     prompts = ["hello!", "hi", "hello, my name is", "hey"]
@@ -249,6 +252,7 @@ def test_auto_lora_model_for_causal_lm_cb_compile_generate(base_model_name, adap
     # test compile
     qeff_model.compile(prefill_seq_len=32, ctx_len=64, full_batch_size=2)
     assert Path(qeff_model.qpc_path).is_dir()
+    assert os.path.isfile(os.path.join(os.path.dirname(qeff_model.qpc_path), "qconfig.json"))
 
     # test generate
     prompts = ["hello!", "hi", "hello, my name is", "hey"]
 
@@ -5,6 +5,7 @@
 #
 # -----------------------------------------------------------------------------
 
+import os
 from time import perf_counter
 
 import numpy as np
@@ -187,3 +188,4 @@ def test_auto_peft_model_for_causal_lm_compile_generate(base_config, adapter_con
     end = perf_counter()
     compile_time_1 = end - start
     assert compile_time_1 < 0.01 * compile_time_0
+    assert os.path.isfile(os.path.join(os.path.dirname(qeff_model.qpc_path), "qconfig.json"))
Original file line number	Diff line number	Diff line change
`@@ -11,6 +11,7 @@`
`11`	`11`	`)`
`12`	`12`	`from QEfficient.utils._utils import ( # noqa: F401`
`13`	`13`	`check_and_assign_cache_dir,`
	`14`	`+ dump_qconfig,`
`14`	`15`	`get_num_layers_from_config,`
`15`	`16`	`get_onnx_dir_name,`
`16`	`17`	`get_padding_shape_from_config,`