quic
diff --git a/‎QEfficient/base/modeling_qeff.py
Lines changed: 5 additions & 1 deletion b/‎QEfficient/base/modeling_qeff.py
Lines changed: 5 additions & 1 deletion
diff --git a/‎QEfficient/peft/auto.py
Lines changed: 4 additions & 0 deletions b/‎QEfficient/peft/auto.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎QEfficient/peft/lora/auto.py
Lines changed: 4 additions & 0 deletions b/‎QEfficient/peft/lora/auto.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎QEfficient/transformers/models/modeling_auto.py
Lines changed: 24 additions & 0 deletions b/‎QEfficient/transformers/models/modeling_auto.py
Lines changed: 24 additions & 0 deletions
diff --git a/‎QEfficient/utils/__init__.py
Lines changed: 1 addition & 0 deletions b/‎QEfficient/utils/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎QEfficient/utils/_utils.py
Lines changed: 113 additions & 1 deletion b/‎QEfficient/utils/_utils.py
Lines changed: 113 additions & 1 deletion
diff --git a/‎QEfficient/utils/constants.py
Lines changed: 2 additions & 0 deletions b/‎QEfficient/utils/constants.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎tests/peft/lora/test_lora_model.py
Lines changed: 4 additions & 0 deletions b/‎tests/peft/lora/test_lora_model.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎tests/peft/test_peft_model.py
Lines changed: 2 additions & 0 deletions b/‎tests/peft/test_peft_model.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎tests/qnn_tests/test_causal_lm_models_qnn.py
Lines changed: 6 additions & 2 deletions b/‎tests/qnn_tests/test_causal_lm_models_qnn.py
Lines changed: 6 additions & 2 deletions
@@ -23,7 +23,7 @@
 from QEfficient.base.pytorch_transforms import PytorchTransform
 from QEfficient.compile.qnn_compiler import compile as qnn_compile
 from QEfficient.generation.cloud_infer import QAICInferenceSession
-from QEfficient.utils import constants
+from QEfficient.utils import constants, dump_qconfig
 from QEfficient.utils._utils import load_json
 from QEfficient.utils.cache import QEFF_HOME, to_hashable
 
@@ -212,6 +212,7 @@ def _export(
         self.onnx_path = onnx_path
         return onnx_path
 
+    @dump_qconfig
     def _compile(
         self,
         onnx_path: Optional[str] = None,
@@ -337,8 +338,10 @@ def _compile(
             )
 
         self.qpc_path = qpc_path
+
         return qpc_path
 
+    @dump_qconfig
     def _qnn_compile(
         self,
         onnx_path: Optional[str] = None,
@@ -436,4 +439,5 @@ def _qnn_compile(
         )
 
         self.qpc_path = qpc_path
+
         return qpc_path
@@ -107,6 +107,10 @@ def model_hash(self) -> str:
         mhash = mhash.hexdigest()[:16]
         return mhash
 
+    @property
+    def get_model_config(self) -> dict:
+        return self.model.get_base_model().config.__dict__
+
     def load_adapter(self, model_id: str, adapter_name: str):
         """Loads a new adapter from huggingface hub or local path
 
 
@@ -90,6 +90,10 @@ def model_hash(self) -> str:
         mhash = mhash.hexdigest()[:16]
         return mhash
 
+    @property
+    def get_model_config(self) -> dict:
+        return self.model.model.config.__dict__
+
     def download_adapter(
         self,
         adapter_model_id: str,
 
@@ -229,6 +229,10 @@ def model_hash(self) -> str:
         mhash = mhash.hexdigest()[:16]
         return mhash
 
+    @property
+    def get_model_config(self) -> dict:
+        return self.model.config.__dict__
+
     def export(self, export_dir: Optional[str] = None) -> str:
         """
         Exports the model to ``ONNX`` format using ``torch.onnx.export``.
@@ -447,6 +451,10 @@ def model_name(self) -> str:
             mname = mname[4:]
         return mname
 
+    @property
+    def get_model_config(self) -> dict:
+        return self.model.model.vision_model.config.__dict__
+
 
 class QEffCausalLMForTextImageToTextModel(QEFFBaseModel):
     _pytorch_transforms = [
@@ -506,6 +514,10 @@ def model_name(self) -> str:
             mname = mname[4:]
         return mname
 
+    @property
+    def get_model_config(self) -> dict:
+        return self.model.language_model.config.__dict__
+
 
 class _QEffAutoModelForImageTextToTextDualQPC:
     _hf_auto_class = AutoModelForImageTextToText
@@ -1128,6 +1140,10 @@ def model_name(self) -> str:
             mname = mname[4:]
         return mname
 
+    @property
+    def get_model_config(self) -> dict:
+        return self.model.config.__dict__
+
 
 class QEFFAutoModelForImageTextToText:
     """
@@ -1332,6 +1348,10 @@ def model_hash(self) -> str:
         mhash = mhash.hexdigest()[:16]
         return mhash
 
+    @property
+    def get_model_config(self) -> dict:
+        return self.model.config.__dict__
+
     def export(self, export_dir: Optional[str] = None) -> str:
         """
         Exports the model to ``ONNX`` format using ``torch.onnx.export``.
@@ -1642,6 +1662,10 @@ def model_hash(self) -> str:
         mhash = mhash.hexdigest()[:16]
         return mhash
 
+    @property
+    def get_model_config(self) -> dict:
+        return self.model.config.__dict__
+
     def export(self, export_dir: Optional[str] = None) -> str:
         """
         Exports the model to ``ONNX`` format using ``torch.onnx.export``.
 
@@ -11,6 +11,7 @@
 )
 from QEfficient.utils._utils import (  # noqa: F401
     check_and_assign_cache_dir,
+    dump_qconfig,
     get_num_layers_from_config,
     get_onnx_dir_name,
     get_padding_shape_from_config,
 
@@ -8,16 +8,18 @@
 import json
 import os
 import subprocess
+import xml.etree.ElementTree as ET
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 import requests
 import torch
+import yaml
 from huggingface_hub import login, snapshot_download
 from requests.exceptions import HTTPError
 from transformers import AutoProcessor, AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast
 
-from QEfficient.utils.constants import QEFF_MODELS_DIR, Constants
+from QEfficient.utils.constants import QEFF_MODELS_DIR, Constants, QnnConstants
 from QEfficient.utils.logging_utils import logger
 
 
@@ -442,3 +444,113 @@ class IOInfo:
 
     def __repr__(self):
         return f"input_name:{self.name}\tdatatype:{self.datatype}\tshape:{self.shape}"
+
+
+def dump_qconfig(func):
+    def wrapper(self, *args, **kwargs):
+        result = func(self, *args, **kwargs)
+        create_and_dump_qconfigs(
+            self.qpc_path,
+            self.onnx_path,
+            self.get_model_config,
+            [cls.__name__ for cls in self._pytorch_transforms],
+            [cls.__name__ for cls in self._onnx_transforms],
+            kwargs.get("specializations"),
+            kwargs.get("mdp_ts_num_devices", 1),
+            kwargs.get("num_speculative_tokens"),
+            **{
+                k: v
+                for k, v in kwargs.items()
+                if k not in ["specializations", "mdp_ts_num_devices", "num_speculative_tokens", "custom_io"]
+            },
+        )
+        return result
+
+    return wrapper
+
+
+def create_and_dump_qconfigs(
+    qpc_path,
+    onnx_path,
+    huggingface_config,
+    pytorch_transforms,
+    onnx_transforms,
+    specializations,
+    mdp_ts_num_devices,
+    num_speculative_tokens,
+    **compiler_options,
+):
+    """
+    This Method creates a JSON file which contains all the configs for a model.
+    Such as huggingface configs, QEff transforms, QAIC sdk version, QNN sdk, compilation dir, qpc dir and
+    many other compilation options.
+    """
+    qnn_config = compiler_options["qnn_config"] if "qnn_config" in compiler_options else None
+    enable_qnn = True if "qnn_config" in compiler_options else None
+
+    qconfig_file_path = os.path.join(os.path.dirname(qpc_path), "qconfig.json")
+    onnx_path = str(onnx_path)
+    specializations_file_path = str(os.path.join(os.path.dirname(qpc_path), "specializations.json"))
+    compile_dir = str(os.path.dirname(qpc_path))
+    qnn_config_path = (
+        (qnn_config if qnn_config is not None else "QEfficient/compile/qnn_config.json") if enable_qnn else None
+    )
+
+    # Extract QAIC SDK Apps Version from SDK XML file
+    tree = ET.parse(Constants.SDK_APPS_XML)
+    root = tree.getroot()
+    qaic_version = root.find(".//base_version").text
+
+    # Extract QNN SDK details from YAML file if the environment variable is set
+    qnn_sdk_details = None
+    qnn_sdk_path = os.getenv(QnnConstants.QNN_SDK_PATH_ENV_VAR_NAME)
+    if qnn_sdk_path:
+        qnn_sdk_yaml_path = os.path.join(qnn_sdk_path, QnnConstants.QNN_SDK_YAML)
+        with open(qnn_sdk_yaml_path, "r") as file:
+            qnn_sdk_details = yaml.safe_load(file)
+
+    # Ensure all objects in the configs dictionary are JSON serializable
+    def make_serializable(obj):
+        if isinstance(obj, (int, float, str, bool, type(None))):
+            return obj
+        elif isinstance(obj, (list, tuple)):
+            return [make_serializable(item) for item in obj]
+        elif isinstance(obj, dict):
+            return {key: make_serializable(value) for key, value in obj.items()}
+        elif hasattr(obj, "__dict__"):
+            return make_serializable(vars(obj))
+        return str(obj)
+
+    qconfigs = {
+        "huggingface_config": make_serializable(huggingface_config),
+        "qpc_config": {
+            "QEff_config": {
+                "pytorch_transforms": make_serializable(pytorch_transforms),
+                "onnx_transforms": make_serializable(onnx_transforms),
+                "onnx_path": onnx_path,
+            },
+        },
+    }
+
+    aic_compiler_config = {
+        "apps_sdk_version": qaic_version,
+        "compile_dir": compile_dir,
+        "specializations_file_path": specializations_file_path,
+        "specializations": make_serializable(specializations),
+        "mdp_ts_num_devices": mdp_ts_num_devices,
+        "num_speculative_tokens": num_speculative_tokens,
+        **compiler_options,
+    }
+    qnn_config = {
+        "enable_qnn": enable_qnn,
+        "qnn_config_path": qnn_config_path,
+    }
+    # Put AIC or qnn details.
+    if enable_qnn:
+        qconfigs["qpc_config"]["qnn_config"] = qnn_config
+        if qnn_sdk_details:
+            qconfigs["qpc_config"]["qnn_config"].update(qnn_sdk_details)
+    else:
+        qconfigs["qpc_config"]["aic_compiler_config"] = aic_compiler_config
+
+    create_json(qconfig_file_path, qconfigs)
@@ -75,12 +75,14 @@ class Constants:
     MAX_QPC_LIMIT = 30
     MAX_RETRIES = 5  # This constant will be used set the maximum number of retry attempts for downloading a model using huggingface_hub snapshot_download
     NUM_SPECULATIVE_TOKENS = 2
+    SDK_APPS_XML = "/opt/qti-aic/versions/apps.xml"  # This xml file is parsed to find out the SDK version.
 
 
 @dataclass
 class QnnConstants:
     # QNN PATH to be read from environment variable.
     QNN_SDK_PATH_ENV_VAR_NAME = "QNN_SDK_ROOT"
+    QNN_SDK_YAML = "sdk.yaml"
 
     # QNN Compilation tools
     QAIRT_CONVERTER = "{}/bin/{}/qairt-converter"
 
@@ -4,6 +4,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 #
 # -----------------------------------------------------------------------------
+
+import os
 from pathlib import Path
 from time import perf_counter
 
@@ -225,6 +227,7 @@ def test_auto_lora_model_for_causal_lm_noncb_export_compile_generate(
     # test compile
     qeff_model.compile(prefill_seq_len=32, ctx_len=64)
     assert Path(qeff_model.qpc_path).is_dir()
+    assert os.path.isfile(os.path.join(os.path.dirname(qeff_model.qpc_path), "qconfig.json"))
 
     # test generate
     prompts = ["hello!", "hi", "hello, my name is", "hey"]
@@ -249,6 +252,7 @@ def test_auto_lora_model_for_causal_lm_cb_compile_generate(base_model_name, adap
     # test compile
     qeff_model.compile(prefill_seq_len=32, ctx_len=64, full_batch_size=2)
     assert Path(qeff_model.qpc_path).is_dir()
+    assert os.path.isfile(os.path.join(os.path.dirname(qeff_model.qpc_path), "qconfig.json"))
 
     # test generate
     prompts = ["hello!", "hi", "hello, my name is", "hey"]
 
@@ -5,6 +5,7 @@
 #
 # -----------------------------------------------------------------------------
 
+import os
 from time import perf_counter
 
 import numpy as np
@@ -187,3 +188,4 @@ def test_auto_peft_model_for_causal_lm_compile_generate(base_config, adapter_con
     end = perf_counter()
     compile_time_1 = end - start
     assert compile_time_1 < 0.01 * compile_time_0
+    assert os.path.isfile(os.path.join(os.path.dirname(qeff_model.qpc_path), "qconfig.json"))
@@ -5,6 +5,8 @@
 #
 # -----------------------------------------------------------------------------
 
+import os
+
 import numpy as np
 import pytest
 from transformers import AutoModelForCausalLM
@@ -98,14 +100,15 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(
     if not get_available_device_id():
         pytest.skip("No available devices to run model on Cloud AI 100")
 
-    _ = qeff_model.compile(
+    qpc_path = qeff_model.compile(
         prefill_seq_len=prompt_len,
         ctx_len=ctx_len,
         num_cores=14,
         mxfp6=False,
         aic_enable_depth_first=False,
         enable_qnn=True,
     )
+    assert os.path.isfile(os.path.join(os.path.dirname(qpc_path), "qconfig.json"))
     exec_info = qeff_model.generate(tokenizer, prompts=Constants.INPUT_STR)
     cloud_ai_100_tokens = exec_info.generated_ids[0]  # Because we always run for single input and single batch size
     gen_len = ort_tokens.shape[-1]
@@ -136,7 +139,7 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(
     if not get_available_device_id():
         pytest.skip("No available devices to run model on Cloud AI 100")
 
-    _ = qeff_model.compile(
+    qpc_path = qeff_model.compile(
         prefill_seq_len=prompt_len,
         ctx_len=ctx_len,
         num_cores=14,
@@ -145,6 +148,7 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(
         full_batch_size=full_batch_size,
         enable_qnn=True,
     )
+    assert os.path.isfile(os.path.join(os.path.dirname(qpc_path), "qconfig.json"))
     exec_info_fbs = qeff_model.generate(tokenizer, prompts=fbs_prompts)
 
     assert all(
Original file line number	Diff line number	Diff line change
`@@ -11,6 +11,7 @@`
`11`	`11`	`)`
`12`	`12`	`from QEfficient.utils._utils import ( # noqa: F401`
`13`	`13`	`check_and_assign_cache_dir,`
	`14`	`+ dump_qconfig,`
`14`	`15`	`get_num_layers_from_config,`
`15`	`16`	`get_onnx_dir_name,`
`16`	`17`	`get_padding_shape_from_config,`