Skip to content

Commit bc60d47

Browse files
authored
Merge branch 'Release-V1.19.3' into image_text_support
2 parents ad06845 + 7cfea38 commit bc60d47

18 files changed

+205
-13
lines changed

QEfficient/base/modeling_qeff.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@
1919
import onnx
2020
import torch
2121

22-
from QEfficient.base.onnx_transforms import OnnxTransform
22+
from QEfficient.base.onnx_transforms import OnnxTransform, SplitTensorsTransform
2323
from QEfficient.base.pytorch_transforms import PytorchTransform
2424
from QEfficient.compile.qnn_compiler import compile as qnn_compile
2525
from QEfficient.generation.cloud_infer import QAICInferenceSession
26-
from QEfficient.utils import constants
26+
from QEfficient.utils import constants, dump_qconfig
2727
from QEfficient.utils._utils import load_json
2828
from QEfficient.utils.cache import QEFF_HOME, to_hashable
2929

@@ -191,7 +191,8 @@ def _export(
191191
transform_kwargs.update(onnx_transform_kwargs)
192192

193193
for transform in self._onnx_transforms:
194-
model, transformed = transform.apply(model, **transform_kwargs)
194+
if not (self.enable_qnn and transform == SplitTensorsTransform):
195+
model, transformed = transform.apply(model, **transform_kwargs)
195196
model.metadata_props.append(
196197
onnx.StringStringEntryProto(key="qeff_transforms", value=",".join(self._transform_names()))
197198
)
@@ -211,6 +212,7 @@ def _export(
211212
self.onnx_path = onnx_path
212213
return onnx_path
213214

215+
@dump_qconfig
214216
def _compile(
215217
self,
216218
onnx_path: Optional[str] = None,
@@ -336,8 +338,10 @@ def _compile(
336338
)
337339

338340
self.qpc_path = qpc_path
341+
339342
return qpc_path
340343

344+
@dump_qconfig
341345
def _qnn_compile(
342346
self,
343347
onnx_path: Optional[str] = None,
@@ -435,4 +439,5 @@ def _qnn_compile(
435439
)
436440

437441
self.qpc_path = qpc_path
442+
438443
return qpc_path

QEfficient/peft/auto.py

+4
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,10 @@ def model_hash(self) -> str:
107107
mhash = mhash.hexdigest()[:16]
108108
return mhash
109109

110+
@property
111+
def get_model_config(self) -> dict:
112+
return self.model.get_base_model().config.__dict__
113+
110114
def load_adapter(self, model_id: str, adapter_name: str):
111115
"""Loads a new adapter from huggingface hub or local path
112116

QEfficient/peft/lora/auto.py

+4
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ def model_hash(self) -> str:
9090
mhash = mhash.hexdigest()[:16]
9191
return mhash
9292

93+
@property
94+
def get_model_config(self) -> dict:
95+
return self.model.model.config.__dict__
96+
9397
def download_adapter(
9498
self,
9599
adapter_model_id: str,

QEfficient/transformers/models/modeling_auto.py

+38-2
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,10 @@ def model_hash(self) -> str:
229229
mhash = mhash.hexdigest()[:16]
230230
return mhash
231231

232+
@property
233+
def get_model_config(self) -> dict:
234+
return self.model.config.__dict__
235+
232236
def export(self, export_dir: Optional[str] = None) -> str:
233237
"""
234238
Exports the model to ``ONNX`` format using ``torch.onnx.export``.
@@ -447,6 +451,10 @@ def model_name(self) -> str:
447451
mname = mname[4:]
448452
return mname
449453

454+
@property
455+
def get_model_config(self) -> dict:
456+
return self.model.model.vision_model.config.__dict__
457+
450458

451459
class QEffCausalLMForTextImageToTextModel(QEFFBaseModel):
452460
_pytorch_transforms = [
@@ -506,6 +514,10 @@ def model_name(self) -> str:
506514
mname = mname[4:]
507515
return mname
508516

517+
@property
518+
def get_model_config(self) -> dict:
519+
return self.model.language_model.config.__dict__
520+
509521

510522
class _QEffAutoModelForImageTextToTextDualQPC:
511523
_hf_auto_class = AutoModelForImageTextToText
@@ -1132,6 +1144,10 @@ def model_name(self) -> str:
11321144
mname = mname[4:]
11331145
return mname
11341146

1147+
@property
1148+
def get_model_config(self) -> dict:
1149+
return self.model.config.__dict__
1150+
11351151

11361152
class QEFFAutoModelForImageTextToText:
11371153
"""
@@ -1187,6 +1203,7 @@ class QEFFAutoModelForCausalLM(QEFFBaseModel):
11871203
:model (nn.Module): PyTorch model
11881204
:continuous_batching (bool): Weather this model will be used for continuous batching in future. If this is not set True here, the model can not be exported/compiled for continuous batching later.
11891205
:is_tlm (bool): Whether this is a Speculative Decoding Target Language Model. If set to True, `num_logits_to_keep` input array will have to be fed to control the number of returned logits during prefill/decode.
1206+
:enable_qnn (bool): Enables QNN Compilation path for the model.
11901207
11911208
11921209
.. code-block:: python
@@ -1217,6 +1234,7 @@ def __init__(
12171234
model: nn.Module,
12181235
continuous_batching: bool = False,
12191236
is_tlm: bool = False,
1237+
enable_qnn: bool = False,
12201238
**kwargs,
12211239
):
12221240
model_class_name = model.__class__.__name__
@@ -1248,6 +1266,8 @@ def __init__(
12481266
self.model, transformed = SpDTransform.apply(self.model)
12491267
self.is_tlm = is_tlm
12501268

1269+
self.enable_qnn = enable_qnn
1270+
12511271
@property
12521272
def model_name(self) -> str:
12531273
mname = self.model.__class__.__name__
@@ -1261,7 +1281,13 @@ def __repr__(self) -> str:
12611281
@classmethod
12621282
@with_replaced_quantizers
12631283
def from_pretrained(
1264-
cls, pretrained_model_name_or_path, continuous_batching: bool = False, is_tlm: bool = False, *args, **kwargs
1284+
cls,
1285+
pretrained_model_name_or_path,
1286+
continuous_batching: bool = False,
1287+
is_tlm: bool = False,
1288+
enable_qnn: bool = False,
1289+
*args,
1290+
**kwargs,
12651291
):
12661292
"""
12671293
This method serves as the easiest entry point into using QEfficient. The interface is designed to be similar to transformers.AutoModelForCausalLM.
@@ -1272,6 +1298,7 @@ def from_pretrained(
12721298
:pretrained_name_or_path (str): Model card name from HuggingFace or local path to model directory.
12731299
:continuous_batching (bool): Whether this model will be used for continuous batching in future. If this is not set True here, the model can not be exported/compiled for continuous batching later.
12741300
:is_tlm (bool): Whether this is a Speculative Decoding Target Language Model. If set to True, `num_logits_to_keep` input array will have to be fed to control the number of returned logits during prefill/decode.
1301+
:enable_qnn (bool): Enables QNN Compilation path for the model.
12751302
:args, kwargs: Additional arguments to pass to transformers.AutoModelForCausalLM.
12761303
12771304
.. code-block:: python
@@ -1305,6 +1332,7 @@ def from_pretrained(
13051332
kv_offload = kwargs.pop("kv_offload", None)
13061333

13071334
kwargs.update({"attn_implementation": "eager", "low_cpu_mem_usage": False})
1335+
13081336
model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
13091337

13101338
# This is support models that should be classified to in a different auto class but transformers load them via this class
@@ -1314,7 +1342,7 @@ def from_pretrained(
13141342
model, kv_offload=kv_offload
13151343
)
13161344

1317-
return cls(model, is_tlm=is_tlm, continuous_batching=continuous_batching)
1345+
return cls(model, is_tlm=is_tlm, continuous_batching=continuous_batching, enable_qnn=enable_qnn)
13181346

13191347
@property
13201348
def model_hash(self) -> str:
@@ -1327,6 +1355,10 @@ def model_hash(self) -> str:
13271355
mhash = mhash.hexdigest()[:16]
13281356
return mhash
13291357

1358+
@property
1359+
def get_model_config(self) -> dict:
1360+
return self.model.config.__dict__
1361+
13301362
def export(self, export_dir: Optional[str] = None) -> str:
13311363
"""
13321364
Exports the model to ``ONNX`` format using ``torch.onnx.export``.
@@ -1640,6 +1672,10 @@ def model_hash(self) -> str:
16401672
mhash = mhash.hexdigest()[:16]
16411673
return mhash
16421674

1675+
@property
1676+
def get_model_config(self) -> dict:
1677+
return self.model.config.__dict__
1678+
16431679
def export(self, export_dir: Optional[str] = None) -> str:
16441680
"""
16451681
Exports the model to ``ONNX`` format using ``torch.onnx.export``.

QEfficient/utils/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
)
1212
from QEfficient.utils._utils import ( # noqa: F401
1313
check_and_assign_cache_dir,
14+
dump_qconfig,
1415
get_num_layers_from_config,
1516
get_onnx_dir_name,
1617
get_padding_shape_from_config,

QEfficient/utils/_utils.py

+113-1
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,18 @@
88
import json
99
import os
1010
import subprocess
11+
import xml.etree.ElementTree as ET
1112
from dataclasses import dataclass
1213
from typing import Any, Dict, List, Optional, Tuple, Union
1314

1415
import requests
1516
import torch
17+
import yaml
1618
from huggingface_hub import login, snapshot_download
1719
from requests.exceptions import HTTPError
1820
from transformers import AutoProcessor, AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast
1921

20-
from QEfficient.utils.constants import QEFF_MODELS_DIR, Constants
22+
from QEfficient.utils.constants import QEFF_MODELS_DIR, Constants, QnnConstants
2123
from QEfficient.utils.logging_utils import logger
2224

2325

@@ -442,3 +444,113 @@ class IOInfo:
442444

443445
def __repr__(self):
444446
return f"input_name:{self.name}\tdatatype:{self.datatype}\tshape:{self.shape}"
447+
448+
449+
def dump_qconfig(func):
450+
def wrapper(self, *args, **kwargs):
451+
result = func(self, *args, **kwargs)
452+
create_and_dump_qconfigs(
453+
self.qpc_path,
454+
self.onnx_path,
455+
self.get_model_config,
456+
[cls.__name__ for cls in self._pytorch_transforms],
457+
[cls.__name__ for cls in self._onnx_transforms],
458+
kwargs.get("specializations"),
459+
kwargs.get("mdp_ts_num_devices", 1),
460+
kwargs.get("num_speculative_tokens"),
461+
**{
462+
k: v
463+
for k, v in kwargs.items()
464+
if k not in ["specializations", "mdp_ts_num_devices", "num_speculative_tokens", "custom_io"]
465+
},
466+
)
467+
return result
468+
469+
return wrapper
470+
471+
472+
def create_and_dump_qconfigs(
473+
qpc_path,
474+
onnx_path,
475+
huggingface_config,
476+
pytorch_transforms,
477+
onnx_transforms,
478+
specializations,
479+
mdp_ts_num_devices,
480+
num_speculative_tokens,
481+
**compiler_options,
482+
):
483+
"""
484+
This Method creates a JSON file which contains all the configs for a model.
485+
Such as huggingface configs, QEff transforms, QAIC sdk version, QNN sdk, compilation dir, qpc dir and
486+
many other compilation options.
487+
"""
488+
qnn_config = compiler_options["qnn_config"] if "qnn_config" in compiler_options else None
489+
enable_qnn = True if "qnn_config" in compiler_options else None
490+
491+
qconfig_file_path = os.path.join(os.path.dirname(qpc_path), "qconfig.json")
492+
onnx_path = str(onnx_path)
493+
specializations_file_path = str(os.path.join(os.path.dirname(qpc_path), "specializations.json"))
494+
compile_dir = str(os.path.dirname(qpc_path))
495+
qnn_config_path = (
496+
(qnn_config if qnn_config is not None else "QEfficient/compile/qnn_config.json") if enable_qnn else None
497+
)
498+
499+
# Extract QAIC SDK Apps Version from SDK XML file
500+
tree = ET.parse(Constants.SDK_APPS_XML)
501+
root = tree.getroot()
502+
qaic_version = root.find(".//base_version").text
503+
504+
# Extract QNN SDK details from YAML file if the environment variable is set
505+
qnn_sdk_details = None
506+
qnn_sdk_path = os.getenv(QnnConstants.QNN_SDK_PATH_ENV_VAR_NAME)
507+
if qnn_sdk_path:
508+
qnn_sdk_yaml_path = os.path.join(qnn_sdk_path, QnnConstants.QNN_SDK_YAML)
509+
with open(qnn_sdk_yaml_path, "r") as file:
510+
qnn_sdk_details = yaml.safe_load(file)
511+
512+
# Ensure all objects in the configs dictionary are JSON serializable
513+
def make_serializable(obj):
514+
if isinstance(obj, (int, float, str, bool, type(None))):
515+
return obj
516+
elif isinstance(obj, (list, tuple)):
517+
return [make_serializable(item) for item in obj]
518+
elif isinstance(obj, dict):
519+
return {key: make_serializable(value) for key, value in obj.items()}
520+
elif hasattr(obj, "__dict__"):
521+
return make_serializable(vars(obj))
522+
return str(obj)
523+
524+
qconfigs = {
525+
"huggingface_config": make_serializable(huggingface_config),
526+
"qpc_config": {
527+
"QEff_config": {
528+
"pytorch_transforms": make_serializable(pytorch_transforms),
529+
"onnx_transforms": make_serializable(onnx_transforms),
530+
"onnx_path": onnx_path,
531+
},
532+
},
533+
}
534+
535+
aic_compiler_config = {
536+
"apps_sdk_version": qaic_version,
537+
"compile_dir": compile_dir,
538+
"specializations_file_path": specializations_file_path,
539+
"specializations": make_serializable(specializations),
540+
"mdp_ts_num_devices": mdp_ts_num_devices,
541+
"num_speculative_tokens": num_speculative_tokens,
542+
**compiler_options,
543+
}
544+
qnn_config = {
545+
"enable_qnn": enable_qnn,
546+
"qnn_config_path": qnn_config_path,
547+
}
548+
# Put AIC or qnn details.
549+
if enable_qnn:
550+
qconfigs["qpc_config"]["qnn_config"] = qnn_config
551+
if qnn_sdk_details:
552+
qconfigs["qpc_config"]["qnn_config"].update(qnn_sdk_details)
553+
else:
554+
qconfigs["qpc_config"]["aic_compiler_config"] = aic_compiler_config
555+
556+
create_json(qconfig_file_path, qconfigs)

QEfficient/utils/constants.py

+2
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,14 @@ class Constants:
7575
MAX_QPC_LIMIT = 30
7676
MAX_RETRIES = 5 # This constant will be used set the maximum number of retry attempts for downloading a model using huggingface_hub snapshot_download
7777
NUM_SPECULATIVE_TOKENS = 2
78+
SDK_APPS_XML = "/opt/qti-aic/versions/apps.xml" # This xml file is parsed to find out the SDK version.
7879

7980

8081
@dataclass
8182
class QnnConstants:
8283
# QNN PATH to be read from environment variable.
8384
QNN_SDK_PATH_ENV_VAR_NAME = "QNN_SDK_ROOT"
85+
QNN_SDK_YAML = "sdk.yaml"
8486

8587
# QNN Compilation tools
8688
QAIRT_CONVERTER = "{}/bin/{}/qairt-converter"

tests/peft/lora/test_lora_model.py

+4
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
# SPDX-License-Identifier: BSD-3-Clause
55
#
66
# -----------------------------------------------------------------------------
7+
8+
import os
79
from pathlib import Path
810
from time import perf_counter
911

@@ -225,6 +227,7 @@ def test_auto_lora_model_for_causal_lm_noncb_export_compile_generate(
225227
# test compile
226228
qeff_model.compile(prefill_seq_len=32, ctx_len=64)
227229
assert Path(qeff_model.qpc_path).is_dir()
230+
assert os.path.isfile(os.path.join(os.path.dirname(qeff_model.qpc_path), "qconfig.json"))
228231

229232
# test generate
230233
prompts = ["hello!", "hi", "hello, my name is", "hey"]
@@ -249,6 +252,7 @@ def test_auto_lora_model_for_causal_lm_cb_compile_generate(base_model_name, adap
249252
# test compile
250253
qeff_model.compile(prefill_seq_len=32, ctx_len=64, full_batch_size=2)
251254
assert Path(qeff_model.qpc_path).is_dir()
255+
assert os.path.isfile(os.path.join(os.path.dirname(qeff_model.qpc_path), "qconfig.json"))
252256

253257
# test generate
254258
prompts = ["hello!", "hi", "hello, my name is", "hey"]

tests/peft/test_peft_model.py

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#
66
# -----------------------------------------------------------------------------
77

8+
import os
89
from time import perf_counter
910

1011
import numpy as np
@@ -187,3 +188,4 @@ def test_auto_peft_model_for_causal_lm_compile_generate(base_config, adapter_con
187188
end = perf_counter()
188189
compile_time_1 = end - start
189190
assert compile_time_1 < 0.01 * compile_time_0
191+
assert os.path.isfile(os.path.join(os.path.dirname(qeff_model.qpc_path), "qconfig.json"))

0 commit comments

Comments
 (0)