Skip to content

Commit ee2faf1

Browse files
author
Shubham Agrawal
committed
Added enable_qnn as arg in _compile
Signed-off-by: Shubham Agrawal <[email protected]>
1 parent 2f352b6 commit ee2faf1

File tree

5 files changed

+35
-77
lines changed

5 files changed

+35
-77
lines changed

QEfficient/base/modeling_qeff.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,6 @@ def compile(self, *args, **kwargs) -> Path:
102102
Following flag can be passed in compiler_options to enable QNN Compilation path.
103103
:enable_qnn (bool): Enables QNN Compilation. ``Defaults to False. if not passed.``
104104
:qnn_config (str): Path of QNN Config parameters file. ``Defaults to None. if not passed``
105-
any other parameter passed will be ignored in QNN compilation path as we expect overriding or extra parameters for QNN via config file.
106105
for QAIC compilation path, any flag that is supported by ``qaic-exec`` can be passed. Params are converted to flags as below:
107106
- aic_num_cores=16 -> -aic-num-cores=16
108107
- convert_to_fp16=True -> -convert-to-fp16
@@ -244,12 +243,7 @@ def _compile(
244243
:num_speculative_tokens (int, optional): Number of speculative tokens to take as input for Speculative Decoding Target Language Model.
245244
:enable_qnn (bool): Enables QNN Compilation. ``Defaults to False.``
246245
:qnn_config (str): Path of QNN Config parameters file. ``Defaults to None.``
247-
:compiler_options: Pass any compiler option as input.
248-
Following flag can be passed in compiler_options to enable QNN Compilation path.
249-
:enable_qnn (bool): Enables QNN Compilation. ``Defaults to False. if not passed.``
250-
:qnn_config (str): Path of QNN Config parameters file. ``Defaults to None. if not passed``
251-
any other parameter passed will be ignored in QNN compilation path as we expect overriding or extra parameters for QNN via config file.
252-
for QAIC compilation path, any flag that is supported by ``qaic-exec`` can be passed. Params are converted to flags as below:
246+
:compiler_options: Pass any compiler option as input. Any flag that is supported by `qaic-exec` can be passed. Params are converted to flags as below:
253247
- aic_num_cores=16 -> -aic-num-cores=16
254248
- convert_to_fp16=True -> -convert-to-fp16
255249

QEfficient/utils/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ class QnnConstants:
161161
]
162162

163163
QNN_SAMPLE_CONFIG = {
164-
"converter_args_extension": "",
164+
"converter_args_extension": "--onnx_defer_loading",
165165
"context_binary_generator_args_extension": "--log_level debug",
166166
"qnn_compilation_backend": {
167167
"compiler_enable_depth_first": True,

tests/transformers/models/test_causal_lm_models.py

Lines changed: 21 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -130,27 +130,16 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(
130130
if not get_available_device_id():
131131
pytest.skip("No available devices to run model on Cloud AI 100")
132132

133-
if enable_qnn:
134-
qpc_path = qeff_model.compile(
135-
prefill_seq_len=prompt_len,
136-
ctx_len=ctx_len,
137-
num_cores=14,
138-
mxfp6=False,
139-
aic_enable_depth_first=False,
140-
num_speculative_tokens=num_speculative_tokens,
141-
enable_qnn=enable_qnn,
142-
qnn_config=qnn_config,
143-
)
144-
else:
145-
qpc_path = qeff_model.compile(
146-
prefill_seq_len=prompt_len,
147-
ctx_len=ctx_len,
148-
num_cores=14,
149-
mxfp6=False,
150-
aic_enable_depth_first=False,
151-
num_speculative_tokens=num_speculative_tokens,
152-
)
153-
133+
qpc_path = qeff_model.compile(
134+
prefill_seq_len=prompt_len,
135+
ctx_len=ctx_len,
136+
num_cores=14,
137+
mxfp6=False,
138+
aic_enable_depth_first=False,
139+
num_speculative_tokens=num_speculative_tokens,
140+
enable_qnn=enable_qnn,
141+
qnn_config=qnn_config,
142+
)
154143
exec_info = qeff_model.generate(tokenizer, prompts=Constants.INPUT_STR)
155144
cloud_ai_100_tokens = exec_info.generated_ids[0] # Because we always run for single input and single batch size
156145
gen_len = ort_tokens.shape[-1]
@@ -182,29 +171,17 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(
182171
if not get_available_device_id():
183172
pytest.skip("No available devices to run model on Cloud AI 100")
184173

185-
if enable_qnn:
186-
qpc_path = qeff_model.compile(
187-
prefill_seq_len=prompt_len,
188-
ctx_len=ctx_len,
189-
num_cores=14,
190-
mxfp6=False,
191-
aic_enable_depth_first=False,
192-
full_batch_size=full_batch_size,
193-
num_speculative_tokens=num_speculative_tokens,
194-
enable_qnn=enable_qnn,
195-
qnn_config=qnn_config,
196-
)
197-
else:
198-
qpc_path = qeff_model.compile(
199-
prefill_seq_len=prompt_len,
200-
ctx_len=ctx_len,
201-
num_cores=14,
202-
mxfp6=False,
203-
aic_enable_depth_first=False,
204-
full_batch_size=full_batch_size,
205-
num_speculative_tokens=num_speculative_tokens,
206-
)
207-
174+
qpc_path = qeff_model.compile(
175+
prefill_seq_len=prompt_len,
176+
ctx_len=ctx_len,
177+
num_cores=14,
178+
mxfp6=False,
179+
aic_enable_depth_first=False,
180+
full_batch_size=full_batch_size,
181+
num_speculative_tokens=num_speculative_tokens,
182+
enable_qnn=enable_qnn,
183+
qnn_config=qnn_config,
184+
)
208185
exec_info_fbs = qeff_model.generate(tokenizer, prompts=fbs_prompts)
209186

210187
assert all(

tests/transformers/models/test_embedding_models.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -73,17 +73,11 @@ def check_embed_pytorch_vs_ort_vs_ai100(
7373
print("Mad for onnx and PyTorch is ", mad)
7474
assert mad <= 10**-5, f"MAD is too high for onnx and Pytorch: {mad}"
7575

76-
if enable_qnn:
77-
qeff_model.compile(
78-
num_cores=14,
79-
enable_qnn=enable_qnn,
80-
qnn_config=qnn_config,
81-
)
82-
else:
83-
qeff_model.compile(
84-
num_cores=14,
85-
)
86-
76+
qeff_model.compile(
77+
num_cores=14,
78+
enable_qnn=enable_qnn,
79+
qnn_config=qnn_config,
80+
)
8781
ai100_output = qeff_model.generate(inputs=inputs)
8882

8983
# Compare ONNX and AI 100 outputs

tests/transformers/models/test_speech_seq2seq_models.py

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -335,20 +335,13 @@ def check_seq2seq_pytorch_vs_kv_vs_ort_vs_ai100(
335335
if not get_available_device_id():
336336
pytest.skip("No available devices to run model on Cloud AI 100")
337337

338-
if enable_qnn:
339-
qeff_model.compile(
340-
ctx_len=ctx_len,
341-
num_cores=16,
342-
batch_size=batch_size,
343-
enable_qnn=enable_qnn,
344-
qnn_config=qnn_config,
345-
)
346-
else:
347-
qeff_model.compile(
348-
ctx_len=ctx_len,
349-
num_cores=16,
350-
batch_size=batch_size,
351-
)
338+
qeff_model.compile(
339+
ctx_len=ctx_len,
340+
num_cores=16,
341+
batch_size=batch_size,
342+
enable_qnn=enable_qnn,
343+
qnn_config=qnn_config,
344+
)
352345

353346
exec_info = qeff_model.generate(
354347
inputs=processor(data, sampling_rate=sample_rate, return_tensors="pt"), generation_len=ctx_len

0 commit comments

Comments
 (0)