Skip to content

Reading mxfp6_matmul for QNN Compilation path from compile API arguments #499

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion QEfficient/base/modeling_qeff.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ def _compile(
custom_io: Optional[Dict[str, str]] = None,
mdp_ts_num_devices: int = 1,
num_speculative_tokens: Optional[int] = None,
mxfp6_matmul: bool = constants.DEFAULT_AIC_MXPF6_MATMUL,
enable_qnn: Optional[bool] = False,
qnn_config: Optional[str] = None,
**compiler_options,
Expand All @@ -239,6 +240,7 @@ def _compile(
:custom_io (dict): Custom IO to specify the input and outputs in different formats than default
:mdp_ts_num_devices (int): Number of devices to partition to use Multi-Device Partitioning with tensor-slicing.
:num_speculative_tokens (int, optional): Number of speculative tokens to take as input for Speculative Decoding Target Language Model.
:mxfp6_matmul (bool): Use MXFP6 to compress weights for MatMul nodes to run faster on device. ``Defaults to False``.
:enable_qnn (bool): Enables QNN Compilation. ``Defaults to False.``
:qnn_config (str): Path of QNN Config parameters file. Any extra parameters for QNN compilation can be passed via this file. ``Defaults to None.``
:compiler_options: Pass any compiler option as input.
Expand Down Expand Up @@ -269,7 +271,7 @@ def _compile(
custom_io=custom_io,
device_group=list(range(mdp_ts_num_devices)),
num_cores=compiler_options.get("aic_num_cores", constants.DEFAULT_AIC_NUM_CORES),
mxfp6=compiler_options.get("mxfp6_matmul", constants.DEFAULT_AIC_MXPF6_MATMUL),
mxfp6=mxfp6_matmul,
mxint8=mxint8_kv_cache,
qnn_config=qnn_config,
)
Expand All @@ -281,6 +283,9 @@ def _compile(
if mdp_ts_json_path := compiler_options.pop("mdp_load_partition_config", None):
command.append(f"-mdp-load-partition-config={mdp_ts_json_path}")

if mxfp6_matmul:
command.append("-mxfp6-matmul")

for key, value in compiler_options.items():
option = "-" + key.replace("_", "-")
if isinstance(value, bool):
Expand Down
11 changes: 10 additions & 1 deletion QEfficient/compile/qnn_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,17 @@ def parse_qnn_config(self):
for key, value in config_data.items():
if key == QnnConstants.CONVERTER_ARGS_EXTENSION_STR:
self.check_extension_arg(key, value, QnnConstants.IMMUTABLE_CONVERTER_ARGS)
if key == QnnConstants.CONTEXT_BIN_ARGS_EXTENSION_STR:
elif key == QnnConstants.CONTEXT_BIN_ARGS_EXTENSION_STR:
self.check_extension_arg(key, value, QnnConstants.IMMUTABLE_CONTEXT_BIN_GEN_ARGS)
elif key == QnnConstants.QNN_COMPILATION_BACKEND_STR:
immutable_param = [
sub_key for sub_key in value.keys() if sub_key in QnnConstants.IMMUTABLE_COMPILATION_BACKEND_ARGS
]
if immutable_param:
raise AttributeError(
f"Immutable Parameters {immutable_param} found in {QnnConstants.QNN_COMPILATION_BACKEND_STR}. Please remove them from QNN Configuration file."
)

self.qnn_config[key] = value

def create_qnn_tensor_slicing_json(self) -> str:
Expand Down
4 changes: 4 additions & 0 deletions QEfficient/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,10 @@ class QnnConstants:
"--config_file ",
]

IMMUTABLE_COMPILATION_BACKEND_ARGS = [
"compiler_mxfp6_matmul_weights",
]

QNN_SAMPLE_CONFIG = {
"converter_args_extension": "--onnx_defer_loading",
"context_binary_generator_args_extension": "--log_level debug",
Expand Down
Loading