Addressing comments

asmigosw · asmigosw · commit 8d99a9385ff2 · 2025-03-10T07:25:52.000Z
Signed-off-by: Asmita Goswami &lt;quic_asmigosw@quicinc.com&gt;
diff --git a/QEfficient/base/common.py b/QEfficient/base/common.py
@@ -14,18 +14,11 @@
 
 from typing import Any
 
-import transformers.models.auto.modeling_auto as mapping
+from QEfficient.transformers.modeling_utils import model_class_mapping
 from transformers import AutoConfig
 
 from QEfficient.base.modeling_qeff import QEFFBaseModel
 
-MODEL_CLASS_MAPPING = {}
-for architecture in mapping.MODEL_FOR_CAUSAL_LM_MAPPING_NAMES.values():
-    MODEL_CLASS_MAPPING[architecture] = "QEFFAutoModelForCausalLM"
-
-for architecture in mapping.MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES.values():
-    MODEL_CLASS_MAPPING[architecture] = "QEFFAutoModelForImageTextToText"
-
 
 class QEFFCommonLoader:
     """
@@ -48,7 +41,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, *args, **kwargs) ->
         config = AutoConfig.from_pretrained(pretrained_model_name_or_path)
         architecture = config.architectures[0] if config.architectures else None
 
-        class_name = MODEL_CLASS_MAPPING.get(architecture)
+        class_name = model_class_mapping.get(architecture)
         if class_name:
             module = __import__("QEfficient.transformers.models.modeling_auto")
             model_class = getattr(module, class_name)
diff --git a/QEfficient/cloud/infer.py b/QEfficient/cloud/infer.py
@@ -16,7 +16,7 @@
 from transformers.models.auto.modeling_auto import MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES
 
 from QEfficient.base.common import QEFFCommonLoader
-from QEfficient.utils import check_and_assign_cache_dir, load_hf_tokenizer
+from QEfficient.utils import check_and_assign_cache_dir, load_hf_tokenizer, constants
 from QEfficient.utils.logging_utils import logger
 
 
@@ -41,7 +41,6 @@ def main(
     allow_mxint8_mdp_io: bool = False,
     enable_qnn: Optional[bool] = False,
     qnn_config: Optional[str] = None,
-    img_size: Optional[int] = None,
     **kwargs,
 ) -> None:
     """
@@ -89,9 +88,6 @@ def main(
         if args.mxint8:
             logger.warning("mxint8 is going to be deprecated in a future release, use -mxint8_kv_cache instead.")
 
-    image_path = kwargs.pop("image_path", None)
-    image_url = kwargs.pop("image_url", None)
-
     qeff_model = QEFFCommonLoader.from_pretrained(
         pretrained_model_name_or_path=model_name,
         cache_dir=cache_dir,
@@ -100,6 +96,16 @@ def main(
         local_model_dir=local_model_dir,
     )
 
+    image_path = kwargs.pop("image_path", None)
+    image_url = kwargs.pop("image_url", None)
+
+    config = qeff_model.model.config
+    architecture = config.architectures[0] if config.architectures else None
+    if architecture not in MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES.values():
+        img_size = kwargs.pop("img_size", None)
+        if img_size or image_path or image_url:
+            logger.warning(f"Skipping image arguments as they are not valid for {architecture}")
+
     #########
     # Compile
     #########
@@ -117,38 +123,21 @@ def main(
         allow_mxint8_mdp_io=allow_mxint8_mdp_io,
         enable_qnn=enable_qnn,
         qnn_config=qnn_config,
-        img_size=img_size,
         **kwargs,
     )
 
     #########
     # Execute
     #########
-    config = qeff_model.model.config
-    architecture = config.architectures[0] if config.architectures else None
-
     if architecture in MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES.values():
         processor = AutoProcessor.from_pretrained(model_name, use_fast=False)
 
-        raw_image = None
-        if image_url is not None:
-            raw_image = Image.open(requests.get(image_url, stream=True).raw)
-        elif image_path is not None:
-            raw_image = Image.open(image_path)
-        else:
-            raise FileNotFoundError(
-                'Neither Image URL nor Image Path is found, either provide "image_url" or "image_path"'
-            )
+        if not (image_url or image_path):
+            raise ValueError('Neither Image URL nor Image Path is found, either provide "image_url" or "image_path"')
+        raw_image = Image.open(requests.get(image_url, stream=True).raw) if image_url else Image.open(image_path)
 
-        conversation = [
-            {
-                "role": "user",
-                "content": [
-                    {"type": "image"},
-                    {"type": "text", "text": prompt[0]},  # Currently accepting only 1 prompt
-                ],
-            },
-        ]
+        conversation = constants.Constants.conversation
+        conversation[0]["content"][1].update({"text": prompt[0]})  # Currently accepting only 1 prompt
 
         # Converts a list of dictionaries with `"role"` and `"content"` keys to a list of token ids.
         input_text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
@@ -277,19 +266,21 @@ def main(
         "--enable_qnn",
         "--enable-qnn",
         action="store_true",
+        nargs="?",
+        const=True,
+        type=str,
         default=False,
         help="Enables QNN. Optionally, a configuration file can be provided with [--enable_qnn CONFIG_FILE].\
              If not provided, the default configuration will be used.\
              Sample Config: QEfficient/compile/qnn_config.json",
     )
-    parser.add_argument(
-        "--qnn_config",
-        nargs="?",
-        type=str,
-    )
-    parser.add_argument("--img-size", "--img_size", default=None, type=int, required=False, help="Size of Image")
 
     args, compiler_options = parser.parse_known_args()
+
+    if isinstance(args.enable_qnn, str):
+        args.qnn_config = args.enable_qnn
+        args.enable_qnn = True
+
     compiler_options_dict = {}
     for i in range(0, len(compiler_options)):
         if compiler_options[i].startswith("--"):
diff --git a/QEfficient/transformers/modeling_utils.py b/QEfficient/transformers/modeling_utils.py
@@ -8,6 +8,8 @@
 from collections import namedtuple
 from typing import Dict, Optional, Tuple, Type
 
+import transformers.models.auto.modeling_auto as mapping
+
 import torch
 import torch.nn as nn
 from transformers.models.codegen.modeling_codegen import (
@@ -272,6 +274,15 @@
 }
 
 
+model_class_mapping = {
+    **{architecture: "QEFFAutoModelForCausalLM" for architecture in mapping.MODEL_FOR_CAUSAL_LM_MAPPING_NAMES.values()},
+    **{
+        architecture: "QEFFAutoModelForImageTextToText"
+        for architecture in mapping.MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES.values()
+    },
+}
+
+
 def _prepare_cross_attention_mask(
     cross_attention_mask: torch.Tensor,
     num_vision_tokens: int,
diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py
@@ -615,8 +615,6 @@ def compile(
             )
 
         output_names = self.model.get_output_names(kv_offload=True)
-        vision_onnx_path = compiler_options.get("vision_onnx_path", None)
-        lang_onnx_path = compiler_options.get("lang_onnx_path", None)
 
         specializations, compiler_options = self.model.get_specializations(
             batch_size=batch_size,
@@ -1567,9 +1565,6 @@ def compile(
             decode_specialization.update({"num_logits_to_keep": num_speculative_tokens + 1}) if self.is_tlm else ...
             specializations.append(decode_specialization)
 
-        if compiler_options.pop("img_size", None):
-            logger.warning(f"Skipping img_size as it is not a valid argument for {self.model.config.architectures[0]}.")
-
         if enable_qnn:
             if compiler_options:
                 logger.warning("Extra arguments to QNN compilation are supported via qnn_config.json only")
diff --git a/QEfficient/utils/_utils.py b/QEfficient/utils/_utils.py
@@ -504,7 +504,7 @@ def create_and_dump_qconfigs(
     # Extract QNN SDK details from YAML file if the environment variable is set
     qnn_sdk_details = None
     qnn_sdk_path = os.getenv(QnnConstants.QNN_SDK_PATH_ENV_VAR_NAME)
-    if qnn_sdk_path:
+    if enable_qnn and qnn_sdk_path:
         qnn_sdk_yaml_path = os.path.join(qnn_sdk_path, QnnConstants.QNN_SDK_YAML)
         with open(qnn_sdk_yaml_path, "r") as file:
             qnn_sdk_details = yaml.safe_load(file)
diff --git a/QEfficient/utils/constants.py b/QEfficient/utils/constants.py
@@ -76,6 +76,15 @@ class Constants:
     MAX_RETRIES = 5  # This constant will be used set the maximum number of retry attempts for downloading a model using huggingface_hub snapshot_download
     NUM_SPECULATIVE_TOKENS = 2
     SDK_APPS_XML = "/opt/qti-aic/versions/apps.xml"  # This xml file is parsed to find out the SDK version.
+    conversation = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image"},
+                {"type": "text"},
+            ],
+        }
+    ]
 
 
 @dataclass