quic
diff --git a/‎QEfficient/base/common.py
+4 b/‎QEfficient/base/common.py
+4
diff --git a/‎QEfficient/base/modeling_qeff.py
+5-5 b/‎QEfficient/base/modeling_qeff.py
+5-5
diff --git a/‎QEfficient/transformers/models/mllama/modeling_mllama.py
+1-14 b/‎QEfficient/transformers/models/mllama/modeling_mllama.py
+1-14
@@ -12,12 +12,14 @@
 QEFFAutoModel provides a common interface for loading the HuggingFace models using either the HF card name of local path of downloaded model.
 """
 
+import os
 from typing import Any
 
 from transformers import AutoConfig
 
 from QEfficient.base.modeling_qeff import QEFFBaseModel
 from QEfficient.transformers.modeling_utils import MODEL_CLASS_MAPPING
+from QEfficient.utils import login_and_download_hf_lm
 
 
 class QEFFCommonLoader:
@@ -51,6 +53,8 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, *args, **kwargs) ->
             )
 
         local_model_dir = kwargs.pop("local_model_dir", None)
+        if not os.path.isdir(pretrained_model_name_or_path) and local_model_dir is None:
+            pretrained_model_name_or_path = login_and_download_hf_lm(pretrained_model_name_or_path, *args, **kwargs)
         hf_token = kwargs.pop("hf_token", None)
         continuous_batching = True if kwargs.pop("full_batch_size", None) else False
 
 
@@ -245,8 +245,11 @@ def _compile(
         qpc_path = compile_dir / "qpc"
         if not onnx_path.is_file():
             raise FileNotFoundError(f"ONNX file not found at: {onnx_path}")
-
         command = constants.COMPILER + [f"-m={onnx_path}"]
+        if mdp_ts_json_path := compiler_options.pop("mdp_ts_json_path", None):
+            mdp_ts_num_devices = None
+            command.append(f"-mdp-load-partition-config={mdp_ts_json_path}")
+
         for key, value in compiler_options.items():
             option = "-" + key.replace("_", "-")
             if isinstance(value, bool):
@@ -262,9 +265,6 @@ def _compile(
         if custom_io is not None:
             compile_hash.update(to_hashable(custom_io))
 
-        if mdp_ts_num_devices > 1:
-            compile_hash.update(to_hashable({"mdp_ts_num_devices": mdp_ts_num_devices}))
-
         if num_speculative_tokens:
             compile_hash.update(to_hashable({"num_speculative_tokens": num_speculative_tokens}))
 
@@ -300,7 +300,7 @@ def _compile(
             command.append(f"-custom-IO-list-file={custom_io_yaml}")
 
         # Write mdp_config.json file
-        if mdp_ts_num_devices > 1:
+        if not mdp_ts_json_path and mdp_ts_num_devices > 1:
             num_cores = compiler_options.get("aic_num_cores", 16)
             mdp_ts_json = compile_dir / f"mdp_ts_{mdp_ts_num_devices}.json"
             with open(mdp_ts_json, "w") as fp:
 
@@ -55,19 +55,8 @@ class QEffMllamaRotaryEmbedding(MllamaRotaryEmbedding):
     - Add static sin/cos computations.
     """
 
-    def __init__(
-        self,
-        dim=None,
-        max_position_embeddings=2048,
-        base=10000,
-        device=None,
-        scaling_factor=1.0,
-        rope_type="default",
-        config: Optional[MllamaConfig] = None,
-    ):
+    def __init__(self, config: MllamaConfig, device=None):
         super().__init__(config=config)
-        inv_freq, self.attention_scaling = self.rope_init_fn(self.config, device, **self.rope_kwargs)
-        self.register_buffer("inv_freq", inv_freq, persistent=False)
 
         # Build here to make `torch.jit.trace` work.
         self._set_cos_sin_cache(
@@ -868,7 +857,6 @@ def forward(
         output_hidden_states: Optional[bool] = None,
         return_dict: Optional[bool] = None,
         cache_position: Optional[torch.LongTensor] = None,
-        num_logits_to_keep: int = 0,
     ) -> Union[Tuple, CausalLMOutputWithPast]:
         output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
         output_hidden_states = (
@@ -935,7 +923,6 @@ def forward(
             output_attentions=output_attentions,
             return_dict=return_dict,
             cache_position=cache_position,
-            num_logits_to_keep=num_logits_to_keep,
         )
 
         return outputs