quic
diff --git a/‎QEfficient/customop/__init__.py
Lines changed: 2 additions & 1 deletion b/‎QEfficient/customop/__init__.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎QEfficient/customop/rms_norm.py
Lines changed: 14 additions & 1 deletion b/‎QEfficient/customop/rms_norm.py
Lines changed: 14 additions & 1 deletion
diff --git a/‎QEfficient/exporter/export_utils.py
Lines changed: 2 additions & 2 deletions b/‎QEfficient/exporter/export_utils.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎QEfficient/transformers/modeling_utils.py
Lines changed: 37 additions & 0 deletions b/‎QEfficient/transformers/modeling_utils.py
Lines changed: 37 additions & 0 deletions
diff --git a/‎QEfficient/transformers/models/gemma/__init__.py
Lines changed: 6 additions & 0 deletions b/‎QEfficient/transformers/models/gemma/__init__.py
Lines changed: 6 additions & 0 deletions
@@ -12,14 +12,15 @@
     CtxScatterFuncCB,
     CtxScatterFuncCB3D,
 )
-from QEfficient.customop.rms_norm import CustomRMSNormAIC
+from QEfficient.customop.rms_norm import CustomRMSNormAIC, GemmaCustomRMSNormAIC
 
 __all__ = [
     "CtxGatherFunc",
     "CtxScatterFunc",
     "CtxGatherFunc3D",
     "CtxScatterFunc3D",
     "CustomRMSNormAIC",
+    "GemmaCustomRMSNormAIC",
     "CtxGatherFuncCB",
     "CtxScatterFuncCB",
     "CtxGatherFuncCB3D",
 
@@ -45,7 +45,20 @@ class CustomRMSNormAIC(nn.Module):
     def __init__(self, hidden_size, eps=1e-05):
         super(CustomRMSNormAIC, self).__init__()
         self.variance_epsilon = eps
+        self.eps = eps  # Added to support GemmaRMSNorm
         self.weight = torch.nn.Parameter(torch.ones(hidden_size))
 
     def forward(self, hidden_states):
-        return CustomRMSNormFunc.apply(hidden_states, self.weight, self.variance_epsilon)
+        return CustomRMSNormFunc.apply(
+            hidden_states, self.weight, self.variance_epsilon if hasattr(self, "variance_epsilon") else self.eps
+        )
+
+
+class GemmaCustomRMSNormAIC(CustomRMSNormAIC):
+    """
+    Modify the init function to add +1 to the weights
+    """
+
+    def __qeff_init__(self):
+        with torch.no_grad():
+            self.weight.copy_(self.weight + 1.0)
@@ -103,7 +103,7 @@ def export_onnx(
     except Exception as e:
         raise RuntimeError("Exporting to ONNX failed. {}".format(e))
 
-    onnx.checker.check_model(f"{gen_models_path}_tmp/{model_base_name}.onnx")
+    onnx.checker.check_model(f"{gen_models_path}_tmp/{model_base_name}.onnx", full_check=True)
     loaded_model = onnx.load(f"{gen_models_path}_tmp/{model_base_name}.onnx")
     shutil.rmtree(f"{gen_models_path}_tmp")
     os.makedirs(f"{gen_models_path}", exist_ok=True)
@@ -123,7 +123,7 @@ def export_onnx(
         size_threshold=1024,
         convert_attribute=False,
     )
-    onnx.checker.check_model(os.path.join(gen_models_path, f"{model_base_name}.onnx"))
+    onnx.checker.check_model(os.path.join(gen_models_path, f"{model_base_name}.onnx"), full_check=True)
 
     # Run shape inference in intial model itself
     onnx.shape_inference.infer_shapes_path(
 
@@ -20,6 +20,20 @@
     FalconForCausalLM,
     FalconModel,
 )
+from transformers.models.gemma.modeling_gemma import (
+    GemmaAttention,
+    GemmaDecoderLayer,
+    GemmaForCausalLM,
+    GemmaModel,
+    GemmaRMSNorm,
+)
+from transformers.models.gemma2.modeling_gemma2 import (
+    Gemma2Attention,
+    Gemma2DecoderLayer,
+    Gemma2ForCausalLM,
+    Gemma2Model,
+    Gemma2RMSNorm,
+)
 from transformers.models.gpt2.modeling_gpt2 import GPT2Attention, GPT2Block, GPT2LMHeadModel, GPT2Model
 from transformers.models.gpt_bigcode.modeling_gpt_bigcode import (
     GPTBigCodeAttention,
@@ -74,6 +88,13 @@
     QEffFalconForCausalLM,
     QEffFalconModel,
 )
+from .models.gemma.modeling_gemma import QEffGemmaAttention, QEffGemmaDecoderLayer, QEffGemmaForCausalLM, QEffGemmaModel
+from .models.gemma2.modeling_gemma2 import (
+    QEffGemma2Attention,
+    QEffGemma2DecoderLayer,
+    QEffGemma2ForCausalLM,
+    QEffGemma2Model,
+)
 from .models.gpt2.modeling_gpt2 import QEffGPT2Attention, QEffGPT2Block, QEffGPT2LMHeadModel, QEffGPT2Model
 from .models.gpt_bigcode.modeling_gpt_bigcode import (
     QEffGPTBigCodeAttention,
@@ -119,6 +140,8 @@
 get_lists_of_cb_qeff_models = ModelArchitectures(
     [
         LlamaForCausalLM.__name__,
+        GemmaForCausalLM.__name__,
+        Gemma2ForCausalLM.__name__,
         MistralForCausalLM.__name__,
         MixtralForCausalLM.__name__,
         Starcoder2ForCausalLM.__name__,
@@ -141,6 +164,8 @@
         MptForCausalLM.__name__,
         CodeGenForCausalLM.__name__,
         LlamaForCausalLM.__name__,
+        GemmaForCausalLM.__name__,
+        Gemma2ForCausalLM.__name__,
         MistralForCausalLM.__name__,
         MixtralForCausalLM.__name__,
         Phi3ForCausalLM.__name__,
@@ -170,6 +195,18 @@
     LlamaForCausalLM: QEffLlamaForCausalLM,
     LlamaDecoderLayer: QEffLlamaDecoderLayer,
     LlamaRMSNorm: CustomRMSNormAIC,
+    # Gemma model layers
+    GemmaModel: QEffGemmaModel,
+    GemmaAttention: QEffGemmaAttention,
+    GemmaForCausalLM: QEffGemmaForCausalLM,
+    GemmaDecoderLayer: QEffGemmaDecoderLayer,
+    GemmaRMSNorm: CustomRMSNormAIC,
+    # Gemma2 model layers
+    Gemma2Model: QEffGemma2Model,
+    Gemma2Attention: QEffGemma2Attention,
+    Gemma2ForCausalLM: QEffGemma2ForCausalLM,
+    Gemma2DecoderLayer: QEffGemma2DecoderLayer,
+    Gemma2RMSNorm: CustomRMSNormAIC,
     # MPT model layers
     MptAttention: QEffMptAttention,
     MptBlock: QEffMptBlock,
 
@@ -0,0 +1,6 @@
+# -----------------------------------------------------------------------------
+#
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# -----------------------------------------------------------------------------