Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
4a074a7
Added all the changes for enabling subfunction for VLMs
abhishek-singh591 Jan 5, 2026
7e4299e
Merge branch 'quic:main' into subfunction_for_VLMs
abhishek-singh591 Jan 9, 2026
4458154
Fixed rope method for batch size > 1
abhishek-singh591 Jan 11, 2026
d2a81ad
Added test file for subfunction with VLM
abhishek-singh591 Jan 12, 2026
ddd471d
Merge branch 'main' into subfunction_for_VLMs
abhishek-singh591 Jan 12, 2026
441e2ba
Made minor fixes
abhishek-singh591 Jan 13, 2026
df8862b
Merge branch 'quic:main' into subfunction_for_VLMs
abhishek-singh591 Jan 13, 2026
9d06a55
Merge branch 'quic:main' into subfunction_for_VLMs
abhishek-singh591 Jan 14, 2026
fc71b96
Update modeling_codegen.py
abhishek-singh591 Jan 14, 2026
1b28002
Resolved lint error
abhishek-singh591 Jan 14, 2026
2b1f09c
Made Minor Fixes
abhishek-singh591 Jan 16, 2026
c806c93
Merge branch 'quic:main' into subfunction_for_VLMs
abhishek-singh591 Jan 18, 2026
f06028a
Fixed test file for subfunction
abhishek-singh591 Jan 18, 2026
5fd672d
Changed test file for subfunction with VLMs
abhishek-singh591 Jan 19, 2026
dca8322
Made Minor Fixes
abhishek-singh591 Jan 19, 2026
1407f61
Added support of subfunction to mllama
abhishek-singh591 Jan 19, 2026
5773b8e
Merge branch 'quic:main' into subfunction_for_VLMs
abhishek-singh591 Jan 20, 2026
f227e04
Merge branch 'quic:main' into subfunction_for_VLMs
abhishek-singh591 Jan 20, 2026
6f59466
Merge branch 'quic:main' into subfunction_for_VLMs
abhishek-singh591 Jan 20, 2026
6e66dcb
Merge branch 'quic:main' into subfunction_for_VLMs
abhishek-singh591 Jan 20, 2026
6324496
Merge branch 'quic:main' into subfunction_for_VLMs
abhishek-singh591 Jan 21, 2026
b21eb20
Merge branch 'main' into subfunction_for_VLMs
abhishek-singh591 Jan 21, 2026
129be5b
Update torch_patches.py
abhishek-singh591 Jan 21, 2026
4db605b
Merge branch 'main' into subfunction_for_VLMs
abhishek-singh591 Jan 22, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion QEfficient/transformers/models/codegen/modeling_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

"""PyTorch Codegen model."""

from typing import Optional, Tuple, Union
from typing import Optional, Tuple, Type, Union

import torch
from torch import nn
Expand Down Expand Up @@ -296,6 +296,15 @@ class QEffCodeGenForCausalLM(CodeGenForCausalLM):
- update the hidden_states, and fix for onnx model
"""

def get_submodules_for_export(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.
Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {QEffCodeGenBlock}

def forward(
self,
input_ids: Optional[torch.LongTensor] = None,
Expand Down
12 changes: 11 additions & 1 deletion QEfficient/transformers/models/falcon/modeling_falcon.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
"""PyTorch Falcon model."""

import math
from typing import Optional, Tuple, Union
from typing import Optional, Tuple, Type, Union

import torch
import torch.nn as nn
import torch.utils.checkpoint
from torch.nn import functional as F
from transformers.cache_utils import Cache
Expand Down Expand Up @@ -353,6 +354,15 @@ class QEffFalconForCausalLM(FalconForCausalLM):
- update the hidden_states, and fix for onnx model
"""

def get_submodules_for_export(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.
Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {QEffFalconDecoderLayer}

def forward(
self,
input_ids: torch.LongTensor = None,
Expand Down
11 changes: 10 additions & 1 deletion QEfficient/transformers/models/gemma/modeling_gemma.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# -----------------------------------------------------------------------------

from typing import List, Optional, Tuple, Union
from typing import List, Optional, Tuple, Type, Union

import torch
from torch import nn
Expand Down Expand Up @@ -336,6 +336,15 @@ class QEffGemmaForCausalLM(GemmaForCausalLM):
- add new args cache idx for the kv retention
"""

def get_submodules_for_export(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.
Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {QEffGemmaDecoderLayer}

def forward(
self,
input_ids: torch.LongTensor = None,
Expand Down
11 changes: 10 additions & 1 deletion QEfficient/transformers/models/gemma2/modeling_gemma2.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# -----------------------------------------------------------------------------

from typing import Callable, List, Optional, Tuple, Union
from typing import Callable, List, Optional, Tuple, Type, Union

import torch
from torch import nn
Expand Down Expand Up @@ -388,6 +388,15 @@ class QEffGemma2ForCausalLM(Gemma2ForCausalLM, GenerationMixin):
- add new args cache idx for the kv retention
"""

def get_submodules_for_export(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.
Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {QEffGemma2DecoderLayer}

def forward(
self,
input_ids: torch.LongTensor = None,
Expand Down
20 changes: 19 additions & 1 deletion QEfficient/transformers/models/gemma3/modeling_gemma3.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# -----------------------------------------------------------------------------

import copy
from typing import List, Optional, Tuple, Union
from typing import List, Optional, Tuple, Type, Union

import torch
from torch import nn
Expand Down Expand Up @@ -589,6 +589,15 @@ def __init__(self, model):
self.model = model
self.model.vision_model = self.model.vision_tower

def get_submodules_for_export(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.
Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {self.model.vision_tower.vision_model.encoder.layers[0].__class__}

def forward(self, pixel_values):
image_features = self.model.get_image_features(pixel_values=pixel_values)
return image_features
Expand All @@ -602,6 +611,15 @@ def __init__(self, model):
self.config = self.model.config
self.lm_head = self.model.lm_head

def get_submodules_for_export(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.
Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {QEffGemma3DecoderLayer}

def forward(
self,
input_ids,
Expand Down
11 changes: 10 additions & 1 deletion QEfficient/transformers/models/gpt2/modeling_gpt2.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# -----------------------------------------------------------------------------

from typing import Callable, Optional, Tuple, Union
from typing import Callable, Optional, Tuple, Type, Union

import torch
from torch import nn
Expand Down Expand Up @@ -397,6 +397,15 @@ class QEffGPT2LMHeadModel(GPT2LMHeadModel):
- add new args position idx for the cache_kwargs for kv retention
"""

def get_submodules_for_export(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.
Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {QEffGPT2Block}

def forward(
self,
input_ids: Optional[torch.LongTensor] = None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

"""PyTorch GPTBigCode model."""

from typing import Optional, Tuple, Union
from typing import Optional, Tuple, Type, Union

import torch
import torch.utils.checkpoint
Expand Down Expand Up @@ -378,6 +378,15 @@ def forward(


class QEffGPTBigCodeForCausalLM(GPTBigCodeForCausalLM):
def get_submodules_for_export(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.
Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {QEffGPTBigCodeBlock}

def forward(
self,
input_ids: Optional[torch.Tensor] = None,
Expand Down
12 changes: 11 additions & 1 deletion QEfficient/transformers/models/gpt_oss/modeling_gpt_oss.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# -----------------------------------------------------------------------------
import math
import os
from typing import Callable, Optional, Union
from typing import Callable, Optional, Type, Union

import torch
from torch import nn
Expand Down Expand Up @@ -1205,6 +1205,16 @@ def forward(


class QEffGptOssForCausalLM(GptOssForCausalLM):
def get_submodules_for_export(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.

Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {QEffGptOssDecoderLayer}

def forward(
self,
input_ids: Optional[torch.LongTensor] = None,
Expand Down
11 changes: 10 additions & 1 deletion QEfficient/transformers/models/gptj/modeling_gptj.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

"""PyTorch GPT-J model."""

from typing import Optional, Tuple, Union
from typing import Optional, Tuple, Type, Union

import torch
from torch import nn
Expand Down Expand Up @@ -318,6 +318,15 @@ class QEffGPTJForCausalLM(GPTJForCausalLM):
- update the hidden_states, and fix for onnx model
"""

def get_submodules_for_export(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.
Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {QEffGPTJBlock}

def forward(
self,
input_ids: Optional[torch.LongTensor] = None,
Expand Down
11 changes: 10 additions & 1 deletion QEfficient/transformers/models/granite/modeling_granite.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# -----------------------------------------------------------------------------

from typing import Callable, List, Optional, Tuple, Union
from typing import Callable, List, Optional, Tuple, Type, Union

import torch
from torch import nn
Expand Down Expand Up @@ -347,6 +347,15 @@ class QEffGraniteForCausalLM(GraniteForCausalLM):
Copied from GraniteForCausalLM: https://github.com/huggingface/transformers/blob/main/src/transformers/models/granite/modeling_granite.py
"""

def get_submodules_for_export(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.
Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {QEffGraniteDecoderLayer}

def forward(
self,
input_ids: torch.LongTensor = None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# -----------------------------------------------------------------------------

from typing import List, Optional, Tuple, Union
from typing import List, Optional, Tuple, Type, Union

import torch
import torch.nn.functional as F
Expand Down Expand Up @@ -493,6 +493,15 @@ class QEffGraniteMoeForCausalLM(GraniteMoeForCausalLM):
Copied from GraniteForCausalLM: https://github.com/huggingface/transformers/blob/main/src/transformers/models/granite/modeling_granite.py
"""

def get_submodules_for_export(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.
Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {self.model.layers[0].__class__}

def forward(
self,
input_ids: torch.LongTensor = None,
Expand Down
11 changes: 10 additions & 1 deletion QEfficient/transformers/models/grok_1/modeling_grok1.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# ----------------------------------------------------------------------------

from typing import List, Optional, Tuple, Union
from typing import List, Optional, Tuple, Type, Union

import torch
import torch.nn as nn
Expand Down Expand Up @@ -397,6 +397,15 @@ class QEffGrok1ModelForCausalLM(nn.Module):
Grok model for causal language modeling.
"""

def get_submodules_for_export(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.
Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {QEffGrok1DecoderLayer}

def forward(
self,
input_ids: torch.LongTensor = None,
Expand Down
20 changes: 19 additions & 1 deletion QEfficient/transformers/models/internvl/modeling_internvl.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# -----------------------------------------------------------------------------

from typing import List, Optional
from typing import List, Optional, Type

import torch
import torch.nn as nn
Expand All @@ -21,6 +21,15 @@ def __init__(self, model):
super().__init__()
self.model = model

def get_submodules_for_export(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.
Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {self.model.vision_model.encoder.layers[0].__class__}

def forward(self, pixel_values):
vision_embeds = self.model.extract_feature(pixel_values)
# Reshape from [num_patches, 256, hidden_dim] -> [1, num_patches*256, head_dim]
Expand All @@ -36,6 +45,15 @@ def __init__(self, model):
self.config = self.model.language_model.config
self.language_model = self.model.language_model

def get_submodules_for_export(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.
Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {self.model.language_model.model.layers[0].__class__}

def forward(
self,
input_ids,
Expand Down
11 changes: 10 additions & 1 deletion QEfficient/transformers/models/llama/modeling_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# -----------------------------------------------------------------------------

from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, List, Optional, Tuple, Type, Union

import torch
from torch import nn
Expand Down Expand Up @@ -404,6 +404,15 @@ class QEffLlamaForCausalLM(LlamaForCausalLM):
Copied from LlamaForCausalLM: https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py
"""

def get_submodules_for_export(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.
Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {QEffLlamaDecoderLayer}

def forward(
self,
input_ids: torch.LongTensor = None,
Expand Down
Loading
Loading