Talmaj · gcunhase · Jul 17, 2025 · Jul 17, 2025 · Jul 17, 2025
diff --git a/onnx2pytorch/convert/attribute.py b/onnx2pytorch/convert/attribute.py
@@ -79,6 +79,8 @@ def extract_attributes(node):
                 )
         elif attr.name == "axis" and node.op_type == "Flatten":
             kwargs["start_dim"] = extract_attr_values(attr)
+        elif attr.name == "axis" and node.op_type == "LayerNormalization":
+            continue
         elif attr.name == "axis" or attr.name == "axes":
             v = extract_attr_values(attr)
             if isinstance(v, (tuple, list)) and len(v) == 1:

diff --git a/onnx2pytorch/convert/operations.py b/onnx2pytorch/convert/operations.py
@@ -137,6 +137,8 @@ def convert_operations(onnx_graph, opset_version, batch_dim=0, enable_pruning=Tr
             op = nn.Identity()
         elif node.op_type == "InstanceNormalization":
             op = convert_instance_norm_layer(node, params=params)
+        elif node.op_type == "LayerNormalization":
+            op = LayerNorm(list(params[0].dims), **extract_attributes(node))
         elif node.op_type == "LeakyRelu":
             op = nn.LeakyReLU(**extract_attributes(node), inplace=True)
         elif node.op_type == "Less":

diff --git a/onnx2pytorch/operations/__init__.py b/onnx2pytorch/operations/__init__.py
@@ -13,6 +13,7 @@
 from .globalaveragepool import GlobalAveragePool
 from .hardsigmoid import Hardsigmoid
 from .instancenorm import InstanceNormWrapper
+from .layernorm import LayerNorm
 from .loop import Loop
 from .lstm import LSTMWrapper
 from .matmul import MatMul
@@ -55,6 +56,7 @@
     "GatherND",
     "GlobalAveragePool",
     "InstanceNormWrapper",
+    "LayerNorm",
     "Loop",
     "LSTMWrapper",
     "MatMul",

diff --git a/onnx2pytorch/operations/layernorm.py b/onnx2pytorch/operations/layernorm.py
@@ -0,0 +1,24 @@
+import torch
+from torch import nn
+from typing import Optional
+
+
+class LayerNorm(nn.Module):  # pylint: disable=missing-docstring
+    def __init__(self, normalized_shape: list, eps: float):
+        super().__init__()
+        self.normalized_shape = normalized_shape
+        self.eps = eps
+
+    def forward(  # pylint: disable=missing-function-docstring
+        self,
+        inputs: torch.Tensor,
+        scale: Optional[torch.Tensor] = None,
+        bias: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        return nn.functional.layer_norm(
+            input=inputs,
+            normalized_shape=self.normalized_shape,
+            weight=scale,
+            bias=bias,
+            eps=self.eps,
+        )