NVIDIA-NeMo · akoumpa · Feb 17, 2026
@@ -74,6 +74,7 @@
 
     HAVE_CUT_CROSS_ENTROPY = True
 except ImportError:  # pragma: no cover
+    linear_cross_entropy = None  # pragma: no cover
     HAVE_CUT_CROSS_ENTROPY = False  # pragma: no cover
 
 

@@ -113,13 +113,13 @@ def build_recipe(self):
         it is returned directly.  String values ``"current"`` and ``"block"`` are
         mapped to the corresponding TE recipe class.
         """
-        if not HAVE_TE:
-            return None
-
         # Pass through pre-built recipe objects directly
         if not isinstance(self.recipe, str):
             return self.recipe
 
+        if not HAVE_TE:
+            return None
+
         from transformer_engine.common.recipe import Float8BlockScaling, Float8CurrentScaling
 
         if self.recipe == "block":

@@ -158,6 +158,7 @@ def to_dict(self) -> Dict[str, Any]:
 
     FLASH_ATTN_AVAILABLE = True
 except ImportError:
+    flash_attn_varlen_func = None
     FLASH_ATTN_AVAILABLE = False
 
 

diff --git a/tests/unit_tests/models/llama/test_llama_custom_model.py b/tests/unit_tests/models/llama/test_llama_custom_model.py
@@ -20,6 +20,7 @@
 
 from nemo_automodel import NeMoAutoModelForCausalLM
 from nemo_automodel.components.models.common import BackendConfig
+from nemo_automodel.components.models.common.utils import HAVE_TE
 from nemo_automodel.components.models.llama.state_dict_adapter import LlamaStateDictAdapter
 
 set_seed(42)
@@ -97,7 +98,13 @@ def _tiny_checkpoint(self, tmp_path_factory):
         )
 
     @pytest.mark.parametrize("rope_type", ["default", "llama3"])
-    @pytest.mark.parametrize("rms_norm", ["torch_fp32", "te"])
+    @pytest.mark.parametrize(
+        "rms_norm",
+        [
+            "torch_fp32",
+            pytest.param("te", marks=pytest.mark.skipif(not HAVE_TE, reason="transformer_engine not installed")),
+        ],
+    )
     def test_model_matches_hf_with_adapter_bidirectional(self, rope_type, rms_norm, tmp_path):
         """Test bidirectional conversion between HF and custom models produces identical outputs.
 

diff --git a/tests/unit_tests/models/qwen2/test_qwen2_custom_model.py b/tests/unit_tests/models/qwen2/test_qwen2_custom_model.py
@@ -20,6 +20,7 @@
 
 from nemo_automodel import NeMoAutoModelForCausalLM
 from nemo_automodel.components.models.common import BackendConfig
+from nemo_automodel.components.models.common.utils import HAVE_TE
 from nemo_automodel.components.models.qwen2.state_dict_adapter import Qwen2StateDictAdapter
 
 set_seed(42)
@@ -70,7 +71,13 @@ def _tiny_checkpoint(self, tmp_path_factory):
             TINY_DEFAULT_QWEN2_CONFIG, tmp_path_factory.mktemp("qwen2_ckpt")
         )
 
-    @pytest.mark.parametrize("rms_norm", ["torch_fp32", "te"])
+    @pytest.mark.parametrize(
+        "rms_norm",
+        [
+            "torch_fp32",
+            pytest.param("te", marks=pytest.mark.skipif(not HAVE_TE, reason="transformer_engine not installed")),
+        ],
+    )
     def test_model_matches_hf_with_adapter_bidirectional(self, rms_norm, tmp_path):
         """Test bidirectional conversion between HF and custom models produces identical outputs.
 

diff --git a/tests/unit_tests/recipes/test_finetune_vlm_helpers.py b/tests/unit_tests/recipes/test_finetune_vlm_helpers.py
@@ -20,6 +20,7 @@
 from contextlib import nullcontext
 
 from nemo_automodel.components.loggers.metric_logger import MetricsSample
+from nemo_automodel.components.loss.linear_ce import HAVE_CUT_CROSS_ENTROPY
 from nemo_automodel.recipes.vlm.finetune import (
     FinetuneRecipeForVLM,
     _get_model_name,
@@ -978,6 +979,7 @@ def test_calculate_loss_with_masked_ce(self):
         assert loss.dim() == 0  # scalar
 
     @pytest.mark.skipif(not torch.cuda.is_available(), reason="FusedLinearCE requires CUDA")
+    @pytest.mark.skipif(not HAVE_CUT_CROSS_ENTROPY, reason="cut_cross_entropy not installed")
     def test_calculate_loss_with_fused_linear_ce(self):
         """Test calculate_loss with FusedLinearCrossEntropy."""
         from nemo_automodel.components.loss.linear_ce import FusedLinearCrossEntropy
@@ -1004,6 +1006,7 @@ def test_calculate_loss_with_fused_linear_ce(self):
         assert loss.dim() == 0
 
     @pytest.mark.skipif(not torch.cuda.is_available(), reason="FusedLinearCE requires CUDA")
+    @pytest.mark.skipif(not HAVE_CUT_CROSS_ENTROPY, reason="cut_cross_entropy not installed")
     def test_calculate_loss_fused_ce_finds_lm_head_by_name(self):
         """Test that FusedLinearCE can find lm_head via named_parameters when model has no get_output_embeddings."""
         from nemo_automodel.components.loss.linear_ce import FusedLinearCrossEntropy
@@ -1521,6 +1524,7 @@ class TestForwardBackwardStepNonPP:
     """Tests for _forward_backward_step without pipeline parallelism."""
 
     @pytest.mark.skipif(not torch.cuda.is_available(), reason="FusedLinearCE requires CUDA")
+    @pytest.mark.skipif(not HAVE_CUT_CROSS_ENTROPY, reason="cut_cross_entropy not installed")
     def test_non_pp_with_fused_linear_ce(self, monkeypatch):
         """Test non-PP path with FusedLinearCrossEntropy."""
         from nemo_automodel.components.loss.linear_ce import FusedLinearCrossEntropy