Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions nemo_automodel/components/loss/linear_ce.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@

HAVE_CUT_CROSS_ENTROPY = True
except ImportError: # pragma: no cover
linear_cross_entropy = None # pragma: no cover
HAVE_CUT_CROSS_ENTROPY = False # pragma: no cover


Expand Down
6 changes: 3 additions & 3 deletions nemo_automodel/components/models/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,13 @@ def build_recipe(self):
it is returned directly. String values ``"current"`` and ``"block"`` are
mapped to the corresponding TE recipe class.
"""
if not HAVE_TE:
return None

# Pass through pre-built recipe objects directly
if not isinstance(self.recipe, str):
return self.recipe

if not HAVE_TE:
return None

from transformer_engine.common.recipe import Float8BlockScaling, Float8CurrentScaling

if self.recipe == "block":
Expand Down
1 change: 1 addition & 0 deletions nemo_automodel/components/models/kimi_k25_vl/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ def to_dict(self) -> Dict[str, Any]:

FLASH_ATTN_AVAILABLE = True
except ImportError:
flash_attn_varlen_func = None
FLASH_ATTN_AVAILABLE = False


Expand Down
9 changes: 8 additions & 1 deletion tests/unit_tests/models/llama/test_llama_custom_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from nemo_automodel import NeMoAutoModelForCausalLM
from nemo_automodel.components.models.common import BackendConfig
from nemo_automodel.components.models.common.utils import HAVE_TE
from nemo_automodel.components.models.llama.state_dict_adapter import LlamaStateDictAdapter

set_seed(42)
Expand Down Expand Up @@ -97,7 +98,13 @@ def _tiny_checkpoint(self, tmp_path_factory):
)

@pytest.mark.parametrize("rope_type", ["default", "llama3"])
@pytest.mark.parametrize("rms_norm", ["torch_fp32", "te"])
@pytest.mark.parametrize(
"rms_norm",
[
"torch_fp32",
pytest.param("te", marks=pytest.mark.skipif(not HAVE_TE, reason="transformer_engine not installed")),
],
)
def test_model_matches_hf_with_adapter_bidirectional(self, rope_type, rms_norm, tmp_path):
"""Test bidirectional conversion between HF and custom models produces identical outputs.

Expand Down
9 changes: 8 additions & 1 deletion tests/unit_tests/models/qwen2/test_qwen2_custom_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from nemo_automodel import NeMoAutoModelForCausalLM
from nemo_automodel.components.models.common import BackendConfig
from nemo_automodel.components.models.common.utils import HAVE_TE
from nemo_automodel.components.models.qwen2.state_dict_adapter import Qwen2StateDictAdapter

set_seed(42)
Expand Down Expand Up @@ -70,7 +71,13 @@ def _tiny_checkpoint(self, tmp_path_factory):
TINY_DEFAULT_QWEN2_CONFIG, tmp_path_factory.mktemp("qwen2_ckpt")
)

@pytest.mark.parametrize("rms_norm", ["torch_fp32", "te"])
@pytest.mark.parametrize(
"rms_norm",
[
"torch_fp32",
pytest.param("te", marks=pytest.mark.skipif(not HAVE_TE, reason="transformer_engine not installed")),
],
)
def test_model_matches_hf_with_adapter_bidirectional(self, rms_norm, tmp_path):
"""Test bidirectional conversion between HF and custom models produces identical outputs.

Expand Down
4 changes: 4 additions & 0 deletions tests/unit_tests/recipes/test_finetune_vlm_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from contextlib import nullcontext

from nemo_automodel.components.loggers.metric_logger import MetricsSample
from nemo_automodel.components.loss.linear_ce import HAVE_CUT_CROSS_ENTROPY
from nemo_automodel.recipes.vlm.finetune import (
FinetuneRecipeForVLM,
_get_model_name,
Expand Down Expand Up @@ -978,6 +979,7 @@ def test_calculate_loss_with_masked_ce(self):
assert loss.dim() == 0 # scalar

@pytest.mark.skipif(not torch.cuda.is_available(), reason="FusedLinearCE requires CUDA")
@pytest.mark.skipif(not HAVE_CUT_CROSS_ENTROPY, reason="cut_cross_entropy not installed")
def test_calculate_loss_with_fused_linear_ce(self):
"""Test calculate_loss with FusedLinearCrossEntropy."""
from nemo_automodel.components.loss.linear_ce import FusedLinearCrossEntropy
Expand All @@ -1004,6 +1006,7 @@ def test_calculate_loss_with_fused_linear_ce(self):
assert loss.dim() == 0

@pytest.mark.skipif(not torch.cuda.is_available(), reason="FusedLinearCE requires CUDA")
@pytest.mark.skipif(not HAVE_CUT_CROSS_ENTROPY, reason="cut_cross_entropy not installed")
def test_calculate_loss_fused_ce_finds_lm_head_by_name(self):
"""Test that FusedLinearCE can find lm_head via named_parameters when model has no get_output_embeddings."""
from nemo_automodel.components.loss.linear_ce import FusedLinearCrossEntropy
Expand Down Expand Up @@ -1521,6 +1524,7 @@ class TestForwardBackwardStepNonPP:
"""Tests for _forward_backward_step without pipeline parallelism."""

@pytest.mark.skipif(not torch.cuda.is_available(), reason="FusedLinearCE requires CUDA")
@pytest.mark.skipif(not HAVE_CUT_CROSS_ENTROPY, reason="cut_cross_entropy not installed")
def test_non_pp_with_fused_linear_ce(self, monkeypatch):
"""Test non-PP path with FusedLinearCrossEntropy."""
from nemo_automodel.components.loss.linear_ce import FusedLinearCrossEntropy
Expand Down
Loading