Skip to content

Commit

Permalink
resolve merge conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
xrsrke committed Jan 13, 2025
1 parent 9a99ab6 commit c0cb423
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 4 deletions.
4 changes: 1 addition & 3 deletions src/nanotron/sanity_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from nanotron.logging import get_logger, log_rank
from nanotron.models import NanotronModel
from nanotron.optim.gradient_accumulator import GradientAccumulator
from nanotron.optim.optimizer_from_gradient_accumulator import OptimizerFromGradientAccumulator
from nanotron.parallel import ParallelContext
from nanotron.parallel.tied_parameters import get_tied_id_to_param

Expand Down Expand Up @@ -170,14 +169,13 @@ def before_optim_step_sanity_checks(
config: Config,
parallel_context: ParallelContext,
unwrapped_model: NanotronModel,
optim: OptimizerFromGradientAccumulator,
grad_accumulator: GradientAccumulator,
optimizer: optim.BaseOptimizer,
) -> None:

# NOTE: sanity check that non-fp8 parameters's gradients have
# the same datatype of the residual stream's dtype
for pg in optim.param_groups:
for pg in optimizer.param_groups:
for p in pg["params"]:
assert p.grad is not None
if isinstance(p.data, FP8Tensor):
Expand Down
2 changes: 1 addition & 1 deletion src/nanotron/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ def training_step(
)

before_optim_step_sanity_checks(
self.config, self.parallel_context, self.unwrapped_model, self.optimizer, self.grad_accumulator
self.config, self.parallel_context, self.unwrapped_model, self.grad_accumulator, self.optimizer
)

# Compute DP average loss and overlap with optimizer step
Expand Down

0 comments on commit c0cb423

Please sign in to comment.