pr

stanford-crfm · Feb 10, 2025 · eda11af · eda11af
1 parent 459c526
commit eda11af
Showing 1 changed file with 6 additions and 0 deletions.
diff --git a/src/levanter/eval.py b/src/levanter/eval.py
@@ -196,6 +196,9 @@ def cb_tagged_lm_evaluate(
         EvalBatch, tagged_eval_sets, tokenizer, device_mesh, axis_mapping, max_examples_per_dataset, mp=mp
     )
 
+    if not eval_current and not eval_ema:
+        raise ValueError("At least one of eval_current or eval_ema should be True")
+
     def eval_callback(step: StepInfo):
         if eval_current:
             with levanter.tracker.capture_time() as time_fn:
@@ -205,6 +208,9 @@ def eval_callback(step: StepInfo):
 
             levanter.tracker.log(log_dict, step=step.step)
 
+        if not eval_current and step.state.model_averaging is None:
+            raise ValueError("Cannot evaluate EMA model without model averaging, but you only want to evaluate EMA")
+
         if eval_ema and step.state.model_averaging is not None:
             with levanter.tracker.capture_time() as time_fn:
                 result = evaluator.evaluate(step.eval_model)