Skip to content

Commit acade4d

Browse files
committed
Moved eval all reduce code after including it in json list.
Signed-off-by: meetkuma <[email protected]>
1 parent 0e0f3bc commit acade4d

File tree

1 file changed

+11
-10
lines changed

1 file changed

+11
-10
lines changed

QEfficient/finetune/utils/train_utils.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -334,10 +334,7 @@ def train(
334334
eval_loss, eval_metric, step_loss, step_metric = evaluation_helper(
335335
model, train_config, eval_dataloader, device
336336
)
337-
# Print evaluation metrics
338-
logger.log_rank_zero(
339-
f"Epoch {epoch + 1}: Eval Loss: {eval_loss.detach().cpu():.4f}, Eval metric: {eval_metric.detach().cpu():.4f}"
340-
)
337+
341338
if eval_loss < best_val_loss:
342339
best_val_loss = eval_loss
343340
logger.log_rank_zero(f"Best eval loss on epoch {epoch + 1} is {best_val_loss:.4f}")
@@ -350,6 +347,16 @@ def train(
350347
val_loss.append(float(eval_loss))
351348
val_metric.append(float(eval_metric))
352349

350+
if train_config.enable_ddp:
351+
dist.all_reduce(eval_loss, op=dist.ReduceOp.SUM)
352+
eval_loss /= get_num_ddp_devices()
353+
dist.all_reduce(eval_metric, op=dist.ReduceOp.SUM)
354+
eval_metric /= get_num_ddp_devices()
355+
356+
logger.log_rank_zero(
357+
f"Epoch {epoch + 1}: Eval Loss: {eval_loss.detach().cpu():.4f}, Eval metric: {eval_metric.detach().cpu():.4f}"
358+
)
359+
353360
# saving the adapters after completion of each epoch
354361
if train_config.save_model:
355362
if train_config.enable_ddp:
@@ -469,12 +476,6 @@ def evaluation_helper(model, train_config, eval_dataloader, device):
469476
else:
470477
eval_metric = torch.exp(eval_loss)
471478

472-
if train_config.enable_ddp:
473-
dist.all_reduce(eval_loss, op=dist.ReduceOp.SUM)
474-
eval_loss /= get_num_ddp_devices()
475-
dist.all_reduce(eval_metric, op=dist.ReduceOp.SUM)
476-
eval_metric /= get_num_ddp_devices()
477-
478479
return eval_loss, eval_metric, val_step_loss, val_step_metric
479480

480481

0 commit comments

Comments
 (0)