Skip to content

Commit

Permalink
fix recording trace
Browse files Browse the repository at this point in the history
  • Loading branch information
3outeille committed Jun 24, 2024
1 parent 72ccd4c commit 1f5a1dd
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 2 deletions.
10 changes: 9 additions & 1 deletion src/nanotron/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,15 @@ def get_profiler(config: Config):
on_trace_ready = None
prof = profile(
activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
schedule=torch.profiler.schedule(wait=1, warmup=1, active=1, repeat=1, skip_first=3),
# schedule=torch.profiler.schedule(wait=1, warmup=1, active=1, repeat=1, skip_first=3),
# In this example with wait=1, warmup=1, active=2, repeat=1,
# profiler will skip the first step/iteration,
# start warming up on the second, record
# the third, forth, fifth iterations,
# after which the trace will become available
# and on_trace_ready (when set) is called;
# the cycle repeats starting with the next step
schedule=torch.profiler.schedule(wait=1, warmup=1, active=3, repeat=1, skip_first=2),
on_trace_ready=on_trace_ready,
# record_shapes=True,
# profile_memory=True,
Expand Down
2 changes: 1 addition & 1 deletion src/nanotron/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@ def train(
torch.cuda.empty_cache()
with prof:
for self.iteration_step in range(self.metadata.last_train_step + 1, self.config.tokens.train_steps + 1):
if isinstance(prof, torch.profiler.profile) and self.iteration_step < 6: # Only profile the first 6 steps
if isinstance(prof, torch.profiler.profile):
prof.step()

self.iteration_start_time = time.time()
Expand Down

0 comments on commit 1f5a1dd

Please sign in to comment.