File tree Expand file tree Collapse file tree 1 file changed +2
-15
lines changed Expand file tree Collapse file tree 1 file changed +2
-15
lines changed Original file line number Diff line number Diff line change 1111import triton .language as tl
1212import iris
1313from iris ._mpi_helpers import mpi_allgather
14- # from examples.common.utils import read_realtime
15-
16- @triton .jit
17- def read_realtime ():
18- tmp = tl .inline_asm_elementwise (
19- asm = "mov.u64 $0, %globaltimer;" ,
20- constraints = ("=l" ),
21- args = [],
22- dtype = tl .int64 ,
23- is_pure = False ,
24- pack = 1 ,
25- )
26- return tmp
14+ from examples .common .utils import read_realtime
2715
2816
2917@triton .jit ()
@@ -270,11 +258,10 @@ def print_run_settings(
270258 mm_begin_cpu = mm_begin_timestamp .cpu ().numpy ()
271259 mm_end_cpu = mm_end_timestamp .cpu ().numpy ()
272260
273- gpu_freq = iris .hip .get_wall_clock_rate (cur_rank ) * 1e-3
274261 for destination_rank in range (num_ranks ):
275262 delta = mm_end_cpu [destination_rank , :] - mm_begin_cpu [destination_rank , :]
276263 avg_cc = float (delta .sum () / max (1 , delta .size ) / max (1 , niter ))
277- local_latency [destination_rank ] = avg_cc / gpu_freq
264+ local_latency [destination_rank ] = avg_cc * 10 # since the value is updated every 10ns (cc freq is 100MHZ in MI300)
278265
279266 latency_matrix = mpi_allgather (local_latency .cpu ())
280267
You can’t perform that action at this time.
0 commit comments