We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 1072557 commit 9910619Copy full SHA for 9910619
benchmarks/bench_load_latency.py
@@ -13,6 +13,7 @@
13
from iris._mpi_helpers import mpi_allgather
14
# from examples.common.utils import read_realtime
15
16
+
17
@triton.jit
18
def read_realtime():
19
tmp = tl.inline_asm_elementwise(
@@ -270,7 +271,7 @@ def print_run_settings(
270
271
mm_begin_cpu = mm_begin_timestamp.cpu().numpy()
272
mm_end_cpu = mm_end_timestamp.cpu().numpy()
273
- gpu_freq = iris.hip.get_wall_clock_rate(cur_rank)
274
+ gpu_freq = iris.hip.get_wall_clock_rate(cur_rank)
275
276
for destination_rank in range(num_ranks):
277
delta = mm_end_cpu[destination_rank, :] - mm_begin_cpu[destination_rank, :]
0 commit comments