File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1111import triton .language as tl
1212import iris
1313from iris ._mpi_helpers import mpi_allgather
14- # from examples.common.utils import read_realtime
15-
16- @triton .jit
17- def read_realtime ():
18- tmp = tl .inline_asm_elementwise (
19- asm = "mov.u64 $0, %globaltimer;" ,
20- constraints = ("=l" ),
21- args = [],
22- dtype = tl .int64 ,
23- is_pure = False ,
24- pack = 1 ,
25- )
26- return tmp
14+ from examples .common .utils import read_realtime
2715
2816
2917@triton .jit ()
@@ -270,11 +258,10 @@ def print_run_settings(
270258 mm_begin_cpu = mm_begin_timestamp .cpu ().numpy ()
271259 mm_end_cpu = mm_end_timestamp .cpu ().numpy ()
272260
273- gpu_freq = iris .hip .get_wall_clock_rate (cur_rank ) * 1e-3
274261 for destination_rank in range (num_ranks ):
275262 delta = mm_end_cpu [destination_rank , :] - mm_begin_cpu [destination_rank , :]
276263 avg_cc = float (delta .sum () / max (1 , delta .size ) / max (1 , niter ))
277- local_latency [destination_rank ] = avg_cc / gpu_freq
264+ local_latency [destination_rank ] = avg_cc * 10 # since the value is updated every 10ns (cc freq is 100MHZ in MI300)
278265
279266 latency_matrix = mpi_allgather (local_latency .cpu ())
280267
You can’t perform that action at this time.
0 commit comments