Skip to content

Commit b782e12

Browse files
committed
Consolidate asymmetric nounroll schedule into parameterized asymmetric schedule
The no-unroll path needs a different kernel interleaving strategy than the unrolled path: 2-group interleaving (shared A loads interleaved with MMA) with B loads and G2S prefetches in a separate third cluster, rather than 4-group interleaving that folds B loads and G2S directly into the two MMA clusters. The 4-group pattern was designed for the unrolled kernel where the larger loop body can absorb the extra live values; with unroll_factor=1 the tighter loop needs the third cluster to keep VGPR pressure in check.
1 parent 809d2cb commit b782e12

4 files changed

Lines changed: 124 additions & 434 deletions

File tree

examples/python/7.1_schedule.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
get_mxfp4_dbuf_pingpong_schedule,
2828
get_mxfp4_dbuf_mixed_pingpong_schedule,
2929
get_mxfp4_asymmetric_schedule,
30-
get_mxfp4_asymmetric_nounroll_schedule,
3130
get_mxfp4_dbuf_mixed_pingpong_shuffle_schedule,
3231
get_mxfp4_dbuf_pingpong_schedule_Bshuffled,
3332
get_mxfp4_dbuf_pingpong_schedule_Bshuffled_lds,
@@ -387,14 +386,12 @@ def test_dbuf_4wave_mxfp_preshuffle_b_gemm_cpp(
387386
options.wave_runtime = True
388387
options.dump_intermediates = "build/intermediates"
389388
options.eliminate_epilogue = eliminate_epilogue
390-
if no_unroll:
391-
schedule = get_mxfp4_asymmetric_nounroll_schedule(
392-
eliminate_epilogue=eliminate_epilogue, is_bscale_shuffled=True
393-
)
394-
else:
395-
schedule = get_mxfp4_asymmetric_schedule(
396-
eliminate_epilogue=eliminate_epilogue, is_bscale_shuffled=True
397-
)
389+
schedule = get_mxfp4_asymmetric_schedule(
390+
eliminate_epilogue=eliminate_epilogue,
391+
is_bscale_shuffled=True,
392+
unroll_factor=1 if no_unroll else 2,
393+
unroll_kernel=not no_unroll,
394+
)
398395
options.print_ir_after = "all" if is_debug else []
399396
options = set_default_run_config(options)
400397
gemm = wave_compile(options, gemm, schedule)

tests/kernel/wave/asm/test_waveasm_e2e.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1169,7 +1169,6 @@ def _dbuf_mxfp4_helper(
11691169
from wave_lang.kernel.wave.schedules import (
11701170
get_mxfp4_dbuf_schedule,
11711171
get_mxfp4_asymmetric_schedule,
1172-
get_mxfp4_asymmetric_nounroll_schedule,
11731172
)
11741173
from wave_lang.kernel.wave.scheduling.schedule_enums import SchedulingType
11751174
from wave_lang.kernel.wave.utils.run_utils import set_default_run_config
@@ -1202,14 +1201,12 @@ def _dbuf_mxfp4_helper(
12021201
)
12031202
options.eliminate_epilogue = eliminate_epilogue
12041203
if use_schedule:
1205-
if no_unroll:
1206-
schedule = get_mxfp4_asymmetric_nounroll_schedule(
1207-
eliminate_epilogue=eliminate_epilogue, is_bscale_shuffled=True
1208-
)
1209-
else:
1210-
schedule = get_mxfp4_asymmetric_schedule(
1211-
eliminate_epilogue=eliminate_epilogue, is_bscale_shuffled=True
1212-
)
1204+
schedule = get_mxfp4_asymmetric_schedule(
1205+
eliminate_epilogue=eliminate_epilogue,
1206+
is_bscale_shuffled=True,
1207+
unroll_factor=1 if no_unroll else 2,
1208+
unroll_kernel=not no_unroll,
1209+
)
12131210
else:
12141211
schedule = None
12151212
options.schedule = SchedulingType.NONE

wave_lang/kernel/wave/schedules/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
get_mxfp4_dbuf_mixed_pingpong_schedule,
1919
get_mxfp4_dbuf_mixed_pingpong_shuffle_schedule,
2020
get_mxfp4_asymmetric_schedule,
21-
get_mxfp4_asymmetric_nounroll_schedule,
2221
get_mxfp4_dbuf_pingpong_schedule_Bshuffled,
2322
get_mxfp4_dbuf_pingpong_schedule_Bshuffled_lds,
2423
)
@@ -35,7 +34,6 @@
3534
"get_mxfp4_dbuf_pingpong_schedule_Bshuffled",
3635
"get_mxfp4_dbuf_pingpong_schedule_Bshuffled_lds",
3736
"get_mxfp4_asymmetric_schedule",
38-
"get_mxfp4_asymmetric_nounroll_schedule",
3937
"get_mxfp4_dbuf_mixed_pingpong_shuffle_schedule",
4038
"get_attention_prefetch_schedule",
4139
]

0 commit comments

Comments
 (0)