44// Verifies scf.for -> waveasm.loop and scf.if -> waveasm.if with correct
55// SSA threading, iter_args, and condition patterns.
66//
7- // Note: scf_if_to_wave_if currently produces a vreg condition for waveasm.if
8- // (instead of scc), so the module dumps in generic form after verification .
7+ // With SALU promotion, arith.cmpi on scalar operands emits s_cmp (SCC result),
8+ // so waveasm.if gets an SCC condition and the output is in custom form.
99
1010module {
1111 gpu.module @test_scf_translation {
1212
1313 // --- scf.for(0, 16, 1) -> waveasm.loop with SGPR induction variable ---
14- // CHECK-LABEL: sym_name = " scf_for_to_loop"
14+ // CHECK-LABEL: waveasm.program @ scf_for_to_loop
1515 gpu.func @scf_for_to_loop () kernel {
1616 %c0 = arith.constant 0 : index
1717 %c1 = arith.constant 1 : index
1818 %c16 = arith.constant 16 : index
1919
2020 // Init materialised via s_mov_b32, loop carries single sreg
21- // CHECK: " waveasm.s_mov_b32"
22- // CHECK: " waveasm.loop"
21+ // CHECK: waveasm.s_mov_b32
22+ // CHECK: waveasm.loop
2323 scf.for %i = %c0 to %c16 step %c1 {
2424 %i_i32 = arith.index_cast %i : index to i32
2525 }
2626 // Induction variable incremented, compared, condition terminates
27- // CHECK: %[[NEXT:.*]], %{{.*}} = waveasm.s_add_u32 %[[IV]] , %{{.*}} : !waveasm.sreg, !waveasm.imm<1> -> !waveasm.sreg, !waveasm.scc
27+ // CHECK: %[[NEXT:.*]], %{{.*}} = waveasm.s_add_u32 %{{.*}} , %{{.*}} : !waveasm.sreg, !waveasm.imm<1> -> !waveasm.sreg, !waveasm.scc
2828 // CHECK-NEXT: %[[CMP:.*]] = waveasm.s_cmp_lt_u32 %[[NEXT]], %{{.*}} : !waveasm.sreg, !waveasm.imm<16> -> !waveasm.scc
2929 // CHECK-NEXT: waveasm.condition %[[CMP]] : !waveasm.scc iter_args(%[[NEXT]]) : !waveasm.sreg
3030
31- // CHECK: " waveasm.s_endpgm"
31+ // CHECK: waveasm.s_endpgm
3232 gpu.return
3333 }
3434
3535 // --- scf.for with iter_args -> waveasm.loop with two iter_args ---
36- // CHECK-LABEL: sym_name = " scf_for_with_iter_args"
36+ // CHECK-LABEL: waveasm.program @ scf_for_with_iter_args
3737 gpu.func @scf_for_with_iter_args () kernel {
3838 %c0 = arith.constant 0 : index
3939 %c1 = arith.constant 1 : index
4040 %c16 = arith.constant 16 : index
4141 %init = arith.constant 0 : i32
4242
4343 // Two inits: sreg counter + vreg accumulator
44- // CHECK: " waveasm.s_mov_b32"
45- // CHECK: " waveasm.v_mov_b32"
46- // CHECK: " waveasm.loop"
44+ // CHECK: waveasm.s_mov_b32
45+ // CHECK: waveasm.v_mov_b32
46+ // CHECK: waveasm.loop
4747 %result = scf.for %i = %c0 to %c16 step %c1
4848 iter_args (%acc = %init ) -> (i32 ) {
4949 %i_i32 = arith.index_cast %i : index to i32
5050 %new_acc = arith.addi %acc , %i_i32 : i32
5151 scf.yield %new_acc : i32
5252 }
5353 // Body accumulates: vreg + sreg
54- // CHECK: " waveasm.v_add_u32"
54+ // CHECK: waveasm.v_add_u32
5555 // Induction variable incremented, compared, condition with both iter_args
56- // CHECK: %[[NEXT:.*]], %{{.*}} = waveasm.s_add_u32 %[[IV]] , %{{.*}} : !waveasm.sreg, !waveasm.imm<1> -> !waveasm.sreg, !waveasm.scc
56+ // CHECK: %[[NEXT:.*]], %{{.*}} = waveasm.s_add_u32 %{{.*}} , %{{.*}} : !waveasm.sreg, !waveasm.imm<1> -> !waveasm.sreg, !waveasm.scc
5757 // CHECK-NEXT: %[[CMP:.*]] = waveasm.s_cmp_lt_u32 %[[NEXT]], %{{.*}} : !waveasm.sreg, !waveasm.imm<16> -> !waveasm.scc
58- // CHECK-NEXT: waveasm.condition %[[CMP]] : !waveasm.scc iter_args(%[[NEXT]], %[[NEWACC]] ) : !waveasm.sreg, !waveasm.vreg
58+ // CHECK-NEXT: waveasm.condition %[[CMP]] : !waveasm.scc iter_args(%[[NEXT]], %{{.*}} ) : !waveasm.sreg, !waveasm.vreg
5959
60- // CHECK: " waveasm.s_endpgm"
60+ // CHECK: waveasm.s_endpgm
6161 gpu.return
6262 }
6363
6464 // --- scf.if -> waveasm.if with then/else branches ---
65- // CHECK-LABEL: sym_name = " scf_if_to_wave_if"
65+ // CHECK-LABEL: waveasm.program @ scf_if_to_wave_if
6666 gpu.func @scf_if_to_wave_if () kernel {
6767 %arg0 = arith.constant 5 : i32
6868 %arg1 = arith.constant 3 : i32
6969 %c10 = arith.constant 10 : i32
7070 %cond_i32 = arith.cmpi slt , %arg0 , %c10 : i32
7171 %cond_ext = arith.extui %cond_i32 : i1 to i32
7272
73+ // SALU promotion: scalar cmpi produces SCC directly
74+ // CHECK: waveasm.s_cmp_lt_i32
7375 // CHECK: %{{.*}} = waveasm.if %{{.*}} : !waveasm.scc -> !waveasm.vreg {
7476 %result = scf.if %cond_i32 -> i32 {
75- // CHECK: " waveasm.v_add_u32"
77+ // CHECK: waveasm.v_add_u32
7678 %sum = arith.addi %arg0 , %arg1 : i32
77- // CHECK: " waveasm.yield"
79+ // CHECK: waveasm.yield
7880 scf.yield %sum : i32
7981 } else {
80- // CHECK: " waveasm.v_sub_u32"
82+ // CHECK: waveasm.v_sub_u32
8183 %diff = arith.subi %arg0 , %arg1 : i32
82- // CHECK: " waveasm.yield"
84+ // CHECK: waveasm.yield
8385 scf.yield %diff : i32
8486 }
8587
86- // CHECK: " waveasm.s_endpgm"
88+ // CHECK: waveasm.s_endpgm
8789 gpu.return
8890 }
8991
9092 // --- Nested scf.for -> nested waveasm.loop ---
91- // CHECK-LABEL: sym_name = " nested_scf_loops"
93+ // CHECK-LABEL: waveasm.program @ nested_scf_loops
9294 gpu.func @nested_scf_loops () kernel {
9395 %c0 = arith.constant 0 : index
9496 %c1 = arith.constant 1 : index
9597 %c4 = arith.constant 4 : index
9698 %c8 = arith.constant 8 : index
9799
98100 // Outer loop: sreg counter
99- // CHECK: " waveasm.loop"
101+ // CHECK: waveasm.loop
100102 scf.for %i = %c0 to %c4 step %c1 {
101103 // Inner loop: sreg counter
102- // CHECK: " waveasm.loop"
104+ // CHECK: waveasm.loop
103105 scf.for %j = %c0 to %c8 step %c1 {
104106 // Body uses both outer and inner IVs
105- // CHECK: " waveasm.s_add_u32"
107+ // CHECK: waveasm.s_add_u32
106108 %sum = arith.addi %i , %j : index
107109 }
108110 // Inner condition
@@ -111,7 +113,7 @@ module {
111113 // Outer condition
112114 // CHECK: waveasm.condition %{{.*}} : !waveasm.scc iter_args(%{{.*}}) : !waveasm.sreg
113115
114- // CHECK: " waveasm.s_endpgm"
116+ // CHECK: waveasm.s_endpgm
115117 gpu.return
116118 }
117119 }
0 commit comments