Skip to content

Commit dd3b163

Browse files
committed
Update existing lit tests and add new tests for SALU promotion and emitter changes
Fix 5 test regressions from cherry-picked SALU promotion commits: - region-based-translation.mlir: output now in custom form (SCC condition) - vadd-commute.mlir: v_add_u32 is VOP3-only on GFX9+, needs scratch VGPR - buffer-ops-srd-adjust.mlir: match WaveASM IR form for SRD construction - scf-if-agpr-else-coercion.mlir: output now in custom form (SCC condition) - swizzle-srd-num-records.mlir: match WaveASM IR form for swizzle SRD Add 4 new lit tests: - salu-promotion-arith.mlir: SGPR muli/addi/cmpi use SALU instructions - salu-select-fusion.mlir: scalar cmpi + select fuses to s_cmp + s_cselect - agpr-inline-constant.mlir: inline constants written directly to AGPRs - vop2-commutative-swap.mlir: VOP2 literal swapped from src1 to src0 Made-with: Cursor Signed-off-by: Sanket Pandit <sanket.pandit@amd.com>
1 parent 807ea90 commit dd3b163

9 files changed

Lines changed: 196 additions & 58 deletions
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// RUN: waveasm-translate --waveasm-linear-scan --emit-assembly %s | FileCheck %s
2+
//
3+
// Test: Inline AGPR constants. When writing an inline constant ([-16, 64])
4+
// to an AGPR, emit v_accvgpr_write_b32 directly without a scratch VGPR.
5+
// Non-inline literals still require v_mov_b32 to scratch VGPR first.
6+
7+
// CHECK-LABEL: agpr_inline_test:
8+
9+
waveasm.program @agpr_inline_test target = #waveasm.target<#waveasm.gfx942, 5> abi = #waveasm.abi<> attributes {vgprs = 32 : i64, sgprs = 16 : i64} {
10+
11+
// Inline constant 0 -> direct v_accvgpr_write_b32, no scratch VGPR
12+
%c0 = waveasm.constant 0 : !waveasm.imm<0>
13+
// CHECK: v_accvgpr_write_b32 a{{[0-9]+}}, 0
14+
%a0 = waveasm.v_mov_b32 %c0 : !waveasm.imm<0> -> !waveasm.areg
15+
16+
// Inline constant 42 -> direct v_accvgpr_write_b32
17+
%c42 = waveasm.constant 42 : !waveasm.imm<42>
18+
// CHECK-NEXT: v_accvgpr_write_b32 a{{[0-9]+}}, 42
19+
%a1 = waveasm.v_mov_b32 %c42 : !waveasm.imm<42> -> !waveasm.areg
20+
21+
// Non-inline literal 999 -> must use scratch VGPR
22+
%c999 = waveasm.constant 999 : !waveasm.imm<999>
23+
// CHECK-NEXT: v_mov_b32 v15, 999
24+
// CHECK-NEXT: v_accvgpr_write_b32 a{{[0-9]+}}, v15
25+
%a2 = waveasm.v_mov_b32 %c999 : !waveasm.imm<999> -> !waveasm.areg
26+
27+
// CHECK: s_endpgm
28+
waveasm.s_endpgm
29+
}

waveasm/test/Transforms/region-based-translation.mlir

Lines changed: 28 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,105 +4,107 @@
44
// Verifies scf.for -> waveasm.loop and scf.if -> waveasm.if with correct
55
// SSA threading, iter_args, and condition patterns.
66
//
7-
// Note: scf_if_to_wave_if currently produces a vreg condition for waveasm.if
8-
// (instead of scc), so the module dumps in generic form after verification.
7+
// With SALU promotion, arith.cmpi on scalar operands emits s_cmp (SCC result),
8+
// so waveasm.if gets an SCC condition and the output is in custom form.
99

1010
module {
1111
gpu.module @test_scf_translation {
1212

1313
// --- scf.for(0, 16, 1) -> waveasm.loop with SGPR induction variable ---
14-
// CHECK-LABEL: sym_name = "scf_for_to_loop"
14+
// CHECK-LABEL: waveasm.program @scf_for_to_loop
1515
gpu.func @scf_for_to_loop() kernel {
1616
%c0 = arith.constant 0 : index
1717
%c1 = arith.constant 1 : index
1818
%c16 = arith.constant 16 : index
1919

2020
// Init materialised via s_mov_b32, loop carries single sreg
21-
// CHECK: "waveasm.s_mov_b32"
22-
// CHECK: "waveasm.loop"
21+
// CHECK: waveasm.s_mov_b32
22+
// CHECK: waveasm.loop
2323
scf.for %i = %c0 to %c16 step %c1 {
2424
%i_i32 = arith.index_cast %i : index to i32
2525
}
2626
// Induction variable incremented, compared, condition terminates
27-
// CHECK: %[[NEXT:.*]], %{{.*}} = waveasm.s_add_u32 %[[IV]], %{{.*}} : !waveasm.sreg, !waveasm.imm<1> -> !waveasm.sreg, !waveasm.scc
27+
// CHECK: %[[NEXT:.*]], %{{.*}} = waveasm.s_add_u32 %{{.*}}, %{{.*}} : !waveasm.sreg, !waveasm.imm<1> -> !waveasm.sreg, !waveasm.scc
2828
// CHECK-NEXT: %[[CMP:.*]] = waveasm.s_cmp_lt_u32 %[[NEXT]], %{{.*}} : !waveasm.sreg, !waveasm.imm<16> -> !waveasm.scc
2929
// CHECK-NEXT: waveasm.condition %[[CMP]] : !waveasm.scc iter_args(%[[NEXT]]) : !waveasm.sreg
3030

31-
// CHECK: "waveasm.s_endpgm"
31+
// CHECK: waveasm.s_endpgm
3232
gpu.return
3333
}
3434

3535
// --- scf.for with iter_args -> waveasm.loop with two iter_args ---
36-
// CHECK-LABEL: sym_name = "scf_for_with_iter_args"
36+
// CHECK-LABEL: waveasm.program @scf_for_with_iter_args
3737
gpu.func @scf_for_with_iter_args() kernel {
3838
%c0 = arith.constant 0 : index
3939
%c1 = arith.constant 1 : index
4040
%c16 = arith.constant 16 : index
4141
%init = arith.constant 0 : i32
4242

4343
// Two inits: sreg counter + vreg accumulator
44-
// CHECK: "waveasm.s_mov_b32"
45-
// CHECK: "waveasm.v_mov_b32"
46-
// CHECK: "waveasm.loop"
44+
// CHECK: waveasm.s_mov_b32
45+
// CHECK: waveasm.v_mov_b32
46+
// CHECK: waveasm.loop
4747
%result = scf.for %i = %c0 to %c16 step %c1
4848
iter_args(%acc = %init) -> (i32) {
4949
%i_i32 = arith.index_cast %i : index to i32
5050
%new_acc = arith.addi %acc, %i_i32 : i32
5151
scf.yield %new_acc : i32
5252
}
5353
// Body accumulates: vreg + sreg
54-
// CHECK: "waveasm.v_add_u32"
54+
// CHECK: waveasm.v_add_u32
5555
// Induction variable incremented, compared, condition with both iter_args
56-
// CHECK: %[[NEXT:.*]], %{{.*}} = waveasm.s_add_u32 %[[IV]], %{{.*}} : !waveasm.sreg, !waveasm.imm<1> -> !waveasm.sreg, !waveasm.scc
56+
// CHECK: %[[NEXT:.*]], %{{.*}} = waveasm.s_add_u32 %{{.*}}, %{{.*}} : !waveasm.sreg, !waveasm.imm<1> -> !waveasm.sreg, !waveasm.scc
5757
// CHECK-NEXT: %[[CMP:.*]] = waveasm.s_cmp_lt_u32 %[[NEXT]], %{{.*}} : !waveasm.sreg, !waveasm.imm<16> -> !waveasm.scc
58-
// CHECK-NEXT: waveasm.condition %[[CMP]] : !waveasm.scc iter_args(%[[NEXT]], %[[NEWACC]]) : !waveasm.sreg, !waveasm.vreg
58+
// CHECK-NEXT: waveasm.condition %[[CMP]] : !waveasm.scc iter_args(%[[NEXT]], %{{.*}}) : !waveasm.sreg, !waveasm.vreg
5959

60-
// CHECK: "waveasm.s_endpgm"
60+
// CHECK: waveasm.s_endpgm
6161
gpu.return
6262
}
6363

6464
// --- scf.if -> waveasm.if with then/else branches ---
65-
// CHECK-LABEL: sym_name = "scf_if_to_wave_if"
65+
// CHECK-LABEL: waveasm.program @scf_if_to_wave_if
6666
gpu.func @scf_if_to_wave_if() kernel {
6767
%arg0 = arith.constant 5 : i32
6868
%arg1 = arith.constant 3 : i32
6969
%c10 = arith.constant 10 : i32
7070
%cond_i32 = arith.cmpi slt, %arg0, %c10 : i32
7171
%cond_ext = arith.extui %cond_i32 : i1 to i32
7272

73+
// SALU promotion: scalar cmpi produces SCC directly
74+
// CHECK: waveasm.s_cmp_lt_i32
7375
// CHECK: %{{.*}} = waveasm.if %{{.*}} : !waveasm.scc -> !waveasm.vreg {
7476
%result = scf.if %cond_i32 -> i32 {
75-
// CHECK: "waveasm.v_add_u32"
77+
// CHECK: waveasm.v_add_u32
7678
%sum = arith.addi %arg0, %arg1 : i32
77-
// CHECK: "waveasm.yield"
79+
// CHECK: waveasm.yield
7880
scf.yield %sum : i32
7981
} else {
80-
// CHECK: "waveasm.v_sub_u32"
82+
// CHECK: waveasm.v_sub_u32
8183
%diff = arith.subi %arg0, %arg1 : i32
82-
// CHECK: "waveasm.yield"
84+
// CHECK: waveasm.yield
8385
scf.yield %diff : i32
8486
}
8587

86-
// CHECK: "waveasm.s_endpgm"
88+
// CHECK: waveasm.s_endpgm
8789
gpu.return
8890
}
8991

9092
// --- Nested scf.for -> nested waveasm.loop ---
91-
// CHECK-LABEL: sym_name = "nested_scf_loops"
93+
// CHECK-LABEL: waveasm.program @nested_scf_loops
9294
gpu.func @nested_scf_loops() kernel {
9395
%c0 = arith.constant 0 : index
9496
%c1 = arith.constant 1 : index
9597
%c4 = arith.constant 4 : index
9698
%c8 = arith.constant 8 : index
9799

98100
// Outer loop: sreg counter
99-
// CHECK: "waveasm.loop"
101+
// CHECK: waveasm.loop
100102
scf.for %i = %c0 to %c4 step %c1 {
101103
// Inner loop: sreg counter
102-
// CHECK: "waveasm.loop"
104+
// CHECK: waveasm.loop
103105
scf.for %j = %c0 to %c8 step %c1 {
104106
// Body uses both outer and inner IVs
105-
// CHECK: "waveasm.s_add_u32"
107+
// CHECK: waveasm.s_add_u32
106108
%sum = arith.addi %i, %j : index
107109
}
108110
// Inner condition
@@ -111,7 +113,7 @@ module {
111113
// Outer condition
112114
// CHECK: waveasm.condition %{{.*}} : !waveasm.scc iter_args(%{{.*}}) : !waveasm.sreg
113115

114-
// CHECK: "waveasm.s_endpgm"
116+
// CHECK: waveasm.s_endpgm
115117
gpu.return
116118
}
117119
}

waveasm/test/Transforms/vadd-commute.mlir

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,21 @@
11
// RUN: waveasm-translate --waveasm-linear-scan --emit-assembly %s | FileCheck %s
22
//
3-
// Test: v_add_u32 commutes non-inline literal from src1 to src0
3+
// Test: v_add_u32 is VOP3-only on GFX9+, so non-inline literals must be
4+
// materialized into a scratch VGPR (no VOP2 commutation available).
5+
// Inline constants work directly.
46

57
// CHECK-LABEL: vadd_commute_test:
68

79
waveasm.program @vadd_commute_test target = #waveasm.target<#waveasm.gfx942, 5> abi = #waveasm.abi<> {
810
%v0 = waveasm.precolored.vreg 0 : !waveasm.pvreg<0>
911

10-
// Non-inline literal in src1 should be commuted to src0
12+
// Non-inline literal 256: materialized into scratch VGPR (v_add_u32 is VOP3)
1113
%c256 = waveasm.constant 256 : !waveasm.imm<256>
12-
// CHECK-NOT: v_mov_b32
13-
// CHECK: v_add_u32 v{{[0-9]+}}, 256, v0
14+
// CHECK: v_mov_b32 v15, 256
15+
// CHECK: v_add_u32 v{{[0-9]+}}, v0, v15
1416
%r1 = waveasm.v_add_u32 %v0, %c256 : !waveasm.pvreg<0>, !waveasm.imm<256> -> !waveasm.vreg
1517

16-
// Inline constant should work without commutation
18+
// Inline constant should work without materialization
1719
%c1 = waveasm.constant 1 : !waveasm.imm<1>
1820
// CHECK: v_add_u32 v{{[0-9]+}}, v{{[0-9]+}}, 1
1921
%r2 = waveasm.v_add_u32 %r1, %c1 : !waveasm.vreg, !waveasm.imm<1> -> !waveasm.vreg
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// RUN: waveasm-translate --waveasm-linear-scan --emit-assembly %s | FileCheck %s
2+
//
3+
// Test: VOP2 commutative literal swap. For VOP2 instructions (v_and_b32,
4+
// v_or_b32, v_xor_b32), when a non-inline literal appears in src1, the
5+
// emitter swaps operands to place it in src0, avoiding scratch VGPR
6+
// materialization. Non-commutative ops still need materialization.
7+
8+
// CHECK-LABEL: vop2_commute_swap_test:
9+
10+
waveasm.program @vop2_commute_swap_test target = #waveasm.target<#waveasm.gfx942, 5> abi = #waveasm.abi<> {
11+
%v0 = waveasm.precolored.vreg 0 : !waveasm.pvreg<0>
12+
13+
// v_and_b32 with literal in src1: swap to src0 (commutative)
14+
%c4096 = waveasm.constant 4096 : !waveasm.imm<4096>
15+
// CHECK-NOT: v_mov_b32
16+
// CHECK: v_and_b32 v{{[0-9]+}}, 4096, v0
17+
%r1 = waveasm.v_and_b32 %v0, %c4096 : !waveasm.pvreg<0>, !waveasm.imm<4096> -> !waveasm.vreg
18+
19+
// v_or_b32 with literal in src1: swap to src0 (commutative)
20+
%c256 = waveasm.constant 256 : !waveasm.imm<256>
21+
// CHECK-NOT: v_mov_b32
22+
// CHECK: v_or_b32 v{{[0-9]+}}, 256, v0
23+
%r2 = waveasm.v_or_b32 %v0, %c256 : !waveasm.pvreg<0>, !waveasm.imm<256> -> !waveasm.vreg
24+
25+
// v_xor_b32 with literal in src1: swap to src0 (commutative)
26+
%c128 = waveasm.constant 128 : !waveasm.imm<128>
27+
// CHECK-NOT: v_mov_b32
28+
// CHECK: v_xor_b32 v{{[0-9]+}}, 128, v0
29+
%r3 = waveasm.v_xor_b32 %v0, %c128 : !waveasm.pvreg<0>, !waveasm.imm<128> -> !waveasm.vreg
30+
31+
// v_lshlrev_b32 with literal in src0: literal already in correct position
32+
%c200 = waveasm.constant 200 : !waveasm.imm<200>
33+
// CHECK-NOT: v_mov_b32
34+
// CHECK: v_lshlrev_b32 v{{[0-9]+}}, 200, v0
35+
%r4 = waveasm.v_lshlrev_b32 %c200, %v0 : !waveasm.imm<200>, !waveasm.pvreg<0> -> !waveasm.vreg
36+
37+
// CHECK: s_endpgm
38+
waveasm.s_endpgm
39+
}

waveasm/test/Translate/buffer-ops-srd-adjust.mlir

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,17 +38,14 @@ func.func @buffer_ops_test(%arg0: memref<f16>, %arg1: memref<f32>) {
3838
to memref<?xf16, #amdgpu.address_space<fat_raw_buffer>>
3939

4040
// The load SRD should be adjusted with the workgroup offset via SALU:
41-
// s_mov_b64 (copy base), s_mov_b32 (wg offset already in SGPR),
4241
// s_mul_hi_i32 + s_mul_i32 (signed 64-bit byte offset),
4342
// s_add_u32 + s_addc_u32 (adjust base),
44-
// s_mov_b32 (num_records, element-aligned sentinel-safe max).
45-
// CHECK: s_mov_b64 s[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}]
43+
// s_mov_b32 (num_records), s_mov_b32 (stride/swizzle flags).
4644
// CHECK: waveasm.s_mul_hi_i32
4745
// CHECK: waveasm.s_mul_i32
4846
// CHECK: waveasm.s_add_u32
4947
// CHECK: waveasm.s_addc_u32
50-
// CHECK: s_mov_b32 s{{[0-9]+}}, 0x7FFFFFF
51-
// CHECK: s_mov_b32 s{{[0-9]+}}, 0x20000
48+
// CHECK: waveasm.pack
5249
// CHECK: waveasm.buffer_load_dwordx2
5350
%loaded = vector.load %buf0[%th_offset]
5451
: memref<?xf16, #amdgpu.address_space<fat_raw_buffer>>, vector<4xf16>
@@ -72,13 +69,11 @@ func.func @buffer_ops_test(%arg0: memref<f16>, %arg1: memref<f32>) {
7269
%ext = arith.extf %elem : vector<1xf16> to vector<1xf32>
7370

7471
// The store SRD should also be adjusted, with sentinel-safe max num_records.
75-
// CHECK: s_mov_b64 s[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}]
7672
// CHECK: waveasm.s_mul_hi_i32
7773
// CHECK: waveasm.s_mul_i32
7874
// CHECK: waveasm.s_add_u32
7975
// CHECK: waveasm.s_addc_u32
80-
// CHECK: s_mov_b32 s{{[0-9]+}}, 0x7FFFFFF
81-
// CHECK: s_mov_b32 s{{[0-9]+}}, 0x20000
76+
// CHECK: waveasm.pack
8277
// CHECK: waveasm.buffer_store_dword
8378
vector.store %ext, %buf1[%thread_id]
8479
: memref<?xf32, #amdgpu.address_space<fat_raw_buffer>>, vector<1xf32>
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// RUN: waveasm-translate %s 2>&1 | FileCheck %s
2+
//
3+
// Test: SALU promotion of scalar arithmetic. When both operands of an arith op
4+
// are in SGPRs (e.g. workgroup_id), the auto-select emit helpers route through
5+
// SALU instructions instead of VALU.
6+
7+
module {
8+
gpu.module @test_salu_promotion {
9+
10+
// CHECK-LABEL: waveasm.program @salu_mul_add
11+
gpu.func @salu_mul_add() kernel {
12+
%wg_x = gpu.block_id x upper_bound 4
13+
%wg_y = gpu.block_id y upper_bound 4
14+
%c128 = arith.constant 128 : index
15+
16+
// Scalar (SGPR) * immediate -> s_mul_i32
17+
// CHECK: waveasm.s_mul_i32 %{{.*}}, %{{.*}} : !waveasm.sreg, !waveasm.imm<128> -> !waveasm.sreg
18+
%prod = arith.muli %wg_x, %c128 : index
19+
20+
// Scalar (SGPR) + scalar (SGPR) -> s_add_u32
21+
// CHECK: waveasm.s_add_u32 %{{.*}}, %{{.*}} : !waveasm.sreg, !waveasm.sreg -> !waveasm.sreg, !waveasm.scc
22+
%sum = arith.addi %prod, %wg_y : index
23+
24+
// CHECK: waveasm.s_endpgm
25+
gpu.return
26+
}
27+
28+
// CHECK-LABEL: waveasm.program @salu_cmpi
29+
gpu.func @salu_cmpi() kernel {
30+
%wg_x = gpu.block_id x upper_bound 16
31+
%c10 = arith.constant 10 : index
32+
33+
// Scalar cmpi -> s_cmp (SCC result). The immediate is first moved to SGPR.
34+
// CHECK: waveasm.s_mov_b32 %{{.*}} : !waveasm.imm<10> -> !waveasm.sreg
35+
// CHECK: waveasm.s_cmp_lt_i32 %{{.*}}, %{{.*}} : !waveasm.sreg, !waveasm.sreg -> !waveasm.scc
36+
%cmp = arith.cmpi slt, %wg_x, %c10 : index
37+
38+
// CHECK: waveasm.s_endpgm
39+
gpu.return
40+
}
41+
}
42+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// RUN: waveasm-translate %s 2>&1 | FileCheck %s
2+
//
3+
// Test: Scalar cmpi + scalar select fusion into s_cmp + s_cselect_b32.
4+
// When both comparison operands are scalar and the select's true/false values
5+
// are also scalar, the backend fuses the pair into a single s_cmp + s_cselect
6+
// sequence, avoiding the VALU v_cmp + v_cndmask path.
7+
8+
module {
9+
gpu.module @test_select_fusion {
10+
11+
// CHECK-LABEL: waveasm.program @cmpi_select_scalar_fusion
12+
gpu.func @cmpi_select_scalar_fusion() kernel {
13+
%wg_x = gpu.block_id x upper_bound 16
14+
%c10 = arith.constant 10 : index
15+
%c100 = arith.constant 100 : index
16+
%c200 = arith.constant 200 : index
17+
18+
// Scalar cmpi + scalar select -> s_cmp_lt_i32 + s_cselect_b32
19+
// CHECK: waveasm.s_cmp_lt_i32
20+
// CHECK: waveasm.s_cselect_b32
21+
// CHECK-NOT: waveasm.v_cmp
22+
// CHECK-NOT: waveasm.v_cndmask
23+
%cmp = arith.cmpi slt, %wg_x, %c10 : index
24+
%sel = arith.select %cmp, %c100, %c200 : index
25+
26+
// CHECK: waveasm.s_endpgm
27+
gpu.return
28+
}
29+
}
30+
}

waveasm/test/Translate/scf-if-agpr-else-coercion.mlir

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,13 @@
99
// values (immediates). The backend must coerce the else-yield immediates
1010
// into register types so that both branches yield type-compatible values.
1111
//
12-
// Note: the translator currently produces a vreg condition for waveasm.if
13-
// (instead of scc), so the output is dumped in generic form after
14-
// verification. The CHECK patterns below match generic form.
12+
// With SALU promotion, scalar cmpi produces SCC directly, so waveasm.if
13+
// gets an SCC condition and the output is in custom form.
1514

1615
module {
1716
gpu.module @test_if_else_coercion {
1817

19-
// CHECK-LABEL: sym_name = "if_else_coercion"
18+
// CHECK-LABEL: waveasm.program @if_else_coercion
2019
gpu.func @if_else_coercion() kernel {
2120
%c0 = arith.constant 0 : index
2221
%c1 = arith.constant 1 : index
@@ -28,20 +27,21 @@ module {
2827
// Then branch: compute a VGPR value (addi -> v_add_u32)
2928
// Else branch: yield a constant zero (-> immediate coerced to vreg)
3029
//
31-
// CHECK: "waveasm.if"
32-
// CHECK: "waveasm.v_add_u32"
33-
// CHECK: "waveasm.yield"
34-
// CHECK: }, {
35-
// CHECK: "waveasm.v_mov_b32"
36-
// CHECK: "waveasm.yield"
30+
// CHECK: waveasm.s_cmp_lt_i32
31+
// CHECK: waveasm.if %{{.*}} : !waveasm.scc -> !waveasm.vreg {
32+
// CHECK: waveasm.v_add_u32
33+
// CHECK: waveasm.yield
34+
// CHECK: } else {
35+
// CHECK: waveasm.v_mov_b32
36+
// CHECK: waveasm.yield
3737
%result = scf.if %cond_i1 -> i32 {
3838
%val = arith.addi %zero_i32, %one_i32 : i32
3939
scf.yield %val : i32
4040
} else {
4141
scf.yield %zero_i32 : i32
4242
}
4343

44-
// CHECK: "waveasm.s_endpgm"
44+
// CHECK: waveasm.s_endpgm
4545
gpu.return
4646
}
4747
}

0 commit comments

Comments
 (0)