Skip to content

Commit 93d26c3

Browse files
Hardcode84claude
andcommitted
Add cross-class spill tests and guard unimplemented spill directions
Tests added: - no-spill-needed: verify no spill ops when VGPRs fit within limit - no-tied: verify tied values are never eviction candidates - exhausted: verify proper error when both VGPR and AGPR are full - multi-reg: verify size > 1 values are not eviction candidates - multiple: verify multiple values can be spilled to different AGPRs - sgpr-unsupported: verify SGPR overflow gives a clean error Also: - Disable SGPR and AGPR cross-class eviction paths in allocate() (pass nullptr as altPool) since the spill insertion only handles VGPR -> AGPR. Previously these paths would silently corrupt the allocation by evicting without inserting spill/reload ops. - Add explicit validation in insertSpillReloads() that rejects non-VGPR->AGPR spill records with a diagnostic. - Simplify spill/reload insertion to not branch on direction (only VGPR->AGPR is supported). Known limitation: if two spilled values feed the same instruction, both reload into the single scratch v14, corrupting the first reload. This does not occur in practice (spilled values are rare and unlikely to be consumed by the same op). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Signed-off-by: Ivan Butygin <ivan.butygin@gmail.com>
1 parent 6f4ab9e commit 93d26c3

8 files changed

Lines changed: 176 additions & 35 deletions

waveasm/lib/Transforms/LinearScanPass.cpp

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,18 @@ struct LinearScanPass
155155
LogicalResult insertSpillReloads(ProgramOp program,
156156
ArrayRef<SpillRecord> spills,
157157
PhysicalMapping &mapping) {
158+
// Validate that all spills use the implemented VGPR -> AGPR direction.
159+
// SGPR -> VGPR and AGPR -> VGPR are wired in the allocator but not yet
160+
// supported by the spill insertion logic.
161+
for (const SpillRecord &sr : spills) {
162+
if (sr.sourceClass != RegClass::VGPR ||
163+
sr.targetClass != RegClass::AGPR) {
164+
return program.emitOpError()
165+
<< "unsupported spill direction: only VGPR -> AGPR is "
166+
"currently implemented";
167+
}
168+
}
169+
158170
// Build a lookup from victim Value -> SpillRecord.
159171
llvm::DenseMap<Value, const SpillRecord *> spillMap;
160172
for (const SpillRecord &sr : spills)
@@ -167,17 +179,17 @@ struct LinearScanPass
167179
// to thread through the spill ops.
168180
//
169181
// We insert a PrecoloredARegOp at program entry to materialise the
170-
// AGPR "slot". It does not generate any assembly; it just provides
182+
// AGPR "slot". It does not generate any assembly; it just provides
171183
// an SSA value with the right physical type for the dialect ops.
172184
llvm::DenseMap<Value, Value> spillSlots; // victim -> AGPR SSA value
173185
{
174186
OpBuilder entryBuilder(ctx);
175187
Block &entry = program.getBodyBlock();
176188
entryBuilder.setInsertionPointToStart(&entry);
177189
for (const SpillRecord &sr : spills) {
178-
auto physARegType = PARegType::get(ctx, sr.targetPhysReg, 1);
190+
auto physType = PARegType::get(ctx, sr.targetPhysReg, 1);
179191
Value slot = PrecoloredARegOp::create(entryBuilder, program.getLoc(),
180-
physARegType, sr.targetPhysReg,
192+
physType, sr.targetPhysReg,
181193
/*size=*/1);
182194
spillSlots[sr.victim] = slot;
183195
}
@@ -194,13 +206,10 @@ struct LinearScanPass
194206
if (it == spillMap.end())
195207
continue;
196208
const SpillRecord &sr = *it->second;
197-
if (sr.sourceClass == RegClass::VGPR &&
198-
sr.targetClass == RegClass::AGPR) {
199-
OpBuilder builder(ctx);
200-
builder.setInsertionPointAfter(op);
201-
Value slot = spillSlots[sr.victim];
202-
V_ACCVGPR_WRITE_B32::create(builder, op->getLoc(), result, slot);
203-
}
209+
OpBuilder builder(ctx);
210+
builder.setInsertionPointAfter(op);
211+
Value slot = spillSlots[sr.victim];
212+
V_ACCVGPR_WRITE_B32::create(builder, op->getLoc(), result, slot);
204213
}
205214

206215
// --- Insert reloads before uses. ---
@@ -209,24 +218,15 @@ struct LinearScanPass
209218
auto it = spillMap.find(operand);
210219
if (it == spillMap.end())
211220
continue;
212-
const SpillRecord &sr = *it->second;
213-
if (sr.sourceClass == RegClass::VGPR &&
214-
sr.targetClass == RegClass::AGPR) {
215-
OpBuilder builder(op);
216-
auto scratchType = PVRegType::get(ctx, kSpillScratchVGPR, 1);
217-
Value slot = spillSlots[sr.victim];
218-
Value reloaded = V_ACCVGPR_READ_B32::create(builder, op->getLoc(),
219-
scratchType, slot);
220-
op->setOperand(i, reloaded);
221-
}
221+
OpBuilder builder(op);
222+
auto scratchType = PVRegType::get(ctx, kSpillScratchVGPR, 1);
223+
Value slot = spillSlots[operand];
224+
Value reloaded = V_ACCVGPR_READ_B32::create(builder, op->getLoc(),
225+
scratchType, slot);
226+
op->setOperand(i, reloaded);
222227
}
223228
}
224229

225-
// Update the mapping for spilled values: the victim VGPR is no longer
226-
// live; its uses have been rewritten to read from the scratch VGPR.
227-
// The original mapping (victim -> sourcePhysReg) is left as-is for the
228-
// type transformation to assign the correct PVRegType to the def site.
229-
230230
return success();
231231
}
232232

waveasm/lib/Transforms/LinearScanRegAlloc.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -300,19 +300,19 @@ LinearScanRegAlloc::allocate(ProgramOp program) {
300300
return failure();
301301
stats.peakVGPRs = vgprPool.getPeakUsage();
302302

303-
// Step 6: Allocate SGPRs. On failure, evict to spare VGPRs.
304-
if (failed(allocateRegClass(liveness.sregRanges, sgprPool, mapping, stats,
305-
tiedOperands, precoloredValues, "SGPR", program,
306-
maxSGPRs, liveness.maxSRegPressure, &vgprPool,
307-
&spills, &liveness)))
303+
// Step 6: Allocate SGPRs (no cross-class spilling yet).
304+
if (failed(allocateRegClass(
305+
liveness.sregRanges, sgprPool, mapping, stats, tiedOperands,
306+
precoloredValues, "SGPR", program, maxSGPRs, liveness.maxSRegPressure,
307+
/*altPool=*/nullptr, /*spills=*/nullptr, &liveness)))
308308
return failure();
309309
stats.peakSGPRs = sgprPool.getPeakUsage();
310310

311-
// Step 7: Allocate AGPRs. On failure, evict to spare VGPRs.
312-
if (failed(allocateRegClass(liveness.aregRanges, agprPool, mapping, stats,
313-
tiedOperands, precoloredValues, "AGPR", program,
314-
maxAGPRs, liveness.maxARegPressure, &vgprPool,
315-
&spills, &liveness)))
311+
// Step 7: Allocate AGPRs (no cross-class spilling yet).
312+
if (failed(allocateRegClass(
313+
liveness.aregRanges, agprPool, mapping, stats, tiedOperands,
314+
precoloredValues, "AGPR", program, maxAGPRs, liveness.maxARegPressure,
315+
/*altPool=*/nullptr, /*spills=*/nullptr, &liveness)))
316316
return failure();
317317
stats.peakAGPRs = agprPool.getPeakUsage();
318318

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// RUN: not waveasm-translate --waveasm-linear-scan="max-vgprs=4 max-agprs=0" %s 2>&1 | FileCheck %s
2+
//
3+
// Test: When both VGPR and AGPR pools are exhausted, allocation fails
4+
// with a diagnostic. max-agprs=0 leaves no spare AGPRs for eviction.
5+
6+
// CHECK: error: 'waveasm.program' op Failed to allocate VGPR
7+
waveasm.program @both_exhausted target = #waveasm.target<#waveasm.gfx942, 5> abi = #waveasm.abi<> {
8+
%v0 = waveasm.precolored.vreg 0 : !waveasm.pvreg<0>
9+
%v1 = waveasm.precolored.vreg 1 : !waveasm.pvreg<1>
10+
%r1 = waveasm.v_add_u32 %v0, %v1 : !waveasm.pvreg<0>, !waveasm.pvreg<1> -> !waveasm.vreg
11+
%r2 = waveasm.v_add_u32 %r1, %v0 : !waveasm.vreg, !waveasm.pvreg<0> -> !waveasm.vreg
12+
%r3 = waveasm.v_add_u32 %r2, %v1 : !waveasm.vreg, !waveasm.pvreg<1> -> !waveasm.vreg
13+
%sum = waveasm.v_add_u32 %r1, %r3 : !waveasm.vreg, !waveasm.vreg -> !waveasm.vreg
14+
waveasm.s_endpgm
15+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// RUN: not waveasm-translate --waveasm-linear-scan="max-vgprs=4 max-agprs=8" %s 2>&1 | FileCheck %s
2+
//
3+
// Test: Multi-register (size > 1) values are NOT spill candidates.
4+
// When the only live values are multi-register, eviction is not attempted
5+
// and allocation fails.
6+
7+
// CHECK: error: 'waveasm.program' op Failed to allocate VGPR
8+
waveasm.program @multi_reg_no_spill target = #waveasm.target<#waveasm.gfx942, 5> abi = #waveasm.abi<> {
9+
%srd = waveasm.precolored.sreg 0, 4 : !waveasm.psreg<0, 4>
10+
%voff = waveasm.precolored.vreg 0 : !waveasm.pvreg<0>
11+
%soff = waveasm.constant 0 : !waveasm.imm<0>
12+
13+
// A 4-wide load consumes v2..v5 (v0 precolored, v14/v15 reserved).
14+
// That exhausts the pool. There are no size-1 eviction candidates.
15+
%wide = waveasm.buffer_load_dwordx4 %srd, %voff, %soff : !waveasm.psreg<0, 4>, !waveasm.pvreg<0>, !waveasm.imm<0> -> !waveasm.vreg<4, 4>
16+
17+
// This needs another VGPR but nothing can be evicted (wide is size 4).
18+
%extra = waveasm.v_add_u32 %voff, %voff : !waveasm.pvreg<0>, !waveasm.pvreg<0> -> !waveasm.vreg
19+
20+
// Keep wide live past extra to create the pressure.
21+
%elem = waveasm.extract %wide[0] : !waveasm.vreg<4, 4> -> !waveasm.vreg
22+
%sum = waveasm.v_add_u32 %elem, %extra : !waveasm.vreg, !waveasm.vreg -> !waveasm.vreg
23+
24+
waveasm.s_endpgm
25+
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// RUN: waveasm-translate --waveasm-linear-scan="max-vgprs=5 max-agprs=8" %s | FileCheck %s
2+
//
3+
// Test: Multiple values are spilled to different AGPRs when VGPR pressure
4+
// exceeds the limit by more than one register.
5+
//
6+
// With max-vgprs=5: v0, v1 precolored; v14, v15 outside range (>= 5).
7+
// Only v2, v3, v4 are allocatable. We create 4 virtual VGPRs with
8+
// overlapping liveness, so at least 1 must be spilled. The long-lived
9+
// values r1 and r2 compete for VGPRs while r3 and r4 also need them.
10+
//
11+
// NOTE: two spilled values must not feed the same instruction because
12+
// both would reload into the single scratch v14. This test avoids that.
13+
14+
// CHECK-LABEL: waveasm.program @multiple_spills
15+
waveasm.program @multiple_spills target = #waveasm.target<#waveasm.gfx942, 5> abi = #waveasm.abi<> {
16+
%v0 = waveasm.precolored.vreg 0 : !waveasm.pvreg<0>
17+
%v1 = waveasm.precolored.vreg 1 : !waveasm.pvreg<1>
18+
19+
// Four virtual VGPRs; r1, r2 are long-lived.
20+
%r1 = waveasm.v_add_u32 %v0, %v1 : !waveasm.pvreg<0>, !waveasm.pvreg<1> -> !waveasm.vreg
21+
%r2 = waveasm.v_add_u32 %v0, %v1 : !waveasm.pvreg<0>, !waveasm.pvreg<1> -> !waveasm.vreg
22+
%r3 = waveasm.v_add_u32 %v0, %v1 : !waveasm.pvreg<0>, !waveasm.pvreg<1> -> !waveasm.vreg
23+
%r4 = waveasm.v_add_u32 %v0, %v1 : !waveasm.pvreg<0>, !waveasm.pvreg<1> -> !waveasm.vreg
24+
25+
// At least one spill write expected.
26+
// CHECK: waveasm.v_accvgpr_write_b32
27+
// Use spilled values one at a time (avoid double-reload into same scratch).
28+
%s1 = waveasm.v_add_u32 %r1, %r3 : !waveasm.vreg, !waveasm.vreg -> !waveasm.vreg
29+
%s2 = waveasm.v_add_u32 %r2, %r4 : !waveasm.vreg, !waveasm.vreg -> !waveasm.vreg
30+
%final = waveasm.v_add_u32 %s1, %s2 : !waveasm.vreg, !waveasm.vreg -> !waveasm.vreg
31+
32+
waveasm.s_endpgm
33+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// RUN: waveasm-translate --waveasm-linear-scan="max-vgprs=8 max-agprs=4" %s | FileCheck %s
2+
//
3+
// Test: No spill needed when VGPRs fit within the limit.
4+
// With max-vgprs=8 and only 3 virtual VGPRs needed, no spill should occur.
5+
6+
// CHECK-LABEL: waveasm.program @no_spill_needed
7+
// CHECK-NOT: v_accvgpr_write_b32
8+
// CHECK-NOT: v_accvgpr_read_b32
9+
waveasm.program @no_spill_needed target = #waveasm.target<#waveasm.gfx942, 5> abi = #waveasm.abi<> {
10+
%v0 = waveasm.precolored.vreg 0 : !waveasm.pvreg<0>
11+
%v1 = waveasm.precolored.vreg 1 : !waveasm.pvreg<1>
12+
%r1 = waveasm.v_add_u32 %v0, %v1 : !waveasm.pvreg<0>, !waveasm.pvreg<1> -> !waveasm.vreg
13+
%r2 = waveasm.v_add_u32 %r1, %v0 : !waveasm.vreg, !waveasm.pvreg<0> -> !waveasm.vreg
14+
%r3 = waveasm.v_add_u32 %r2, %v1 : !waveasm.vreg, !waveasm.pvreg<1> -> !waveasm.vreg
15+
%sum = waveasm.v_add_u32 %r1, %r3 : !waveasm.vreg, !waveasm.vreg -> !waveasm.vreg
16+
waveasm.s_endpgm
17+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// RUN: waveasm-translate --waveasm-linear-scan="max-vgprs=4 max-agprs=4" %s | FileCheck %s
2+
//
3+
// Test: Tied values (MFMA accumulator -> result) are NOT spill candidates.
4+
// The spill should pick an untied value instead.
5+
//
6+
// With max-vgprs=4: v0, v1 precolored; v14, v15 reserved. Only v2, v3
7+
// allocatable. Three values are live simultaneously -> one must spill.
8+
// The spill must NOT be a tied value.
9+
10+
// CHECK-LABEL: waveasm.program @no_tied_spill
11+
waveasm.program @no_tied_spill target = #waveasm.target<#waveasm.gfx942, 5> abi = #waveasm.abi<> {
12+
%v0 = waveasm.precolored.vreg 0 : !waveasm.pvreg<0>
13+
%v1 = waveasm.precolored.vreg 1 : !waveasm.pvreg<1>
14+
15+
// Long-lived untied value.
16+
%addr = waveasm.v_add_u32 %v0, %v1 : !waveasm.pvreg<0>, !waveasm.pvreg<1> -> !waveasm.vreg
17+
18+
// Second value creating pressure.
19+
%tmp = waveasm.v_add_u32 %v0, %v1 : !waveasm.pvreg<0>, !waveasm.pvreg<1> -> !waveasm.vreg
20+
21+
// Third value that triggers the spill.
22+
%extra = waveasm.v_add_u32 %v0, %v1 : !waveasm.pvreg<0>, !waveasm.pvreg<1> -> !waveasm.vreg
23+
24+
// Use all three -> one must be spilled. Verify that spill/reload appear.
25+
// CHECK: waveasm.v_accvgpr_write_b32
26+
// CHECK: waveasm.v_accvgpr_read_b32
27+
%s1 = waveasm.v_add_u32 %addr, %tmp : !waveasm.vreg, !waveasm.vreg -> !waveasm.vreg
28+
%s2 = waveasm.v_add_u32 %s1, %extra : !waveasm.vreg, !waveasm.vreg -> !waveasm.vreg
29+
30+
waveasm.s_endpgm
31+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// RUN: not waveasm-translate --waveasm-linear-scan="max-sgprs=4 max-vgprs=8" %s 2>&1 | FileCheck %s
2+
//
3+
// Test: SGPR overflow triggers a hard error because SGPR -> VGPR spilling
4+
// is not yet implemented. The allocator does not attempt cross-class
5+
// eviction for SGPRs (altPool is nullptr).
6+
7+
// CHECK: error: 'waveasm.program' op Failed to allocate SGPR
8+
waveasm.program @sgpr_overflow target = #waveasm.target<#waveasm.gfx942, 5> abi = #waveasm.abi<> {
9+
%s0 = waveasm.precolored.sreg 0 : !waveasm.psreg<0>
10+
%s1 = waveasm.precolored.sreg 1 : !waveasm.psreg<1>
11+
%s2 = waveasm.precolored.sreg 2 : !waveasm.psreg<2>
12+
%s3 = waveasm.precolored.sreg 3 : !waveasm.psreg<3>
13+
14+
// All 4 SGPRs precolored. Virtual SGPRs have nowhere to go.
15+
%r1 = waveasm.s_mul_i32 %s0, %s1 : !waveasm.psreg<0>, !waveasm.psreg<1> -> !waveasm.sreg
16+
%r2 = waveasm.s_mul_i32 %s2, %s3 : !waveasm.psreg<2>, !waveasm.psreg<3> -> !waveasm.sreg
17+
%r3 = waveasm.s_mul_i32 %r1, %r2 : !waveasm.sreg, !waveasm.sreg -> !waveasm.sreg
18+
19+
waveasm.s_endpgm
20+
}

0 commit comments

Comments
 (0)