Skip to content

Commit d773a6f

Browse files
committed
[stinkytofu] Implement legalizeImplicitSpecialRegisters function
for handling implicit special registers (SCC, VCC, EXEC) in instructions And, use both in ToStinkyTofuUtils and StinkyBuildImplicitDependencyPass
1 parent 243760c commit d773a6f

6 files changed

Lines changed: 102 additions & 34 deletions

File tree

shared/stinkytofu/include/stinkytofu/ir/asm/RegisterKey.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,14 @@ inline RegKey toRegKey(const StinkyRegister& reg, unsigned offset = 0) {
5656
return {reg.reg.type, reg.reg.idx + offset};
5757
}
5858

59+
/// Check if two registers are the same.
60+
/// @param reg1 The first register.
61+
/// @param reg2 The second register.
62+
/// @return True if the two registers are the same, false otherwise.
63+
inline bool isSameRegister(const StinkyRegister& reg1, const StinkyRegister& reg2) {
64+
return toRegKey(reg1) == toRegKey(reg2);
65+
}
66+
5967
/// Invoke fn(RegKey) for each DWORD in a register operand.
6068
/// Skips non-register operands (literals, immediates).
6169
template <typename Fn>

shared/stinkytofu/include/stinkytofu/transforms/asm/LegalizationUtils.hpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,18 @@ STINKYTOFU_EXPORT Legalized legalizeDSStoreB192(StinkyInstruction* inst, AsmIRBu
116116
STINKYTOFU_EXPORT Legalized legalizeDSStoreB256(StinkyInstruction* inst, AsmIRBuilder& irBuilder,
117117
GfxArchID archId, bool hasVgprMsb);
118118

119+
// Legalize implicit special registers (SCC, VCC, EXEC) on an instruction.
120+
//
121+
// HW flags (Flags.def: IF_ImplicitRead/WriteSCC, IF_ImplicitReadVCC,
122+
// IF_ImplicitRead/WriteEXEC) declare implicit reads/writes that are not
123+
// encoded as explicit operands. This function inspects those flags and adds
124+
// the corresponding singleton register (sized by `wavefrontSize` for
125+
// VCC/EXEC) to the instruction's src/dest list — but only if the register
126+
// is not already present. The check matches by RegType and idx, which is
127+
// sufficient for SCC/VCC/EXEC since they are singletons.
128+
STINKYTOFU_EXPORT void legalizeImplicitSpecialRegisters(StinkyInstruction* inst,
129+
uint32_t wavefrontSize);
130+
119131
} // namespace stinkytofu
120132

121133
#endif // STINKYTOFU_LEGALIZATION_UTILS_HPP

shared/stinkytofu/include/stinkytofu/transforms/asm/StinkyBuildImplicitDependencyPass.hpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,21 @@ namespace stinkytofu {
3131
class Pass;
3232

3333
/**
34-
* @brief Attaches LDS pseudo-registers to instructions for implicit dependency tracking.
34+
* @brief Attaches implicit registers to instructions for dependency tracking.
3535
*
36-
* Adds RegType::LDS pseudo-registers (keyed by MemTokenData token IDs) to LDS-related
37-
* instructions so that the def-use chain enforces barrier ordering:
36+
* Two kinds of implicit dependencies are materialized as registers so that the
37+
* def-use chain builder can see them:
3838
*
39-
* LDS writers (tensor_load, ds_write) — token to dest (defines)
40-
* LDS readers (ds_read) — token to src (uses)
41-
* Barriers — token to both src and dest
39+
* 1) Special registers (SCC, VCC, EXEC) declared via HW flags
40+
* (Flags.def: IF_ImplicitRead/WriteSCC, IF_ImplicitReadVCC,
41+
* IF_ImplicitRead/WriteEXEC). The corresponding singleton register is
42+
* added to src/dest if not already present.
4243
*
43-
* This creates the dependency chain: writer → barrier → reader.
44+
* 2) RegType::LDS pseudo-registers keyed by MemTokenData token IDs:
45+
* LDS writers (tensor_load, ds_write) — token to dest (defines)
46+
* LDS readers (ds_read) — token to src (uses)
47+
* Barriers — token to both src and dest
48+
* This creates the dependency chain: writer → barrier → reader.
4449
*/
4550
STINKYTOFU_EXPORT std::unique_ptr<Pass> createStinkyBuildImplicitDependencyPass();
4651

shared/stinkytofu/src/conversion/rocisa/ToStinkyTofuUtils.cpp

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,10 @@ Legalized legalizeInstruction(StinkyInstruction* inst, rocisa::Instruction* roci
210210
AsmIRBuilder& irBuilder, GfxArchID archId,
211211
const std::map<std::string, int>& asmCaps,
212212
const std::map<std::string, int>& archCaps, bool hasVgprMsb) {
213+
// Attach implicit special registers (SCC/VCC/`EXEC) declared by HW flags
214+
// (Flags.def) to the instruction.
215+
legalizeImplicitSpecialRegisters(inst, getWaveFrontSize(archId));
216+
213217
if (isBranch(*inst)) {
214218
// Handle branch instructions
215219
rocisa::BranchInstruction* branchInst =
@@ -317,19 +321,6 @@ void addRegistersToInstruction(StinkyInstruction* stinkyInst, const rocisa::Inst
317321
}
318322
}
319323

320-
// Add implicit special registers driven by HW flags (Flags.def).
321-
if (stinkyInst->is(IF_ImplicitReadSCC)) stinkyInst->addSrcReg(StinkyRegister::getSCCRegister());
322-
if (stinkyInst->is(IF_ImplicitWriteSCC))
323-
stinkyInst->addDestReg(StinkyRegister::getSCCRegister());
324-
325-
uint32_t wfs = getWaveFrontSize(archId);
326-
if (stinkyInst->is(IF_ImplicitReadVCC))
327-
stinkyInst->addSrcReg(StinkyRegister::getVCCRegister(wfs));
328-
if (stinkyInst->is(IF_ImplicitReadEXEC))
329-
stinkyInst->addSrcReg(StinkyRegister::getEXECRegister(wfs));
330-
if (stinkyInst->is(IF_ImplicitWriteEXEC))
331-
stinkyInst->addDestReg(StinkyRegister::getEXECRegister(wfs));
332-
333324
#ifndef NDEBUG
334325
// Verify: read-write operands must exist in both destRegs and srcRegs.
335326
{

shared/stinkytofu/src/transforms/asm/LegalizationUtils.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
#include "stinkytofu/hardware/ArchHelper.hpp"
3030
#include "stinkytofu/hardware/GfxIsa.hpp"
31+
#include "stinkytofu/ir/asm/RegisterKey.hpp"
3132
#include "stinkytofu/ir/asm/StinkyAsmIR.hpp"
3233
#include "stinkytofu/ir/asm/StinkyModifiers.hpp"
3334

@@ -595,4 +596,36 @@ Legalized legalizeDSStoreB256(StinkyInstruction* inst, AsmIRBuilder& irBuilder,
595596
return {store1, store2};
596597
}
597598

599+
namespace {
600+
// Add `reg` to the dest list of `inst`, unless a register with the same
601+
// RegType/idx is already present. SCC/VCC/EXEC are singletons, so RegType+idx
602+
// is sufficient to detect duplicates introduced by an upstream stage or by an
603+
// instruction that already encodes the register as an explicit operand.
604+
void addUniqueSpecialDest(StinkyInstruction* inst, const StinkyRegister& reg) {
605+
for (const StinkyRegister& d : inst->getDestRegs())
606+
if (isSameRegister(d, reg)) return;
607+
inst->addDestReg(reg);
608+
}
609+
610+
void addUniqueSpecialSrc(StinkyInstruction* inst, const StinkyRegister& reg) {
611+
for (const StinkyRegister& s : inst->getSrcRegs())
612+
if (isSameRegister(s, reg)) return;
613+
inst->addSrcReg(reg);
614+
}
615+
} // namespace
616+
617+
void legalizeImplicitSpecialRegisters(StinkyInstruction* inst, uint32_t wavefrontSize) {
618+
if (inst == nullptr) return;
619+
620+
if (inst->is(IF_ImplicitReadSCC)) addUniqueSpecialSrc(inst, StinkyRegister::getSCCRegister());
621+
if (inst->is(IF_ImplicitWriteSCC)) addUniqueSpecialDest(inst, StinkyRegister::getSCCRegister());
622+
623+
if (inst->is(IF_ImplicitReadVCC))
624+
addUniqueSpecialSrc(inst, StinkyRegister::getVCCRegister(wavefrontSize));
625+
if (inst->is(IF_ImplicitReadEXEC))
626+
addUniqueSpecialSrc(inst, StinkyRegister::getEXECRegister(wavefrontSize));
627+
if (inst->is(IF_ImplicitWriteEXEC))
628+
addUniqueSpecialDest(inst, StinkyRegister::getEXECRegister(wavefrontSize));
629+
}
630+
598631
} // namespace stinkytofu

shared/stinkytofu/src/transforms/asm/StinkyBuildImplicitDependencyPass.cpp

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -32,21 +32,31 @@
3232
#include "stinkytofu/core/PassManager.hpp"
3333
#include "stinkytofu/ir/asm/StinkyAsmIR.hpp"
3434
#include "stinkytofu/support/ErrorHandling.hpp"
35+
#include "stinkytofu/transforms/asm/LegalizationUtils.hpp"
3536

3637
#define DEBUG_TYPE "StinkyBuildImplicitDependencyPass"
3738

38-
// Implicit LDS dependency pass
39-
// ============================
40-
// Attaches RegType::LDS pseudo-registers to instructions based on their MemTokenData
41-
// token IDs. The instruction type determines src vs dest placement:
39+
// Implicit dependency pass
40+
// ========================
41+
// Attaches implicit registers to instructions so that the def-use chain builder
42+
// can see dependencies that are not encoded as explicit operands. Two kinds of
43+
// implicit dependencies are handled:
4244
//
43-
// tensor_load / ds_write → LDS token to dest (LDS producer)
44-
// ds_read → LDS token to src (LDS consumer)
45-
// barrier / signal / wait → LDS token to both (synchronization point)
45+
// 1) Implicit special registers (SCC, VCC, EXEC) driven by HW flags
46+
// (Flags.def: IF_ImplicitRead/WriteSCC, IF_ImplicitReadVCC,
47+
// IF_ImplicitRead/WriteEXEC). The corresponding special register is added
48+
// to src/dest if not already present.
4649
//
47-
// The def-use chain builder then sees:
48-
// producer(def LDS[t]) → barrier(use+def LDS[t]) → consumer(use LDS[t])
49-
// which forces the scheduler to respect: producers → barrier → consumers.
50+
// 2) RegType::LDS pseudo-registers (keyed by MemTokenData token IDs). The
51+
// instruction type determines src vs dest placement:
52+
//
53+
// tensor_load / ds_write → LDS token to dest (LDS producer)
54+
// ds_read → LDS token to src (LDS consumer)
55+
// barrier / signal / wait → LDS token to both (synchronization point)
56+
//
57+
// The def-use chain builder then sees:
58+
// producer(def LDS[t]) → barrier(use+def LDS[t]) → consumer(use LDS[t])
59+
// which forces the scheduler to respect: producers → barrier → consumers.
5060

5161
namespace {
5262
using namespace stinkytofu;
@@ -186,18 +196,27 @@ static void checkConsistentMemTokens(const BasicBlock& bb) {
186196

187197
void setPseudoRegistersInBlock(BasicBlock& bb, PassContext& passCtx,
188198
const std::unordered_set<const BasicBlock*>& checkBlocks) {
199+
bool doLdsTokenHandling = true;
189200
if (!passCtx.getPassFeatureConfig().barrierConfig.unrollMovableBarrier) {
190-
PASS_DEBUG(std::cerr << "[BuildImplicitDep] skip BB label=\"" << bb.getLabel()
191-
<< "\" (unrollMovableBarrier=false)\n");
192-
return;
201+
PASS_DEBUG(std::cerr << "[BuildImplicitDep] skip LDS-token handling BB label=\""
202+
<< bb.getLabel() << "\" (unrollMovableBarrier=false)\n");
203+
doLdsTokenHandling = false;
193204
}
194205

195-
checkConsistentMemTokens(bb);
206+
if (doLdsTokenHandling) {
207+
checkConsistentMemTokens(bb);
208+
}
196209

210+
const uint32_t wavefrontSize = passCtx.getWavefrontSize();
197211
for (auto it = bb.begin(); it != bb.end(); ++it) {
198212
auto* inst = dyn_cast<StinkyInstruction>(it.getNodePtr());
199213
if (!inst) continue;
200214

215+
// Always attach implicit special registers (SCC/VCC/EXEC) declared by HW flags
216+
legalizeImplicitSpecialRegisters(inst, wavefrontSize);
217+
218+
if (!doLdsTokenHandling) continue;
219+
201220
const MemTokenData* mt = inst->getModifier<MemTokenData>();
202221
if (!mt) continue;
203222
assert(!mt->tokens.empty() && "MemTokenData with empty tokens");

0 commit comments

Comments
 (0)