Skip to content

Commit da663a1

Browse files
authored
[AMDGPU][GlobalIsel] Add regbank support for cvt_scalef32_sr_pk_f6_f116/32 intrinsics (#192745)
This patch adds register bank legalization rules for cvt_scalef32_sr_pk_f6_f116/32 intrinsics in the AMDGPU GlobalISel pipeline.
1 parent 5cda8d9 commit da663a1

4 files changed

Lines changed: 218 additions & 328 deletions

File tree

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1450,6 +1450,12 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
14501450
case VgprV2S64:
14511451
case UniInVgprV2S64:
14521452
return LLT::fixed_vector(2, 64);
1453+
case VgprV6S32:
1454+
return LLT::fixed_vector(6, 32);
1455+
case VgprV32S16:
1456+
return LLT::fixed_vector(32, 16);
1457+
case VgprV32S32:
1458+
return LLT::fixed_vector(32, 32);
14531459
default:
14541460
return LLT();
14551461
}
@@ -1611,7 +1617,9 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
16111617
case VgprV3S32:
16121618
case VgprV4S16:
16131619
case VgprV4S32:
1620+
case VgprV6S32:
16141621
case VgprV8S32:
1622+
case VgprV32S16:
16151623
case VgprB32:
16161624
case VgprB64:
16171625
case VgprB96:
@@ -1675,7 +1683,9 @@ bool RegBankLegalizeHelper::applyMappingDst(
16751683
case VgprV3S32:
16761684
case VgprV4S16:
16771685
case VgprV4S32:
1678-
case VgprV8S32: {
1686+
case VgprV6S32:
1687+
case VgprV8S32:
1688+
case VgprV32S16: {
16791689
assert(Ty == getTyFromID(MethodIDs[OpIdx]));
16801690
assert(RB == getRegBankFromID(MethodIDs[OpIdx]));
16811691
break;
@@ -1868,7 +1878,10 @@ bool RegBankLegalizeHelper::applyMappingSrc(
18681878
case VgprV3S32:
18691879
case VgprV4S16:
18701880
case VgprV4S32:
1871-
case VgprV8S32: {
1881+
case VgprV6S32:
1882+
case VgprV8S32:
1883+
case VgprV32S16:
1884+
case VgprV32S32: {
18721885
assert(Ty == getTyFromID(MethodIDs[i]));
18731886
if (RB != VgprRB) {
18741887
auto CopyToVgpr = B.buildCopy({VgprRB, Ty}, Reg);

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
193193
return MRI.getType(Reg) == LLT::fixed_vector(3, 32) && MUI.isDivergent(Reg);
194194
case DivV4S16:
195195
return MRI.getType(Reg) == LLT::fixed_vector(4, 16) && MUI.isDivergent(Reg);
196+
case DivV6S32:
197+
return MRI.getType(Reg) == LLT::fixed_vector(6, 32) && MUI.isDivergent(Reg);
196198
case DivB32:
197199
return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(Reg);
198200
case DivB64:
@@ -1714,6 +1716,18 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
17141716
.Uni(V2S16, {{UniInVgprV2S16}, {IntrId, Vgpr32, Vgpr32}})
17151717
.Div(V2S16, {{VgprV2S16}, {IntrId, Vgpr32, Vgpr32}});
17161718

1719+
addRulesForIOpcs({amdgcn_cvt_scalef32_sr_pk32_bf6_f16,
1720+
amdgcn_cvt_scalef32_sr_pk32_fp6_f16,
1721+
amdgcn_cvt_scalef32_sr_pk32_bf6_bf16,
1722+
amdgcn_cvt_scalef32_sr_pk32_fp6_bf16},
1723+
Standard)
1724+
.Any({{DivV6S32}, {{VgprV6S32}, {IntrId, VgprV32S16, Vgpr32, Vgpr32}}});
1725+
1726+
addRulesForIOpcs({amdgcn_cvt_scalef32_sr_pk32_bf6_f32,
1727+
amdgcn_cvt_scalef32_sr_pk32_fp6_f32},
1728+
Standard)
1729+
.Any({{DivV6S32}, {{VgprV6S32}, {IntrId, VgprV32S32, Vgpr32, Vgpr32}}});
1730+
17171731
addRulesForIOpcs({amdgcn_global_load_tr_b64})
17181732
.Any({{DivB64}, {{VgprB64}, {IntrId, SgprP1}}})
17191733
.Any({{DivB32}, {{VgprB32}, {IntrId, SgprP1}}});

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ enum UniformityLLTOpPredicateID {
107107
DivV2S64,
108108
DivV3S32,
109109
DivV4S16,
110+
DivV6S32,
110111

111112
// B types
112113
B32,
@@ -255,6 +256,10 @@ enum RegBankLLTMappingApplyID {
255256
Vgpr32AExt,
256257
Vgpr32SExt,
257258
Vgpr32ZExt,
259+
260+
VgprV6S32,
261+
VgprV32S16,
262+
VgprV32S32,
258263
};
259264

260265
// Instruction needs to be replaced with sequence of instructions. Lowering was

0 commit comments

Comments
 (0)