Skip to content

Commit 703501e

Browse files
authored
[AMDGPU] Select flat GVS loads on gfx1250 (#149183)
1 parent 9912ccb commit 703501e

File tree

3 files changed

+2481
-38
lines changed

3 files changed

+2481
-38
lines changed

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 74 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1250,6 +1250,16 @@ class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueTyp
12501250
(inst $saddr, $voffset, $offset, 0, $in)
12511251
>;
12521252

1253+
class FlatLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1254+
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$in)),
1255+
(inst $saddr, $voffset, $offset, (i32 0), $in)
1256+
>;
1257+
1258+
class FlatLoadSaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1259+
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset))),
1260+
(inst $saddr, $voffset, $offset, (i32 0))
1261+
>;
1262+
12531263
class GlobalLoadSaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
12541264
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset))),
12551265
(inst $saddr, $voffset, $offset, (i32 0))
@@ -1260,7 +1270,7 @@ class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt>
12601270
(inst $vaddr, $offset)
12611271
>;
12621272

1263-
class GlobalLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1273+
class FlatLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
12641274
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset))),
12651275
(inst $saddr, $voffset, $offset, 0)
12661276
>;
@@ -1444,7 +1454,7 @@ multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueTyp
14441454
let AddedComplexity = 10;
14451455
}
14461456

1447-
def : GlobalLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1457+
def : FlatLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
14481458
let AddedComplexity = 11;
14491459
}
14501460
}
@@ -1454,7 +1464,7 @@ multiclass GlobalFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, Valu
14541464
let AddedComplexity = 10;
14551465
}
14561466

1457-
def : GlobalLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1467+
def : FlatLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
14581468
let AddedComplexity = 11;
14591469
}
14601470
}
@@ -1618,61 +1628,89 @@ multiclass ScratchFLATLoadPats_D16_t16<string inst, SDPatternOperator node, Valu
16181628
}
16191629
}
16201630

1631+
multiclass FlatLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1632+
def : FlatLoadPat <inst, node, vt>;
1633+
1634+
def : FlatLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1635+
let AddedComplexity = 9;
1636+
let SubtargetPredicate = HasFlatGVSMode;
1637+
}
1638+
}
1639+
1640+
multiclass FlatLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1641+
def : FlatLoadPat_D16 <inst, node, vt>;
1642+
1643+
def : FlatLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1644+
let AddedComplexity = 9;
1645+
let SubtargetPredicate = HasFlatGVSMode;
1646+
}
1647+
}
1648+
1649+
multiclass FlatLoadPats_D16_t16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1650+
def : FlatLoadPat_D16_t16 <inst, node, vt>;
1651+
1652+
def : FlatLoadSaddrPat_D16_t16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1653+
let AddedComplexity = 9;
1654+
let SubtargetPredicate = HasFlatGVSMode;
1655+
}
1656+
}
1657+
16211658
let OtherPredicates = [HasFlatAddressSpace] in {
16221659

1623-
def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_aext_8_flat, i32>;
1624-
def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_zext_8_flat, i32>;
1625-
def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_aext_16_flat, i32>;
1626-
def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_zext_16_flat, i32>;
1627-
def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
1628-
def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
1629-
def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
1630-
def : FlatLoadPat <FLAT_LOAD_SBYTE, atomic_load_sext_8_flat, i32>;
1631-
def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
1632-
def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
1633-
def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
1634-
def : FlatLoadPat <FLAT_LOAD_SSHORT, atomic_load_sext_16_flat, i32>;
1635-
def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
1660+
defm : FlatLoadPats <FLAT_LOAD_UBYTE, atomic_load_aext_8_flat, i32>;
1661+
defm : FlatLoadPats <FLAT_LOAD_UBYTE, atomic_load_zext_8_flat, i32>;
1662+
defm : FlatLoadPats <FLAT_LOAD_USHORT, atomic_load_aext_16_flat, i32>;
1663+
defm : FlatLoadPats <FLAT_LOAD_USHORT, atomic_load_zext_16_flat, i32>;
1664+
defm : FlatLoadPats <FLAT_LOAD_USHORT, atomic_load_zext_16_flat, i16>;
1665+
defm : FlatLoadPats <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
1666+
defm : FlatLoadPats <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
1667+
defm : FlatLoadPats <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
1668+
defm : FlatLoadPats <FLAT_LOAD_SBYTE, atomic_load_sext_8_flat, i32>;
1669+
defm : FlatLoadPats <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
1670+
defm : FlatLoadPats <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
1671+
defm : FlatLoadPats <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
1672+
defm : FlatLoadPats <FLAT_LOAD_SSHORT, atomic_load_sext_16_flat, i32>;
1673+
defm : FlatLoadPats <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
16361674

16371675
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
16381676
let True16Predicate = p in {
1639-
def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
1640-
def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
1641-
def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
1642-
def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
1643-
def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_aext_8_flat, i16>;
1644-
def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_zext_8_flat, i16>;
1645-
def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_nonext_16_flat, i16>;
1646-
def : FlatLoadPat <FLAT_LOAD_SBYTE, atomic_load_sext_8_flat, i16>;
1677+
defm : FlatLoadPats <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
1678+
defm : FlatLoadPats <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
1679+
defm : FlatLoadPats <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
1680+
defm : FlatLoadPats <FLAT_LOAD_USHORT, load_flat, i16>;
1681+
defm : FlatLoadPats <FLAT_LOAD_UBYTE, atomic_load_aext_8_flat, i16>;
1682+
defm : FlatLoadPats <FLAT_LOAD_UBYTE, atomic_load_zext_8_flat, i16>;
1683+
defm : FlatLoadPats <FLAT_LOAD_USHORT, atomic_load_nonext_16_flat, i16>;
1684+
defm : FlatLoadPats <FLAT_LOAD_SBYTE, atomic_load_sext_8_flat, i16>;
16471685
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
16481686
def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
16491687
def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i16>;
16501688
def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>;
16511689
}
16521690

16531691
let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts in {
1654-
def : FlatLoadPat_D16_t16<FLAT_LOAD_UBYTE_D16_t16, extloadi8_flat, i16>;
1655-
def : FlatLoadPat_D16_t16<FLAT_LOAD_UBYTE_D16_t16, zextloadi8_flat, i16>;
1656-
def : FlatLoadPat_D16_t16<FLAT_LOAD_SBYTE_D16_t16, sextloadi8_flat, i16>;
1657-
def : FlatLoadPat_D16_t16<FLAT_LOAD_SHORT_D16_t16, load_flat, i16>;
1658-
def : FlatLoadPat_D16_t16<FLAT_LOAD_UBYTE_D16_t16, atomic_load_aext_8_flat, i16>;
1659-
def : FlatLoadPat_D16_t16<FLAT_LOAD_UBYTE_D16_t16, atomic_load_zext_8_flat, i16>;
1660-
def : FlatLoadPat_D16_t16<FLAT_LOAD_SHORT_D16_t16, atomic_load_nonext_16_flat, i16>;
1661-
def : FlatLoadPat_D16_t16<FLAT_LOAD_SBYTE_D16_t16, atomic_load_sext_8_flat, i16>;
1692+
defm : FlatLoadPats_D16_t16<FLAT_LOAD_UBYTE_D16_t16, extloadi8_flat, i16>;
1693+
defm : FlatLoadPats_D16_t16<FLAT_LOAD_UBYTE_D16_t16, zextloadi8_flat, i16>;
1694+
defm : FlatLoadPats_D16_t16<FLAT_LOAD_SBYTE_D16_t16, sextloadi8_flat, i16>;
1695+
defm : FlatLoadPats_D16_t16<FLAT_LOAD_SHORT_D16_t16, load_flat, i16>;
1696+
defm : FlatLoadPats_D16_t16<FLAT_LOAD_UBYTE_D16_t16, atomic_load_aext_8_flat, i16>;
1697+
defm : FlatLoadPats_D16_t16<FLAT_LOAD_UBYTE_D16_t16, atomic_load_zext_8_flat, i16>;
1698+
defm : FlatLoadPats_D16_t16<FLAT_LOAD_SHORT_D16_t16, atomic_load_nonext_16_flat, i16>;
1699+
defm : FlatLoadPats_D16_t16<FLAT_LOAD_SBYTE_D16_t16, atomic_load_sext_8_flat, i16>;
16621700
def : FlatStorePat <FLAT_STORE_BYTE_t16, truncstorei8_flat, i16>;
16631701
def : FlatStorePat <FLAT_STORE_SHORT_t16, store_flat, i16>;
16641702
def : FlatStorePat <FLAT_STORE_BYTE_t16, atomic_store_8_flat, i16>;
16651703
def : FlatStorePat <FLAT_STORE_SHORT_t16, atomic_store_16_flat, i16>;
16661704
} // End let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts
16671705

1668-
def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_nonext_32_flat, i32>;
1669-
def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_nonext_64_flat, i64>;
1706+
defm : FlatLoadPats <FLAT_LOAD_DWORD, atomic_load_nonext_32_flat, i32>;
1707+
defm : FlatLoadPats <FLAT_LOAD_DWORDX2, atomic_load_nonext_64_flat, i64>;
16701708

16711709
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
16721710
def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
16731711

16741712
foreach vt = Reg32Types.types in {
1675-
def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>;
1713+
defm : FlatLoadPats <FLAT_LOAD_DWORD, load_flat, vt>;
16761714
def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>;
16771715
}
16781716

@@ -1684,7 +1722,7 @@ def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>;
16841722
def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>;
16851723

16861724
foreach vt = VReg_128.RegTypes in {
1687-
def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>;
1725+
defm : FlatLoadPats <FLAT_LOAD_DWORDX4, load_flat, vt>;
16881726
def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>;
16891727
}
16901728

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6460,7 +6460,7 @@ bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const {
64606460
if (OldSAddrIdx < 0)
64616461
return false;
64626462

6463-
assert(isSegmentSpecificFLAT(Inst));
6463+
assert(isSegmentSpecificFLAT(Inst) || (isFLAT(Inst) && ST.hasFlatGVSMode()));
64646464

64656465
int NewOpc = AMDGPU::getGlobalVaddrOp(Opc);
64666466
if (NewOpc < 0)
@@ -6537,7 +6537,7 @@ bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const {
65376537
// FIXME: Remove this when SelectionDAG is obsoleted.
65386538
void SIInstrInfo::legalizeOperandsFLAT(MachineRegisterInfo &MRI,
65396539
MachineInstr &MI) const {
6540-
if (!isSegmentSpecificFLAT(MI))
6540+
if (!isSegmentSpecificFLAT(MI) && !ST.hasFlatGVSMode())
65416541
return;
65426542

65436543
// Fixup SGPR operands in VGPRs. We only select these when the DAG divergence

0 commit comments

Comments
 (0)