Skip to content

Commit e53e75a

Browse files
committed
[AMDGPU] Add BFX Formation Combines to RegBankCombiner
They're relatively safe to use there I believe. The only new registers they may create are the constants for the BFX. For those, borrow the RC from the source register. Fixes #140040
1 parent 9927a43 commit e53e75a

File tree

9 files changed

+1332
-1438
lines changed

9 files changed

+1332
-1438
lines changed

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4636,10 +4636,17 @@ bool CombinerHelper::matchBitfieldExtractFromSExtInReg(
46364636
if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
46374637
return false;
46384638

4639+
const RegisterBank *RB = getRegBank(ShiftSrc);
4640+
46394641
MatchInfo = [=](MachineIRBuilder &B) {
46404642
auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
46414643
auto Cst2 = B.buildConstant(ExtractTy, Width);
46424644
B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4645+
4646+
if (RB) {
4647+
MRI.setRegBank(Cst1.getReg(0), *RB);
4648+
MRI.setRegBank(Cst2.getReg(0), *RB);
4649+
}
46434650
};
46444651
return true;
46454652
}
@@ -4674,10 +4681,18 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(MachineInstr &MI,
46744681
return false;
46754682

46764683
uint64_t Width = APInt(Size, AndImm).countr_one();
4684+
4685+
const RegisterBank *RB = getRegBank(ShiftSrc);
4686+
46774687
MatchInfo = [=](MachineIRBuilder &B) {
46784688
auto WidthCst = B.buildConstant(ExtractTy, Width);
46794689
auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
46804690
B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4691+
4692+
if (RB) {
4693+
MRI.setRegBank(WidthCst.getReg(0), *RB);
4694+
MRI.setRegBank(LSBCst.getReg(0), *RB);
4695+
}
46814696
};
46824697
return true;
46834698
}
@@ -4724,10 +4739,17 @@ bool CombinerHelper::matchBitfieldExtractFromShr(
47244739
const int64_t Pos = ShrAmt - ShlAmt;
47254740
const int64_t Width = Size - ShrAmt;
47264741

4742+
const RegisterBank *RB = getRegBank(ShlSrc);
4743+
47274744
MatchInfo = [=](MachineIRBuilder &B) {
47284745
auto WidthCst = B.buildConstant(ExtractTy, Width);
47294746
auto PosCst = B.buildConstant(ExtractTy, Pos);
47304747
B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
4748+
4749+
if (RB) {
4750+
MRI.setRegBank(WidthCst.getReg(0), *RB);
4751+
MRI.setRegBank(PosCst.getReg(0), *RB);
4752+
}
47314753
};
47324754
return true;
47334755
}
@@ -4782,10 +4804,17 @@ bool CombinerHelper::matchBitfieldExtractFromShrAnd(
47824804
if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
47834805
return false;
47844806

4807+
const RegisterBank *RB = getRegBank(AndSrc);
4808+
47854809
MatchInfo = [=](MachineIRBuilder &B) {
47864810
auto WidthCst = B.buildConstant(ExtractTy, Width);
47874811
auto PosCst = B.buildConstant(ExtractTy, Pos);
47884812
B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
4813+
4814+
if (RB) {
4815+
MRI.setRegBank(WidthCst.getReg(0), *RB);
4816+
MRI.setRegBank(PosCst.getReg(0), *RB);
4817+
}
47894818
};
47904819
return true;
47914820
}

llvm/lib/Target/AMDGPU/AMDGPUCombine.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,5 +210,5 @@ def AMDGPURegBankCombiner : GICombiner<
210210
fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp,
211211
identity_combines, redundant_and, constant_fold_cast_op,
212212
cast_of_cast_combines, sext_trunc, zext_of_shift_amount_combines,
213-
lower_uniform_sbfx, lower_uniform_ubfx]> {
213+
lower_uniform_sbfx, lower_uniform_ubfx, form_bitfield_extract]> {
214214
}

llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll

Lines changed: 56 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -811,16 +811,15 @@ define amdgpu_ps i32 @s_ashr_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amou
811811
;
812812
; GFX8-LABEL: s_ashr_v2i16:
813813
; GFX8: ; %bb.0:
814-
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
815-
; GFX8-NEXT: s_sext_i32_i16 s0, s0
816-
; GFX8-NEXT: s_lshr_b32 s3, s1, 16
817-
; GFX8-NEXT: s_ashr_i32 s0, s0, s1
818-
; GFX8-NEXT: s_sext_i32_i16 s1, s2
819-
; GFX8-NEXT: s_ashr_i32 s1, s1, s3
820-
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
814+
; GFX8-NEXT: s_lshr_b32 s2, s1, 16
815+
; GFX8-NEXT: s_sext_i32_i16 s3, s0
816+
; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
817+
; GFX8-NEXT: s_ashr_i32 s0, s0, s2
818+
; GFX8-NEXT: s_ashr_i32 s1, s3, s1
821819
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
822-
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
823-
; GFX8-NEXT: s_or_b32 s0, s0, s1
820+
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
821+
; GFX8-NEXT: s_lshl_b32 s0, s0, 16
822+
; GFX8-NEXT: s_or_b32 s0, s1, s0
824823
; GFX8-NEXT: ; return to shader part epilog
825824
;
826825
; GFX9-LABEL: s_ashr_v2i16:
@@ -1014,26 +1013,24 @@ define amdgpu_ps <2 x i32> @s_ashr_v4i16(<4 x i16> inreg %value, <4 x i16> inreg
10141013
;
10151014
; GFX8-LABEL: s_ashr_v4i16:
10161015
; GFX8: ; %bb.0:
1017-
; GFX8-NEXT: s_lshr_b32 s4, s0, 16
1018-
; GFX8-NEXT: s_sext_i32_i16 s0, s0
1019-
; GFX8-NEXT: s_lshr_b32 s6, s2, 16
1020-
; GFX8-NEXT: s_ashr_i32 s0, s0, s2
1021-
; GFX8-NEXT: s_sext_i32_i16 s2, s4
1022-
; GFX8-NEXT: s_lshr_b32 s5, s1, 16
1023-
; GFX8-NEXT: s_ashr_i32 s2, s2, s6
1024-
; GFX8-NEXT: s_sext_i32_i16 s1, s1
1025-
; GFX8-NEXT: s_lshr_b32 s7, s3, 16
1026-
; GFX8-NEXT: s_ashr_i32 s1, s1, s3
1027-
; GFX8-NEXT: s_sext_i32_i16 s3, s5
1028-
; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1029-
; GFX8-NEXT: s_ashr_i32 s3, s3, s7
1016+
; GFX8-NEXT: s_lshr_b32 s4, s2, 16
1017+
; GFX8-NEXT: s_sext_i32_i16 s6, s0
1018+
; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
1019+
; GFX8-NEXT: s_lshr_b32 s5, s3, 16
1020+
; GFX8-NEXT: s_ashr_i32 s0, s0, s4
1021+
; GFX8-NEXT: s_sext_i32_i16 s4, s1
1022+
; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
1023+
; GFX8-NEXT: s_ashr_i32 s2, s6, s2
1024+
; GFX8-NEXT: s_ashr_i32 s1, s1, s5
10301025
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
1031-
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1032-
; GFX8-NEXT: s_or_b32 s0, s0, s2
1033-
; GFX8-NEXT: s_and_b32 s2, 0xffff, s3
1026+
; GFX8-NEXT: s_ashr_i32 s3, s4, s3
1027+
; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1028+
; GFX8-NEXT: s_lshl_b32 s0, s0, 16
10341029
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
1035-
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1036-
; GFX8-NEXT: s_or_b32 s1, s1, s2
1030+
; GFX8-NEXT: s_or_b32 s0, s2, s0
1031+
; GFX8-NEXT: s_and_b32 s2, 0xffff, s3
1032+
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
1033+
; GFX8-NEXT: s_or_b32 s1, s2, s1
10371034
; GFX8-NEXT: ; return to shader part epilog
10381035
;
10391036
; GFX9-LABEL: s_ashr_v4i16:
@@ -1223,46 +1220,42 @@ define amdgpu_ps <4 x i32> @s_ashr_v8i16(<8 x i16> inreg %value, <8 x i16> inreg
12231220
;
12241221
; GFX8-LABEL: s_ashr_v8i16:
12251222
; GFX8: ; %bb.0:
1226-
; GFX8-NEXT: s_lshr_b32 s8, s0, 16
1227-
; GFX8-NEXT: s_sext_i32_i16 s0, s0
1228-
; GFX8-NEXT: s_lshr_b32 s12, s4, 16
1229-
; GFX8-NEXT: s_ashr_i32 s0, s0, s4
1230-
; GFX8-NEXT: s_sext_i32_i16 s4, s8
1231-
; GFX8-NEXT: s_lshr_b32 s9, s1, 16
1232-
; GFX8-NEXT: s_ashr_i32 s4, s4, s12
1233-
; GFX8-NEXT: s_sext_i32_i16 s1, s1
1234-
; GFX8-NEXT: s_lshr_b32 s13, s5, 16
1235-
; GFX8-NEXT: s_ashr_i32 s1, s1, s5
1236-
; GFX8-NEXT: s_sext_i32_i16 s5, s9
1237-
; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
1238-
; GFX8-NEXT: s_lshr_b32 s10, s2, 16
1239-
; GFX8-NEXT: s_ashr_i32 s5, s5, s13
1240-
; GFX8-NEXT: s_sext_i32_i16 s2, s2
1223+
; GFX8-NEXT: s_lshr_b32 s8, s4, 16
1224+
; GFX8-NEXT: s_sext_i32_i16 s12, s0
1225+
; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
1226+
; GFX8-NEXT: s_lshr_b32 s9, s5, 16
1227+
; GFX8-NEXT: s_ashr_i32 s0, s0, s8
1228+
; GFX8-NEXT: s_sext_i32_i16 s8, s1
1229+
; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
1230+
; GFX8-NEXT: s_lshr_b32 s10, s6, 16
1231+
; GFX8-NEXT: s_ashr_i32 s4, s12, s4
1232+
; GFX8-NEXT: s_ashr_i32 s5, s8, s5
1233+
; GFX8-NEXT: s_ashr_i32 s1, s1, s9
1234+
; GFX8-NEXT: s_sext_i32_i16 s8, s2
1235+
; GFX8-NEXT: s_bfe_i32 s2, s2, 0x100010
12411236
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
1242-
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1243-
; GFX8-NEXT: s_lshr_b32 s14, s6, 16
1244-
; GFX8-NEXT: s_ashr_i32 s2, s2, s6
1245-
; GFX8-NEXT: s_sext_i32_i16 s6, s10
1246-
; GFX8-NEXT: s_or_b32 s0, s0, s4
1247-
; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
1248-
; GFX8-NEXT: s_lshr_b32 s11, s3, 16
1249-
; GFX8-NEXT: s_ashr_i32 s6, s6, s14
1250-
; GFX8-NEXT: s_sext_i32_i16 s3, s3
1237+
; GFX8-NEXT: s_lshr_b32 s11, s7, 16
1238+
; GFX8-NEXT: s_ashr_i32 s6, s8, s6
1239+
; GFX8-NEXT: s_ashr_i32 s2, s2, s10
1240+
; GFX8-NEXT: s_sext_i32_i16 s8, s3
1241+
; GFX8-NEXT: s_bfe_i32 s3, s3, 0x100010
1242+
; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
1243+
; GFX8-NEXT: s_lshl_b32 s0, s0, 16
12511244
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
1252-
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1253-
; GFX8-NEXT: s_lshr_b32 s15, s7, 16
1254-
; GFX8-NEXT: s_ashr_i32 s3, s3, s7
1255-
; GFX8-NEXT: s_sext_i32_i16 s7, s11
1256-
; GFX8-NEXT: s_or_b32 s1, s1, s4
1257-
; GFX8-NEXT: s_and_b32 s4, 0xffff, s6
1258-
; GFX8-NEXT: s_ashr_i32 s7, s7, s15
1245+
; GFX8-NEXT: s_ashr_i32 s3, s3, s11
1246+
; GFX8-NEXT: s_or_b32 s0, s4, s0
1247+
; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
1248+
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
12591249
; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1260-
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1261-
; GFX8-NEXT: s_or_b32 s2, s2, s4
1262-
; GFX8-NEXT: s_and_b32 s4, 0xffff, s7
1250+
; GFX8-NEXT: s_ashr_i32 s7, s8, s7
1251+
; GFX8-NEXT: s_or_b32 s1, s4, s1
1252+
; GFX8-NEXT: s_and_b32 s4, 0xffff, s6
1253+
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
12631254
; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
1264-
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1265-
; GFX8-NEXT: s_or_b32 s3, s3, s4
1255+
; GFX8-NEXT: s_or_b32 s2, s4, s2
1256+
; GFX8-NEXT: s_and_b32 s4, 0xffff, s7
1257+
; GFX8-NEXT: s_lshl_b32 s3, s3, 16
1258+
; GFX8-NEXT: s_or_b32 s3, s4, s3
12661259
; GFX8-NEXT: ; return to shader part epilog
12671260
;
12681261
; GFX9-LABEL: s_ashr_v8i16:

0 commit comments

Comments
 (0)