Skip to content

Commit f65e3bb

Browse files
committed
[GlobalISel] fdiv to fmul transform
This is a port of the SDAG DAGCombiner::combineRepeatedFPDivisors combine that looks like multiple fdiv operations with the same divisor and converts them to a single reciprocal fdiv and multiple fmuls. It is currently a fairly faithful port, with some additions to make sure that the newly created fdiv dominates all new uses. Compared to the SDAG version it also drops some logic about splat uses which assumes no vector fdivs and some logic about x/sqrt(x) which does not yet apply to GISel.
1 parent 7efc861 commit f65e3bb

File tree

4 files changed

+129
-84
lines changed

4 files changed

+129
-84
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -809,6 +809,10 @@ class CombinerHelper {
809809

810810
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info) const;
811811

812+
bool matchRepeatedFPDivisor(MachineInstr &MI,
813+
SmallVector<MachineInstr *> &MatchInfo) const;
814+
void applyRepeatedFPDivisor(SmallVector<MachineInstr *> &MatchInfo) const;
815+
812816
/// Transform G_ADD(x, G_SUB(y, x)) to y.
813817
/// Transform G_ADD(G_SUB(y, x), x) to y.
814818
bool matchAddSubSameReg(MachineInstr &MI, Register &Src) const;

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ def constantfp_matchinfo : GIDefMatchData<"ConstantFP*">;
211211
def build_fn_matchinfo :
212212
GIDefMatchData<"std::function<void(MachineIRBuilder &)>">;
213213
def unsigned_matchinfo: GIDefMatchData<"unsigned">;
214+
def mi_vector_matchinfo : GIDefMatchData<"SmallVector<MachineInstr *>">;
214215

215216
def copy_prop : GICombineRule<
216217
(defs root:$d),
@@ -1327,6 +1328,14 @@ def combine_minmax_nan: GICombineRule<
13271328
[{ return Helper.matchCombineFMinMaxNaN(*${root}, ${info}); }]),
13281329
(apply [{ Helper.replaceSingleDefInstWithOperand(*${root}, ${info}); }])>;
13291330

1331+
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
1332+
// reciprocal.
1333+
def fdiv_repeated_divison: GICombineRule<
1334+
(defs root:$root, mi_vector_matchinfo:$matchinfo),
1335+
(match (G_FDIV $dst, $src1, $src2):$root,
1336+
[{ return Helper.matchRepeatedFPDivisor(*${root}, ${matchinfo}); }]),
1337+
(apply [{ Helper.applyRepeatedFPDivisor(${matchinfo}); }])>;
1338+
13301339
// Transform (add x, (sub y, x)) -> y
13311340
// Transform (add (sub y, x), x) -> y
13321341
def add_sub_reg_frags : GICombinePatFrag<
@@ -2051,7 +2060,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
20512060
constant_fold_cast_op, fabs_fneg_fold,
20522061
intdiv_combines, mulh_combines, redundant_neg_operands,
20532062
and_or_disjoint_mask, fma_combines, fold_binop_into_select,
2054-
sub_add_reg, select_to_minmax,
2063+
sub_add_reg, select_to_minmax, fdiv_repeated_divison,
20552064
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
20562065
simplify_neg_minmax, combine_concat_vector,
20572066
sext_trunc, zext_trunc, prefer_sign_combines, shuffle_combines,

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6389,6 +6389,79 @@ bool CombinerHelper::matchCombineFMinMaxNaN(MachineInstr &MI,
63896389
return MatchNaN(1) || MatchNaN(2);
63906390
}
63916391

6392+
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
6393+
// reciprocal.
6394+
// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
6395+
bool CombinerHelper::matchRepeatedFPDivisor(
6396+
MachineInstr &MI, SmallVector<MachineInstr *> &MatchInfo) const {
6397+
assert(MI.getOpcode() == TargetOpcode::G_FDIV);
6398+
auto *MF = MI.getMF();
6399+
const TargetOptions &Options = MF->getTarget().Options;
6400+
6401+
Register Dst = MI.getOperand(0).getReg();
6402+
Register X = MI.getOperand(1).getReg();
6403+
Register Y = MI.getOperand(2).getReg();
6404+
LLT Type = MRI.getType(Dst);
6405+
6406+
bool UnsafeMath = Options.UnsafeFPMath;
6407+
if (!UnsafeMath && !MI.getFlag(MachineInstr::MIFlag::FmArcp))
6408+
return false;
6409+
6410+
// Skip if current node is a reciprocal/fneg-reciprocal.
6411+
auto N0CFP = isConstantOrConstantSplatVectorFP(*MRI.getVRegDef(X), MRI);
6412+
if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
6413+
return false;
6414+
6415+
// Exit early if the target does not want this transform or if there can't
6416+
// possibly be enough uses of the divisor to make the transform worthwhile.
6417+
unsigned MinUses = getTargetLowering().combineRepeatedFPDivisors();
6418+
6419+
if (!MinUses)
6420+
return false;
6421+
6422+
// Find all FDIV users of the same divisor.
6423+
// Use a set because duplicates may be present in the user list.
6424+
// For the moment we limit all instructions to a single BB and use the first
6425+
// Instr as the dominating position.
6426+
MatchInfo.push_back(&MI);
6427+
for (auto &U : MRI.use_nodbg_instructions(Y)) {
6428+
if (&U == &MI || U.getParent() != MI.getParent())
6429+
continue;
6430+
if (U.getOpcode() == TargetOpcode::G_FDIV && U.getOperand(2).getReg() == Y) {
6431+
// This division is eligible for optimization only if global unsafe math
6432+
// is enabled or if this division allows reciprocal formation.
6433+
if (UnsafeMath || U.getFlag(MachineInstr::MIFlag::FmArcp)) {
6434+
MatchInfo.push_back(&U);
6435+
// Is there a better way to handle this?
6436+
if (dominates(U, *MatchInfo[0]))
6437+
std::swap(MatchInfo[0], MatchInfo.back());
6438+
}
6439+
}
6440+
}
6441+
6442+
// Now that we have the actual number of divisor uses, make sure it meets
6443+
// the minimum threshold specified by the target.
6444+
return MatchInfo.size() >= MinUses;
6445+
}
6446+
6447+
void CombinerHelper::applyRepeatedFPDivisor(
6448+
SmallVector<MachineInstr *> &MatchInfo) const {
6449+
// Generate the new div at the position of the first instruction, that we have
6450+
// ensured will dominate all other instructions.
6451+
Builder.setInsertPt(*MatchInfo[0]->getParent(), MatchInfo[0]);
6452+
LLT Ty = MRI.getType(MatchInfo[0]->getOperand(0).getReg());
6453+
auto Div = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0),
6454+
MatchInfo[0]->getOperand(2).getReg());
6455+
6456+
// Replace all found div's with fmul instructions.
6457+
for (MachineInstr *MI : MatchInfo) {
6458+
Builder.setInsertPt(*MI->getParent(), MI);
6459+
Builder.buildFMul(MI->getOperand(0).getReg(), MI->getOperand(1).getReg(),
6460+
Div->getOperand(0).getReg());
6461+
MI->eraseFromParent();
6462+
}
6463+
}
6464+
63926465
bool CombinerHelper::matchAddSubSameReg(MachineInstr &MI, Register &Src) const {
63936466
assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
63946467
Register LHS = MI.getOperand(1).getReg();

llvm/test/CodeGen/AArch64/fdiv-combine.ll

Lines changed: 42 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -12,22 +12,14 @@
1212
; =>
1313
; recip = 1.0 / D; a * recip; b * recip; c * recip;
1414
define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {
15-
; CHECK-SD-LABEL: three_fdiv_float:
16-
; CHECK-SD: // %bb.0:
17-
; CHECK-SD-NEXT: fmov s4, #1.00000000
18-
; CHECK-SD-NEXT: fdiv s4, s4, s0
19-
; CHECK-SD-NEXT: fmul s0, s1, s4
20-
; CHECK-SD-NEXT: fmul s1, s2, s4
21-
; CHECK-SD-NEXT: fmul s2, s3, s4
22-
; CHECK-SD-NEXT: b foo_3f
23-
;
24-
; CHECK-GI-LABEL: three_fdiv_float:
25-
; CHECK-GI: // %bb.0:
26-
; CHECK-GI-NEXT: fdiv s4, s1, s0
27-
; CHECK-GI-NEXT: fdiv s1, s2, s0
28-
; CHECK-GI-NEXT: fdiv s2, s3, s0
29-
; CHECK-GI-NEXT: fmov s0, s4
30-
; CHECK-GI-NEXT: b foo_3f
15+
; CHECK-LABEL: three_fdiv_float:
16+
; CHECK: // %bb.0:
17+
; CHECK-NEXT: fmov s4, #1.00000000
18+
; CHECK-NEXT: fdiv s4, s4, s0
19+
; CHECK-NEXT: fmul s0, s1, s4
20+
; CHECK-NEXT: fmul s1, s2, s4
21+
; CHECK-NEXT: fmul s2, s3, s4
22+
; CHECK-NEXT: b foo_3f
3123
%div = fdiv float %a, %D
3224
%div1 = fdiv float %b, %D
3325
%div2 = fdiv float %c, %D
@@ -36,22 +28,14 @@ define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {
3628
}
3729

3830
define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
39-
; CHECK-SD-LABEL: three_fdiv_double:
40-
; CHECK-SD: // %bb.0:
41-
; CHECK-SD-NEXT: fmov d4, #1.00000000
42-
; CHECK-SD-NEXT: fdiv d4, d4, d0
43-
; CHECK-SD-NEXT: fmul d0, d1, d4
44-
; CHECK-SD-NEXT: fmul d1, d2, d4
45-
; CHECK-SD-NEXT: fmul d2, d3, d4
46-
; CHECK-SD-NEXT: b foo_3d
47-
;
48-
; CHECK-GI-LABEL: three_fdiv_double:
49-
; CHECK-GI: // %bb.0:
50-
; CHECK-GI-NEXT: fdiv d4, d1, d0
51-
; CHECK-GI-NEXT: fdiv d1, d2, d0
52-
; CHECK-GI-NEXT: fdiv d2, d3, d0
53-
; CHECK-GI-NEXT: fmov d0, d4
54-
; CHECK-GI-NEXT: b foo_3d
31+
; CHECK-LABEL: three_fdiv_double:
32+
; CHECK: // %bb.0:
33+
; CHECK-NEXT: fmov d4, #1.00000000
34+
; CHECK-NEXT: fdiv d4, d4, d0
35+
; CHECK-NEXT: fmul d0, d1, d4
36+
; CHECK-NEXT: fmul d1, d2, d4
37+
; CHECK-NEXT: fmul d2, d3, d4
38+
; CHECK-NEXT: b foo_3d
5539
%div = fdiv double %a, %D
5640
%div1 = fdiv double %b, %D
5741
%div2 = fdiv double %c, %D
@@ -60,22 +44,14 @@ define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
6044
}
6145

6246
define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
63-
; CHECK-SD-LABEL: three_fdiv_4xfloat:
64-
; CHECK-SD: // %bb.0:
65-
; CHECK-SD-NEXT: fmov v4.4s, #1.00000000
66-
; CHECK-SD-NEXT: fdiv v4.4s, v4.4s, v0.4s
67-
; CHECK-SD-NEXT: fmul v0.4s, v1.4s, v4.4s
68-
; CHECK-SD-NEXT: fmul v1.4s, v2.4s, v4.4s
69-
; CHECK-SD-NEXT: fmul v2.4s, v3.4s, v4.4s
70-
; CHECK-SD-NEXT: b foo_3_4xf
71-
;
72-
; CHECK-GI-LABEL: three_fdiv_4xfloat:
73-
; CHECK-GI: // %bb.0:
74-
; CHECK-GI-NEXT: fdiv v4.4s, v1.4s, v0.4s
75-
; CHECK-GI-NEXT: fdiv v1.4s, v2.4s, v0.4s
76-
; CHECK-GI-NEXT: fdiv v2.4s, v3.4s, v0.4s
77-
; CHECK-GI-NEXT: mov v0.16b, v4.16b
78-
; CHECK-GI-NEXT: b foo_3_4xf
47+
; CHECK-LABEL: three_fdiv_4xfloat:
48+
; CHECK: // %bb.0:
49+
; CHECK-NEXT: fmov v4.4s, #1.00000000
50+
; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s
51+
; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s
52+
; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s
53+
; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s
54+
; CHECK-NEXT: b foo_3_4xf
7955
%div = fdiv <4 x float> %a, %D
8056
%div1 = fdiv <4 x float> %b, %D
8157
%div2 = fdiv <4 x float> %c, %D
@@ -84,22 +60,14 @@ define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b,
8460
}
8561

8662
define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
87-
; CHECK-SD-LABEL: three_fdiv_2xdouble:
88-
; CHECK-SD: // %bb.0:
89-
; CHECK-SD-NEXT: fmov v4.2d, #1.00000000
90-
; CHECK-SD-NEXT: fdiv v4.2d, v4.2d, v0.2d
91-
; CHECK-SD-NEXT: fmul v0.2d, v1.2d, v4.2d
92-
; CHECK-SD-NEXT: fmul v1.2d, v2.2d, v4.2d
93-
; CHECK-SD-NEXT: fmul v2.2d, v3.2d, v4.2d
94-
; CHECK-SD-NEXT: b foo_3_2xd
95-
;
96-
; CHECK-GI-LABEL: three_fdiv_2xdouble:
97-
; CHECK-GI: // %bb.0:
98-
; CHECK-GI-NEXT: fdiv v4.2d, v1.2d, v0.2d
99-
; CHECK-GI-NEXT: fdiv v1.2d, v2.2d, v0.2d
100-
; CHECK-GI-NEXT: fdiv v2.2d, v3.2d, v0.2d
101-
; CHECK-GI-NEXT: mov v0.16b, v4.16b
102-
; CHECK-GI-NEXT: b foo_3_2xd
63+
; CHECK-LABEL: three_fdiv_2xdouble:
64+
; CHECK: // %bb.0:
65+
; CHECK-NEXT: fmov v4.2d, #1.00000000
66+
; CHECK-NEXT: fdiv v4.2d, v4.2d, v0.2d
67+
; CHECK-NEXT: fmul v0.2d, v1.2d, v4.2d
68+
; CHECK-NEXT: fmul v1.2d, v2.2d, v4.2d
69+
; CHECK-NEXT: fmul v2.2d, v3.2d, v4.2d
70+
; CHECK-NEXT: b foo_3_2xd
10371
%div = fdiv <2 x double> %a, %D
10472
%div1 = fdiv <2 x double> %b, %D
10573
%div2 = fdiv <2 x double> %c, %D
@@ -136,25 +104,16 @@ define void @two_fdiv_double(double %D, double %a, double %b) #0 {
136104
}
137105

138106
define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
139-
; CHECK-SD-LABEL: splat_three_fdiv_4xfloat:
140-
; CHECK-SD: // %bb.0:
141-
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
142-
; CHECK-SD-NEXT: fmov v4.4s, #1.00000000
143-
; CHECK-SD-NEXT: dup v0.4s, v0.s[0]
144-
; CHECK-SD-NEXT: fdiv v4.4s, v4.4s, v0.4s
145-
; CHECK-SD-NEXT: fmul v0.4s, v1.4s, v4.4s
146-
; CHECK-SD-NEXT: fmul v1.4s, v2.4s, v4.4s
147-
; CHECK-SD-NEXT: fmul v2.4s, v3.4s, v4.4s
148-
; CHECK-SD-NEXT: b foo_3_4xf
149-
;
150-
; CHECK-GI-LABEL: splat_three_fdiv_4xfloat:
151-
; CHECK-GI: // %bb.0:
152-
; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
153-
; CHECK-GI-NEXT: dup v4.4s, v0.s[0]
154-
; CHECK-GI-NEXT: fdiv v0.4s, v1.4s, v4.4s
155-
; CHECK-GI-NEXT: fdiv v1.4s, v2.4s, v4.4s
156-
; CHECK-GI-NEXT: fdiv v2.4s, v3.4s, v4.4s
157-
; CHECK-GI-NEXT: b foo_3_4xf
107+
; CHECK-LABEL: splat_three_fdiv_4xfloat:
108+
; CHECK: // %bb.0:
109+
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
110+
; CHECK-NEXT: fmov v4.4s, #1.00000000
111+
; CHECK-NEXT: dup v0.4s, v0.s[0]
112+
; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s
113+
; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s
114+
; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s
115+
; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s
116+
; CHECK-NEXT: b foo_3_4xf
158117
%D.ins = insertelement <4 x float> poison, float %D, i64 0
159118
%splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
160119
%div = fdiv <4 x float> %a, %splat

0 commit comments

Comments
 (0)