Skip to content

Commit 2b404d0

Browse files
author
Jessica Paquette
committed
[GlobalISel][AArch64] Teach GISel to handle shifts in load addressing modes
When we select the XRO variants of loads, we can pull in very specific shifts (of the size of an element). E.g. ``` ldr x1, [x2, x3, lsl #3] ``` This teaches GISel to handle these when they're coming from shifts specifically. This adds a new addressing mode function, `selectAddrModeShiftedExtendXReg` which recognizes this pattern. This also packs this up with `selectAddrModeRegisterOffset` into `selectAddrModeXRO`. This is intended to be equivalent to `selectAddrModeXRO` in AArch64ISelDAGtoDAG. Also update load-addressing-modes to show that all of the cases here work. Differential Revision: https://reviews.llvm.org/D65119 llvm-svn: 366819
1 parent 123f6ff commit 2b404d0

File tree

2 files changed

+366
-7
lines changed

2 files changed

+366
-7
lines changed

llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp

+124-7
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,15 @@ class AArch64InstructionSelector : public InstructionSelector {
187187
ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
188188
return selectAddrModeIndexed(Root, Width / 8);
189189
}
190+
191+
bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
192+
const MachineRegisterInfo &MRI) const;
193+
ComplexRendererFns
194+
selectAddrModeShiftedExtendXReg(MachineOperand &Root,
195+
unsigned SizeInBytes) const;
190196
ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
197+
ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
198+
unsigned SizeInBytes) const;
191199

192200
void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
193201

@@ -1238,8 +1246,8 @@ bool AArch64InstructionSelector::earlySelectLoad(
12381246
if (DstSize != 64)
12391247
return false;
12401248

1241-
// Check if we can do any folding from GEPs etc. into the load.
1242-
auto ImmFn = selectAddrModeRegisterOffset(I.getOperand(1));
1249+
// Check if we can do any folding from GEPs/shifts etc. into the load.
1250+
auto ImmFn = selectAddrModeXRO(I.getOperand(1), MemBytes);
12431251
if (!ImmFn)
12441252
return false;
12451253

@@ -3995,6 +4003,98 @@ AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
39954003
}};
39964004
}
39974005

4006+
/// Return true if it is worth folding MI into an extended register. That is,
4007+
/// if it's safe to pull it into the addressing mode of a load or store as a
4008+
/// shift.
4009+
bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
4010+
MachineInstr &MI, const MachineRegisterInfo &MRI) const {
4011+
// Always fold if there is one use, or if we're optimizing for size.
4012+
Register DefReg = MI.getOperand(0).getReg();
4013+
if (MRI.hasOneUse(DefReg) ||
4014+
MI.getParent()->getParent()->getFunction().hasMinSize())
4015+
return true;
4016+
4017+
// It's better to avoid folding and recomputing shifts when we don't have a
4018+
// fastpath.
4019+
if (!STI.hasLSLFast())
4020+
return false;
4021+
4022+
// We have a fastpath, so folding a shift in and potentially computing it
4023+
// many times may be beneficial. Check if this is only used in memory ops.
4024+
// If it is, then we should fold.
4025+
return all_of(MRI.use_instructions(DefReg),
4026+
[](MachineInstr &Use) { return Use.mayLoadOrStore(); });
4027+
}
4028+
4029+
/// This is used for computing addresses like this:
4030+
///
4031+
/// ldr x1, [x2, x3, lsl #3]
4032+
///
4033+
/// Where x2 is the base register, and x3 is an offset register. The shift-left
4034+
/// is a constant value specific to this load instruction. That is, we'll never
4035+
/// see anything other than a 3 here (which corresponds to the size of the
4036+
/// element being loaded.)
4037+
InstructionSelector::ComplexRendererFns
4038+
AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
4039+
MachineOperand &Root, unsigned SizeInBytes) const {
4040+
if (!Root.isReg())
4041+
return None;
4042+
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4043+
4044+
// Make sure that the memory op is a valid size.
4045+
int64_t LegalShiftVal = Log2_32(SizeInBytes);
4046+
if (LegalShiftVal == 0)
4047+
return None;
4048+
4049+
// We want to find something like this:
4050+
//
4051+
// val = G_CONSTANT LegalShiftVal
4052+
// shift = G_SHL off_reg val
4053+
// ptr = G_GEP base_reg shift
4054+
// x = G_LOAD ptr
4055+
//
4056+
// And fold it into this addressing mode:
4057+
//
4058+
// ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
4059+
4060+
// Check if we can find the G_GEP.
4061+
MachineInstr *Gep = getOpcodeDef(TargetOpcode::G_GEP, Root.getReg(), MRI);
4062+
if (!Gep || !isWorthFoldingIntoExtendedReg(*Gep, MRI))
4063+
return None;
4064+
4065+
// Now try to match the G_SHL.
4066+
MachineInstr *Shl =
4067+
getOpcodeDef(TargetOpcode::G_SHL, Gep->getOperand(2).getReg(), MRI);
4068+
if (!Shl || !isWorthFoldingIntoExtendedReg(*Shl, MRI))
4069+
return None;
4070+
4071+
// Now, try to find the specific G_CONSTANT.
4072+
auto ValAndVReg =
4073+
getConstantVRegValWithLookThrough(Shl->getOperand(2).getReg(), MRI);
4074+
if (!ValAndVReg)
4075+
return None;
4076+
4077+
// The value must fit into 3 bits, and must be positive. Make sure that is
4078+
// true.
4079+
int64_t ImmVal = ValAndVReg->Value;
4080+
if ((ImmVal & 0x7) != ImmVal)
4081+
return None;
4082+
4083+
// We are only allowed to shift by LegalShiftVal. This shift value is built
4084+
// into the instruction, so we can't just use whatever we want.
4085+
if (ImmVal != LegalShiftVal)
4086+
return None;
4087+
4088+
// We can use the LHS of the GEP as the base, and the LHS of the shift as an
4089+
// offset. Signify that we are shifting by setting the shift flag to 1.
4090+
return {{
4091+
[=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(1)); },
4092+
[=](MachineInstrBuilder &MIB) { MIB.add(Shl->getOperand(1)); },
4093+
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4094+
[=](MachineInstrBuilder &MIB) { MIB.addImm(1); },
4095+
}};
4096+
}
4097+
39984098
/// This is used for computing addresses like this:
39994099
///
40004100
/// ldr x1, [x2, x3]
@@ -4008,11 +4108,6 @@ AArch64InstructionSelector::selectAddrModeRegisterOffset(
40084108
MachineOperand &Root) const {
40094109
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
40104110

4011-
// If we have a constant offset, then we probably don't want to match a
4012-
// register offset.
4013-
if (isBaseWithConstantOffset(Root, MRI))
4014-
return None;
4015-
40164111
// We need a GEP.
40174112
MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
40184113
if (!Gep || Gep->getOpcode() != TargetOpcode::G_GEP)
@@ -4033,6 +4128,28 @@ AArch64InstructionSelector::selectAddrModeRegisterOffset(
40334128
}};
40344129
}
40354130

4131+
/// This is intended to be equivalent to selectAddrModeXRO in
4132+
/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
4133+
InstructionSelector::ComplexRendererFns
4134+
AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
4135+
unsigned SizeInBytes) const {
4136+
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4137+
4138+
// If we have a constant offset, then we probably don't want to match a
4139+
// register offset.
4140+
if (isBaseWithConstantOffset(Root, MRI))
4141+
return None;
4142+
4143+
// Try to fold shifts into the addressing mode.
4144+
auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
4145+
if (AddrModeFns)
4146+
return AddrModeFns;
4147+
4148+
// If that doesn't work, see if it's possible to fold in registers from
4149+
// a GEP.
4150+
return selectAddrModeRegisterOffset(Root);
4151+
}
4152+
40364153
/// Select a "register plus unscaled signed 9-bit immediate" address. This
40374154
/// should only match when there is an offset that is not valid for a scaled
40384155
/// immediate addressing mode. The "Size" argument is the size in bytes of the

0 commit comments

Comments
 (0)