Skip to content

Commit ce77653

Browse files
committed
[VPlan] Introduces explicit broadcast for live-in constants.
This patch focus on explicit show the broadcast for the live-in constants. This can help the VPlan-based cost model the broadcast cost and track the register pressure of the broadcast value in the future. Live-in constants usually only has single user so insert the `broadcast` before the user to reduce the live range of the broadcast value and prevent generated vector IR changes.
1 parent 7b8dea2 commit ce77653

File tree

8 files changed

+457
-284
lines changed

8 files changed

+457
-284
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1399,6 +1399,17 @@ class LLVM_ABI_FOR_TEST VPWidenRecipe : public VPRecipeWithIRFlags,
13991399
void print(raw_ostream &O, const Twine &Indent,
14001400
VPSlotTracker &SlotTracker) const override;
14011401
#endif
1402+
1403+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1404+
assert(is_contained(operands(), Op) &&
1405+
"Op must be an operand of the recipe");
1406+
switch (Opcode) {
1407+
default:
1408+
return false;
1409+
case Instruction::ExtractValue:
1410+
return Op == getOperand(1);
1411+
}
1412+
}
14021413
};
14031414

14041415
/// VPWidenCastRecipe is a recipe to create vector cast instructions.
@@ -1594,6 +1605,14 @@ class LLVM_ABI_FOR_TEST VPWidenCallRecipe : public VPRecipeWithIRFlags,
15941605
void print(raw_ostream &O, const Twine &Indent,
15951606
VPSlotTracker &SlotTracker) const override;
15961607
#endif
1608+
1609+
/// Returns true if the recipe only uses the first lane of operand \p Op.
1610+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1611+
assert(is_contained(operands(), Op) &&
1612+
"Op must be an operand of the recipe");
1613+
// Scalar called fuction cannot be vectorized.
1614+
return Op == getOperand(getNumOperands() - 1);
1615+
}
15971616
};
15981617

15991618
/// A recipe representing a sequence of load -> update -> store as part of

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1108,6 +1108,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
11081108
default:
11091109
return false;
11101110
case Instruction::ExtractElement:
1111+
case Instruction::ExtractValue:
11111112
return Op == getOperand(1);
11121113
case Instruction::PHI:
11131114
return true;
@@ -1132,8 +1133,9 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
11321133
case VPInstruction::WidePtrAdd:
11331134
return Op == getOperand(0);
11341135
case VPInstruction::ComputeAnyOfResult:
1135-
case VPInstruction::ComputeFindIVResult:
11361136
return Op == getOperand(1);
1137+
case VPInstruction::ComputeFindIVResult:
1138+
return Op == getOperand(1) || Op == getOperand(2);
11371139
case VPInstruction::ExtractLane:
11381140
return Op == getOperand(0);
11391141
};

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3193,10 +3193,7 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
31933193

31943194
auto *VectorPreheader = Plan.getVectorPreheader();
31953195
for (VPValue *VPV : VPValues) {
3196-
if (all_of(VPV->users(),
3197-
[VPV](VPUser *U) { return U->usesScalars(VPV); }) ||
3198-
(VPV->isLiveIn() && VPV->getLiveInIRValue() &&
3199-
isa<Constant>(VPV->getLiveInIRValue())))
3196+
if (all_of(VPV->users(), [VPV](VPUser *U) { return U->usesScalars(VPV); }))
32003197
continue;
32013198

32023199
// Add explicit broadcast at the insert point that dominates all users.
@@ -3213,8 +3210,25 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
32133210
"All users must be in the vector preheader or dominated by it");
32143211
}
32153212

3216-
VPBuilder Builder(cast<VPBasicBlock>(HoistBlock), HoistPoint);
3217-
auto *Broadcast = Builder.createNaryOp(VPInstruction::Broadcast, {VPV});
3213+
VPInstruction *Broadcast;
3214+
if (VPV->isLiveIn() && isa_and_nonnull<Constant>(VPV->getLiveInIRValue())) {
3215+
// We cannot replace the constant live-ins for PHIs by broadcast in the
3216+
// same VPBB because it will break PHI. Also cannot replace the
3217+
// VPWidenGEPRecipe since it broadcasts the generated pointer instead of
3218+
// operands.
3219+
if (auto *R = dyn_cast_if_present<VPRecipeBase>(*(VPV->users().begin()));
3220+
R && !isa<VPHeaderPHIRecipe, VPWidenPHIRecipe, VPWidenGEPRecipe>(R) &&
3221+
!VPV->hasMoreThanOneUniqueUser()) {
3222+
Broadcast = new VPInstruction(VPInstruction::Broadcast, {VPV});
3223+
// Insert just before the user to reduce register pressure.
3224+
Broadcast->insertBefore(R);
3225+
} else {
3226+
continue;
3227+
}
3228+
} else {
3229+
VPBuilder Builder(cast<VPBasicBlock>(HoistBlock), HoistPoint);
3230+
Broadcast = Builder.createNaryOp(VPInstruction::Broadcast, {VPV});
3231+
}
32183232
VPV->replaceUsesWithIf(Broadcast,
32193233
[VPV, Broadcast](VPUser &U, unsigned Idx) {
32203234
return Broadcast != &U && !U.usesScalars(VPV);

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -383,15 +383,15 @@ define void @single_fmul_used_by_each_member(ptr noalias %A, ptr noalias %B, ptr
383383
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
384384
; CHECK-NEXT: [[INDEX24:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT25:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
385385
; CHECK-NEXT: [[TMP45:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX24]]
386-
; CHECK-NEXT: [[TMP47:%.*]] = load double, ptr [[TMP45]], align 8
387-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP47]], i64 0
388-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
386+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = load <2 x double>, ptr [[TMP45]], align 8
389387
; CHECK-NEXT: [[TMP48:%.*]] = fmul <2 x double> [[BROADCAST_SPLAT]], splat (double 5.000000e+00)
390388
; CHECK-NEXT: [[TMP49:%.*]] = getelementptr { double, double }, ptr [[B]], i64 [[INDEX24]]
391-
; CHECK-NEXT: store <2 x double> [[TMP48]], ptr [[TMP49]], align 8
389+
; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <2 x double> [[TMP48]], <2 x double> [[TMP48]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
390+
; CHECK-NEXT: [[INTERLEAVED_VEC26:%.*]] = shufflevector <4 x double> [[TMP52]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
391+
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC26]], ptr [[TMP49]], align 8
392392
; CHECK-NEXT: [[TMP50:%.*]] = getelementptr { double, double }, ptr [[C]], i64 [[INDEX24]]
393-
; CHECK-NEXT: store <2 x double> [[TMP48]], ptr [[TMP50]], align 8
394-
; CHECK-NEXT: [[INDEX_NEXT25]] = add nuw i64 [[INDEX24]], 1
393+
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC26]], ptr [[TMP50]], align 8
394+
; CHECK-NEXT: [[INDEX_NEXT25]] = add nuw i64 [[INDEX24]], 2
395395
; CHECK-NEXT: [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT25]], [[N_VEC23]]
396396
; CHECK-NEXT: br i1 [[TMP51]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
397397
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)