-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LV] Use VPReductionRecipe for partial reductions #144908
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
d5caa62
7c436bf
f99cbd4
59545b9
d85eb07
2a4d170
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -994,11 +994,22 @@ InstructionCost TargetTransformInfo::getShuffleCost( | |
} | ||
|
||
TargetTransformInfo::PartialReductionExtendKind | ||
TargetTransformInfo::getPartialReductionExtendKind(Instruction *I) { | ||
if (isa<SExtInst>(I)) | ||
return PR_SignExtend; | ||
if (isa<ZExtInst>(I)) | ||
TargetTransformInfo::getPartialReductionExtendKind( | ||
Instruction::CastOps CastOpc) { | ||
switch (CastOpc) { | ||
case Instruction::CastOps::ZExt: | ||
return PR_ZeroExtend; | ||
case Instruction::CastOps::SExt: | ||
return PR_SignExtend; | ||
default: | ||
return PR_None; | ||
} | ||
} | ||
|
||
TargetTransformInfo::PartialReductionExtendKind | ||
TargetTransformInfo::getPartialReductionExtendKind(Instruction *I) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function can be removed. There's still one use in Can you pull out that change into a separate NFC commit? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The call to |
||
if (auto *Cast = dyn_cast<CastInst>(I)) | ||
return getPartialReductionExtendKind(Cast->getOpcode()); | ||
return PR_None; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -7050,7 +7050,8 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan, | |||||||||
} | ||||||||||
// The VPlan-based cost model is more accurate for partial reduction and | ||||||||||
// comparing against the legacy cost isn't desirable. | ||||||||||
if (isa<VPPartialReductionRecipe>(&R)) | ||||||||||
if (auto *VPR = dyn_cast<VPReductionRecipe>(&R); | ||||||||||
VPR && VPR->isPartialReduction()) | ||||||||||
Comment on lines
+7053
to
+7054
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: I know it's a style thing, so feel free to ignore, but below you wrote it like this:
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are you talking about here? That's different because it's checking if it's either a |
||||||||||
return true; | ||||||||||
|
||||||||||
/// If a VPlan transform folded a recipe to one producing a single-scalar, | ||||||||||
|
@@ -8278,11 +8279,14 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, | |||||||||
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader())); | ||||||||||
|
||||||||||
// If the PHI is used by a partial reduction, set the scale factor. | ||||||||||
unsigned ScaleFactor = | ||||||||||
getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1); | ||||||||||
PhiRecipe = new VPReductionPHIRecipe( | ||||||||||
Phi, RdxDesc, *StartV, CM.isInLoopReduction(Phi), | ||||||||||
CM.useOrderedReductions(RdxDesc), ScaleFactor); | ||||||||||
bool UseInLoopReduction = CM.isInLoopReduction(Phi); | ||||||||||
bool UseOrderedReductions = CM.useOrderedReductions(RdxDesc); | ||||||||||
auto ScaleFactor = | ||||||||||
(UseOrderedReductions || UseInLoopReduction) | ||||||||||
? 0 | ||||||||||
: getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1); | ||||||||||
PhiRecipe = new VPReductionPHIRecipe(Phi, RdxDesc, *StartV, | ||||||||||
UseOrderedReductions, ScaleFactor); | ||||||||||
} else { | ||||||||||
// TODO: Currently fixed-order recurrences are modeled as chains of | ||||||||||
// first-order recurrences. If there are no users of the intermediate | ||||||||||
|
@@ -8346,7 +8350,8 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction, | |||||||||
VPValue *Accumulator = Operands[1]; | ||||||||||
VPRecipeBase *BinOpRecipe = BinOp->getDefiningRecipe(); | ||||||||||
if (isa<VPReductionPHIRecipe>(BinOpRecipe) || | ||||||||||
isa<VPPartialReductionRecipe>(BinOpRecipe)) | ||||||||||
(isa<VPReductionRecipe>(BinOpRecipe) && | ||||||||||
cast<VPReductionRecipe>(BinOpRecipe)->isPartialReduction())) | ||||||||||
std::swap(BinOp, Accumulator); | ||||||||||
|
||||||||||
unsigned ReductionOpcode = Reduction->getOpcode(); | ||||||||||
|
@@ -8367,12 +8372,10 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction, | |||||||||
"Expected an ADD or SUB operation for predicated partial " | ||||||||||
"reductions (because the neutral element in the mask is zero)!"); | ||||||||||
Cond = getBlockInMask(Builder.getInsertBlock()); | ||||||||||
VPValue *Zero = | ||||||||||
Plan.getOrAddLiveIn(ConstantInt::get(Reduction->getType(), 0)); | ||||||||||
BinOp = Builder.createSelect(Cond, BinOp, Zero, Reduction->getDebugLoc()); | ||||||||||
} | ||||||||||
return new VPPartialReductionRecipe(ReductionOpcode, Accumulator, BinOp, Cond, | ||||||||||
ScaleFactor, Reduction); | ||||||||||
|
||||||||||
return new VPReductionRecipe(RecurKind::Add, FastMathFlags(), Reduction, | ||||||||||
Accumulator, BinOp, Cond, false, ScaleFactor); | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just sharing an observation in case other reviewers notice changes in CodeGen in Passing in the condition to the VPReductionRecipe rather than creating a select on the input and passing that into the VPReductionRecipe results in different, but better, code for |
||||||||||
} | ||||||||||
|
||||||||||
void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, | ||||||||||
|
@@ -9139,9 +9142,11 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( | |||||||||
FastMathFlags FMFs = isa<FPMathOperator>(CurrentLinkI) | ||||||||||
? RdxDesc.getFastMathFlags() | ||||||||||
: FastMathFlags(); | ||||||||||
bool UseOrderedReductions = CM.useOrderedReductions(RdxDesc); | ||||||||||
unsigned VFScaleFactor = !UseOrderedReductions; | ||||||||||
auto *RedRecipe = new VPReductionRecipe( | ||||||||||
Kind, FMFs, CurrentLinkI, PreviousLink, VecOp, CondOp, | ||||||||||
CM.useOrderedReductions(RdxDesc), CurrentLinkI->getDebugLoc()); | ||||||||||
UseOrderedReductions, VFScaleFactor, CurrentLinkI->getDebugLoc()); | ||||||||||
// Append the recipe to the end of the VPBasicBlock because we need to | ||||||||||
// ensure that it comes after all of it's inputs, including CondOp. | ||||||||||
// Delete CurrentLink as it will be invalid if its operand is replaced | ||||||||||
|
@@ -9175,8 +9180,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( | |||||||||
// Don't output selects for partial reductions because they have an output | ||||||||||
// with fewer lanes than the VF. So the operands of the select would have | ||||||||||
// different numbers of lanes. Partial reductions mask the input instead. | ||||||||||
auto *RR = dyn_cast<VPReductionRecipe>(OrigExitingVPV->getDefiningRecipe()); | ||||||||||
if (!PhiR->isInLoop() && CM.foldTailByMasking() && | ||||||||||
!isa<VPPartialReductionRecipe>(OrigExitingVPV->getDefiningRecipe())) { | ||||||||||
(!RR || !RR->isPartialReduction())) { | ||||||||||
VPValue *Cond = RecipeBuilder.getBlockInMask(PhiR->getParent()); | ||||||||||
std::optional<FastMathFlags> FMFs = | ||||||||||
PhiTy->isFloatingPointTy() | ||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This change looks like it could be split off to reduce the diff a bit?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This could be an NFC, but I don't know if it's worth it since it wouldn't meaningfully reduce the diff and committing something unused doesn't seem like a good idea, in case it ends up being unneeded in this PR for whatever reason.