@@ -503,11 +503,8 @@ class InnerLoopVectorizer {
503
503
// / is generated around the vectorized (and scalar epilogue) loops consisting
504
504
// / of various checks and bypasses. Return the pre-header block of the new
505
505
// / loop. In the case of epilogue vectorization, this function is overriden to
506
- // / handle the more complex control flow around the loops. \p ExpandedSCEVs is
507
- // / used to look up SCEV expansions for expressions needed during skeleton
508
- // / creation.
509
- virtual BasicBlock *
510
- createVectorizedLoopSkeleton (const SCEV2ValueTy &ExpandedSCEVs);
506
+ // / handle the more complex control flow around the loops.
507
+ virtual BasicBlock *createVectorizedLoopSkeleton ();
511
508
512
509
// / Fix the vectorized code, taking care of header phi's, and more.
513
510
void fixVectorizedLoop (VPTransformState &State);
@@ -535,12 +532,6 @@ class InnerLoopVectorizer {
535
532
// / count of the original loop for both main loop and epilogue vectorization.
536
533
void setTripCount (Value *TC) { TripCount = TC; }
537
534
538
- // Retrieve the additional bypass value associated with an original
539
- // / induction header phi.
540
- Value *getInductionAdditionalBypassValue (PHINode *OrigPhi) const {
541
- return Induction2AdditionalBypassValue.at (OrigPhi);
542
- }
543
-
544
535
// / Return the additional bypass block which targets the scalar loop by
545
536
// / skipping the epilogue loop after completing the main loop.
546
537
BasicBlock *getAdditionalBypassBlock () const {
@@ -577,11 +568,6 @@ class InnerLoopVectorizer {
577
568
// / vector loop preheader, middle block and scalar preheader.
578
569
void createVectorLoopSkeleton (StringRef Prefix);
579
570
580
- // / Create and record the values for induction variables to resume coming from
581
- // / the additional bypass block.
582
- void createInductionAdditionalBypassValues (const SCEV2ValueTy &ExpandedSCEVs,
583
- Value *MainVectorTripCount);
584
-
585
571
// / Allow subclasses to override and print debug traces before/after vplan
586
572
// / execution, when trace information is requested.
587
573
virtual void printDebugTracesAtStart () {}
@@ -671,11 +657,6 @@ class InnerLoopVectorizer {
671
657
// / for cleaning the checks, if vectorization turns out unprofitable.
672
658
GeneratedRTChecks &RTChecks;
673
659
674
- // / Mapping of induction phis to their additional bypass values. They
675
- // / need to be added as operands to phi nodes in the scalar loop preheader
676
- // / after the epilogue skeleton has been created.
677
- DenseMap<PHINode *, Value *> Induction2AdditionalBypassValue;
678
-
679
660
// / The additional bypass block which conditionally skips over the epilogue
680
661
// / loop after executing the main loop. Needed to resume inductions and
681
662
// / reductions during epilogue vectorization.
@@ -738,16 +719,14 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer {
738
719
739
720
// Override this function to handle the more complex control flow around the
740
721
// three loops.
741
- BasicBlock *
742
- createVectorizedLoopSkeleton (const SCEV2ValueTy &ExpandedSCEVs) final {
743
- return createEpilogueVectorizedLoopSkeleton (ExpandedSCEVs);
722
+ BasicBlock *createVectorizedLoopSkeleton () final {
723
+ return createEpilogueVectorizedLoopSkeleton ();
744
724
}
745
725
746
726
// / The interface for creating a vectorized skeleton using one of two
747
727
// / different strategies, each corresponding to one execution of the vplan
748
728
// / as described above.
749
- virtual BasicBlock *
750
- createEpilogueVectorizedLoopSkeleton (const SCEV2ValueTy &ExpandedSCEVs) = 0;
729
+ virtual BasicBlock *createEpilogueVectorizedLoopSkeleton () = 0;
751
730
752
731
// / Holds and updates state information required to vectorize the main loop
753
732
// / and its epilogue in two separate passes. This setup helps us avoid
@@ -775,8 +754,7 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
775
754
EPI, LVL, CM, BFI, PSI, Check, Plan) {}
776
755
// / Implements the interface for creating a vectorized skeleton using the
777
756
// / *main loop* strategy (ie the first pass of vplan execution).
778
- BasicBlock *
779
- createEpilogueVectorizedLoopSkeleton (const SCEV2ValueTy &ExpandedSCEVs) final ;
757
+ BasicBlock *createEpilogueVectorizedLoopSkeleton () final ;
780
758
781
759
protected:
782
760
// / Emits an iteration count bypass check once for the main loop (when \p
@@ -806,8 +784,7 @@ class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
806
784
}
807
785
// / Implements the interface for creating a vectorized skeleton using the
808
786
// / *epilogue loop* strategy (ie the second pass of vplan execution).
809
- BasicBlock *
810
- createEpilogueVectorizedLoopSkeleton (const SCEV2ValueTy &ExpandedSCEVs) final ;
787
+ BasicBlock *createEpilogueVectorizedLoopSkeleton () final ;
811
788
812
789
protected:
813
790
// / Emits an iteration count bypass check after the main vector loop has
@@ -2722,44 +2699,7 @@ static void addFullyUnrolledInstructionsToIgnore(
2722
2699
}
2723
2700
}
2724
2701
2725
- void InnerLoopVectorizer::createInductionAdditionalBypassValues (
2726
- const SCEV2ValueTy &ExpandedSCEVs, Value *MainVectorTripCount) {
2727
- assert (MainVectorTripCount && " Must have bypass information" );
2728
-
2729
- Instruction *OldInduction = Legal->getPrimaryInduction ();
2730
- IRBuilder<> BypassBuilder (getAdditionalBypassBlock (),
2731
- getAdditionalBypassBlock ()->getFirstInsertionPt ());
2732
- for (const auto &InductionEntry : Legal->getInductionVars ()) {
2733
- PHINode *OrigPhi = InductionEntry.first ;
2734
- const InductionDescriptor &II = InductionEntry.second ;
2735
- Value *Step = getExpandedStep (II, ExpandedSCEVs);
2736
- // For the primary induction the additional bypass end value is known.
2737
- // Otherwise it is computed.
2738
- Value *EndValueFromAdditionalBypass = MainVectorTripCount;
2739
- if (OrigPhi != OldInduction) {
2740
- auto *BinOp = II.getInductionBinOp ();
2741
- // Fast-math-flags propagate from the original induction instruction.
2742
- if (isa_and_nonnull<FPMathOperator>(BinOp))
2743
- BypassBuilder.setFastMathFlags (BinOp->getFastMathFlags ());
2744
-
2745
- // Compute the end value for the additional bypass.
2746
- EndValueFromAdditionalBypass =
2747
- emitTransformedIndex (BypassBuilder, MainVectorTripCount,
2748
- II.getStartValue (), Step, II.getKind (), BinOp);
2749
- EndValueFromAdditionalBypass->setName (" ind.end" );
2750
- }
2751
-
2752
- // Store the bypass value here, as it needs to be added as operand to its
2753
- // scalar preheader phi node after the epilogue skeleton has been created.
2754
- // TODO: Directly add as extra operand to the VPResumePHI recipe.
2755
- assert (!Induction2AdditionalBypassValue.contains (OrigPhi) &&
2756
- " entry for OrigPhi already exits" );
2757
- Induction2AdditionalBypassValue[OrigPhi] = EndValueFromAdditionalBypass;
2758
- }
2759
- }
2760
-
2761
- BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton (
2762
- const SCEV2ValueTy &ExpandedSCEVs) {
2702
+ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton () {
2763
2703
/*
2764
2704
In this function we generate a new loop. The new loop will contain
2765
2705
the vectorized instructions while the old loop will continue to run the
@@ -7726,16 +7666,11 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
7726
7666
7727
7667
DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan (
7728
7668
ElementCount BestVF, unsigned BestUF, VPlan &BestVPlan,
7729
- InnerLoopVectorizer &ILV, DominatorTree *DT, bool VectorizingEpilogue,
7730
- const DenseMap<const SCEV *, Value *> *ExpandedSCEVs) {
7669
+ InnerLoopVectorizer &ILV, DominatorTree *DT, bool VectorizingEpilogue) {
7731
7670
assert (BestVPlan.hasVF (BestVF) &&
7732
7671
" Trying to execute plan with unsupported VF" );
7733
7672
assert (BestVPlan.hasUF (BestUF) &&
7734
7673
" Trying to execute plan with unsupported UF" );
7735
- assert (
7736
- ((VectorizingEpilogue && ExpandedSCEVs) ||
7737
- (!VectorizingEpilogue && !ExpandedSCEVs)) &&
7738
- " expanded SCEVs to reuse can only be used during epilogue vectorization" );
7739
7674
// TODO: Move to VPlan transform stage once the transition to the VPlan-based
7740
7675
// cost model is complete for better cost estimates.
7741
7676
VPlanTransforms::runPass (VPlanTransforms::unrollByUF, BestVPlan, BestUF,
@@ -7773,8 +7708,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7773
7708
// middle block. The vector loop is created during VPlan execution.
7774
7709
VPBasicBlock *VectorPH =
7775
7710
cast<VPBasicBlock>(BestVPlan.getEntry ()->getSingleSuccessor ());
7776
- State. CFG . PrevBB = ILV. createVectorizedLoopSkeleton (
7777
- ExpandedSCEVs ? *ExpandedSCEVs : State. ExpandedSCEVs );
7711
+
7712
+ State. CFG . PrevBB = ILV. createVectorizedLoopSkeleton ( );
7778
7713
if (VectorizingEpilogue)
7779
7714
VPlanTransforms::removeDeadRecipes (BestVPlan);
7780
7715
@@ -7815,8 +7750,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7815
7750
BestVPlan.execute (&State);
7816
7751
7817
7752
auto *MiddleVPBB = BestVPlan.getMiddleBlock ();
7818
- // 2.5 When vectorizing the epilogue, fix reduction and induction resume
7819
- // values from the additional bypass block.
7753
+ // 2.5 When vectorizing the epilogue, fix reduction resume values from the
7754
+ // additional bypass block.
7820
7755
if (VectorizingEpilogue) {
7821
7756
assert (!ILV.Legal ->hasUncountableEarlyExit () &&
7822
7757
" Epilogue vectorisation not yet supported with early exits" );
@@ -7834,11 +7769,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7834
7769
fixReductionScalarResumeWhenVectorizingEpilog (
7835
7770
&R, State, State.CFG .VPBB2IRBB [MiddleVPBB], BypassBlock);
7836
7771
}
7837
- for (const auto &[IVPhi, _] : Legal->getInductionVars ()) {
7838
- auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock (PH));
7839
- Value *V = ILV.getInductionAdditionalBypassValue (IVPhi);
7840
- Inc->setIncomingValueForBlock (BypassBlock, V);
7841
- }
7842
7772
}
7843
7773
7844
7774
// 2.6. Maintain Loop Hints
@@ -7900,8 +7830,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7900
7830
7901
7831
// / This function is partially responsible for generating the control flow
7902
7832
// / depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
7903
- BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton (
7904
- const SCEV2ValueTy &ExpandedSCEVs) {
7833
+ BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton () {
7905
7834
createVectorLoopSkeleton (" " );
7906
7835
7907
7836
// Generate the code to check the minimum iteration count of the vector
@@ -8011,8 +7940,7 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
8011
7940
// / This function is partially responsible for generating the control flow
8012
7941
// / depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
8013
7942
BasicBlock *
8014
- EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton (
8015
- const SCEV2ValueTy &ExpandedSCEVs) {
7943
+ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton () {
8016
7944
createVectorLoopSkeleton (" vec.epilog." );
8017
7945
8018
7946
// Now, compare the remaining count and if there aren't enough iterations to
@@ -8080,11 +8008,6 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
8080
8008
Phi->removeIncomingValue (EPI.MemSafetyCheck );
8081
8009
}
8082
8010
8083
- // Generate bypass values from the additional bypass block. Note that when the
8084
- // vectorized epilogue is skipped due to iteration count check, then the
8085
- // resume value for the induction variable comes from the trip count of the
8086
- // main vector loop, passed as the second argument.
8087
- createInductionAdditionalBypassValues (ExpandedSCEVs, EPI.VectorTripCount );
8088
8011
return LoopVectorPreHeader;
8089
8012
}
8090
8013
@@ -10529,6 +10452,33 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
10529
10452
}
10530
10453
}
10531
10454
10455
+ // Generate bypass values from the additional bypass block. Note that when the
10456
+ // vectorized epilogue is skipped due to iteration count check, then the
10457
+ // resume value for the induction variable comes from the trip count of the
10458
+ // main vector loop, passed as the second argument.
10459
+ static Value *createInductionAdditionalBypassValues (
10460
+ PHINode *OrigPhi, const InductionDescriptor &II, IRBuilder<> &BypassBuilder,
10461
+ const SCEV2ValueTy &ExpandedSCEVs, Value *MainVectorTripCount,
10462
+ Instruction *OldInduction) {
10463
+ Value *Step = getExpandedStep (II, ExpandedSCEVs);
10464
+ // For the primary induction the additional bypass end value is known.
10465
+ // Otherwise it is computed.
10466
+ Value *EndValueFromAdditionalBypass = MainVectorTripCount;
10467
+ if (OrigPhi != OldInduction) {
10468
+ auto *BinOp = II.getInductionBinOp ();
10469
+ // Fast-math-flags propagate from the original induction instruction.
10470
+ if (isa_and_nonnull<FPMathOperator>(BinOp))
10471
+ BypassBuilder.setFastMathFlags (BinOp->getFastMathFlags ());
10472
+
10473
+ // Compute the end value for the additional bypass.
10474
+ EndValueFromAdditionalBypass =
10475
+ emitTransformedIndex (BypassBuilder, MainVectorTripCount,
10476
+ II.getStartValue (), Step, II.getKind (), BinOp);
10477
+ EndValueFromAdditionalBypass->setName (" ind.end" );
10478
+ }
10479
+ return EndValueFromAdditionalBypass;
10480
+ }
10481
+
10532
10482
bool LoopVectorizePass::processLoop (Loop *L) {
10533
10483
assert ((EnableVPlanNativePath || L->isInnermost ()) &&
10534
10484
" VPlan-native path is not enabled. Only process inner loops." );
@@ -10912,7 +10862,21 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10912
10862
preparePlanForEpilogueVectorLoop (BestEpiPlan, L, ExpandedSCEVs, EPI);
10913
10863
10914
10864
LVP.executePlan (EPI.EpilogueVF , EPI.EpilogueUF , BestEpiPlan, EpilogILV,
10915
- DT, true , &ExpandedSCEVs);
10865
+ DT, true );
10866
+
10867
+ // Fix induction resume values from the additional bypass block.
10868
+ BasicBlock *BypassBlock = EpilogILV.getAdditionalBypassBlock ();
10869
+ IRBuilder<> BypassBuilder (BypassBlock,
10870
+ BypassBlock->getFirstInsertionPt ());
10871
+ BasicBlock *PH = L->getLoopPreheader ();
10872
+ for (const auto &[IVPhi, II] : LVL.getInductionVars ()) {
10873
+ auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock (PH));
10874
+ Value *V = createInductionAdditionalBypassValues (
10875
+ IVPhi, II, BypassBuilder, ExpandedSCEVs, EPI.VectorTripCount ,
10876
+ LVL.getPrimaryInduction ());
10877
+ // TODO: Directly add as extra operand to the VPResumePHI recipe.
10878
+ Inc->setIncomingValueForBlock (BypassBlock, V);
10879
+ }
10916
10880
++LoopsEpilogueVectorized;
10917
10881
10918
10882
if (!MainILV.areSafetyChecksAdded ())
0 commit comments