Skip to content

[LAA] Support assumptions in evaluatePtrAddRecAtMaxBTCWillNotWrap #147047

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 25 additions & 7 deletions llvm/include/llvm/Analysis/LoopAccessAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,12 @@ class MemoryDepChecker {
const SmallVectorImpl<Instruction *> &Instrs) const;
};

MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L,
MemoryDepChecker(PredicatedScalarEvolution &PSE, AssumptionCache *AC,
DominatorTree *DT, const Loop *L,
const DenseMap<Value *, const SCEV *> &SymbolicStrides,
unsigned MaxTargetVectorWidthInBits)
: PSE(PSE), InnermostLoop(L), SymbolicStrides(SymbolicStrides),
: PSE(PSE), AC(AC), DT(DT), InnermostLoop(L),
SymbolicStrides(SymbolicStrides),
MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits) {}

/// Register the location (instructions are given increasing numbers)
Expand Down Expand Up @@ -288,6 +290,15 @@ class MemoryDepChecker {
return PointerBounds;
}

DominatorTree *getDT() const {
assert(DT && "requested DT, but it is not available");
return DT;
}
AssumptionCache *getAC() const {
assert(AC && "requested AC, but it is not available");
return AC;
}

private:
/// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and
/// applies dynamic knowledge to simplify SCEV expressions and convert them
Expand All @@ -296,6 +307,10 @@ class MemoryDepChecker {
/// example we might assume a unit stride for a pointer in order to prove
/// that a memory access is strided and doesn't wrap.
PredicatedScalarEvolution &PSE;

AssumptionCache *AC;
DominatorTree *DT;
Comment on lines +311 to +312
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An alternative would be to retrieve them directly from ScalarEvolution, which holds them already, but it's not accessible at the moment. Not sure if we should expose them to use more conveniently here in the patch


const Loop *InnermostLoop;

/// Reference to map of pointer values to
Expand Down Expand Up @@ -670,7 +685,7 @@ class LoopAccessInfo {
LLVM_ABI LoopAccessInfo(Loop *L, ScalarEvolution *SE,
const TargetTransformInfo *TTI,
const TargetLibraryInfo *TLI, AAResults *AA,
DominatorTree *DT, LoopInfo *LI,
DominatorTree *DT, LoopInfo *LI, AssumptionCache *AC,
Comment on lines -673 to +688
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can have a default = nullptr for DT, LI, AC?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure done thanks

bool AllowPartial = false);

/// Return true we can analyze the memory accesses in the loop and there are
Expand Down Expand Up @@ -922,7 +937,8 @@ LLVM_ABI std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
const SCEV *MaxBTC, ScalarEvolution *SE,
DenseMap<std::pair<const SCEV *, Type *>,
std::pair<const SCEV *, const SCEV *>> *PointerBounds);
std::pair<const SCEV *, const SCEV *>> *PointerBounds,
DominatorTree *DT, AssumptionCache *AC);

class LoopAccessInfoManager {
/// The cache.
Expand All @@ -935,12 +951,14 @@ class LoopAccessInfoManager {
LoopInfo &LI;
TargetTransformInfo *TTI;
const TargetLibraryInfo *TLI = nullptr;
AssumptionCache *AC;

public:
LoopAccessInfoManager(ScalarEvolution &SE, AAResults &AA, DominatorTree &DT,
LoopInfo &LI, TargetTransformInfo *TTI,
const TargetLibraryInfo *TLI)
: SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI) {}
LoopInfo &LI, TargetTransformInfo *TTI = nullptr,
const TargetLibraryInfo *TLI = nullptr,
AssumptionCache *AC = nullptr)
: SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI), AC(AC) {}

LLVM_ABI const LoopAccessInfo &getInfo(Loop &L, bool AllowPartial = false);

Expand Down
10 changes: 1 addition & 9 deletions llvm/lib/Analysis/Loads.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -333,16 +333,8 @@ bool llvm::isDereferenceableAndAlignedInLoop(
if (isa<SCEVCouldNotCompute>(MaxBECount))
return false;

if (isa<SCEVCouldNotCompute>(BECount)) {
// TODO: Support symbolic max backedge taken counts for loops without
// computable backedge taken counts.
MaxBECount =
Predicates
? SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates)
: SE.getConstantMaxBackedgeTakenCount(L);
}
const auto &[AccessStart, AccessEnd] = getStartAndEndForAccess(
L, PtrScev, LI->getType(), BECount, MaxBECount, &SE, nullptr);
L, PtrScev, LI->getType(), BECount, MaxBECount, &SE, nullptr, &DT, AC);
if (isa<SCEVCouldNotCompute>(AccessStart) ||
isa<SCEVCouldNotCompute>(AccessEnd))
return false;
Expand Down
76 changes: 49 additions & 27 deletions llvm/lib/Analysis/LoopAccessAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
Expand Down Expand Up @@ -208,28 +210,46 @@ static const SCEV *mulSCEVOverflow(const SCEV *A, const SCEV *B,

/// Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at
/// \p MaxBTC is guaranteed inbounds of the accessed object.
static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
const SCEV *MaxBTC,
const SCEV *EltSize,
ScalarEvolution &SE,
const DataLayout &DL) {
static bool
evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
const SCEV *MaxBTC, const SCEV *EltSize,
ScalarEvolution &SE, const DataLayout &DL,
DominatorTree *DT, AssumptionCache *AC) {
auto *PointerBase = SE.getPointerBase(AR->getStart());
auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);
if (!StartPtr)
return false;
const Loop *L = AR->getLoop();
bool CheckForNonNull, CheckForFreed;
uint64_t DerefBytes = StartPtr->getValue()->getPointerDereferenceableBytes(
Value *StartPtrV = StartPtr->getValue();
uint64_t DerefBytes = StartPtrV->getPointerDereferenceableBytes(
DL, CheckForNonNull, CheckForFreed);

if (CheckForNonNull || CheckForFreed)
if (DerefBytes && (CheckForNonNull || CheckForFreed))
return false;

const SCEV *Step = AR->getStepRecurrence(SE);
Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType());
const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes);

// Check if we have a suitable dereferencable assumption we can use.
if (!StartPtrV->canBeFreed()) {
RetainedKnowledge DerefRK = getKnowledgeValidInContext(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a lot of similarity between this and the isDereferenceableAndAlignedPointerViaAssumption.
In the latter API, we check canBeFreed but use getKnowledgeForValue with the CtX passed through lambda isValidAssumeForContext(Assume, CtxI, DT). Note that we bail out in absence of CtX there, but we could technically use the Loop predecessor's terminator there (since the pointer is not freed through entire function).

Can we common this logic across both patches (#128436)? The difference is we check alignment assumptions as well in that API.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good, I'll first #128436 and then update this PR

Copy link
Contributor

@annamthomas annamthomas Jul 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a bigger difference - Here, we only handle DerefRK is a constant value. This wont be enough for non-constant deref values in assumption bundles.
We need to get the IRArgValue for DerefRK just as done in LoopAccessAnalysis and update the SCEV APIs below to use non-constant ones.

@fhahn I think the common-ing can be done after this patch is landed? I have some improvements locally that was built upon this patch. (even the non-constant deref can be done as a follow-up).

StartPtrV, {Attribute::Dereferenceable}, *AC,
L->getLoopPredecessor()->getTerminator(), DT);
if (DerefRK) {
DerefBytesSCEV = SE.getUMaxExpr(
DerefBytesSCEV, SE.getConstant(WiderTy, DerefRK.ArgValue));
}
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Might be worth bailing out early if DerefBytesSCEV is zero, which will be quite common I think. I suspect in most cases the pointer won't be marked as dereferenceable or have an assumption, in which case getPointerDereferenceableBytes returns 0.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good, updated, thanks!

if (DerefBytesSCEV->isZero())
return false;

bool IsKnownNonNegative = SE.isKnownNonNegative(Step);
if (!IsKnownNonNegative && !SE.isKnownNegative(Step))
return false;

Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType());
Step = SE.getNoopOrSignExtend(Step, WiderTy);
MaxBTC = SE.getNoopOrZeroExtend(MaxBTC, WiderTy);

Expand All @@ -256,24 +276,23 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
const SCEV *EndBytes = addSCEVNoOverflow(StartOffset, OffsetEndBytes, SE);
if (!EndBytes)
return false;
return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes,
SE.getConstant(WiderTy, DerefBytes));
return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV);
}

// For negative steps check if
// * StartOffset >= (MaxBTC * Step + EltSize)
// * StartOffset <= DerefBytes.
assert(SE.isKnownNegative(Step) && "must be known negative");
return SE.isKnownPredicate(CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) &&
SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset,
SE.getConstant(WiderTy, DerefBytes));
SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset, DerefBytesSCEV);
}

std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
const SCEV *MaxBTC, ScalarEvolution *SE,
DenseMap<std::pair<const SCEV *, Type *>,
std::pair<const SCEV *, const SCEV *>> *PointerBounds) {
std::pair<const SCEV *, const SCEV *>> *PointerBounds,
DominatorTree *DT, AssumptionCache *AC) {
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
if (PointerBounds) {
auto [Iter, Ins] = PointerBounds->insert(
Expand Down Expand Up @@ -308,8 +327,8 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
// sets ScEnd to the maximum unsigned value for the type. Note that LAA
// separately checks that accesses cannot not wrap, so unsigned max
// represents an upper bound.
if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE,
DL)) {
if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE, DL,
DT, AC)) {
ScEnd = AR->evaluateAtIteration(MaxBTC, *SE);
} else {
ScEnd = SE->getAddExpr(
Expand Down Expand Up @@ -356,9 +375,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
bool NeedsFreeze) {
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
const SCEV *BTC = PSE.getBackedgeTakenCount();
const auto &[ScStart, ScEnd] =
getStartAndEndForAccess(Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC,
PSE.getSE(), &DC.getPointerBounds());
const auto &[ScStart, ScEnd] = getStartAndEndForAccess(
Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(),
&DC.getPointerBounds(), DC.getDT(), DC.getAC());
assert(!isa<SCEVCouldNotCompute>(ScStart) &&
!isa<SCEVCouldNotCompute>(ScEnd) &&
"must be able to compute both start and end expressions");
Expand Down Expand Up @@ -1961,13 +1980,15 @@ bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src,
const SCEV *BTC = PSE.getBackedgeTakenCount();
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
ScalarEvolution &SE = *PSE.getSE();
const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess(
InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC, &SE, &PointerBounds);
const auto &[SrcStart_, SrcEnd_] =
getStartAndEndForAccess(InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC,
&SE, &PointerBounds, DT, AC);
if (isa<SCEVCouldNotCompute>(SrcStart_) || isa<SCEVCouldNotCompute>(SrcEnd_))
return false;

const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess(
InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC, &SE, &PointerBounds);
const auto &[SinkStart_, SinkEnd_] =
getStartAndEndForAccess(InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC,
&SE, &PointerBounds, DT, AC);
if (isa<SCEVCouldNotCompute>(SinkStart_) ||
isa<SCEVCouldNotCompute>(SinkEnd_))
return false;
Expand Down Expand Up @@ -3003,7 +3024,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
const TargetTransformInfo *TTI,
const TargetLibraryInfo *TLI, AAResults *AA,
DominatorTree *DT, LoopInfo *LI,
bool AllowPartial)
AssumptionCache *AC, bool AllowPartial)
: PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
PtrRtChecking(nullptr), TheLoop(L), AllowPartial(AllowPartial) {
unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max();
Expand All @@ -3013,8 +3034,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
MaxTargetVectorWidthInBits =
TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) * 2;

DepChecker = std::make_unique<MemoryDepChecker>(*PSE, L, SymbolicStrides,
MaxTargetVectorWidthInBits);
DepChecker = std::make_unique<MemoryDepChecker>(
*PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits);
PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
if (canAnalyzeLoop())
CanVecMem = analyzeLoop(AA, LI, TLI, DT);
Expand Down Expand Up @@ -3083,7 +3104,7 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L,
// or if it was created with a different value of AllowPartial.
if (Inserted || It->second->hasAllowPartial() != AllowPartial)
It->second = std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT,
&LI, AllowPartial);
&LI, AC, AllowPartial);

return *It->second;
}
Expand Down Expand Up @@ -3126,7 +3147,8 @@ LoopAccessInfoManager LoopAccessAnalysis::run(Function &F,
auto &LI = FAM.getResult<LoopAnalysis>(F);
auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI);
auto &AC = FAM.getResult<AssumptionAnalysis>(F);
return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI, &AC);
}

AnalysisKey LoopAccessAnalysis::Key;
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Scalar/LoopFlatten.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1009,7 +1009,7 @@ PreservedAnalyses LoopFlattenPass::run(LoopNest &LN, LoopAnalysisManager &LAM,
// in simplified form, and also needs LCSSA. Running
// this pass will simplify all loops that contain inner loops,
// regardless of whether anything ends up being flattened.
LoopAccessInfoManager LAIM(AR.SE, AR.AA, AR.DT, AR.LI, &AR.TTI, nullptr);
LoopAccessInfoManager LAIM(AR.SE, AR.AA, AR.DT, AR.LI, &AR.TTI);
for (Loop *InnerLoop : LN.getLoops()) {
auto *OuterLoop = InnerLoop->getParentLoop();
if (!OuterLoop)
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,7 @@ PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM,
const Function *F = L.getHeader()->getParent();
OptimizationRemarkEmitter ORE(F);

LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr, nullptr);
LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr, nullptr, &LAR.AC);
if (!LoopVersioningLICM(AA, SE, &ORE, LAIs, LAR.LI, &L).run(DT))
return PreservedAnalyses::all();
return getLoopPassPreservedAnalyses();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ e.1:
ret i32 1
}

define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption(ptr %A, ptr %B) {
define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption(ptr %A, ptr %B) nosync nofree {
; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption'
; CHECK-NEXT: loop.header:
; CHECK-NEXT: Memory dependences are safe with run-time checks
Expand All @@ -518,10 +518,10 @@ define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_kno
; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr))
; CHECK-NEXT: (Low: %B High: (2000 + %B))
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr))
; CHECK-NEXT: (Low: %A High: (2000 + %A))
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
Expand Down Expand Up @@ -565,7 +565,7 @@ e.2:
ret void
}

define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_deref_via_assumption_too_small(ptr %A, ptr %B) {
define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_deref_via_assumption_too_small(ptr %A, ptr %B) nosync nofree {
; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations_deref_via_assumption_too_small'
; CHECK-NEXT: loop.header:
; CHECK-NEXT: Memory dependences are safe with run-time checks
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,48 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_si
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P1]], i64 4), "dereferenceable"(ptr [[P1]], i64 1024) ]
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i64 4), "dereferenceable"(ptr [[P2]], i64 1024) ]
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX1]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX1]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true)
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[TMP8]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[LOOP1:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: loop.end:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ -1, [[LOOP_INC]] ]
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP1]] ], [ -1, [[LOOP_INC]] ], [ -1, [[MIDDLE_BLOCK]] ], [ [[TMP9]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
Expand Down
3 changes: 2 additions & 1 deletion llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ class VPlanSlpTest : public VPlanTestIRBase {
AARes.reset(new AAResults(*TLI));
AARes->addAAResult(*BasicAA);
PSE.reset(new PredicatedScalarEvolution(*SE, *L));
LAI.reset(new LoopAccessInfo(L, &*SE, nullptr, &*TLI, &*AARes, &*DT, &*LI));
LAI.reset(
new LoopAccessInfo(L, &*SE, nullptr, &*TLI, &*AARes, &*DT, &*LI, &*AC));
IAI.reset(new InterleavedAccessInfo(*PSE, L, &*DT, &*LI, &*LAI));
IAI->analyzeInterleaving(false);
return {Plan, *IAI};
Expand Down
Loading