-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[LAA] Support assumptions in evaluatePtrAddRecAtMaxBTCWillNotWrap #147047
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
1512049
8db1162
e755778
d00836a
55b9f7e
626a4fb
9d9179e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -180,10 +180,12 @@ class MemoryDepChecker { | |
const SmallVectorImpl<Instruction *> &Instrs) const; | ||
}; | ||
|
||
MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L, | ||
MemoryDepChecker(PredicatedScalarEvolution &PSE, AssumptionCache *AC, | ||
DominatorTree *DT, const Loop *L, | ||
const DenseMap<Value *, const SCEV *> &SymbolicStrides, | ||
unsigned MaxTargetVectorWidthInBits) | ||
: PSE(PSE), InnermostLoop(L), SymbolicStrides(SymbolicStrides), | ||
: PSE(PSE), AC(AC), DT(DT), InnermostLoop(L), | ||
SymbolicStrides(SymbolicStrides), | ||
MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits) {} | ||
|
||
/// Register the location (instructions are given increasing numbers) | ||
|
@@ -288,6 +290,15 @@ class MemoryDepChecker { | |
return PointerBounds; | ||
} | ||
|
||
DominatorTree *getDT() const { | ||
assert(DT && "requested DT, but it is not available"); | ||
return DT; | ||
} | ||
AssumptionCache *getAC() const { | ||
assert(AC && "requested AC, but it is not available"); | ||
return AC; | ||
} | ||
|
||
private: | ||
/// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and | ||
/// applies dynamic knowledge to simplify SCEV expressions and convert them | ||
|
@@ -296,6 +307,10 @@ class MemoryDepChecker { | |
/// example we might assume a unit stride for a pointer in order to prove | ||
/// that a memory access is strided and doesn't wrap. | ||
PredicatedScalarEvolution &PSE; | ||
|
||
AssumptionCache *AC; | ||
DominatorTree *DT; | ||
|
||
const Loop *InnermostLoop; | ||
|
||
/// Reference to map of pointer values to | ||
|
@@ -670,7 +685,7 @@ class LoopAccessInfo { | |
LLVM_ABI LoopAccessInfo(Loop *L, ScalarEvolution *SE, | ||
const TargetTransformInfo *TTI, | ||
const TargetLibraryInfo *TLI, AAResults *AA, | ||
DominatorTree *DT, LoopInfo *LI, | ||
DominatorTree *DT, LoopInfo *LI, AssumptionCache *AC, | ||
Comment on lines
-673
to
+688
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can have a default There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure done thanks |
||
bool AllowPartial = false); | ||
|
||
/// Return true we can analyze the memory accesses in the loop and there are | ||
|
@@ -922,7 +937,8 @@ LLVM_ABI std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess( | |
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC, | ||
const SCEV *MaxBTC, ScalarEvolution *SE, | ||
DenseMap<std::pair<const SCEV *, Type *>, | ||
std::pair<const SCEV *, const SCEV *>> *PointerBounds); | ||
std::pair<const SCEV *, const SCEV *>> *PointerBounds, | ||
DominatorTree *DT, AssumptionCache *AC); | ||
|
||
class LoopAccessInfoManager { | ||
/// The cache. | ||
|
@@ -935,12 +951,14 @@ class LoopAccessInfoManager { | |
LoopInfo &LI; | ||
TargetTransformInfo *TTI; | ||
const TargetLibraryInfo *TLI = nullptr; | ||
AssumptionCache *AC; | ||
|
||
public: | ||
LoopAccessInfoManager(ScalarEvolution &SE, AAResults &AA, DominatorTree &DT, | ||
LoopInfo &LI, TargetTransformInfo *TTI, | ||
const TargetLibraryInfo *TLI) | ||
: SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI) {} | ||
LoopInfo &LI, TargetTransformInfo *TTI = nullptr, | ||
const TargetLibraryInfo *TLI = nullptr, | ||
AssumptionCache *AC = nullptr) | ||
: SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI), AC(AC) {} | ||
|
||
LLVM_ABI const LoopAccessInfo &getInfo(Loop &L, bool AllowPartial = false); | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,8 @@ | |
#include "llvm/ADT/SmallVector.h" | ||
#include "llvm/Analysis/AliasAnalysis.h" | ||
#include "llvm/Analysis/AliasSetTracker.h" | ||
#include "llvm/Analysis/AssumeBundleQueries.h" | ||
#include "llvm/Analysis/AssumptionCache.h" | ||
#include "llvm/Analysis/LoopAnalysisManager.h" | ||
#include "llvm/Analysis/LoopInfo.h" | ||
#include "llvm/Analysis/LoopIterator.h" | ||
|
@@ -208,28 +210,46 @@ static const SCEV *mulSCEVOverflow(const SCEV *A, const SCEV *B, | |
|
||
/// Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at | ||
/// \p MaxBTC is guaranteed inbounds of the accessed object. | ||
static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, | ||
const SCEV *MaxBTC, | ||
const SCEV *EltSize, | ||
ScalarEvolution &SE, | ||
const DataLayout &DL) { | ||
static bool | ||
evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, | ||
const SCEV *MaxBTC, const SCEV *EltSize, | ||
ScalarEvolution &SE, const DataLayout &DL, | ||
DominatorTree *DT, AssumptionCache *AC) { | ||
auto *PointerBase = SE.getPointerBase(AR->getStart()); | ||
auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase); | ||
if (!StartPtr) | ||
return false; | ||
const Loop *L = AR->getLoop(); | ||
bool CheckForNonNull, CheckForFreed; | ||
uint64_t DerefBytes = StartPtr->getValue()->getPointerDereferenceableBytes( | ||
Value *StartPtrV = StartPtr->getValue(); | ||
uint64_t DerefBytes = StartPtrV->getPointerDereferenceableBytes( | ||
DL, CheckForNonNull, CheckForFreed); | ||
|
||
if (CheckForNonNull || CheckForFreed) | ||
if (DerefBytes && (CheckForNonNull || CheckForFreed)) | ||
return false; | ||
|
||
const SCEV *Step = AR->getStepRecurrence(SE); | ||
Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType()); | ||
const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes); | ||
|
||
// Check if we have a suitable dereferencable assumption we can use. | ||
if (!StartPtrV->canBeFreed()) { | ||
RetainedKnowledge DerefRK = getKnowledgeValidInContext( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is a lot of similarity between this and the Can we common this logic across both patches (#128436)? The difference is we check alignment assumptions as well in that API. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good, I'll first #128436 and then update this PR There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's a bigger difference - Here, we only handle @fhahn I think the common-ing can be done after this patch is landed? I have some improvements locally that was built upon this patch. (even the non-constant deref can be done as a follow-up). |
||
StartPtrV, {Attribute::Dereferenceable}, *AC, | ||
L->getLoopPredecessor()->getTerminator(), DT); | ||
if (DerefRK) { | ||
DerefBytesSCEV = SE.getUMaxExpr( | ||
DerefBytesSCEV, SE.getConstant(WiderTy, DerefRK.ArgValue)); | ||
} | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Might be worth bailing out early if DerefBytesSCEV is zero, which will be quite common I think. I suspect in most cases the pointer won't be marked as dereferenceable or have an assumption, in which case getPointerDereferenceableBytes returns 0. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good, updated, thanks! |
||
if (DerefBytesSCEV->isZero()) | ||
return false; | ||
|
||
bool IsKnownNonNegative = SE.isKnownNonNegative(Step); | ||
if (!IsKnownNonNegative && !SE.isKnownNegative(Step)) | ||
return false; | ||
|
||
Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType()); | ||
Step = SE.getNoopOrSignExtend(Step, WiderTy); | ||
MaxBTC = SE.getNoopOrZeroExtend(MaxBTC, WiderTy); | ||
|
||
|
@@ -256,24 +276,23 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, | |
const SCEV *EndBytes = addSCEVNoOverflow(StartOffset, OffsetEndBytes, SE); | ||
if (!EndBytes) | ||
return false; | ||
return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes, | ||
SE.getConstant(WiderTy, DerefBytes)); | ||
return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV); | ||
} | ||
|
||
// For negative steps check if | ||
// * StartOffset >= (MaxBTC * Step + EltSize) | ||
// * StartOffset <= DerefBytes. | ||
assert(SE.isKnownNegative(Step) && "must be known negative"); | ||
return SE.isKnownPredicate(CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) && | ||
SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset, | ||
SE.getConstant(WiderTy, DerefBytes)); | ||
SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset, DerefBytesSCEV); | ||
} | ||
|
||
std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess( | ||
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC, | ||
const SCEV *MaxBTC, ScalarEvolution *SE, | ||
DenseMap<std::pair<const SCEV *, Type *>, | ||
std::pair<const SCEV *, const SCEV *>> *PointerBounds) { | ||
std::pair<const SCEV *, const SCEV *>> *PointerBounds, | ||
DominatorTree *DT, AssumptionCache *AC) { | ||
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair; | ||
if (PointerBounds) { | ||
auto [Iter, Ins] = PointerBounds->insert( | ||
|
@@ -308,8 +327,8 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess( | |
// sets ScEnd to the maximum unsigned value for the type. Note that LAA | ||
// separately checks that accesses cannot not wrap, so unsigned max | ||
// represents an upper bound. | ||
if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE, | ||
DL)) { | ||
if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE, DL, | ||
DT, AC)) { | ||
ScEnd = AR->evaluateAtIteration(MaxBTC, *SE); | ||
} else { | ||
ScEnd = SE->getAddExpr( | ||
|
@@ -356,9 +375,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, | |
bool NeedsFreeze) { | ||
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount(); | ||
const SCEV *BTC = PSE.getBackedgeTakenCount(); | ||
const auto &[ScStart, ScEnd] = | ||
getStartAndEndForAccess(Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, | ||
PSE.getSE(), &DC.getPointerBounds()); | ||
const auto &[ScStart, ScEnd] = getStartAndEndForAccess( | ||
Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(), | ||
&DC.getPointerBounds(), DC.getDT(), DC.getAC()); | ||
assert(!isa<SCEVCouldNotCompute>(ScStart) && | ||
!isa<SCEVCouldNotCompute>(ScEnd) && | ||
"must be able to compute both start and end expressions"); | ||
|
@@ -1961,13 +1980,15 @@ bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src, | |
const SCEV *BTC = PSE.getBackedgeTakenCount(); | ||
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount(); | ||
ScalarEvolution &SE = *PSE.getSE(); | ||
const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess( | ||
InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC, &SE, &PointerBounds); | ||
const auto &[SrcStart_, SrcEnd_] = | ||
getStartAndEndForAccess(InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC, | ||
&SE, &PointerBounds, DT, AC); | ||
if (isa<SCEVCouldNotCompute>(SrcStart_) || isa<SCEVCouldNotCompute>(SrcEnd_)) | ||
return false; | ||
|
||
const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess( | ||
InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC, &SE, &PointerBounds); | ||
const auto &[SinkStart_, SinkEnd_] = | ||
getStartAndEndForAccess(InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC, | ||
&SE, &PointerBounds, DT, AC); | ||
if (isa<SCEVCouldNotCompute>(SinkStart_) || | ||
isa<SCEVCouldNotCompute>(SinkEnd_)) | ||
return false; | ||
|
@@ -3003,7 +3024,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, | |
const TargetTransformInfo *TTI, | ||
const TargetLibraryInfo *TLI, AAResults *AA, | ||
DominatorTree *DT, LoopInfo *LI, | ||
bool AllowPartial) | ||
AssumptionCache *AC, bool AllowPartial) | ||
: PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)), | ||
PtrRtChecking(nullptr), TheLoop(L), AllowPartial(AllowPartial) { | ||
unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max(); | ||
|
@@ -3013,8 +3034,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, | |
MaxTargetVectorWidthInBits = | ||
TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) * 2; | ||
|
||
DepChecker = std::make_unique<MemoryDepChecker>(*PSE, L, SymbolicStrides, | ||
MaxTargetVectorWidthInBits); | ||
DepChecker = std::make_unique<MemoryDepChecker>( | ||
*PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits); | ||
PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE); | ||
if (canAnalyzeLoop()) | ||
CanVecMem = analyzeLoop(AA, LI, TLI, DT); | ||
|
@@ -3083,7 +3104,7 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L, | |
// or if it was created with a different value of AllowPartial. | ||
if (Inserted || It->second->hasAllowPartial() != AllowPartial) | ||
It->second = std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT, | ||
&LI, AllowPartial); | ||
&LI, AC, AllowPartial); | ||
|
||
return *It->second; | ||
} | ||
|
@@ -3126,7 +3147,8 @@ LoopAccessInfoManager LoopAccessAnalysis::run(Function &F, | |
auto &LI = FAM.getResult<LoopAnalysis>(F); | ||
auto &TTI = FAM.getResult<TargetIRAnalysis>(F); | ||
auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); | ||
return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI); | ||
auto &AC = FAM.getResult<AssumptionAnalysis>(F); | ||
return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI, &AC); | ||
} | ||
|
||
AnalysisKey LoopAccessAnalysis::Key; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
An alternative would be to retrieve them directly from ScalarEvolution, which holds them already, but it's not accessible at the moment. Not sure if we should expose them to use more conveniently here in the patch