Skip to content

Commit 79804ee

Browse files
Lower Bounds Inference (#718)
This pull requests extends array bounds inference to support inferring lower bounds for array pointers and inserting using Checked C range bounds. For example: char simple_lower_bound(int *a, int l) { int *b = a; while (b - a < l && *b != 42) b++; return b - a < l; } 3C can now infer bounds for b even though a standard count bound would be invalidated by the increment b++. char simple_lower_bound(_Array_ptr<int> a : count(l), int l) { _Array_ptr<int> b : bounds(a, a + l) = a; while (b - a < l && *b != 42) b++; return b - a < l; } The inference is also able to automatically fatten pointers by generating lower bounds where none exists in the source code. Co-authored-by: Matt McCutchen (Correct Computation) <[email protected]>
1 parent abd7a00 commit 79804ee

File tree

105 files changed

+2006
-553
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

105 files changed

+2006
-553
lines changed

clang/include/clang/3C/ABounds.h

Lines changed: 58 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,26 @@ class ABounds {
3232
CountPlusOneBoundKind,
3333
// Bounds that represent number of bytes.
3434
ByteBoundKind,
35-
// Bounds that represent range.
36-
RangeBoundKind,
3735
};
3836
BoundsKind getKind() const { return Kind; }
3937

4038
protected:
39+
ABounds(BoundsKind K, BoundsKey L, BoundsKey B) : Kind(K), LengthKey(L),
40+
LowerBoundKey(B) {}
41+
ABounds(BoundsKind K, BoundsKey L) : ABounds(K, L, 0) {}
42+
4143
BoundsKind Kind;
4244

43-
protected:
44-
ABounds(BoundsKind K) : Kind(K) {}
45-
void addBoundsUsedKey(BoundsKey);
45+
// Bounds key representing the length of the bounds from the base pointer of
46+
// the range. The exact interpretation of this field varies by subclass.
47+
BoundsKey LengthKey;
48+
49+
// The base pointer representing the start of the range of the bounds. If this
50+
// is not equal to 0, then this ABounds has a specific lower bound that should
51+
// be used when emitting array pointer bounds. Otherwise, if it is 0, then the
52+
// lower bound should implicitly be the pointer the bound is applied to.
53+
BoundsKey LowerBoundKey;
54+
4655
// Get the variable name of the the given bounds key that corresponds
4756
// to the given declaration.
4857
static std::string getBoundsKeyStr(BoundsKey, AVarBoundsInfo *,
@@ -51,50 +60,69 @@ class ABounds {
5160
public:
5261
virtual ~ABounds() {}
5362

54-
virtual std::string mkString(AVarBoundsInfo *, clang::Decl *D = nullptr) = 0;
63+
// Make a string representation of this array bound. If the array has a
64+
// defined lower bound pointer that is not the same as the pointer for which
65+
// the bound string is being generated (passed as parameter BK), then a range
66+
// bound is generated using that lower bound. Otherwise, a standard count
67+
// bound is generated.
68+
std::string
69+
mkString(AVarBoundsInfo *ABI, clang::Decl *D = nullptr, BoundsKey BK = 0);
70+
71+
// Make a string representation of this array bound always generating explicit
72+
// lower bounds in range bounds expressions.
73+
virtual std::string
74+
mkStringWithLowerBound(AVarBoundsInfo *ABI, clang::Decl *D) = 0;
75+
76+
// Make a string representation of this array bound ignoring any lower bound
77+
// information. A standard count bound is always generated.
78+
virtual std::string
79+
mkStringWithoutLowerBound(AVarBoundsInfo *ABI, clang::Decl *D) = 0;
80+
5581
virtual bool areSame(ABounds *, AVarBoundsInfo *) = 0;
56-
virtual BoundsKey getBKey() = 0;
5782
virtual ABounds *makeCopy(BoundsKey NK) = 0;
5883

59-
// Set that maintains all the bound keys that are used inin
60-
// TODO: Is this still needed?
61-
static std::set<BoundsKey> KeysUsedInBounds;
62-
static bool isKeyUsedInBounds(BoundsKey ToCheck);
84+
BoundsKey getLengthKey() const { return LengthKey; }
85+
BoundsKey getLowerBoundKey() const { return LowerBoundKey; }
86+
void setLowerBoundKey(BoundsKey LB) { LowerBoundKey = LB; }
6387

6488
static ABounds *getBoundsInfo(AVarBoundsInfo *AVBInfo, BoundsExpr *BExpr,
6589
const ASTContext &C);
6690
};
6791

6892
class CountBound : public ABounds {
6993
public:
70-
CountBound(BoundsKey Var) : ABounds(CountBoundKind), CountVar(Var) {
71-
addBoundsUsedKey(Var);
72-
}
94+
CountBound(BoundsKey L, BoundsKey B) : ABounds(CountBoundKind, L, B) {}
95+
CountBound(BoundsKey L) : ABounds(CountBoundKind, L) {}
7396

74-
~CountBound() override {}
97+
std::string
98+
mkStringWithLowerBound(AVarBoundsInfo *ABI, clang::Decl *D) override;
99+
std::string
100+
mkStringWithoutLowerBound(AVarBoundsInfo *ABI, clang::Decl *D) override;
75101

76-
std::string mkString(AVarBoundsInfo *ABI, clang::Decl *D = nullptr) override;
77102
bool areSame(ABounds *O, AVarBoundsInfo *ABI) override;
78-
BoundsKey getBKey() override;
103+
79104
ABounds *makeCopy(BoundsKey NK) override;
80105

81106
static bool classof(const ABounds *S) {
82107
return S->getKind() == CountBoundKind;
83108
}
84-
85-
BoundsKey getCountVar() { return CountVar; }
86-
87-
protected:
88-
BoundsKey CountVar;
89109
};
90110

91111
class CountPlusOneBound : public CountBound {
92112
public:
93-
CountPlusOneBound(BoundsKey Var) : CountBound(Var) {
113+
CountPlusOneBound(BoundsKey L, BoundsKey B) : CountBound(L, B) {
94114
this->Kind = CountPlusOneBoundKind;
95115
}
96116

97-
std::string mkString(AVarBoundsInfo *ABI, clang::Decl *D = nullptr) override;
117+
CountPlusOneBound(BoundsKey L) : CountBound(L) {
118+
this->Kind = CountPlusOneBoundKind;
119+
}
120+
121+
std::string
122+
mkStringWithLowerBound(AVarBoundsInfo *ABI, clang::Decl *D) override;
123+
std::string
124+
mkStringWithoutLowerBound(AVarBoundsInfo *ABI, clang::Decl *D) override;
125+
98126
bool areSame(ABounds *O, AVarBoundsInfo *ABI) override;
99127

100128
static bool classof(const ABounds *S) {
@@ -104,54 +132,20 @@ class CountPlusOneBound : public CountBound {
104132

105133
class ByteBound : public ABounds {
106134
public:
107-
ByteBound(BoundsKey Var) : ABounds(ByteBoundKind), ByteVar(Var) {
108-
addBoundsUsedKey(Var);
109-
}
135+
ByteBound(BoundsKey L, BoundsKey B) : ABounds(ByteBoundKind, L, B) {}
136+
ByteBound(BoundsKey L) : ABounds(ByteBoundKind, L) {}
110137

111-
~ByteBound() override {}
138+
std::string
139+
mkStringWithLowerBound(AVarBoundsInfo *ABI, clang::Decl *D) override;
140+
std::string
141+
mkStringWithoutLowerBound(AVarBoundsInfo *ABI, clang::Decl *D) override;
112142

113-
std::string mkString(AVarBoundsInfo *ABI, clang::Decl *D = nullptr) override;
114143
bool areSame(ABounds *O, AVarBoundsInfo *ABI) override;
115-
BoundsKey getBKey() override;
116144
ABounds *makeCopy(BoundsKey NK) override;
117145

118146
static bool classof(const ABounds *S) {
119147
return S->getKind() == ByteBoundKind;
120148
}
121-
BoundsKey getByteVar() { return ByteVar; }
122-
123-
private:
124-
BoundsKey ByteVar;
125149
};
126150

127-
class RangeBound : public ABounds {
128-
public:
129-
RangeBound(BoundsKey L, BoundsKey R) : ABounds(RangeBoundKind), LB(L), UB(R) {
130-
addBoundsUsedKey(L);
131-
addBoundsUsedKey(R);
132-
}
133-
134-
~RangeBound() override {}
135-
136-
std::string mkString(AVarBoundsInfo *ABI, clang::Decl *D = nullptr) override;
137-
bool areSame(ABounds *O, AVarBoundsInfo *ABI) override;
138-
139-
BoundsKey getBKey() override {
140-
assert(false && "Not implemented.");
141-
return 0;
142-
}
143-
144-
ABounds *makeCopy(BoundsKey NK) override {
145-
assert(false && "Not Implemented");
146-
return nullptr;
147-
}
148-
149-
static bool classof(const ABounds *S) {
150-
return S->getKind() == RangeBoundKind;
151-
}
152-
153-
private:
154-
BoundsKey LB;
155-
BoundsKey UB;
156-
};
157151
#endif // LLVM_CLANG_3C_ABOUNDS_H

clang/include/clang/3C/AVarBoundsInfo.h

Lines changed: 107 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ class AvarBoundsInference {
148148
// BoundsKey that failed the flow inference.
149149
std::set<BoundsKey> BKsFailedFlowInference;
150150

151-
static ABounds *getPreferredBound(const BndsKindMap &BKindMap);
151+
ABounds *getPreferredBound(BoundsKey BK);
152152
};
153153

154154
// Class that maintains information about potential bounds for
@@ -181,7 +181,8 @@ class AVarBoundsInfo {
181181
public:
182182
AVarBoundsInfo()
183183
: ProgVarGraph(this), CtxSensProgVarGraph(this),
184-
RevCtxSensProgVarGraph(this), CSBKeyHandler(this) {
184+
RevCtxSensProgVarGraph(this), CSBKeyHandler(this),
185+
LowerBoundGraph(this) {
185186
BCount = 1;
186187
PVarInfo.clear();
187188
InProgramArrPtrBoundsKeys.clear();
@@ -231,11 +232,9 @@ class AVarBoundsInfo {
231232

232233
// Add Assignments between variables. These methods will add edges between
233234
// corresponding BoundsKeys
234-
bool addAssignment(clang::Decl *L, clang::Decl *R);
235-
bool addAssignment(clang::DeclRefExpr *L, clang::DeclRefExpr *R);
236235
bool addAssignment(BoundsKey L, BoundsKey R);
237-
bool handlePointerAssignment(clang::Stmt *St, clang::Expr *L, clang::Expr *R,
238-
ASTContext *C, ConstraintResolver *CR);
236+
bool handlePointerAssignment(clang::Expr *L, clang::Expr *R, ASTContext *C,
237+
ConstraintResolver *CR);
239238
bool handleAssignment(clang::Expr *L, const CVarSet &LCVars,
240239
const std::set<BoundsKey> &CSLKeys, clang::Expr *R,
241240
const CVarSet &RCVars,
@@ -252,11 +251,40 @@ class AVarBoundsInfo {
252251
// for pointers that has pointer arithmetic performed on them.
253252
void recordArithmeticOperation(clang::Expr *E, ConstraintResolver *CR);
254253

255-
// Check if the given bounds key has a pointer arithmetic done on it.
256-
bool hasPointerArithmetic(BoundsKey BK);
254+
// Check if the given bounds key will need to be duplicated during rewriting
255+
// to generate a fresh lower bound. This happens when a pointer is not a valid
256+
// lower bounds due to pointer arithmetic, and lower bounds inference fails to
257+
// find a consistent lower bound among existing pointers in the source code.
258+
bool needsFreshLowerBound(BoundsKey BK);
259+
bool needsFreshLowerBound(ConstraintVariable *CV);
260+
261+
// Return true when a lower bound could be inferred for the array pointer
262+
// corresponding to `BK`. This is the case either when `BK` was not
263+
// invalidated as lower bound by pointer arithmetic meaning it is it's own
264+
// lower bound, or when `BK` was invalidated, but a valid lower bound could be
265+
// inferred.
266+
bool hasLowerBound(BoundsKey BK);
267+
268+
// Record that a pointer cannot be rewritten to use range bounds. This might
269+
// be due to 3C rewriting limitations (assignments appearing inside macros),
270+
// or it might be a Checked C limitation (the current style of range bounds
271+
// can't properly initialized on global variables without error).
272+
void markIneligibleForFreshLowerBound(BoundsKey BK);
257273

258274
// Get the ProgramVar for the provided VarKey.
259-
ProgramVar *getProgramVar(BoundsKey VK);
275+
// This method can return `nullptr` if there is no corresponding ProgramVar.
276+
// It's not obvious when a BoundsKey can be expected to have a ProgramVar, so
277+
// callers should typically check for null.
278+
ProgramVar *getProgramVar(BoundsKey VK) const;
279+
280+
// Get the Scope of the provided BoundsKey.
281+
// This method returns nullptr if `getProgramVar(BK)` would return nullptr.
282+
const ProgramVarScope *getProgramVarScope(BoundsKey BK) const;
283+
284+
// Return true when BoundsKey `To` can be accessed from the scope of `from`.
285+
// Note that this returns false if either BoundsKey cannot be mapped to a
286+
// ProgramVar (and therefore can't be mapped to a scope).
287+
bool isInAccessibleScope(BoundsKey From, BoundsKey To);
260288

261289
// Propagate the array bounds information for all array ptrs.
262290
void performFlowAnalysis(ProgramInfo *PI);
@@ -294,6 +322,13 @@ class AVarBoundsInfo {
294322

295323
void addConstantArrayBounds(ProgramInfo &I);
296324

325+
// This is the main entry point to start lower bound inference. It populates
326+
// the map LowerBounds and set NeedFreshLowerBounds with the result of the
327+
// analysis. LowerBounds is accessed during the rest of bounds inference, so
328+
// this method must be executed before performFlowAnalysis which handles the
329+
// majority of the work for length inference.
330+
void inferLowerBounds(ProgramInfo *PI);
331+
297332
private:
298333
friend class AvarBoundsInference;
299334
friend class CtxSensitiveBoundsKeyHandler;
@@ -315,8 +350,17 @@ class AVarBoundsInfo {
315350
// Set that contains BoundsKeys of variables which have invalid bounds.
316351
std::set<BoundsKey> InvalidBounds;
317352
// These are the bounds key of the pointers that has arithmetic operations
318-
// performed on them.
353+
// performed on them. These pointers cannot have the standard `count(n)`
354+
// bounds and instead must use range bounds with an explict lower bound
355+
// e.g., `bounds(p, p + n)`.
319356
std::set<BoundsKey> ArrPointersWithArithmetic;
357+
358+
// Some pointers, however, cannot be automatically given range bounds. This
359+
// includes global variables and structure fields. If a pointer is in both the
360+
// above pointer arithmetic set and this set, then it cannot be assigned any
361+
// bound.
362+
std::set<BoundsKey> IneligibleForFreshLowerBound;
363+
320364
// Set of BoundsKeys that correspond to pointers.
321365
std::set<BoundsKey> PointerBoundsKey;
322366
// Set of BoundsKey that correspond to array pointers.
@@ -342,6 +386,9 @@ class AVarBoundsInfo {
342386
// BiMap of function keys and BoundsKey for function return values.
343387
BiMap<std::tuple<std::string, std::string, bool>, BoundsKey> FuncDeclVarMap;
344388

389+
PVConstraint *
390+
getConstraintVariable(const ProgramInfo *PI, BoundsKey BK) const;
391+
345392
// Graph of all program variables.
346393
AVarGraph ProgVarGraph;
347394
// Graph that contains only edges from normal BoundsKey to
@@ -356,6 +403,31 @@ class AVarBoundsInfo {
356403
// Context-sensitive bounds key handler
357404
CtxSensitiveBoundsKeyHandler CSBKeyHandler;
358405

406+
// This graph is used of for determining which pointers are valid lower
407+
// bounds, and so are eligible for use as their own lower bound (implicitly as
408+
// a count bounds) or as the lower bound for another pointer in a range bound.
409+
// It is also used to infer lower bounds for the pointers that are not
410+
// eligible to be their own lower bound.
411+
AVarGraph LowerBoundGraph;
412+
// In the LowerBoundGraph the constant 0 is used to represent the global
413+
// singleton invalid pointer.
414+
const BoundsKey InvalidLowerBoundKey = 0;
415+
416+
// BoundsKeys that that cannot be used as a lower bound. These are used in an
417+
// update such as `a = a + 1`, or are transitively assigned from such a
418+
// pointer.
419+
std::set<BoundsKey> InvalidLowerBounds;
420+
421+
// Mapping from pointers to their inferred lower bounds. A pointer maps to
422+
// itself if it can use a simple count bound. Missing pointers have no valid
423+
// lower bound, so no length should be inferred during bounds inference.
424+
std::map<BoundsKey, BoundsKey> LowerBounds;
425+
426+
// Some variables have to valid lower bound in the original source code, but
427+
// we are able to insert a temporary pointer variable to be the lower bound.
428+
// Keep track of these for special handling during rewriting.
429+
std::set<BoundsKey> NeedFreshLowerBounds;
430+
359431
// BoundsKey helper function: These functions help in getting bounds key from
360432
// various artifacts.
361433
bool hasVarKey(PersistentSourceLoc &PSL);
@@ -389,6 +461,31 @@ class AVarBoundsInfo {
389461
void insertParamKey(ParamDeclType ParamDecl, BoundsKey NK);
390462

391463
void dumpBounds();
464+
465+
// Compute which array pointers are not valid lower bounds. This includes any
466+
// pointers directly updated in pointer arithmetic expression, as well as any
467+
// pointers transitively assigned to from these pointers. This is computed
468+
// using essentially the same algorithm as is used for solving the checked
469+
// type constraint graph.
470+
void computeInvalidLowerBounds(ProgramInfo *PI);
471+
472+
// During lower bound inference it may be necessary to generate temporary
473+
// pointers to act as lower bounds for arrays that otherwise don't have a
474+
// consistent lower bound. This method takes a bounds key for an array pointer
475+
// and returns a fresh bounds key that can be used as the lower bound for the
476+
// array bounds of that pointer.
477+
BoundsKey getFreshLowerBound(BoundsKey Arr);
478+
479+
// Return true if the scope of the BoundsKey is one in which lower bounds
480+
// can be inserted. BoundsKeys in context sensitive scope should not get lower
481+
// bounds. The corresponding non-context-sensitive BoundsKey should instead.
482+
bool scopeCanHaveLowerBound(BoundsKey BK);
483+
484+
// Check if a fresh lower bound can be be inserted by 3C for the pointer
485+
// corresponding to the bounds key. When a pointer needs a fresh lower bound,
486+
// it is possible that 3C will not support inserting the new declaration.
487+
// No array bounds can be inferred for such pointers.
488+
bool isEligibleForFreshLowerBound(BoundsKey BK);
392489
};
393490

394491
#endif // LLVM_CLANG_3C_AVARBOUNDSINFO_H

0 commit comments

Comments
 (0)