Skip to content

Commit 1a90b38

Browse files
committed
Merge remote-tracking branch 'origin/main' into nickrolfe/regex_injection
2 parents e5f4730 + 83d204d commit 1a90b38

File tree

91 files changed

+1918
-945
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

91 files changed

+1918
-945
lines changed

cpp/ql/lib/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -484,8 +484,9 @@ module TaintedWithPath {
484484
/** Gets the element that `pathNode` wraps, if any. */
485485
Element getElementFromPathNode(PathNode pathNode) {
486486
exists(DataFlow::Node node | node = pathNode.(WrapPathNode).inner().getNode() |
487-
result = node.asExpr() or
488-
result = node.asParameter()
487+
result = node.asInstruction().getAST()
488+
or
489+
result = node.asOperand().getDef().getAST()
489490
)
490491
or
491492
result = pathNode.(EndpointPathNode).inner()

cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -806,7 +806,7 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
806806
simpleOperandLocalFlowStep(nodeFrom.asInstruction(), nodeTo.asOperand())
807807
or
808808
// Flow into, through, and out of store nodes
809-
StoreNodeFlow::flowInto(nodeFrom, nodeTo)
809+
StoreNodeFlow::flowInto(nodeFrom.asInstruction(), nodeTo)
810810
or
811811
StoreNodeFlow::flowThrough(nodeFrom, nodeTo)
812812
or
@@ -831,18 +831,11 @@ private predicate adjacentDefUseFlow(Node nodeFrom, Node nodeTo) {
831831
//Def-use flow
832832
Ssa::ssaFlow(nodeFrom, nodeTo)
833833
or
834-
exists(Instruction loadAddress | loadAddress = Ssa::getSourceAddressFromNode(nodeFrom) |
835-
// Use-use flow through reads
836-
exists(Node address |
837-
Ssa::addressFlowTC(address.asInstruction(), loadAddress) and
838-
Ssa::ssaFlow(address, nodeTo)
839-
)
840-
or
841-
// Use-use flow through stores.
842-
exists(Node store |
843-
Ssa::explicitWrite(_, store.asInstruction(), loadAddress) and
844-
Ssa::ssaFlow(store, nodeTo)
845-
)
834+
// Use-use flow through stores.
835+
exists(Instruction loadAddress, Node store |
836+
loadAddress = Ssa::getSourceAddressFromNode(nodeFrom) and
837+
Ssa::explicitWrite(_, store.asInstruction(), loadAddress) and
838+
Ssa::ssaFlow(store, nodeTo)
846839
)
847840
)
848841
}
@@ -906,10 +899,13 @@ private module ReadNodeFlow {
906899
}
907900
}
908901

909-
private module StoreNodeFlow {
902+
/**
903+
* INTERNAL: Do not use.
904+
*/
905+
module StoreNodeFlow {
910906
/** Holds if the store node `nodeTo` should receive flow from `nodeFrom`. */
911-
predicate flowInto(Node nodeFrom, StoreNode nodeTo) {
912-
nodeTo.flowInto(Ssa::getDestinationAddress(nodeFrom.asInstruction()))
907+
predicate flowInto(Instruction instrFrom, StoreNode nodeTo) {
908+
nodeTo.flowInto(Ssa::getDestinationAddress(instrFrom))
913909
}
914910

915911
/** Holds if the store node `nodeTo` should receive flow from `nodeFom`. */

cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaImplCommon.qll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -645,17 +645,18 @@ module Consistency {
645645

646646
query predicate nonUniqueDef(RelevantDefinition def, SourceVariable v, BasicBlock bb, int i) {
647647
ssaDefReachesRead(v, def, bb, i) and
648-
not exists(unique(Definition def0 | ssaDefReachesRead(_, def0, bb, i)))
648+
not exists(unique(Definition def0 | ssaDefReachesRead(v, def0, bb, i)))
649649
}
650650

651651
query predicate readWithoutDef(SourceVariable v, BasicBlock bb, int i) {
652652
variableRead(bb, i, v, _) and
653-
not ssaDefReachesRead(_, _, bb, i)
653+
not ssaDefReachesRead(v, _, bb, i)
654654
}
655655

656656
query predicate deadDef(RelevantDefinition def, SourceVariable v) {
657657
v = def.getSourceVariable() and
658658
not ssaDefReachesRead(_, def, _, _) and
659-
not phiHasInputFromBlock(_, def, _)
659+
not phiHasInputFromBlock(_, def, _) and
660+
not uncertainWriteDefinitionInput(_, def)
660661
}
661662
}

cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll

Lines changed: 59 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -244,17 +244,6 @@ Instruction getDestinationAddress(Instruction instr) {
244244
]
245245
}
246246

247-
class ReferenceToInstruction extends CopyValueInstruction {
248-
ReferenceToInstruction() {
249-
this.getResultType() instanceof Cpp::ReferenceType and
250-
not this.getUnary().getResultType() instanceof Cpp::ReferenceType
251-
}
252-
253-
Instruction getSourceAddress() { result = getSourceAddressOperand().getDef() }
254-
255-
Operand getSourceAddressOperand() { result = this.getUnaryOperand() }
256-
}
257-
258247
/** Gets the source address of `instr` if it is an instruction that behaves like a `LoadInstruction`. */
259248
Instruction getSourceAddress(Instruction instr) { result = getSourceAddressOperand(instr).getDef() }
260249

@@ -266,11 +255,7 @@ Operand getSourceAddressOperand(Instruction instr) {
266255
result =
267256
[
268257
instr.(LoadInstruction).getSourceAddressOperand(),
269-
instr.(ReadSideEffectInstruction).getArgumentOperand(),
270-
// `ReferenceToInstruction` is really more of an address-of operation,
271-
// but by including it in this list we break out of `flowOutOfAddressStep` at an
272-
// instruction that, at the source level, looks like a use of a variable.
273-
instr.(ReferenceToInstruction).getSourceAddressOperand()
258+
instr.(ReadSideEffectInstruction).getArgumentOperand()
274259
]
275260
}
276261

@@ -295,10 +280,6 @@ Operand getSourceValueOperand(Instruction instr) {
295280
result = instr.(LoadInstruction).getSourceValueOperand()
296281
or
297282
result = instr.(ReadSideEffectInstruction).getSideEffectOperand()
298-
or
299-
// See the comment on the `ReferenceToInstruction` disjunct in `getSourceAddressOperand` for why
300-
// this case is included.
301-
result = instr.(ReferenceToInstruction).getSourceValueOperand()
302283
}
303284

304285
/**
@@ -513,6 +494,64 @@ private module Cached {
513494
explicitWrite(false, storeNode.getStoreInstruction(), def)
514495
)
515496
or
497+
// The destination of a store operation has undergone lvalue-to-rvalue conversion and is now a
498+
// right-hand-side of a store operation.
499+
// Find the next use of the variable in that store operation, and recursively find the load of that
500+
// pointer. For example, consider this case:
501+
//
502+
// ```cpp
503+
// int x = source();
504+
// int* p = &x;
505+
// sink(*p);
506+
// ```
507+
//
508+
// if we want to find the load of the address of `x`, we see that the pointer is stored into `p`,
509+
// and we then need to recursively look for the load of `p`.
510+
exists(
511+
Def def, StoreInstruction store, IRBlock block1, int rnk1, Use use, IRBlock block2, int rnk2
512+
|
513+
store = def.getInstruction() and
514+
store.getSourceValueOperand() = operand and
515+
def.hasRankInBlock(block1, rnk1) and
516+
use.hasRankInBlock(block2, rnk2) and
517+
adjacentDefRead(_, block1, rnk1, block2, rnk2)
518+
|
519+
// The shared SSA library has determined that `use` is the next use of the operand
520+
// so we find the next load of that use (but only if there is no `PostUpdateNode`) we
521+
// need to flow into first.
522+
not StoreNodeFlow::flowInto(store, _) and
523+
flowOutOfAddressStep(use.getOperand(), nodeTo)
524+
or
525+
// It may also be the case that `store` gives rise to another store step. So let's make sure that
526+
// we also take those into account.
527+
StoreNodeFlow::flowInto(store, nodeTo)
528+
)
529+
or
530+
// As we find the next load of an address, we might come across another use of the same variable.
531+
// In that case, we recursively find the next use of _that_ operand, and continue searching for
532+
// the next load of that operand. For example, consider this case:
533+
//
534+
// ```cpp
535+
// int x = source();
536+
// use(&x);
537+
// int* p = &x;
538+
// sink(*p);
539+
// ```
540+
//
541+
// The next use of `x` after its definition is `use(&x)`, but there is a later load of the address
542+
// of `x` that we want to flow to. So we use the shared SSA library to find the next load.
543+
not operand = getSourceAddressOperand(_) and
544+
exists(Use use1, Use use2, IRBlock block1, int rnk1, IRBlock block2, int rnk2 |
545+
use1.getOperand() = operand and
546+
use1.hasRankInBlock(block1, rnk1) and
547+
// Don't flow to the next use if this use is part of a store operation that totally
548+
// overrides a variable.
549+
not explicitWrite(true, _, use1.getOperand().getDef()) and
550+
adjacentDefRead(_, block1, rnk1, block2, rnk2) and
551+
use2.hasRankInBlock(block2, rnk2) and
552+
flowOutOfAddressStep(use2.getOperand(), nodeTo)
553+
)
554+
or
516555
operand = getSourceAddressOperand(nodeTo.asInstruction())
517556
or
518557
exists(ReturnIndirectionInstruction ret |

cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/annotate_path_to_sink/dispatch.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ using SinkFunction = void (*)(int);
44

55
void notSink(int notSinkParam);
66

7-
void callsSink(int sinkParam) { // $ ir-path=31:28 ir-path=32:31 ir-path=34:22
8-
sink(sinkParam); // $ ir-sink=31:28 ir-sink=32:31 ir-sink=34:22 ast=31:28 ast=32:31 ast=34:22 MISSING: ast,ir=28
7+
void callsSink(int sinkParam) { // $ ir-path=31:23 ir-path=32:26 ir-path=34:17
8+
sink(sinkParam); // $ ast=31:28 ast=32:31 ast=34:22 ir-sink
99
}
1010

1111
struct {
@@ -25,7 +25,7 @@ void assignGlobals() {
2525
};
2626

2727
void testStruct() {
28-
globalStruct.sinkPtr(atoi(getenv("TAINTED"))); // $ ir MISSING: ast
28+
globalStruct.sinkPtr(atoi(getenv("TAINTED"))); // $ MISSING: ir-path,ast
2929
globalStruct.notSinkPtr(atoi(getenv("TAINTED"))); // clean
3030

3131
globalUnion.sinkPtr(atoi(getenv("TAINTED"))); // $ ast ir-path
@@ -48,8 +48,8 @@ class D2 : public D1 {
4848

4949
class D3 : public D2 {
5050
public:
51-
void f(const char* p) override { // $ ir-path=58:10 ir-path=60:17 ir-path=61:28 ir-path=62:29 ir-path=63:33 ir-path=73:30
52-
sink(p); // $ ir-sink=58:10 ir-sink=60:17 ir-sink=61:28 ir-sink=62:29 ir-sink=63:33 ast=58:10 ast=60:17 ast=61:28 ast=62:29 ast=63:33 SPURIOUS: ast=73:30 ir-sink=73:30
51+
void f(const char* p) override { // $ ir-path=58:10 ir-path=60:17 ir-path=61:28 ir-path=62:29 ir-path=63:33 SPURIOUS: ir-path=73:30
52+
sink(p); // $ ast=58:10 ast=60:17 ast=61:28 ast=62:29 ast=63:33 ir-sink SPURIOUS: ast=73:30
5353
}
5454
};
5555

cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/annotate_path_to_sink/tainted.ql

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,18 @@ class SourceConfiguration extends TaintedWithPath::TaintTrackingConfiguration {
2323
override predicate isSink(Element e) { isSinkArgument(e) }
2424
}
2525

26-
predicate irTaint(Element source, Element sink, string tag) {
27-
exists(TaintedWithPath::PathNode sinkNode, TaintedWithPath::PathNode predNode |
26+
predicate irTaint(Element source, TaintedWithPath::PathNode predNode, string tag) {
27+
exists(TaintedWithPath::PathNode sinkNode |
2828
TaintedWithPath::taintedWithPath(source, _, _, sinkNode) and
2929
predNode = getAPredecessor*(sinkNode) and
30-
sink = getElementFromPathNode(predNode) and
3130
// Make sure the path is actually reachable from this predecessor.
3231
// Otherwise, we could pick `predNode` to be b when `source` is
3332
// `source1` in this dataflow graph:
3433
// source1 ---> a ---> c ---> sinkNode
3534
// ^
3635
// source2 ---> b --/
3736
source = getElementFromPathNode(getAPredecessor*(predNode)) and
38-
if sinkNode = predNode then tag = "ir-sink" else tag = "ir-path"
37+
if predNode = sinkNode then tag = "ir-sink" else tag = "ir-path"
3938
)
4039
}
4140

@@ -45,21 +44,25 @@ class IRDefaultTaintTrackingTest extends InlineExpectationsTest {
4544
override string getARelevantTag() { result = ["ir-path", "ir-sink"] }
4645

4746
override predicate hasActualResult(Location location, string element, string tag, string value) {
48-
exists(Element source, Element tainted, int n |
49-
irTaint(source, tainted, tag) and
50-
n = strictcount(Element otherSource | irTaint(otherSource, tainted, _)) and
51-
(
52-
n = 1 and value = ""
53-
or
54-
// If there is more than one source for this sink
55-
// we specify the source location explicitly.
56-
n > 1 and
47+
exists(Element source, Element elem, TaintedWithPath::PathNode node, int n |
48+
irTaint(source, node, tag) and
49+
elem = getElementFromPathNode(node) and
50+
n = count(int startline | getAPredecessor(node).hasLocationInfo(_, startline, _, _, _)) and
51+
location = elem.getLocation() and
52+
element = elem.toString()
53+
|
54+
// Zero predecessors means it's a source, and 1 predecessor means it has a unique predecessor.
55+
// In either of these cases we leave out the location.
56+
n = [0, 1] and value = ""
57+
or
58+
// If there is more than one predecessor for this node
59+
// we specify the source location explicitly.
60+
n > 1 and
61+
exists(TaintedWithPath::PathNode pred | pred = getAPredecessor(node) |
5762
value =
58-
source.getLocation().getStartLine().toString() + ":" +
59-
source.getLocation().getStartColumn()
60-
) and
61-
location = tainted.getLocation() and
62-
element = tainted.toString()
63+
getElementFromPathNode(pred).getLocation().getStartLine().toString() + ":" +
64+
getElementFromPathNode(pred).getLocation().getStartColumn()
65+
)
6366
)
6467
}
6568
}

cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/annotate_path_to_sink/test_diff.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ struct S {
1313
}
1414
};
1515

16-
void calls_sink_with_argv(const char* a) { // $ ir-path=96:26 ir-path=98:18
17-
sink(a); // $ ast=96:26 ast=98:18 ir-sink=96:26 ir-sink=98:18
16+
void calls_sink_with_argv(const char* a) { // $ ir-path=96:26 ir-path=102:26
17+
sink(a); // $ ast=96:26 ast=98:18 ir-sink
1818
}
1919

2020
extern int i;
@@ -27,7 +27,7 @@ class BaseWithPureVirtual {
2727
class DerivedCallsSink : public BaseWithPureVirtual {
2828
public:
2929
void f(const char* p) override { // $ ir-path
30-
sink(p); // $ ir-sink ast=108:10 SPURIOUS: ast=111:10
30+
sink(p); // $ ast=108:10 ir-sink SPURIOUS: ast=111:10
3131
}
3232
};
3333

@@ -49,16 +49,16 @@ class DerivedDoesNotCallSinkDiamond2 : virtual public BaseWithPureVirtual {
4949
};
5050

5151
class DerivesMultiple : public DerivedCallsSinkDiamond1, public DerivedDoesNotCallSinkDiamond2 {
52-
void f(const char* p) override { // $ ir-path
53-
DerivedCallsSinkDiamond1::f(p);
52+
void f(const char* p) override { // $ ir-path=53:37 ir-path=115:11
53+
DerivedCallsSinkDiamond1::f(p); // $ ir-path
5454
}
5555
};
5656

5757
template<typename T>
5858
class CRTP {
5959
public:
6060
void f(const char* p) { // $ ir-path
61-
static_cast<T*>(this)->g(p);
61+
static_cast<T*>(this)->g(p); // $ ir-path
6262
}
6363
};
6464

@@ -79,7 +79,7 @@ class Derived2 : public Derived1 {
7979
class Derived3 : public Derived2 {
8080
public:
8181
void f(const char* p) override { // $ ir-path=124:19 ir-path=126:43 ir-path=128:44
82-
sink(p); // $ ast,ir-sink=124:19 ast,ir-sink=126:43 ast,ir-sink=128:44
82+
sink(p); // $ ast=124:19 ast=126:43 ast=128:44 ir-sink
8383
}
8484
};
8585

@@ -97,11 +97,11 @@ int main(int argc, char *argv[]) {
9797

9898
char*** p = &argv; // $ ast,ir-path
9999

100-
sink(*p[0]); // $ ast,ir-sink
100+
sink(*p[0]); // $ ast ir-sink=96:26 ir-sink=98:18
101101

102-
calls_sink_with_argv(*p[i]); // $ MISSING: ast,ir-path
102+
calls_sink_with_argv(*p[i]); // $ ir-path=96:26 ir-path=98:18 MISSING:ast
103103

104-
sink(*(argv + 1)); // $ ast,ir-path ir-sink
104+
sink(*(argv + 1)); // $ ast ir-path ir-sink
105105

106106
BaseWithPureVirtual* b = new DerivedCallsSink;
107107

cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/annotate_sinks_only/defaulttainttracking.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -190,9 +190,9 @@ void test_pointers1()
190190
sink(ptr1); // $ ast MISSING: ir
191191
sink(ptr2); // $ SPURIOUS: ast
192192
sink(*ptr2); // $ ast MISSING: ir
193-
sink(ptr3); // $ ast MISSING: ir
194-
sink(ptr4); // $ SPURIOUS: ast
195-
sink(*ptr4); // $ ast MISSING: ir
193+
sink(ptr3); // $ ast,ir
194+
sink(ptr4); // $ SPURIOUS: ast,ir
195+
sink(*ptr4); // $ ast,ir
196196
}
197197

198198
void test_pointers2()

0 commit comments

Comments
 (0)