Skip to content

Commit 2947f17

Browse files
committed
Docs: Update data flow documentation to the new API.
1 parent a0e9659 commit 2947f17

18 files changed

+352
-431
lines changed

docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp-new.rst

Lines changed: 47 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -168,74 +168,61 @@ Global data flow tracks data flow throughout the entire program, and is therefor
168168
Using global data flow
169169
~~~~~~~~~~~~~~~~~~~~~~
170170

171-
The global data flow library is used by extending the class ``DataFlow::Configuration`` as follows:
171+
The global data flow library is used by implementing the signature ``DataFlow::ConfigSig`` and applying the module ``DataFlow::Global<ConfigSig>`` as follows:
172172

173173
.. code-block:: ql
174174
175175
import semmle.code.cpp.dataflow.new.DataFlow
176176
177-
class MyDataFlowConfiguration extends DataFlow::Configuration {
178-
MyDataFlowConfiguration() { this = "MyDataFlowConfiguration" }
179-
180-
override predicate isSource(DataFlow::Node source) {
177+
module MyFlowConfiguration implements DataFlow::ConfigSig {
178+
predicate isSource(DataFlow::Node source) {
181179
...
182180
}
183181
184-
override predicate isSink(DataFlow::Node sink) {
182+
predicate isSink(DataFlow::Node sink) {
185183
...
186184
}
187185
}
188186
187+
module MyFlow = DataFlow::Global<MyFlowConfiguration>;
188+
189189
The following predicates are defined in the configuration:
190190

191191
- ``isSource``—defines where data may flow from
192192
- ``isSink``—defines where data may flow to
193193
- ``isBarrier``—optional, restricts the data flow
194-
- ``isBarrierGuard``—optional, restricts the data flow
195194
- ``isAdditionalFlowStep``—optional, adds additional flow steps
196195

197-
The characteristic predicate ``MyDataFlowConfiguration()`` defines the name of the configuration, so ``"MyDataFlowConfiguration"`` should be replaced by the name of your class.
198-
199-
The data flow analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``:
196+
The data flow analysis is performed using the predicate ``flow(DataFlow::Node source, DataFlow::Node sink)``:
200197

201198
.. code-block:: ql
202199
203-
from MyDataFlowConfiguration dataflow, DataFlow::Node source, DataFlow::Node sink
204-
where dataflow.hasFlow(source, sink)
200+
from DataFlow::Node source, DataFlow::Node sink
201+
where MyFlow::flow(source, sink)
205202
select source, "Data flow to $@.", sink, sink.toString()
206203
207204
Using global taint tracking
208205
~~~~~~~~~~~~~~~~~~~~~~~~~~~
209206

210-
Global taint tracking is to global data flow as local taint tracking is to local data flow. That is, global taint tracking extends global data flow with additional non-value-preserving steps. The global taint tracking library is used by extending the class ``TaintTracking::Configuration`` as follows:
207+
Global taint tracking is to global data flow as local taint tracking is to local data flow. That is, global taint tracking extends global data flow with additional non-value-preserving steps. The global taint tracking library is used by applying the module ``TaintTracking::Global<ConfigSig>`` to your configuration instead of ``DataFlow::Global<ConfigSig>`` as follows:
211208

212209
.. code-block:: ql
213210
214211
import semmle.code.cpp.dataflow.new.TaintTracking
215212
216-
class MyTaintTrackingConfiguration extends TaintTracking::Configuration {
217-
MyTaintTrackingConfiguration() { this = "MyTaintTrackingConfiguration" }
218-
219-
override predicate isSource(DataFlow::Node source) {
213+
module MyFlowConfiguration implements DataFlow::ConfigSig {
214+
predicate isSource(DataFlow::Node source) {
220215
...
221216
}
222217
223-
override predicate isSink(DataFlow::Node sink) {
218+
predicate isSink(DataFlow::Node sink) {
224219
...
225220
}
226221
}
227222
228-
The following predicates are defined in the configuration:
229-
230-
- ``isSource``—defines where taint may flow from
231-
- ``isSink``—defines where taint may flow to
232-
- ``isSanitizer``—optional, restricts the taint flow
233-
- ``isSanitizerGuard``—optional, restricts the taint flow
234-
- ``isAdditionalTaintStep``—optional, adds additional taint steps
223+
module MyFlow = TaintTracking::Global<MyFlowConfiguration>;
235224
236-
Similar to global data flow, the characteristic predicate ``MyTaintTrackingConfiguration()`` defines the unique name of the configuration, so ``"MyTaintTrackingConfiguration"`` should be replaced by the name of your class.
237-
238-
The taint tracking analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``.
225+
The resulting module is completely similar to the one obtained from ``DataFlow::Global<ConfigSig>``.
239226

240227
Examples
241228
~~~~~~~~
@@ -247,53 +234,50 @@ The following data flow configuration tracks data flow from environment variable
247234
import cpp
248235
import semmle.code.cpp.dataflow.new.DataFlow
249236
250-
class EnvironmentToFileConfiguration extends DataFlow::Configuration {
251-
EnvironmentToFileConfiguration() { this = "EnvironmentToFileConfiguration" }
252-
253-
override predicate isSource(DataFlow::Node source) {
237+
module EnvironmentToFileConfiguration implements DataFlow::ConfigSig {
238+
predicate isSource(DataFlow::Node source) {
254239
exists(Function getenv |
255240
source.asIndirectExpr(1).(FunctionCall).getTarget() = getenv and
256241
getenv.hasGlobalName("getenv")
257242
)
258243
}
259244
260-
override predicate isSink(DataFlow::Node sink) {
245+
predicate isSink(DataFlow::Node sink) {
261246
exists(FunctionCall fc |
262247
sink.asIndirectExpr(1) = fc.getArgument(0) and
263248
fc.getTarget().hasGlobalName("fopen")
264249
)
265250
}
266251
}
267252
253+
module EnvironmentToFileFlow = DataFlow::Global<EnvironmentToFileConfiguration>;
254+
268255
from
269-
Expr getenv, Expr fopen, EnvironmentToFileConfiguration config, DataFlow::Node source,
270-
DataFlow::Node sink
256+
Expr getenv, Expr fopen, DataFlow::Node source, DataFlow::Node sink
271257
where
272258
source.asIndirectExpr(1) = getenv and
273259
sink.asIndirectExpr(1) = fopen and
274-
config.hasFlow(source, sink)
260+
EnvironmentToFileFlow::flow(source, sink)
275261
select fopen, "This 'fopen' uses data from $@.", getenv, "call to 'getenv'"
276262
277-
The following taint-tracking configuration tracks data from a call to ``ntohl`` to an array index operation. It uses the ``Guards`` library to recognize expressions that have been bounds-checked, and defines ``isSanitizer`` to prevent taint from propagating through them. It also uses ``isAdditionalTaintStep`` to add flow from loop bounds to loop indexes.
263+
The following taint-tracking configuration tracks data from a call to ``ntohl`` to an array index operation. It uses the ``Guards`` library to recognize expressions that have been bounds-checked, and defines ``isBarrier`` to prevent taint from propagating through them. It also uses ``isAdditionalFlowStep`` to add flow from loop bounds to loop indexes.
278264

279265
.. code-block:: ql
280266
281267
import cpp
282268
import semmle.code.cpp.controlflow.Guards
283269
import semmle.code.cpp.dataflow.new.TaintTracking
284270
285-
class NetworkToBufferSizeConfiguration extends TaintTracking::Configuration {
286-
NetworkToBufferSizeConfiguration() { this = "NetworkToBufferSizeConfiguration" }
287-
288-
override predicate isSource(DataFlow::Node node) {
271+
module NetworkToBufferSizeConfiguration implements DataFlow::ConfigSig {
272+
predicate isSource(DataFlow::Node node) {
289273
node.asExpr().(FunctionCall).getTarget().hasGlobalName("ntohl")
290274
}
291275
292-
override predicate isSink(DataFlow::Node node) {
276+
predicate isSink(DataFlow::Node node) {
293277
exists(ArrayExpr ae | node.asExpr() = ae.getArrayOffset())
294278
}
295279
296-
override predicate isAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) {
280+
predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) {
297281
exists(Loop loop, LoopCounter lc |
298282
loop = lc.getALoop() and
299283
loop.getControllingExpr().(RelationalOperation).getGreaterOperand() = pred.asExpr()
@@ -302,7 +286,7 @@ The following taint-tracking configuration tracks data from a call to ``ntohl``
302286
)
303287
}
304288
305-
override predicate isSanitizer(DataFlow::Node node) {
289+
predicate isBarrier(DataFlow::Node node) {
306290
exists(GuardCondition gc, Variable v |
307291
gc.getAChild*() = v.getAnAccess() and
308292
node.asExpr() = v.getAnAccess() and
@@ -312,8 +296,10 @@ The following taint-tracking configuration tracks data from a call to ``ntohl``
312296
}
313297
}
314298
315-
from DataFlow::Node ntohl, DataFlow::Node offset, NetworkToBufferSizeConfiguration conf
316-
where conf.hasFlow(ntohl, offset)
299+
module NetworkToBufferSizeFlow = TaintTracking::Global<NetworkToBufferSizeConfiguration>;
300+
301+
from DataFlow::Node ntohl, DataFlow::Node offset
302+
where NetworkToBufferSizeFlow::flow(ntohl, offset)
317303
select offset, "This array offset may be influenced by $@.", ntohl,
318304
"converted data from the network"
319305
@@ -353,28 +339,27 @@ Exercise 2
353339
import cpp
354340
import semmle.code.cpp.dataflow.new.DataFlow
355341
356-
class LiteralToGethostbynameConfiguration extends DataFlow::Configuration {
357-
LiteralToGethostbynameConfiguration() { this = "LiteralToGethostbynameConfiguration" }
358-
359-
override predicate isSource(DataFlow::Node source) {
342+
module LiteralToGethostbynameConfiguration implements DataFlow::ConfigSig {
343+
predicate isSource(DataFlow::Node source) {
360344
source.asIndirectExpr(1) instanceof StringLiteral
361345
}
362346
363-
override predicate isSink(DataFlow::Node sink) {
347+
predicate isSink(DataFlow::Node sink) {
364348
exists(FunctionCall fc |
365349
sink.asIndirectExpr(1) = fc.getArgument(0) and
366350
fc.getTarget().hasName("gethostbyname")
367351
)
368352
}
369353
}
370354
355+
module LiteralToGethostbynameFlow = DataFlow::Global<LiteralToGethostbynameConfiguration>;
356+
371357
from
372-
StringLiteral sl, FunctionCall fc, LiteralToGethostbynameConfiguration cfg, DataFlow::Node source,
373-
DataFlow::Node sink
358+
StringLiteral sl, FunctionCall fc, DataFlow::Node source, DataFlow::Node sink
374359
where
375360
source.asIndirectExpr(1) = sl and
376361
sink.asIndirectExpr(1) = fc.getArgument(0) and
377-
cfg.hasFlow(source, sink)
362+
LiteralToGethostbynameFlow::flow(source, sink)
378363
select sl, fc
379364
380365
Exercise 3
@@ -401,26 +386,25 @@ Exercise 4
401386
GetenvSource() { this.asIndirectExpr(1).(FunctionCall).getTarget().hasGlobalName("getenv") }
402387
}
403388
404-
class GetenvToGethostbynameConfiguration extends DataFlow::Configuration {
405-
GetenvToGethostbynameConfiguration() { this = "GetenvToGethostbynameConfiguration" }
406-
407-
override predicate isSource(DataFlow::Node source) { source instanceof GetenvSource }
389+
module GetenvToGethostbynameConfiguration implements DataFlow::ConfigSig {
390+
predicate isSource(DataFlow::Node source) { source instanceof GetenvSource }
408391
409-
override predicate isSink(DataFlow::Node sink) {
392+
predicate isSink(DataFlow::Node sink) {
410393
exists(FunctionCall fc |
411394
sink.asIndirectExpr(1) = fc.getArgument(0) and
412395
fc.getTarget().hasName("gethostbyname")
413396
)
414397
}
415398
}
416399
400+
module GetenvToGethostbynameFlow = DataFlow::Global<GetenvToGethostbynameConfiguration>;
401+
417402
from
418-
Expr getenv, FunctionCall fc, GetenvToGethostbynameConfiguration cfg, DataFlow::Node source,
419-
DataFlow::Node sink
403+
Expr getenv, FunctionCall fc, DataFlow::Node source, DataFlow::Node sink
420404
where
421405
source.asIndirectExpr(1) = getenv and
422406
sink.asIndirectExpr(1) = fc.getArgument(0) and
423-
cfg.hasFlow(source, sink)
407+
GetenvToGethostbynameFlow::flow(source, sink)
424408
select getenv, fc
425409
426410
Further reading
@@ -430,4 +414,4 @@ Further reading
430414

431415

432416
.. include:: ../reusables/cpp-further-reading.rst
433-
.. include:: ../reusables/codeql-ref-tools-further-reading.rst
417+
.. include:: ../reusables/codeql-ref-tools-further-reading.rst

0 commit comments

Comments
 (0)