Skip to content

Commit 5419285

Browse files
authored
Merge pull request #19544 from Napalys/js/quality/stream_pipe
JS: new `Quality` query - Unhandled errors in `.pipe()` chain
2 parents 2e6794e + 8521c53 commit 5419285

18 files changed

+866
-0
lines changed

javascript/ql/integration-tests/query-suite/javascript-code-quality.qls.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ ql/javascript/ql/src/Declarations/IneffectiveParameterType.ql
22
ql/javascript/ql/src/Expressions/ExprHasNoEffect.ql
33
ql/javascript/ql/src/Expressions/MissingAwait.ql
44
ql/javascript/ql/src/LanguageFeatures/SpuriousArguments.ql
5+
ql/javascript/ql/src/Quality/UnhandledErrorInStreamPipeline.ql
56
ql/javascript/ql/src/RegExp/RegExpAlwaysMatches.ql
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>
8+
In Node.js, calling the <code>pipe()</code> method on a stream without proper error handling can lead to unexplained failures, where errors are dropped and not propagated downstream. This can result in unwanted behavior and make debugging difficult. To reliably handle all errors, every stream in the pipeline must have an error handler registered.
9+
</p>
10+
</overview>
11+
12+
<recommendation>
13+
<p>
14+
Instead of using <code>pipe()</code> with manual error handling, prefer using the <code>pipeline</code> function from the Node.js <code>stream</code> module. The <code>pipeline</code> function automatically handles errors and ensures proper cleanup of resources. This approach is more robust and eliminates the risk of forgetting to handle errors.
15+
</p>
16+
<p>
17+
If you must use <code>pipe()</code>, always attach an error handler to the source stream using methods like <code>on('error', handler)</code> to ensure that any errors emitted by the input stream are properly handled. When multiple <code>pipe()</code> calls are chained, an error handler should be attached before each step of the pipeline.
18+
</p>
19+
</recommendation>
20+
21+
<example>
22+
<p>
23+
The following code snippet demonstrates a problematic usage of the <code>pipe()</code> method without error handling:
24+
</p>
25+
26+
<sample src="examples/UnhandledStreamPipe.js" />
27+
28+
<p>
29+
A better approach is to use the <code>pipeline</code> function, which automatically handles errors:
30+
</p>
31+
32+
<sample src="examples/UnhandledStreamPipeGood.js" />
33+
34+
<p>
35+
Alternatively, if you need to use <code>pipe()</code>, make sure to add error handling:
36+
</p>
37+
38+
<sample src="examples/UnhandledStreamPipeManualError.js" />
39+
</example>
40+
41+
<references>
42+
<li>Node.js Documentation: <a href="https://nodejs.org/api/stream.html#streampipelinestreams-callback">stream.pipeline()</a>.</li>
43+
</references>
44+
</qhelp>
Lines changed: 303 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,303 @@
1+
/**
2+
* @id js/unhandled-error-in-stream-pipeline
3+
* @name Unhandled error in stream pipeline
4+
* @description Calling `pipe()` on a stream without error handling will drop errors coming from the input stream
5+
* @kind problem
6+
* @problem.severity warning
7+
* @precision high
8+
* @tags quality
9+
* maintainability
10+
* error-handling
11+
* frameworks/nodejs
12+
*/
13+
14+
import javascript
15+
import semmle.javascript.filters.ClassifyFiles
16+
17+
/**
18+
* A call to the `pipe` method on a Node.js stream.
19+
*/
20+
class PipeCall extends DataFlow::MethodCallNode {
21+
PipeCall() {
22+
this.getMethodName() = "pipe" and
23+
this.getNumArgument() = [1, 2] and
24+
not this.getArgument([0, 1]).asExpr() instanceof Function and
25+
not this.getArgument(0).asExpr() instanceof ObjectExpr and
26+
not this.getArgument(0).getALocalSource() = getNonNodeJsStreamType()
27+
}
28+
29+
/** Gets the source stream (receiver of the pipe call). */
30+
DataFlow::Node getSourceStream() { result = this.getReceiver() }
31+
32+
/** Gets the destination stream (argument of the pipe call). */
33+
DataFlow::Node getDestinationStream() { result = this.getArgument(0) }
34+
}
35+
36+
/**
37+
* Gets a reference to a value that is known to not be a Node.js stream.
38+
* This is used to exclude pipe calls on non-stream objects from analysis.
39+
*/
40+
private DataFlow::Node getNonNodeJsStreamType() {
41+
result = getNonStreamApi().getAValueReachableFromSource()
42+
}
43+
44+
/**
45+
* Gets API nodes from modules that are known to not provide Node.js streams.
46+
* This includes reactive programming libraries, frontend frameworks, and other non-stream APIs.
47+
*/
48+
private API::Node getNonStreamApi() {
49+
exists(string moduleName |
50+
moduleName
51+
.regexpMatch([
52+
"rxjs(|/.*)", "@strapi(|/.*)", "highland(|/.*)", "execa(|/.*)", "arktype(|/.*)",
53+
"@ngrx(|/.*)", "@datorama(|/.*)", "@angular(|/.*)", "react.*", "@langchain(|/.*)",
54+
]) and
55+
result = API::moduleImport(moduleName)
56+
)
57+
or
58+
result = getNonStreamApi().getAMember()
59+
or
60+
result = getNonStreamApi().getAParameter().getAParameter()
61+
or
62+
result = getNonStreamApi().getReturn()
63+
or
64+
result = getNonStreamApi().getPromised()
65+
}
66+
67+
/**
68+
* Gets the method names used to register event handlers on Node.js streams.
69+
* These methods are used to attach handlers for events like `error`.
70+
*/
71+
private string getEventHandlerMethodName() { result = ["on", "once", "addListener"] }
72+
73+
/**
74+
* Gets the method names that are chainable on Node.js streams.
75+
*/
76+
private string getChainableStreamMethodName() {
77+
result =
78+
[
79+
"setEncoding", "pause", "resume", "unpipe", "destroy", "cork", "uncork", "setDefaultEncoding",
80+
"off", "removeListener", getEventHandlerMethodName()
81+
]
82+
}
83+
84+
/**
85+
* Gets the method names that are not chainable on Node.js streams.
86+
*/
87+
private string getNonchainableStreamMethodName() {
88+
result = ["read", "write", "end", "pipe", "unshift", "push", "isPaused", "wrap", "emit"]
89+
}
90+
91+
/**
92+
* Gets the property names commonly found on Node.js streams.
93+
*/
94+
private string getStreamPropertyName() {
95+
result =
96+
[
97+
"readable", "writable", "destroyed", "closed", "readableHighWaterMark", "readableLength",
98+
"readableObjectMode", "readableEncoding", "readableFlowing", "readableEnded", "flowing",
99+
"writableHighWaterMark", "writableLength", "writableObjectMode", "writableFinished",
100+
"writableCorked", "writableEnded", "defaultEncoding", "allowHalfOpen", "objectMode",
101+
"errored", "pending", "autoDestroy", "encoding", "path", "fd", "bytesRead", "bytesWritten",
102+
"_readableState", "_writableState"
103+
]
104+
}
105+
106+
/**
107+
* Gets all method names commonly found on Node.js streams.
108+
*/
109+
private string getStreamMethodName() {
110+
result = [getChainableStreamMethodName(), getNonchainableStreamMethodName()]
111+
}
112+
113+
/**
114+
* A call to register an event handler on a Node.js stream.
115+
* This includes methods like `on`, `once`, and `addListener`.
116+
*/
117+
class ErrorHandlerRegistration extends DataFlow::MethodCallNode {
118+
ErrorHandlerRegistration() {
119+
this.getMethodName() = getEventHandlerMethodName() and
120+
this.getArgument(0).getStringValue() = "error"
121+
}
122+
}
123+
124+
/**
125+
* Holds if the stream in `node1` will propagate to `node2`.
126+
*/
127+
private predicate streamFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
128+
exists(PipeCall pipe |
129+
node1 = pipe.getDestinationStream() and
130+
node2 = pipe
131+
)
132+
or
133+
exists(DataFlow::MethodCallNode chainable |
134+
chainable.getMethodName() = getChainableStreamMethodName() and
135+
node1 = chainable.getReceiver() and
136+
node2 = chainable
137+
)
138+
}
139+
140+
/**
141+
* Tracks the result of a pipe call as it flows through the program.
142+
*/
143+
private DataFlow::SourceNode destinationStreamRef(DataFlow::TypeTracker t, PipeCall pipe) {
144+
t.start() and
145+
(result = pipe or result = pipe.getDestinationStream().getALocalSource())
146+
or
147+
exists(DataFlow::SourceNode prev |
148+
prev = destinationStreamRef(t.continue(), pipe) and
149+
streamFlowStep(prev, result)
150+
)
151+
or
152+
exists(DataFlow::TypeTracker t2 | result = destinationStreamRef(t2, pipe).track(t2, t))
153+
}
154+
155+
/**
156+
* Gets a reference to the result of a pipe call.
157+
*/
158+
private DataFlow::SourceNode destinationStreamRef(PipeCall pipe) {
159+
result = destinationStreamRef(DataFlow::TypeTracker::end(), pipe)
160+
}
161+
162+
/**
163+
* Holds if the pipe call result is used to call a non-stream method.
164+
* Since pipe() returns the destination stream, this finds cases where
165+
* the destination stream is used with methods not typical of streams.
166+
*/
167+
private predicate isPipeFollowedByNonStreamMethod(PipeCall pipeCall) {
168+
exists(DataFlow::MethodCallNode call |
169+
call = destinationStreamRef(pipeCall).getAMethodCall() and
170+
not call.getMethodName() = getStreamMethodName()
171+
)
172+
}
173+
174+
/**
175+
* Holds if the pipe call result is used to access a property that is not typical of streams.
176+
*/
177+
private predicate isPipeFollowedByNonStreamProperty(PipeCall pipeCall) {
178+
exists(DataFlow::PropRef propRef |
179+
propRef = destinationStreamRef(pipeCall).getAPropertyRead() and
180+
not propRef.getPropertyName() = [getStreamPropertyName(), getStreamMethodName()]
181+
)
182+
}
183+
184+
/**
185+
* Holds if the pipe call result is used in a non-stream-like way,
186+
* either by calling non-stream methods or accessing non-stream properties.
187+
*/
188+
private predicate isPipeFollowedByNonStreamAccess(PipeCall pipeCall) {
189+
isPipeFollowedByNonStreamMethod(pipeCall) or
190+
isPipeFollowedByNonStreamProperty(pipeCall)
191+
}
192+
193+
/**
194+
* Gets a reference to a stream that may be the source of the given pipe call.
195+
* Uses type back-tracking to trace stream references in the data flow.
196+
*/
197+
private DataFlow::SourceNode sourceStreamRef(DataFlow::TypeBackTracker t, PipeCall pipeCall) {
198+
t.start() and
199+
result = pipeCall.getSourceStream().getALocalSource()
200+
or
201+
exists(DataFlow::SourceNode prev |
202+
prev = sourceStreamRef(t.continue(), pipeCall) and
203+
streamFlowStep(result.getALocalUse(), prev)
204+
)
205+
or
206+
exists(DataFlow::TypeBackTracker t2 | result = sourceStreamRef(t2, pipeCall).backtrack(t2, t))
207+
}
208+
209+
/**
210+
* Gets a reference to a stream that may be the source of the given pipe call.
211+
*/
212+
private DataFlow::SourceNode sourceStreamRef(PipeCall pipeCall) {
213+
result = sourceStreamRef(DataFlow::TypeBackTracker::end(), pipeCall)
214+
}
215+
216+
/**
217+
* Holds if the source stream of the given pipe call has an `error` handler registered.
218+
*/
219+
private predicate hasErrorHandlerRegistered(PipeCall pipeCall) {
220+
exists(DataFlow::Node stream |
221+
stream = sourceStreamRef(pipeCall).getALocalUse() and
222+
(
223+
stream.(DataFlow::SourceNode).getAMethodCall(_) instanceof ErrorHandlerRegistration
224+
or
225+
exists(DataFlow::SourceNode base, string propName |
226+
stream = base.getAPropertyRead(propName) and
227+
base.getAPropertyRead(propName).getAMethodCall(_) instanceof ErrorHandlerRegistration
228+
)
229+
or
230+
exists(DataFlow::PropWrite propWrite, DataFlow::SourceNode instance |
231+
propWrite.getRhs().getALocalSource() = stream and
232+
instance = propWrite.getBase().getALocalSource() and
233+
instance.getAPropertyRead(propWrite.getPropertyName()).getAMethodCall(_) instanceof
234+
ErrorHandlerRegistration
235+
)
236+
)
237+
)
238+
or
239+
hasPlumber(pipeCall)
240+
}
241+
242+
/**
243+
* Holds if the pipe call uses `gulp-plumber`, which automatically handles stream errors.
244+
* `gulp-plumber` returns a stream that uses monkey-patching to ensure all subsequent streams in the pipeline propagate their errors.
245+
*/
246+
private predicate hasPlumber(PipeCall pipeCall) {
247+
pipeCall.getDestinationStream().getALocalSource() = API::moduleImport("gulp-plumber").getACall()
248+
or
249+
sourceStreamRef+(pipeCall) = API::moduleImport("gulp-plumber").getACall()
250+
}
251+
252+
/**
253+
* Holds if the source or destination of the given pipe call is identified as a non-Node.js stream.
254+
*/
255+
private predicate hasNonNodeJsStreamSource(PipeCall pipeCall) {
256+
sourceStreamRef(pipeCall) = getNonNodeJsStreamType() or
257+
destinationStreamRef(pipeCall) = getNonNodeJsStreamType()
258+
}
259+
260+
/**
261+
* Holds if the source stream of the given pipe call is used in a non-stream-like way.
262+
*/
263+
private predicate hasNonStreamSourceLikeUsage(PipeCall pipeCall) {
264+
exists(DataFlow::MethodCallNode call, string name |
265+
call.getReceiver().getALocalSource() = sourceStreamRef(pipeCall) and
266+
name = call.getMethodName() and
267+
not name = getStreamMethodName()
268+
)
269+
or
270+
exists(DataFlow::PropRef propRef, string propName |
271+
propRef.getBase().getALocalSource() = sourceStreamRef(pipeCall) and
272+
propName = propRef.getPropertyName() and
273+
not propName = [getStreamPropertyName(), getStreamMethodName()]
274+
)
275+
}
276+
277+
/**
278+
* Holds if the pipe call destination stream has an error handler registered.
279+
*/
280+
private predicate hasErrorHandlerDownstream(PipeCall pipeCall) {
281+
exists(DataFlow::SourceNode stream |
282+
stream = destinationStreamRef(pipeCall) and
283+
(
284+
exists(ErrorHandlerRegistration handler | handler.getReceiver().getALocalSource() = stream)
285+
or
286+
exists(DataFlow::SourceNode base, string propName |
287+
stream = base.getAPropertyRead(propName) and
288+
base.getAPropertyRead(propName).getAMethodCall(_) instanceof ErrorHandlerRegistration
289+
)
290+
)
291+
)
292+
}
293+
294+
from PipeCall pipeCall
295+
where
296+
not hasErrorHandlerRegistered(pipeCall) and
297+
hasErrorHandlerDownstream(pipeCall) and
298+
not isPipeFollowedByNonStreamAccess(pipeCall) and
299+
not hasNonStreamSourceLikeUsage(pipeCall) and
300+
not hasNonNodeJsStreamSource(pipeCall) and
301+
not isTestFile(pipeCall.getFile())
302+
select pipeCall,
303+
"Stream pipe without error handling on the source stream. Errors won't propagate downstream and may be silently dropped."
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
const fs = require('fs');
2+
const source = fs.createReadStream('source.txt');
3+
const destination = fs.createWriteStream('destination.txt');
4+
5+
// Bad: Only destination has error handling, source errors are unhandled
6+
source.pipe(destination).on('error', (err) => {
7+
console.error('Destination error:', err);
8+
});
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
const { pipeline } = require('stream');
2+
const fs = require('fs');
3+
const source = fs.createReadStream('source.txt');
4+
const destination = fs.createWriteStream('destination.txt');
5+
6+
// Good: Using pipeline for automatic error handling
7+
pipeline(
8+
source,
9+
destination,
10+
(err) => {
11+
if (err) {
12+
console.error('Pipeline failed:', err);
13+
} else {
14+
console.log('Pipeline succeeded');
15+
}
16+
}
17+
);
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
const fs = require('fs');
2+
const source = fs.createReadStream('source.txt');
3+
const destination = fs.createWriteStream('destination.txt');
4+
5+
// Alternative Good: Manual error handling with pipe()
6+
source.on('error', (err) => {
7+
console.error('Source stream error:', err);
8+
destination.destroy(err);
9+
});
10+
11+
destination.on('error', (err) => {
12+
console.error('Destination stream error:', err);
13+
source.destroy(err);
14+
});
15+
16+
source.pipe(destination);

0 commit comments

Comments
 (0)