Skip to content

Commit c8d50c2

Browse files
committed
netstacklat: Add sanity check for out-of-order sequence
The logic for excluding samples from TCP reads that may have been delayed by HOL blocking relies on reading a number of fields from the TCP socket outside of the socket lock. This may be prone to errors due to the socket state being updated at another place in the kernel while our eBPF program is running. To reduce the risk that a data race causes the filter to fail, add a sanity check for the maximum out of order sequence used to exclude future TCP reads from monitoring. The most problematic of the read fields in the tcp_sock is ooo_last_skb, as that is a pointer to another SKB rather than a direct value. This pointer is only valid as long as the out_of_order_queue is non-empty. Due to a data race, we may check that the ooo-queue is non-empty while there are still SKBs in it, then have the kernel clear out the ooo-queue, and finally attempt to read the ooo_last_skb pointer later when it is no longer valid (and may now point to a freed/recycled SKB). This may result in incorrect values being used for the sequence limit used to exclude future reads of ooo-segments. The faulty sequence limit may both cause reads of HOL-blocked segments to be included or the exclusion of an unnecessarily large amount of future reads (up to 2 GB). To reduce the risk that the garbage data from an invalid SKB is used, introduce two sanity checks for end_seq in the ooo_last_skb. First check if the sequence number is zero, if so assume it is invalid (even though it can be a valid sequence number). Even though we will get an error code if reading the data from this SKB fails altogether, we may still succeed reading from a no longer valid SKB, in which case there is a high risk the data will have been zeroed. If it's non-zero, also check that it is within the current receive window (if not, clamp it to the receive window). Signed-off-by: Simon Sundberg <[email protected]>
1 parent dff6bc5 commit c8d50c2

File tree

1 file changed

+56
-7
lines changed

1 file changed

+56
-7
lines changed

netstacklat/netstacklat.bpf.c

Lines changed: 56 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -330,36 +330,85 @@ static bool filter_min_sockqueue_len(struct sock *sk)
330330
return false;
331331
}
332332

333+
/* Get the current receive window end sequence for tp
334+
* In the kernel receive window checks are done against
335+
* tp->rcv_nxt + tcp_receive_window(tp). This function should give a compareable
336+
* result, i.e. rcv_wup + rcv_wnd or rcv_nxt, whichever is higher
337+
*/
338+
static int get_current_rcv_wnd_seq(struct tcp_sock *tp, u32 rcv_nxt, u32 *seq)
339+
{
340+
u32 rcv_wup, rcv_wnd, window = 0;
341+
int err;
342+
343+
err = bpf_core_read(&rcv_wup, sizeof(rcv_wup), &tp->rcv_wup);
344+
if (err) {
345+
bpf_printk("failed to read tcp_sock->rcv_wup, err=%d", err);
346+
goto exit;
347+
}
348+
349+
err = bpf_core_read(&rcv_wnd, sizeof(rcv_wnd), &tp->rcv_wnd);
350+
if (err) {
351+
bpf_printk("failed to read tcp_sock->rcv_wnd, err=%d", err);
352+
goto exit;
353+
}
354+
355+
window = rcv_wup + rcv_wnd;
356+
if (u32_lt(window, rcv_nxt))
357+
window = rcv_nxt;
358+
359+
exit:
360+
*seq = window;
361+
return err;
362+
}
363+
333364
static int current_max_possible_ooo_seq(struct tcp_sock *tp, u32 *seq)
334365
{
366+
u32 rcv_nxt, cur_rcv_window, max_seq = 0;
335367
struct tcp_skb_cb cb;
336-
u32 max_seq = 0;
337368
int err = 0;
338369

370+
err = bpf_core_read(&rcv_nxt, sizeof(rcv_nxt), &tp->rcv_nxt);
371+
if (err) {
372+
bpf_printk("failed reading tcp_sock->rcv_nxt, err=%d", err);
373+
goto exit;
374+
}
375+
339376
if (BPF_CORE_READ(tp, out_of_order_queue.rb_node) == NULL) {
340377
/* No ooo-segments currently in ooo-queue
341378
* Any ooo-segments must already have been merged to the
342379
* receive queue. Current rcv_nxt must therefore be ahead
343380
* of all ooo-segments that have arrived until now.
344381
*/
345-
err = bpf_core_read(&max_seq, sizeof(max_seq), &tp->rcv_nxt);
346-
if (err)
347-
bpf_printk("failed to read tcp_sock->rcv_nxt, err=%d",
348-
err);
382+
max_seq = rcv_nxt;
349383
} else {
350384
/*
351385
* Some ooo-segments currently in ooo-queue
352386
* Max out-of-order seq is given by the seq_end of the tail
353387
* skb in the ooo-queue.
354388
*/
355389
err = BPF_CORE_READ_INTO(&cb, tp, ooo_last_skb, cb);
356-
if (err)
390+
if (err) {
357391
bpf_printk(
358392
"failed to read tcp_sock->ooo_last_skb->cb, err=%d",
359393
err);
360-
max_seq = cb.end_seq;
394+
goto exit;
395+
}
396+
397+
// Sanity check - ooo_last_skb->cb.end_seq within the receive window?
398+
err = get_current_rcv_wnd_seq(tp, rcv_nxt, &cur_rcv_window);
399+
if (err)
400+
goto exit;
401+
402+
/* While seq 0 can be a valid seq, consider it more likely to
403+
* be the result of reading from an invalid SKB pointer
404+
*/
405+
if (cb.end_seq == 0 || u32_lt(cur_rcv_window, cb.end_seq))
406+
max_seq = cur_rcv_window;
407+
else
408+
max_seq = cb.end_seq;
361409
}
362410

411+
exit:
363412
*seq = max_seq;
364413
return err;
365414
}

0 commit comments

Comments
 (0)