Skip to content

Commit e4af020

Browse files
committed
bpf: udp: Avoid socket skips and repeats during iteration
Replace the offset-based approach for tracking progress through a bucket in the UDP table with one based on socket cookies. Remember the cookies of unprocessed sockets from the last batch and use this list to pick up where we left off or, in the case that the next socket disappears between reads, find the first socket after that point that still exists in the bucket and resume from there. Signed-off-by: Jordan Rife <[email protected]>
1 parent 18d58e4 commit e4af020

File tree

1 file changed

+51
-17
lines changed

1 file changed

+51
-17
lines changed

net/ipv4/udp.c

+51-17
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@
9393
#include <linux/inet.h>
9494
#include <linux/netdevice.h>
9595
#include <linux/slab.h>
96+
#include <linux/sock_diag.h>
9697
#include <net/tcp_states.h>
9798
#include <linux/skbuff.h>
9899
#include <linux/proc_fs.h>
@@ -3393,34 +3394,51 @@ struct bpf_iter__udp {
33933394

33943395
union bpf_udp_iter_batch_item {
33953396
struct sock *sock;
3397+
__u64 cookie;
33963398
};
33973399

33983400
struct bpf_udp_iter_state {
33993401
struct udp_iter_state state;
34003402
unsigned int cur_sk;
34013403
unsigned int end_sk;
34023404
unsigned int max_sk;
3403-
int offset;
34043405
union bpf_udp_iter_batch_item *batch;
34053406
bool st_bucket_done;
34063407
};
34073408

34083409
static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter,
34093410
unsigned int new_batch_sz, int flags);
3411+
static struct sock *bpf_iter_udp_resume(struct sock *first_sk,
3412+
union bpf_udp_iter_batch_item *cookies,
3413+
int n_cookies)
3414+
{
3415+
struct sock *sk = NULL;
3416+
int i = 0;
3417+
3418+
for (; i < n_cookies; i++) {
3419+
sk = first_sk;
3420+
udp_portaddr_for_each_entry_from(sk)
3421+
if (cookies[i].cookie == atomic64_read(&sk->sk_cookie))
3422+
goto done;
3423+
}
3424+
done:
3425+
return sk;
3426+
}
3427+
34103428
static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
34113429
{
34123430
struct bpf_udp_iter_state *iter = seq->private;
34133431
struct udp_iter_state *state = &iter->state;
3432+
unsigned int find_cookie, end_cookie = 0;
34143433
struct net *net = seq_file_net(seq);
3415-
int resume_bucket, resume_offset;
34163434
struct udp_table *udptable;
34173435
unsigned int batch_sks = 0;
3436+
int resume_bucket;
34183437
struct sock *sk;
34193438
int resizes = 0;
34203439
int err = 0;
34213440

34223441
resume_bucket = state->bucket;
3423-
resume_offset = iter->offset;
34243442

34253443
/* The current batch is done, so advance the bucket. */
34263444
if (iter->st_bucket_done)
@@ -3436,6 +3454,8 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
34363454
* before releasing the bucket lock. This allows BPF programs that are
34373455
* called in seq_show to acquire the bucket lock if needed.
34383456
*/
3457+
find_cookie = iter->cur_sk;
3458+
end_cookie = iter->end_sk;
34393459
iter->cur_sk = 0;
34403460
iter->end_sk = 0;
34413461
iter->st_bucket_done = false;
@@ -3447,21 +3467,26 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
34473467
if (hlist_empty(&hslot2->head))
34483468
goto next_bucket;
34493469

3450-
iter->offset = 0;
34513470
spin_lock_bh(&hslot2->lock);
34523471
sk = hlist_entry_safe(hslot2->head.first, struct sock,
34533472
__sk_common.skc_portaddr_node);
3473+
/* Resume from the first (in iteration order) unseen socket from
3474+
* the last batch that still exists in resume_bucket. Most of
3475+
* the time this will just be where the last iteration left off
3476+
* in resume_bucket unless that socket disappeared between
3477+
* reads.
3478+
*
3479+
* Skip this if end_cookie isn't set; this is the first
3480+
* batch, we're on bucket zero, and we want to start from the
3481+
* beginning.
3482+
*/
3483+
if (state->bucket == resume_bucket && end_cookie)
3484+
sk = bpf_iter_udp_resume(sk,
3485+
&iter->batch[find_cookie],
3486+
end_cookie - find_cookie);
34543487
fill_batch:
34553488
udp_portaddr_for_each_entry_from(sk) {
34563489
if (seq_sk_match(seq, sk)) {
3457-
/* Resume from the last iterated socket at the
3458-
* offset in the bucket before iterator was stopped.
3459-
*/
3460-
if (state->bucket == resume_bucket &&
3461-
iter->offset < resume_offset) {
3462-
++iter->offset;
3463-
continue;
3464-
}
34653490
if (iter->end_sk < iter->max_sk) {
34663491
sock_hold(sk);
34673492
iter->batch[iter->end_sk++].sock = sk;
@@ -3530,10 +3555,8 @@ static void *bpf_iter_udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
35303555
/* Whenever seq_next() is called, the iter->cur_sk is
35313556
* done with seq_show(), so unref the iter->cur_sk.
35323557
*/
3533-
if (iter->cur_sk < iter->end_sk) {
3558+
if (iter->cur_sk < iter->end_sk)
35343559
sock_put(iter->batch[iter->cur_sk++].sock);
3535-
++iter->offset;
3536-
}
35373560

35383561
/* After updating iter->cur_sk, check if there are more sockets
35393562
* available in the current bucket batch.
@@ -3603,8 +3626,19 @@ static int bpf_iter_udp_seq_show(struct seq_file *seq, void *v)
36033626

36043627
static void bpf_iter_udp_put_batch(struct bpf_udp_iter_state *iter)
36053628
{
3606-
while (iter->cur_sk < iter->end_sk)
3607-
sock_put(iter->batch[iter->cur_sk++].sock);
3629+
union bpf_udp_iter_batch_item *item;
3630+
unsigned int cur_sk = iter->cur_sk;
3631+
__u64 cookie;
3632+
3633+
/* Remember the cookies of the sockets we haven't seen yet, so we can
3634+
* pick up where we left off next time around.
3635+
*/
3636+
while (cur_sk < iter->end_sk) {
3637+
item = &iter->batch[cur_sk++];
3638+
cookie = sock_gen_cookie(item->sock);
3639+
sock_put(item->sock);
3640+
item->cookie = cookie;
3641+
}
36083642
}
36093643

36103644
static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v)

0 commit comments

Comments
 (0)