Skip to content

Commit fdbbaf8

Browse files
committed
bpf: udp: Make sure iter->batch always contains a full bucket snapshot
Require that iter->batch always contains a full bucket snapshot. This invariant is important to avoid skipping or repeating sockets during iteration when combined with the next few patches. Before, there were two cases where a call to bpf_iter_udp_batch may only capture part of a bucket: 1. When bpf_iter_udp_realloc_batch() returns -ENOMEM [1]. 2. When more sockets are added to the bucket while calling bpf_iter_udp_realloc_batch(), making the updated batch size insufficient [2]. In cases where the batch size only covers part of a bucket, it is possible to forget which sockets were already visited, especially if we have to process a bucket in more than two batches. This forces us to choose between repeating or skipping sockets, so don't allow this: 1. Stop iteration and propagate -ENOMEM up to userspace if reallocation fails instead of continuing with a partial batch. 2. Retry bpf_iter_udp_realloc_batch() two times without holding onto the bucket lock (hslot2->lock) so that we can use GFP_USER and maximize the chances that memory allocation succeeds. On the third attempt, if we still haven't been able to capture a full bucket snapshot, hold onto the bucket lock through bpf_iter_udp_realloc_batch() to guarantee that the bucket size doesn't change while we allocate more memory and fill the batch. On the last pass, we must use GFP_ATOMIC since we hold onto the spin lock. Introduce the udp_portaddr_for_each_entry_from macro and use it instead of udp_portaddr_for_each_entry to make it possible to continue iteration from an arbitrary socket. This is required for this patch in the GFP_ATOMIC case to allow us to fill the rest of a batch starting from the middle of a bucket and the later patch which skips sockets that were already seen. Testing all scenarios directly is a bit difficult, but I did some manual testing to exercise the code paths where GFP_ATOMIC is used and where where ERR_PTR(err) is returned. I used the realloc test case included later in this series to trigger a scenario where a realloc happens inside bpf_iter_udp_batch and made a small code tweak to force the first two realloc attempts to allocate a too-small buffer, thus requiring another attempt until the GFP_ATOMIC case is hit. Some printks showed three reallocs with the tests passing: Apr 16 00:08:32 crow kernel: go again (mem_flags=GFP_USER) Apr 16 00:08:32 crow kernel: go again (mem_flags=GFP_USER) Apr 16 00:08:32 crow kernel: go again (mem_flags=GFP_ATOMIC) With this setup, I also forced bpf_iter_udp_realloc_batch to return -ENOMEM on one of the retries to ensure that iteration ends and that the read() in userspace fails and incremented batch_sks to hit the WARN_ON_ONCE condition. [1]: https://lore.kernel.org/bpf/CABi4-ogUtMrH8-NVB6W8Xg_F_KDLq=yy-yu-tKr2udXE2Mu1Lg@mail.gmail.com/ [2]: https://lore.kernel.org/bpf/[email protected]/ Signed-off-by: Jordan Rife <[email protected]> Suggested-by: Martin KaFai Lau <[email protected]>
1 parent ac48bca commit fdbbaf8

File tree

2 files changed

+50
-15
lines changed

2 files changed

+50
-15
lines changed

include/linux/udp.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,9 @@ static inline void udp_allow_gso(struct sock *sk)
209209
#define udp_portaddr_for_each_entry(__sk, list) \
210210
hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node)
211211

212+
#define udp_portaddr_for_each_entry_from(__sk) \
213+
hlist_for_each_entry_from(__sk, __sk_common.skc_portaddr_node)
214+
212215
#define udp_portaddr_for_each_entry_rcu(__sk, list) \
213216
hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node)
214217

net/ipv4/udp.c

Lines changed: 47 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3383,6 +3383,7 @@ int udp4_seq_show(struct seq_file *seq, void *v)
33833383
}
33843384

33853385
#ifdef CONFIG_BPF_SYSCALL
3386+
#define MAX_REALLOC_ATTEMPTS 2
33863387
struct bpf_iter__udp {
33873388
__bpf_md_ptr(struct bpf_iter_meta *, meta);
33883389
__bpf_md_ptr(struct udp_sock *, udp_sk);
@@ -3410,8 +3411,9 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
34103411
int resume_bucket, resume_offset;
34113412
struct udp_table *udptable;
34123413
unsigned int batch_sks = 0;
3413-
bool resized = false;
34143414
struct sock *sk;
3415+
int resizes = 0;
3416+
int err = 0;
34153417

34163418
resume_bucket = state->bucket;
34173419
resume_offset = iter->offset;
@@ -3439,11 +3441,14 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
34393441
struct udp_hslot *hslot2 = &udptable->hash2[state->bucket].hslot;
34403442

34413443
if (hlist_empty(&hslot2->head))
3442-
continue;
3444+
goto next_bucket;
34433445

34443446
iter->offset = 0;
34453447
spin_lock_bh(&hslot2->lock);
3446-
udp_portaddr_for_each_entry(sk, &hslot2->head) {
3448+
sk = hlist_entry_safe(hslot2->head.first, struct sock,
3449+
__sk_common.skc_portaddr_node);
3450+
fill_batch:
3451+
udp_portaddr_for_each_entry_from(sk) {
34473452
if (seq_sk_match(seq, sk)) {
34483453
/* Resume from the last iterated socket at the
34493454
* offset in the bucket before iterator was stopped.
@@ -3460,10 +3465,34 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
34603465
batch_sks++;
34613466
}
34623467
}
3468+
3469+
if (unlikely(resizes == MAX_REALLOC_ATTEMPTS) && iter->end_sk &&
3470+
iter->end_sk != batch_sks) {
3471+
/* This is the last realloc attempt, so keep holding the
3472+
* lock to ensure that the bucket does not change.
3473+
*/
3474+
err = bpf_iter_udp_realloc_batch(iter, batch_sks,
3475+
GFP_ATOMIC);
3476+
if (err) {
3477+
spin_unlock_bh(&hslot2->lock);
3478+
return ERR_PTR(err);
3479+
}
3480+
3481+
sk = iter->batch[iter->end_sk - 1];
3482+
sk = hlist_entry_safe(sk->__sk_common.skc_portaddr_node.next,
3483+
struct sock,
3484+
__sk_common.skc_portaddr_node);
3485+
batch_sks = iter->end_sk;
3486+
resizes++;
3487+
goto fill_batch;
3488+
}
3489+
34633490
spin_unlock_bh(&hslot2->lock);
34643491

34653492
if (iter->end_sk)
34663493
break;
3494+
next_bucket:
3495+
resizes = 0;
34673496
}
34683497

34693498
/* All done: no batch made. */
@@ -3475,18 +3504,18 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
34753504
* socket to be iterated from the batch.
34763505
*/
34773506
iter->st_bucket_done = true;
3478-
goto done;
3507+
return iter->batch[0];
34793508
}
3480-
if (!resized && !bpf_iter_udp_realloc_batch(iter, batch_sks * 3 / 2,
3481-
GFP_USER)) {
3482-
resized = true;
3483-
/* After allocating a larger batch, retry one more time to grab
3484-
* the whole bucket.
3485-
*/
3486-
goto again;
3487-
}
3488-
done:
3489-
return iter->batch[0];
3509+
3510+
if (WARN_ON_ONCE(resizes >= MAX_REALLOC_ATTEMPTS))
3511+
return iter->batch[0];
3512+
3513+
err = bpf_iter_udp_realloc_batch(iter, batch_sks * 3 / 2, GFP_USER);
3514+
if (err)
3515+
return ERR_PTR(err);
3516+
3517+
resizes++;
3518+
goto again;
34903519
}
34913520

34923521
static void *bpf_iter_udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
@@ -3841,7 +3870,10 @@ static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter,
38413870
if (!new_batch)
38423871
return -ENOMEM;
38433872

3844-
bpf_iter_udp_put_batch(iter);
3873+
if (flags != GFP_ATOMIC)
3874+
bpf_iter_udp_put_batch(iter);
3875+
3876+
memcpy(new_batch, iter->batch, sizeof(*iter->batch) * iter->end_sk);
38453877
kvfree(iter->batch);
38463878
iter->batch = new_batch;
38473879
iter->max_sk = new_batch_sz;

0 commit comments

Comments
 (0)