Skip to content

Trigger CI #8831

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: bpf-next_base
Choose a base branch
from
Open
3 changes: 3 additions & 0 deletions include/linux/udp.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,9 @@ static inline void udp_allow_gso(struct sock *sk)
#define udp_portaddr_for_each_entry(__sk, list) \
hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node)

#define udp_portaddr_for_each_entry_from(__sk) \
hlist_for_each_entry_from(__sk, __sk_common.skc_portaddr_node)

#define udp_portaddr_for_each_entry_rcu(__sk, list) \
hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node)

Expand Down
173 changes: 117 additions & 56 deletions net/ipv4/udp.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/slab.h>
#include <linux/sock_diag.h>
#include <net/tcp_states.h>
#include <linux/skbuff.h>
#include <linux/proc_fs.h>
Expand Down Expand Up @@ -3390,34 +3391,55 @@ struct bpf_iter__udp {
int bucket __aligned(8);
};

union bpf_udp_iter_batch_item {
struct sock *sock;
__u64 cookie;
};

struct bpf_udp_iter_state {
struct udp_iter_state state;
unsigned int cur_sk;
unsigned int end_sk;
unsigned int max_sk;
int offset;
struct sock **batch;
bool st_bucket_done;
union bpf_udp_iter_batch_item *batch;
};

static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter,
unsigned int new_batch_sz);
unsigned int new_batch_sz, int flags);
static struct sock *bpf_iter_udp_resume(struct sock *first_sk,
union bpf_udp_iter_batch_item *cookies,
int n_cookies)
{
struct sock *sk = NULL;
int i = 0;

for (; i < n_cookies; i++) {
sk = first_sk;
udp_portaddr_for_each_entry_from(sk)
if (cookies[i].cookie == atomic64_read(&sk->sk_cookie))
goto done;
}
done:
return sk;
}

static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
{
struct bpf_udp_iter_state *iter = seq->private;
struct udp_iter_state *state = &iter->state;
unsigned int find_cookie, end_cookie = 0;
struct net *net = seq_file_net(seq);
int resume_bucket, resume_offset;
struct udp_table *udptable;
unsigned int batch_sks = 0;
bool resized = false;
int resume_bucket;
int resizes = 0;
struct sock *sk;
int err = 0;

resume_bucket = state->bucket;
resume_offset = iter->offset;

/* The current batch is done, so advance the bucket. */
if (iter->st_bucket_done)
if (iter->cur_sk == iter->end_sk)
state->bucket++;

udptable = udp_get_table_seq(seq, net);
Expand All @@ -3430,62 +3452,89 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
* before releasing the bucket lock. This allows BPF programs that are
* called in seq_show to acquire the bucket lock if needed.
*/
find_cookie = iter->cur_sk;
end_cookie = iter->end_sk;
iter->cur_sk = 0;
iter->end_sk = 0;
iter->st_bucket_done = false;
batch_sks = 0;

for (; state->bucket <= udptable->mask; state->bucket++) {
struct udp_hslot *hslot2 = &udptable->hash2[state->bucket].hslot;

if (hlist_empty(&hslot2->head))
continue;
goto next_bucket;

iter->offset = 0;
spin_lock_bh(&hslot2->lock);
udp_portaddr_for_each_entry(sk, &hslot2->head) {
sk = hlist_entry_safe(hslot2->head.first, struct sock,
__sk_common.skc_portaddr_node);
/* Resume from the first (in iteration order) unseen socket from
* the last batch that still exists in resume_bucket. Most of
* the time this will just be where the last iteration left off
* in resume_bucket unless that socket disappeared between
* reads.
*/
if (state->bucket == resume_bucket)
sk = bpf_iter_udp_resume(sk, &iter->batch[find_cookie],
end_cookie - find_cookie);
fill_batch:
udp_portaddr_for_each_entry_from(sk) {
if (seq_sk_match(seq, sk)) {
/* Resume from the last iterated socket at the
* offset in the bucket before iterator was stopped.
*/
if (state->bucket == resume_bucket &&
iter->offset < resume_offset) {
++iter->offset;
continue;
}
if (iter->end_sk < iter->max_sk) {
sock_hold(sk);
iter->batch[iter->end_sk++] = sk;
iter->batch[iter->end_sk++].sock = sk;
}
batch_sks++;
}
}

/* Allocate a larger batch and try again. */
if (unlikely(resizes <= 1 && iter->end_sk &&
iter->end_sk != batch_sks)) {
resizes++;

/* First, try with GFP_USER to maximize the chances of
* grabbing more memory.
*/
if (resizes == 1) {
spin_unlock_bh(&hslot2->lock);
err = bpf_iter_udp_realloc_batch(iter,
batch_sks * 3 / 2,
GFP_USER);
if (err)
return ERR_PTR(err);
/* Start over. */
goto again;
}

/* Next, hold onto the lock, so the bucket doesn't
* change while we get the rest of the sockets.
*/
err = bpf_iter_udp_realloc_batch(iter, batch_sks,
GFP_NOWAIT);
if (err) {
spin_unlock_bh(&hslot2->lock);
return ERR_PTR(err);
}

/* Pick up where we left off. */
sk = iter->batch[iter->end_sk - 1].sock;
sk = hlist_entry_safe(sk->__sk_common.skc_portaddr_node.next,
struct sock,
__sk_common.skc_portaddr_node);
batch_sks = iter->end_sk;
goto fill_batch;
}

spin_unlock_bh(&hslot2->lock);

if (iter->end_sk)
break;
next_bucket:
resizes = 0;
}

/* All done: no batch made. */
if (!iter->end_sk)
return NULL;

if (iter->end_sk == batch_sks) {
/* Batching is done for the current bucket; return the first
* socket to be iterated from the batch.
*/
iter->st_bucket_done = true;
goto done;
}
if (!resized && !bpf_iter_udp_realloc_batch(iter, batch_sks * 3 / 2)) {
resized = true;
/* After allocating a larger batch, retry one more time to grab
* the whole bucket.
*/
goto again;
}
done:
return iter->batch[0];
WARN_ON_ONCE(iter->end_sk != batch_sks);
return iter->end_sk ? iter->batch[0].sock : NULL;
}

static void *bpf_iter_udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
Expand All @@ -3496,16 +3545,14 @@ static void *bpf_iter_udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
/* Whenever seq_next() is called, the iter->cur_sk is
* done with seq_show(), so unref the iter->cur_sk.
*/
if (iter->cur_sk < iter->end_sk) {
sock_put(iter->batch[iter->cur_sk++]);
++iter->offset;
}
if (iter->cur_sk < iter->end_sk)
sock_put(iter->batch[iter->cur_sk++].sock);

/* After updating iter->cur_sk, check if there are more sockets
* available in the current bucket batch.
*/
if (iter->cur_sk < iter->end_sk)
sk = iter->batch[iter->cur_sk];
sk = iter->batch[iter->cur_sk].sock;
else
/* Prepare a new batch. */
sk = bpf_iter_udp_batch(seq);
Expand Down Expand Up @@ -3569,8 +3616,19 @@ static int bpf_iter_udp_seq_show(struct seq_file *seq, void *v)

static void bpf_iter_udp_put_batch(struct bpf_udp_iter_state *iter)
{
while (iter->cur_sk < iter->end_sk)
sock_put(iter->batch[iter->cur_sk++]);
union bpf_udp_iter_batch_item *item;
unsigned int cur_sk = iter->cur_sk;
__u64 cookie;

/* Remember the cookies of the sockets we haven't seen yet, so we can
* pick up where we left off next time around.
*/
while (cur_sk < iter->end_sk) {
item = &iter->batch[cur_sk++];
cookie = sock_gen_cookie(item->sock);
sock_put(item->sock);
item->cookie = cookie;
}
}

static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v)
Expand All @@ -3586,10 +3644,8 @@ static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v)
(void)udp_prog_seq_show(prog, &meta, v, 0, 0);
}

if (iter->cur_sk < iter->end_sk) {
if (iter->cur_sk < iter->end_sk)
bpf_iter_udp_put_batch(iter);
iter->st_bucket_done = false;
}
}

static const struct seq_operations bpf_iter_udp_seq_ops = {
Expand Down Expand Up @@ -3831,16 +3887,19 @@ DEFINE_BPF_ITER_FUNC(udp, struct bpf_iter_meta *meta,
struct udp_sock *udp_sk, uid_t uid, int bucket)

static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter,
unsigned int new_batch_sz)
unsigned int new_batch_sz, int flags)
{
struct sock **new_batch;
union bpf_udp_iter_batch_item *new_batch;

new_batch = kvmalloc_array(new_batch_sz, sizeof(*new_batch),
GFP_USER | __GFP_NOWARN);
flags | __GFP_NOWARN);
if (!new_batch)
return -ENOMEM;

bpf_iter_udp_put_batch(iter);
if (flags != GFP_NOWAIT)
bpf_iter_udp_put_batch(iter);

memcpy(new_batch, iter->batch, sizeof(*iter->batch) * iter->end_sk);
kvfree(iter->batch);
iter->batch = new_batch;
iter->max_sk = new_batch_sz;
Expand All @@ -3859,10 +3918,12 @@ static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux)
if (ret)
return ret;

ret = bpf_iter_udp_realloc_batch(iter, INIT_BATCH_SZ);
ret = bpf_iter_udp_realloc_batch(iter, INIT_BATCH_SZ, GFP_USER);
if (ret)
bpf_iter_fini_seq_net(priv_data);

iter->state.bucket = -1;

return ret;
}

Expand Down
Loading
Loading