Skip to content

Commit 39e8111

Browse files
author
Martin KaFai Lau
committed
Merge branch 'add TCP_BPF_SOCK_OPS_CB_FLAGS to bpf_*sockopt()'
Alan Maguire says: ==================== As previously discussed here [1], long-lived sockets can miss a chance to set additional callbacks if a sock ops program was not attached early in their lifetime. Adding support to bpf_setsockopt() to set callback flags (and bpf_getsockopt() to retrieve them) provides other opportunities to enable callbacks, either directly via a cgroup/setsockopt intercepted setsockopt() or via a socket iterator. Patch 1 adds bpf_[get|set]sockopt() support; patch 2 adds testing for it via a sockops programs, along with verification via a cgroup/getsockopt program. Changes since v1 [2]: - Removed unneeded READ_ONCE() (Martin, patch 1) - Reworked sockopt test to leave existing tests undisturbed while adding test_nonstandard_opt() test to cover the TCP_BPF_SOCK_OPS_CB_FLAGS case; test verifies that value set via bpf_setsockopt() is what we expect via a call to getsockopt() which is caught by a cgroup/getsockopt program to provide the flags value (Martin, patch 2) - Removed unneeded iterator test (Martin) [1] https://lore.kernel.org/bpf/[email protected]/ [2] https://lore.kernel.org/bpf/[email protected]/ ==================== Signed-off-by: Martin KaFai Lau <[email protected]>
2 parents 91d516d + d530509 commit 39e8111

File tree

5 files changed

+90
-5
lines changed

5 files changed

+90
-5
lines changed

include/uapi/linux/bpf.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2851,7 +2851,7 @@ union bpf_attr {
28512851
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**,
28522852
* **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**,
28532853
* **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**,
2854-
* **TCP_BPF_RTO_MIN**.
2854+
* **TCP_BPF_RTO_MIN**, **TCP_BPF_SOCK_OPS_CB_FLAGS**.
28552855
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
28562856
* * **IPPROTO_IPV6**, which supports the following *optname*\ s:
28572857
* **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**.
@@ -7080,6 +7080,7 @@ enum {
70807080
TCP_BPF_SYN = 1005, /* Copy the TCP header */
70817081
TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */
70827082
TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */
7083+
TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */
70837084
};
70847085

70857086
enum {

net/core/filter.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5278,6 +5278,11 @@ static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname,
52785278
return -EINVAL;
52795279
inet_csk(sk)->icsk_rto_min = timeout;
52805280
break;
5281+
case TCP_BPF_SOCK_OPS_CB_FLAGS:
5282+
if (val & ~(BPF_SOCK_OPS_ALL_CB_FLAGS))
5283+
return -EINVAL;
5284+
tp->bpf_sock_ops_cb_flags = val;
5285+
break;
52815286
default:
52825287
return -EINVAL;
52835288
}
@@ -5366,6 +5371,17 @@ static int sol_tcp_sockopt(struct sock *sk, int optname,
53665371
if (*optlen < 1)
53675372
return -EINVAL;
53685373
break;
5374+
case TCP_BPF_SOCK_OPS_CB_FLAGS:
5375+
if (*optlen != sizeof(int))
5376+
return -EINVAL;
5377+
if (getopt) {
5378+
struct tcp_sock *tp = tcp_sk(sk);
5379+
int cb_flags = tp->bpf_sock_ops_cb_flags;
5380+
5381+
memcpy(optval, &cb_flags, *optlen);
5382+
return 0;
5383+
}
5384+
return bpf_sol_tcp_setsockopt(sk, optname, optval, *optlen);
53695385
default:
53705386
if (getopt)
53715387
return -EINVAL;

tools/include/uapi/linux/bpf.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2851,7 +2851,7 @@ union bpf_attr {
28512851
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**,
28522852
* **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**,
28532853
* **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**,
2854-
* **TCP_BPF_RTO_MIN**.
2854+
* **TCP_BPF_RTO_MIN**, **TCP_BPF_SOCK_OPS_CB_FLAGS**.
28552855
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
28562856
* * **IPPROTO_IPV6**, which supports the following *optname*\ s:
28572857
* **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**.
@@ -7080,6 +7080,7 @@ enum {
70807080
TCP_BPF_SYN = 1005, /* Copy the TCP header */
70817081
TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */
70827082
TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */
7083+
TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */
70837084
};
70847085

70857086
enum {

tools/testing/selftests/bpf/prog_tests/setget_sockopt.c

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,51 @@ static void test_ktls(int family)
154154
close(sfd);
155155
}
156156

157+
static void test_nonstandard_opt(int family)
158+
{
159+
struct setget_sockopt__bss *bss = skel->bss;
160+
struct bpf_link *getsockopt_link = NULL;
161+
int sfd = -1, fd = -1, cfd = -1, flags;
162+
socklen_t flagslen = sizeof(flags);
163+
164+
memset(bss, 0, sizeof(*bss));
165+
166+
sfd = start_server(family, SOCK_STREAM,
167+
family == AF_INET6 ? addr6_str : addr4_str, 0, 0);
168+
if (!ASSERT_GE(sfd, 0, "start_server"))
169+
return;
170+
171+
fd = connect_to_fd(sfd, 0);
172+
if (!ASSERT_GE(fd, 0, "connect_to_fd_server"))
173+
goto err_out;
174+
175+
/* cgroup/getsockopt prog will intercept getsockopt() below and
176+
* retrieve the tcp socket bpf_sock_ops_cb_flags value for the
177+
* accept()ed socket; this was set earlier in the passive established
178+
* callback for the accept()ed socket via bpf_setsockopt().
179+
*/
180+
getsockopt_link = bpf_program__attach_cgroup(skel->progs._getsockopt, cg_fd);
181+
if (!ASSERT_OK_PTR(getsockopt_link, "getsockopt prog"))
182+
goto err_out;
183+
184+
cfd = accept(sfd, NULL, 0);
185+
if (!ASSERT_GE(cfd, 0, "accept"))
186+
goto err_out;
187+
188+
if (!ASSERT_OK(getsockopt(cfd, SOL_TCP, TCP_BPF_SOCK_OPS_CB_FLAGS, &flags, &flagslen),
189+
"getsockopt_flags"))
190+
goto err_out;
191+
ASSERT_EQ(flags & BPF_SOCK_OPS_STATE_CB_FLAG, BPF_SOCK_OPS_STATE_CB_FLAG,
192+
"cb_flags_set");
193+
err_out:
194+
close(sfd);
195+
if (fd != -1)
196+
close(fd);
197+
if (cfd != -1)
198+
close(cfd);
199+
bpf_link__destroy(getsockopt_link);
200+
}
201+
157202
void test_setget_sockopt(void)
158203
{
159204
cg_fd = test__join_cgroup(CG_NAME);
@@ -191,6 +236,8 @@ void test_setget_sockopt(void)
191236
test_udp(AF_INET);
192237
test_ktls(AF_INET6);
193238
test_ktls(AF_INET);
239+
test_nonstandard_opt(AF_INET);
240+
test_nonstandard_opt(AF_INET6);
194241

195242
done:
196243
setget_sockopt__destroy(skel);

tools/testing/selftests/bpf/progs/setget_sockopt.c

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ static const struct sockopt_test sol_tcp_tests[] = {
5959
{ .opt = TCP_THIN_LINEAR_TIMEOUTS, .flip = 1, },
6060
{ .opt = TCP_USER_TIMEOUT, .new = 123400, .expected = 123400, },
6161
{ .opt = TCP_NOTSENT_LOWAT, .new = 1314, .expected = 1314, },
62+
{ .opt = TCP_BPF_SOCK_OPS_CB_FLAGS, .new = BPF_SOCK_OPS_ALL_CB_FLAGS,
63+
.expected = BPF_SOCK_OPS_ALL_CB_FLAGS, },
6264
{ .opt = 0, },
6365
};
6466

@@ -353,11 +355,30 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family,
353355
return 1;
354356
}
355357

358+
SEC("cgroup/getsockopt")
359+
int _getsockopt(struct bpf_sockopt *ctx)
360+
{
361+
struct bpf_sock *sk = ctx->sk;
362+
int *optval = ctx->optval;
363+
struct tcp_sock *tp;
364+
365+
if (!sk || ctx->level != SOL_TCP || ctx->optname != TCP_BPF_SOCK_OPS_CB_FLAGS)
366+
return 1;
367+
368+
tp = bpf_core_cast(sk, struct tcp_sock);
369+
if (ctx->optval + sizeof(int) <= ctx->optval_end) {
370+
*optval = tp->bpf_sock_ops_cb_flags;
371+
ctx->retval = 0;
372+
}
373+
return 1;
374+
}
375+
356376
SEC("sockops")
357377
int skops_sockopt(struct bpf_sock_ops *skops)
358378
{
359379
struct bpf_sock *bpf_sk = skops->sk;
360380
struct sock *sk;
381+
int flags;
361382

362383
if (!bpf_sk)
363384
return 1;
@@ -384,9 +405,8 @@ int skops_sockopt(struct bpf_sock_ops *skops)
384405
nr_passive += !(bpf_test_sockopt(skops, sk) ||
385406
test_tcp_maxseg(skops, sk) ||
386407
test_tcp_saved_syn(skops, sk));
387-
bpf_sock_ops_cb_flags_set(skops,
388-
skops->bpf_sock_ops_cb_flags |
389-
BPF_SOCK_OPS_STATE_CB_FLAG);
408+
flags = skops->bpf_sock_ops_cb_flags | BPF_SOCK_OPS_STATE_CB_FLAG;
409+
bpf_setsockopt(skops, SOL_TCP, TCP_BPF_SOCK_OPS_CB_FLAGS, &flags, sizeof(flags));
390410
break;
391411
case BPF_SOCK_OPS_STATE_CB:
392412
if (skops->args[1] == BPF_TCP_CLOSE_WAIT)

0 commit comments

Comments
 (0)