Skip to content

Commit d3cb5df

Browse files
committed
Merge: net: introduce per netns packet chains
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/6814 JIRA: https://issues.redhat.com/browse/RHEL-88921 Tested: vs issue reproducer per netns packet chains prevent unrelated workload from interfering in different netns, causing incremental slow-down on each new tap. Signed-off-by: Paolo Abeni <[email protected]> Approved-by: Antoine Tenart <[email protected]> Approved-by: Sabrina Dubroca <[email protected]> Approved-by: CKI KWF Bot <[email protected]> Merged-by: Augusto Caringi <[email protected]>
2 parents acf1036 + 95ec819 commit d3cb5df

File tree

5 files changed

+80
-21
lines changed

5 files changed

+80
-21
lines changed

include/linux/netdevice.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4191,7 +4191,17 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev,
41914191
return 0;
41924192
}
41934193

4194-
bool dev_nit_active(struct net_device *dev);
4194+
bool dev_nit_active_rcu(const struct net_device *dev);
4195+
static inline bool dev_nit_active(const struct net_device *dev)
4196+
{
4197+
bool ret;
4198+
4199+
rcu_read_lock();
4200+
ret = dev_nit_active_rcu(dev);
4201+
rcu_read_unlock();
4202+
return ret;
4203+
}
4204+
41954205
void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
41964206

41974207
static inline void __dev_put(struct net_device *dev)

include/net/net_namespace.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ struct net {
8383
struct llist_node defer_free_list;
8484
struct llist_node cleanup_list; /* namespaces on death row */
8585

86+
struct list_head ptype_all;
87+
struct list_head ptype_specific;
88+
8689
#ifdef CONFIG_KEYS
8790
struct key_tag *key_domain; /* Key domain of operation tag */
8891
#endif

net/core/dev.c

Lines changed: 42 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,6 @@
164164

165165
static DEFINE_SPINLOCK(ptype_lock);
166166
struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
167-
struct list_head ptype_all __read_mostly; /* Taps */
168167

169168
static int netif_rx_internal(struct sk_buff *skb);
170169
static int call_netdevice_notifiers_extack(unsigned long val,
@@ -569,10 +568,18 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
569568

570569
static inline struct list_head *ptype_head(const struct packet_type *pt)
571570
{
572-
if (pt->type == htons(ETH_P_ALL))
573-
return pt->dev ? &pt->dev->ptype_all : &ptype_all;
574-
else
575-
return pt->dev ? &pt->dev->ptype_specific :
571+
if (pt->type == htons(ETH_P_ALL)) {
572+
if (!pt->af_packet_net && !pt->dev)
573+
return NULL;
574+
575+
return pt->dev ? &pt->dev->ptype_all :
576+
&pt->af_packet_net->ptype_all;
577+
}
578+
579+
if (pt->dev)
580+
return &pt->dev->ptype_specific;
581+
582+
return pt->af_packet_net ? &pt->af_packet_net->ptype_specific :
576583
&ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
577584
}
578585

@@ -593,6 +600,9 @@ void dev_add_pack(struct packet_type *pt)
593600
{
594601
struct list_head *head = ptype_head(pt);
595602

603+
if (WARN_ON_ONCE(!head))
604+
return;
605+
596606
spin_lock(&ptype_lock);
597607
list_add_rcu(&pt->list, head);
598608
spin_unlock(&ptype_lock);
@@ -617,6 +627,9 @@ void __dev_remove_pack(struct packet_type *pt)
617627
struct list_head *head = ptype_head(pt);
618628
struct packet_type *pt1;
619629

630+
if (!head)
631+
return;
632+
620633
spin_lock(&ptype_lock);
621634

622635
list_for_each_entry(pt1, head, list) {
@@ -2360,15 +2373,21 @@ static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
23602373
}
23612374

23622375
/**
2363-
* dev_nit_active - return true if any network interface taps are in use
2376+
* dev_nit_active_rcu - return true if any network interface taps are in use
2377+
*
2378+
* The caller must hold the RCU lock
23642379
*
23652380
* @dev: network device to check for the presence of taps
23662381
*/
2367-
bool dev_nit_active(struct net_device *dev)
2382+
bool dev_nit_active_rcu(const struct net_device *dev)
23682383
{
2369-
return !list_empty(&ptype_all) || !list_empty(&dev->ptype_all);
2384+
/* Callers may hold either RCU or RCU BH lock */
2385+
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
2386+
2387+
return !list_empty(&dev_net(dev)->ptype_all) ||
2388+
!list_empty(&dev->ptype_all);
23702389
}
2371-
EXPORT_SYMBOL_GPL(dev_nit_active);
2390+
EXPORT_SYMBOL_GPL(dev_nit_active_rcu);
23722391

23732392
/*
23742393
* Support routine. Sends outgoing frames to any network
@@ -2380,9 +2399,10 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
23802399
struct packet_type *ptype;
23812400
struct sk_buff *skb2 = NULL;
23822401
struct packet_type *pt_prev = NULL;
2383-
struct list_head *ptype_list = &ptype_all;
2402+
struct list_head *ptype_list;
23842403

23852404
rcu_read_lock();
2405+
ptype_list = &dev_net_rcu(dev)->ptype_all;
23862406
again:
23872407
list_for_each_entry_rcu(ptype, ptype_list, list) {
23882408
if (READ_ONCE(ptype->ignore_outgoing))
@@ -2426,7 +2446,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
24262446
pt_prev = ptype;
24272447
}
24282448

2429-
if (ptype_list == &ptype_all) {
2449+
if (ptype_list != &dev->ptype_all) {
24302450
ptype_list = &dev->ptype_all;
24312451
goto again;
24322452
}
@@ -3640,7 +3660,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
36403660
unsigned int len;
36413661
int rc;
36423662

3643-
if (dev_nit_active(dev))
3663+
if (dev_nit_active_rcu(dev))
36443664
dev_queue_xmit_nit(skb, dev);
36453665

36463666
len = skb->len;
@@ -5504,7 +5524,8 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
55045524
if (pfmemalloc)
55055525
goto skip_taps;
55065526

5507-
list_for_each_entry_rcu(ptype, &ptype_all, list) {
5527+
list_for_each_entry_rcu(ptype, &dev_net_rcu(skb->dev)->ptype_all,
5528+
list) {
55085529
if (pt_prev)
55095530
ret = deliver_skb(skb, pt_prev, orig_dev);
55105531
pt_prev = ptype;
@@ -5616,6 +5637,14 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
56165637
deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
56175638
&ptype_base[ntohs(type) &
56185639
PTYPE_HASH_MASK]);
5640+
5641+
/* orig_dev and skb->dev could belong to different netns;
5642+
* Even in such case we need to traverse only the list
5643+
* coming from skb->dev, as the ptype owner (packet socket)
5644+
* will use dev_net(skb->dev) to do namespace filtering.
5645+
*/
5646+
deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
5647+
&dev_net_rcu(skb->dev)->ptype_specific);
56195648
}
56205649

56215650
deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
@@ -12042,7 +12071,6 @@ static int __init net_dev_init(void)
1204212071
if (netdev_kobject_init())
1204312072
goto out;
1204412073

12045-
INIT_LIST_HEAD(&ptype_all);
1204612074
for (i = 0; i < PTYPE_HASH_SIZE; i++)
1204712075
INIT_LIST_HEAD(&ptype_base[i]);
1204812076

net/core/net-procfs.c

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -175,12 +175,18 @@ static void *ptype_get_idx(struct seq_file *seq, loff_t pos)
175175
}
176176
}
177177

178-
list_for_each_entry_rcu(pt, &ptype_all, list) {
178+
list_for_each_entry_rcu(pt, &seq_file_net(seq)->ptype_all, list) {
179179
if (i == pos)
180180
return pt;
181181
++i;
182182
}
183183

184+
list_for_each_entry_rcu(pt, &seq_file_net(seq)->ptype_specific, list) {
185+
if (i == pos)
186+
return pt;
187+
++i;
188+
}
189+
184190
for (t = 0; t < PTYPE_HASH_SIZE; t++) {
185191
list_for_each_entry_rcu(pt, &ptype_base[t], list) {
186192
if (i == pos)
@@ -200,6 +206,7 @@ static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
200206

201207
static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
202208
{
209+
struct net *net = seq_file_net(seq);
203210
struct net_device *dev;
204211
struct packet_type *pt;
205212
struct list_head *nxt;
@@ -223,14 +230,22 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
223230
}
224231
}
225232

226-
nxt = ptype_all.next;
227-
goto ptype_all;
233+
nxt = net->ptype_all.next;
234+
goto net_ptype_all;
228235
}
229236

230-
if (pt->type == htons(ETH_P_ALL)) {
231-
ptype_all:
232-
if (nxt != &ptype_all)
237+
if (pt->af_packet_net) {
238+
net_ptype_all:
239+
if (nxt != &net->ptype_all && nxt != &net->ptype_specific)
233240
goto found;
241+
242+
if (nxt == &net->ptype_all) {
243+
/* continue with ->ptype_specific if it's not empty */
244+
nxt = net->ptype_specific.next;
245+
if (nxt != &net->ptype_specific)
246+
goto found;
247+
}
248+
234249
hash = 0;
235250
nxt = ptype_base[0].next;
236251
} else

net/core/net_namespace.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,9 @@ EXPORT_SYMBOL_GPL(get_net_ns_by_id);
311311
static __net_init void preinit_net(struct net *net)
312312
{
313313
ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net notrefcnt");
314+
315+
INIT_LIST_HEAD(&net->ptype_all);
316+
INIT_LIST_HEAD(&net->ptype_specific);
314317
}
315318

316319
/*

0 commit comments

Comments
 (0)