Skip to content

Commit 9ac5742

Browse files
committed
netfilter: ipset: Fix race between namespace cleanup and gc in the list:set type
jira LE-3201 cve CVE-2024-39503 Rebuild_History Non-Buildable kernel-rt-4.18.0-553.27.1.rt7.368.el8_10 commit-author Jozsef Kadlecsik <[email protected]> commit 4e7aaa6 Lion Ackermann reported that there is a race condition between namespace cleanup in ipset and the garbage collection of the list:set type. The namespace cleanup can destroy the list:set type of sets while the gc of the set type is waiting to run in rcu cleanup. The latter uses data from the destroyed set which thus leads use after free. The patch contains the following parts: - When destroying all sets, first remove the garbage collectors, then wait if needed and then destroy the sets. - Fix the badly ordered "wait then remove gc" for the destroy a single set case. - Fix the missing rcu locking in the list:set type in the userspace test case. - Use proper RCU list handlings in the list:set type. The patch depends on c1193d9 (netfilter: ipset: Add list flush to cancel_gc). Fixes: 97f7cf1 (netfilter: ipset: fix performance regression in swap operation) Reported-by: Lion Ackermann <[email protected]> Tested-by: Lion Ackermann <[email protected]> Signed-off-by: Jozsef Kadlecsik <[email protected]> Signed-off-by: Pablo Neira Ayuso <[email protected]> (cherry picked from commit 4e7aaa6) Signed-off-by: Jonathan Maple <[email protected]>
1 parent 99292e6 commit 9ac5742

File tree

2 files changed

+60
-51
lines changed

2 files changed

+60
-51
lines changed

net/netfilter/ipset/ip_set_core.c

Lines changed: 46 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,23 +1025,50 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
10251025
.len = IPSET_MAXNAMELEN - 1 },
10261026
};
10271027

1028+
/* In order to return quickly when destroying a single set, it is split
1029+
* into two stages:
1030+
* - Cancel garbage collector
1031+
* - Destroy the set itself via call_rcu()
1032+
*/
1033+
10281034
static void
1029-
ip_set_destroy_set(struct ip_set *set)
1035+
ip_set_destroy_set_rcu(struct rcu_head *head)
10301036
{
1031-
pr_debug("set: %s\n", set->name);
1037+
struct ip_set *set = container_of(head, struct ip_set, rcu);
10321038

1033-
/* Must call it without holding any lock */
10341039
set->variant->destroy(set);
10351040
module_put(set->type->me);
10361041
kfree(set);
10371042
}
10381043

10391044
static void
1040-
ip_set_destroy_set_rcu(struct rcu_head *head)
1045+
_destroy_all_sets(struct ip_set_net *inst)
10411046
{
1042-
struct ip_set *set = container_of(head, struct ip_set, rcu);
1047+
struct ip_set *set;
1048+
ip_set_id_t i;
1049+
bool need_wait = false;
10431050

1044-
ip_set_destroy_set(set);
1051+
/* First cancel gc's: set:list sets are flushed as well */
1052+
for (i = 0; i < inst->ip_set_max; i++) {
1053+
set = ip_set(inst, i);
1054+
if (set) {
1055+
set->variant->cancel_gc(set);
1056+
if (set->type->features & IPSET_TYPE_NAME)
1057+
need_wait = true;
1058+
}
1059+
}
1060+
/* Must wait for flush to be really finished */
1061+
if (need_wait)
1062+
rcu_barrier();
1063+
for (i = 0; i < inst->ip_set_max; i++) {
1064+
set = ip_set(inst, i);
1065+
if (set) {
1066+
ip_set(inst, i) = NULL;
1067+
set->variant->destroy(set);
1068+
module_put(set->type->me);
1069+
kfree(set);
1070+
}
1071+
}
10451072
}
10461073

10471074
static int ip_set_destroy(struct net *net, struct sock *ctnl,
@@ -1057,20 +1084,17 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl,
10571084
if (unlikely(protocol_min_failed(attr)))
10581085
return -IPSET_ERR_PROTOCOL;
10591086

1060-
10611087
/* Commands are serialized and references are
10621088
* protected by the ip_set_ref_lock.
10631089
* External systems (i.e. xt_set) must call
1064-
* ip_set_put|get_nfnl_* functions, that way we
1090+
* ip_set_nfnl_get_* functions, that way we
10651091
* can safely check references here.
10661092
*
10671093
* list:set timer can only decrement the reference
10681094
* counter, so if it's already zero, we can proceed
10691095
* without holding the lock.
10701096
*/
10711097
if (!attr[IPSET_ATTR_SETNAME]) {
1072-
/* Must wait for flush to be really finished in list:set */
1073-
rcu_barrier();
10741098
read_lock_bh(&ip_set_ref_lock);
10751099
for (i = 0; i < inst->ip_set_max; i++) {
10761100
s = ip_set(inst, i);
@@ -1081,15 +1105,7 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl,
10811105
}
10821106
inst->is_destroyed = true;
10831107
read_unlock_bh(&ip_set_ref_lock);
1084-
for (i = 0; i < inst->ip_set_max; i++) {
1085-
s = ip_set(inst, i);
1086-
if (s) {
1087-
ip_set(inst, i) = NULL;
1088-
/* Must cancel garbage collectors */
1089-
s->variant->cancel_gc(s);
1090-
ip_set_destroy_set(s);
1091-
}
1092-
}
1108+
_destroy_all_sets(inst);
10931109
/* Modified by ip_set_destroy() only, which is serialized */
10941110
inst->is_destroyed = false;
10951111
} else {
@@ -1108,12 +1124,12 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl,
11081124
features = s->type->features;
11091125
ip_set(inst, i) = NULL;
11101126
read_unlock_bh(&ip_set_ref_lock);
1127+
/* Must cancel garbage collectors */
1128+
s->variant->cancel_gc(s);
11111129
if (features & IPSET_TYPE_NAME) {
11121130
/* Must wait for flush to be really finished */
11131131
rcu_barrier();
11141132
}
1115-
/* Must cancel garbage collectors */
1116-
s->variant->cancel_gc(s);
11171133
call_rcu(&s->rcu, ip_set_destroy_set_rcu);
11181134
}
11191135
return 0;
@@ -2238,30 +2254,25 @@ ip_set_net_init(struct net *net)
22382254
}
22392255

22402256
static void __net_exit
2241-
ip_set_net_exit(struct net *net)
2257+
ip_set_net_pre_exit(struct net *net)
22422258
{
22432259
struct ip_set_net *inst = ip_set_pernet(net);
22442260

2245-
struct ip_set *set = NULL;
2246-
ip_set_id_t i;
2247-
22482261
inst->is_deleted = true; /* flag for ip_set_nfnl_put */
2262+
}
22492263

2250-
nfnl_lock(NFNL_SUBSYS_IPSET);
2251-
for (i = 0; i < inst->ip_set_max; i++) {
2252-
set = ip_set(inst, i);
2253-
if (set) {
2254-
ip_set(inst, i) = NULL;
2255-
set->variant->cancel_gc(set);
2256-
ip_set_destroy_set(set);
2257-
}
2258-
}
2259-
nfnl_unlock(NFNL_SUBSYS_IPSET);
2264+
static void __net_exit
2265+
ip_set_net_exit(struct net *net)
2266+
{
2267+
struct ip_set_net *inst = ip_set_pernet(net);
2268+
2269+
_destroy_all_sets(inst);
22602270
kvfree(rcu_dereference_protected(inst->ip_set_list, 1));
22612271
}
22622272

22632273
static struct pernet_operations ip_set_net_ops = {
22642274
.init = ip_set_net_init,
2275+
.pre_exit = ip_set_net_pre_exit,
22652276
.exit = ip_set_net_exit,
22662277
.id = &ip_set_net_id,
22672278
.size = sizeof(struct ip_set_net),

net/netfilter/ipset/ip_set_list_set.c

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb,
8383
struct set_elem *e;
8484
int ret;
8585

86-
list_for_each_entry(e, &map->members, list) {
86+
list_for_each_entry_rcu(e, &map->members, list) {
8787
if (SET_WITH_TIMEOUT(set) &&
8888
ip_set_timeout_expired(ext_timeout(e, set)))
8989
continue;
@@ -103,7 +103,7 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb,
103103
struct set_elem *e;
104104
int ret;
105105

106-
list_for_each_entry(e, &map->members, list) {
106+
list_for_each_entry_rcu(e, &map->members, list) {
107107
if (SET_WITH_TIMEOUT(set) &&
108108
ip_set_timeout_expired(ext_timeout(e, set)))
109109
continue;
@@ -192,9 +192,10 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
192192
struct list_set *map = set->data;
193193
struct set_adt_elem *d = value;
194194
struct set_elem *e, *next, *prev = NULL;
195-
int ret;
195+
int ret = 0;
196196

197-
list_for_each_entry(e, &map->members, list) {
197+
rcu_read_lock();
198+
list_for_each_entry_rcu(e, &map->members, list) {
198199
if (SET_WITH_TIMEOUT(set) &&
199200
ip_set_timeout_expired(ext_timeout(e, set)))
200201
continue;
@@ -205,16 +206,19 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
205206

206207
if (d->before == 0) {
207208
ret = 1;
209+
goto out;
208210
} else if (d->before > 0) {
209211
next = list_next_entry(e, list);
210212
ret = !list_is_last(&e->list, &map->members) &&
211213
next->id == d->refid;
212214
} else {
213215
ret = prev && prev->id == d->refid;
214216
}
215-
return ret;
217+
goto out;
216218
}
217-
return 0;
219+
out:
220+
rcu_read_unlock();
221+
return ret;
218222
}
219223

220224
static void
@@ -243,7 +247,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
243247

244248
/* Find where to add the new entry */
245249
n = prev = next = NULL;
246-
list_for_each_entry(e, &map->members, list) {
250+
list_for_each_entry_rcu(e, &map->members, list) {
247251
if (SET_WITH_TIMEOUT(set) &&
248252
ip_set_timeout_expired(ext_timeout(e, set)))
249253
continue;
@@ -320,9 +324,9 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext,
320324
{
321325
struct list_set *map = set->data;
322326
struct set_adt_elem *d = value;
323-
struct set_elem *e, *next, *prev = NULL;
327+
struct set_elem *e, *n, *next, *prev = NULL;
324328

325-
list_for_each_entry(e, &map->members, list) {
329+
list_for_each_entry_safe(e, n, &map->members, list) {
326330
if (SET_WITH_TIMEOUT(set) &&
327331
ip_set_timeout_expired(ext_timeout(e, set)))
328332
continue;
@@ -428,14 +432,8 @@ static void
428432
list_set_destroy(struct ip_set *set)
429433
{
430434
struct list_set *map = set->data;
431-
struct set_elem *e, *n;
432435

433-
list_for_each_entry_safe(e, n, &map->members, list) {
434-
list_del(&e->list);
435-
ip_set_put_byindex(map->net, e->id);
436-
ip_set_ext_destroy(set, e);
437-
kfree(e);
438-
}
436+
WARN_ON_ONCE(!list_empty(&map->members));
439437
kfree(map);
440438

441439
set->data = NULL;

0 commit comments

Comments
 (0)