Skip to content

Commit 5292eb5

Browse files
committed
netdev-offload-tc: Only install recirc flows if the parent is present.
When flows are added through TC, only the match and actions are verified to determine if they can be handled by TC. If they can, the TC flow is installed. However, when the flow is a continuation of a previously recirculated flow, it can happen that the flow performing the recirculation is installed in the kernel. This may occur, for example, if it includes an action that cannot be handled by TC. If the kernel module has the first flow but not the second one (missing because it is programmed in TC), the flow is sent to userspace via an upcall. This patch tracks which recirculation goto actions are handled by TC. A matching TC rule is installed only if the corresponding recirculation ID is confirmed to be handled by TC. Acked-by: Simon Horman <[email protected]> Signed-off-by: Eelco Chaudron <[email protected]>
1 parent 0fb370b commit 5292eb5

File tree

2 files changed

+171
-2
lines changed

2 files changed

+171
-2
lines changed

lib/netdev-offload-tc.c

+52-2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <errno.h>
2020
#include <linux/if_ether.h>
2121

22+
#include "ccmap.h"
2223
#include "dpif.h"
2324
#include "hash.h"
2425
#include "id-pool.h"
@@ -78,6 +79,10 @@ struct policer_node {
7879
uint32_t police_idx;
7980
};
8081

82+
/* ccmap and protective mutex for counting recirculation id (chain) usage. */
83+
static struct ovs_mutex used_chains_mutex = OVS_MUTEX_INITIALIZER;
84+
static struct ccmap used_chains OVS_GUARDED;
85+
8186
/* Protects below meter police ids pool. */
8287
static struct ovs_mutex meter_police_ids_mutex = OVS_MUTEX_INITIALIZER;
8388
static struct id_pool *meter_police_ids OVS_GUARDED_BY(meter_police_ids_mutex);
@@ -204,6 +209,10 @@ static struct ovs_mutex ufid_lock = OVS_MUTEX_INITIALIZER;
204209
* @adjust_stats: When flow gets updated with new actions, we need to adjust
205210
* the reported stats to include previous values as the hardware
206211
* rule is removed and re-added. This stats copy is used for it.
212+
* @chain_goto: If a TC jump action exists for the flow, the target chain it
213+
* jumps to is stored here. Only a single goto action is stored,
214+
* as TC supports only one goto action per flow (there is no
215+
* return mechanism).
207216
*/
208217
struct ufid_tc_data {
209218
struct hmap_node ufid_to_tc_node;
@@ -212,6 +221,7 @@ struct ufid_tc_data {
212221
struct tcf_id id;
213222
struct netdev *netdev;
214223
struct dpif_flow_stats adjust_stats;
224+
uint32_t chain_goto;
215225
};
216226

217227
static void
@@ -233,6 +243,13 @@ del_ufid_tc_mapping_unlocked(const ovs_u128 *ufid)
233243
hmap_remove(&ufid_to_tc, &data->ufid_to_tc_node);
234244
hmap_remove(&tc_to_ufid, &data->tc_to_ufid_node);
235245
netdev_close(data->netdev);
246+
247+
if (data->chain_goto) {
248+
ovs_mutex_lock(&used_chains_mutex);
249+
ccmap_dec(&used_chains, data->chain_goto);
250+
ovs_mutex_unlock(&used_chains_mutex);
251+
}
252+
236253
free(data);
237254
}
238255

@@ -288,7 +305,8 @@ del_filter_and_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid,
288305
/* Add ufid entry to ufid_to_tc hashmap. */
289306
static void
290307
add_ufid_tc_mapping(struct netdev *netdev, const ovs_u128 *ufid,
291-
struct tcf_id *id, struct dpif_flow_stats *stats)
308+
struct tcf_id *id, struct dpif_flow_stats *stats,
309+
uint32_t chain_goto)
292310
{
293311
struct ufid_tc_data *new_data = xzalloc(sizeof *new_data);
294312
size_t ufid_hash = hash_bytes(ufid, sizeof *ufid, 0);
@@ -300,6 +318,8 @@ add_ufid_tc_mapping(struct netdev *netdev, const ovs_u128 *ufid,
300318
new_data->ufid = *ufid;
301319
new_data->id = *id;
302320
new_data->netdev = netdev_ref(netdev);
321+
new_data->chain_goto = chain_goto;
322+
303323
if (stats) {
304324
new_data->adjust_stats = *stats;
305325
}
@@ -2315,6 +2335,17 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
23152335
chain = key->recirc_id;
23162336
mask->recirc_id = 0;
23172337

2338+
if (chain) {
2339+
/* If we match on a recirculation ID, we must ensure the previous
2340+
* flow is also in the TC datapath; otherwise, the entry is useless,
2341+
* as the related packets will be handled by upcalls. */
2342+
if (!ccmap_find(&used_chains, chain)) {
2343+
VLOG_DBG_RL(&rl, "match for chain %u failed due to non-existing "
2344+
"goto chain action", chain);
2345+
return EOPNOTSUPP;
2346+
}
2347+
}
2348+
23182349
if (flow_tnl_dst_is_set(&key->tunnel) ||
23192350
flow_tnl_src_is_set(&key->tunnel)) {
23202351
VLOG_DBG_RL(&rl,
@@ -2657,11 +2688,28 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
26572688
id = tc_make_tcf_id_chain(ifindex, block_id, chain, prio, hook);
26582689
err = tc_replace_flower(&id, &flower);
26592690
if (!err) {
2691+
uint32_t chain_goto = 0;
2692+
26602693
if (stats) {
26612694
memset(stats, 0, sizeof *stats);
26622695
netdev_tc_adjust_stats(stats, &adjust_stats);
26632696
}
2664-
add_ufid_tc_mapping(netdev, ufid, &id, &adjust_stats);
2697+
2698+
if (recirc_act) {
2699+
struct tc_action *action = flower.actions;
2700+
2701+
for (int i = 0; i < flower.action_count; i++, action++) {
2702+
if (action->type == TC_ACT_GOTO && action->chain) {
2703+
chain_goto = action->chain;
2704+
ovs_mutex_lock(&used_chains_mutex);
2705+
ccmap_inc(&used_chains, chain_goto);
2706+
ovs_mutex_unlock(&used_chains_mutex);
2707+
break;
2708+
}
2709+
}
2710+
}
2711+
2712+
add_ufid_tc_mapping(netdev, ufid, &id, &adjust_stats, chain_goto);
26652713
}
26662714

26672715
return err;
@@ -3138,6 +3186,8 @@ netdev_tc_init_flow_api(struct netdev *netdev)
31383186
tc_add_del_qdisc(ifindex, false, 0, hook);
31393187

31403188
if (ovsthread_once_start(&once)) {
3189+
ccmap_init(&used_chains);
3190+
31413191
probe_tc_block_support(ifindex);
31423192
/* Need to re-fetch block id as it depends on feature availability. */
31433193
block_id = get_block_id_from_netdev(netdev);

tests/system-offloads-traffic.at

+119
Original file line numberDiff line numberDiff line change
@@ -1047,3 +1047,122 @@ AT_CHECK([ovs-appctl dpctl/dump-flows type=ovs | grep "eth_type(0x0800)" | DUMP_
10471047

10481048
OVS_TRAFFIC_VSWITCHD_STOP
10491049
AT_CLEANUP
1050+
1051+
AT_SETUP([offloads - split kernel vs offload datapath rules])
1052+
OVS_TRAFFIC_VSWITCHD_START([], [], [-- set Open_vSwitch . other_config:hw-offload=true])
1053+
1054+
ADD_NAMESPACES(at_ns0, at_ns1)
1055+
1056+
ADD_VETH(p1, at_ns0, br0, "10.0.0.1/24")
1057+
ADD_VETH(p2, at_ns1, br0, "10.0.0.2/24")
1058+
1059+
AT_CHECK([ovs-vsctl -- set interface ovs-p1 ofport_request=1 \
1060+
-- set interface ovs-p2 ofport_request=2])
1061+
1062+
AT_DATA([groups.txt], [dnl
1063+
group_id=1,type=select,selection_method=dp_hash,bucket=bucket_id:0,weight:100,actions=ct(commit,table=2)
1064+
])
1065+
AT_DATA([flows.txt], [dnl
1066+
table=0,arp,actions=NORMAL
1067+
table=0,priority=100,cookie=0x12345678,in_port=ovs-p1,ip,nw_dst=10.0.0.2,actions=resubmit(,1)
1068+
table=0,priority=100,cookie=0xabcedf,in_port=ovs-p2,ip,nw_dst=10.0.0.1,actions=ct(table=3)
1069+
table=1,priority=200,ip,actions=group:1
1070+
table=2,ip,actions=ovs-p2
1071+
table=3,ip,actions=ovs-p1
1072+
])
1073+
AT_CHECK([ovs-ofctl add-groups br0 groups.txt])
1074+
AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
1075+
1076+
NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.1 -W 2 10.0.0.2 | FORMAT_PING], [0], [dnl
1077+
3 packets transmitted, 3 received, 0% packet loss, time 0ms
1078+
])
1079+
1080+
AT_CHECK([ovs-appctl revalidator/wait])
1081+
1082+
dnl In this test we should not have the first recirculation(s) in the kernel
1083+
dnl datapath, and the final flow in TC. They should all be in the kernel
1084+
dnl datapath, as the dp_hash() action is currently not supported by TC.
1085+
dnl The command below ensures they are all handled in the kernel datapath.
1086+
AT_CHECK([ovs-appctl dpctl/dump-flows --names type=ovs filter='in_port(ovs-p1),ipv4' | \
1087+
strip_recirc | strip_dp_hash | DUMP_CLEAN_SORTED], [0], [dnl
1088+
recirc_id(<recirc>),in_port(ovs-p1),eth(),eth_type(0x0800),ipv4(dst=10.0.0.2,frag=no), packets:2, bytes:196, used:0.001s, actions:hash(l4(0)),recirc(<recirc>)
1089+
recirc_id(<recirc>),in_port(ovs-p1),eth(),eth_type(0x0800),ipv4(frag=no), packets:2, bytes:196, used:0.001s, actions:ct(commit),recirc(<recirc>)
1090+
recirc_id(<recirc>),in_port(ovs-p1),eth(),eth_type(0x0800),ipv4(frag=no), packets:2, bytes:196, used:0.001s, actions:ovs-p2
1091+
])
1092+
AT_CHECK([ovs-appctl dpctl/dump-flows --names type=tc filter='in_port(ovs-p1),ipv4' | \
1093+
strip_recirc | strip_dp_hash | DUMP_CLEAN_SORTED], [0], [dnl
1094+
])
1095+
1096+
dnl The next test will install two DP flows that are both accepted in the
1097+
dnl TC datapath. But then the first one is moved to the kernel datapath,
1098+
dnl we have to make sure both flows are moved to kernel (and no dual rules
1099+
dnl exist).
1100+
1101+
AT_DATA([flows.txt], [dnl
1102+
table=0,arp,actions=NORMAL
1103+
table=0,priority=100,in_port=ovs-p1,ip,nw_dst=10.0.0.2,actions=ct(table=1)
1104+
table=0,priority=100,in_port=ovs-p2,ip,nw_dst=10.0.0.1,actions=ovs-p1
1105+
table=1,ip,actions=ovs-p2
1106+
])
1107+
1108+
AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:max-idle=2000])
1109+
AT_CHECK([ovs-ofctl del-flows br0])
1110+
AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
1111+
AT_CHECK([ovs-appctl revalidator/purge])
1112+
1113+
NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.1 -W 2 10.0.0.2 | FORMAT_PING], [0], [dnl
1114+
3 packets transmitted, 3 received, 0% packet loss, time 0ms
1115+
])
1116+
1117+
AT_CHECK([ovs-appctl revalidator/wait])
1118+
AT_CHECK([ovs-appctl dpctl/dump-flows --names type=tc filter='in_port(ovs-p1),ipv4' | \
1119+
strip_recirc | strip_dp_hash | DUMP_CLEAN_SORTED], [0], [dnl
1120+
recirc_id(<recirc>),in_port(ovs-p1),eth(),eth_type(0x0800),ipv4(dst=10.0.0.2,frag=no), packets:2, bytes:168, used:0.001s, actions:ct,recirc(<recirc>)
1121+
recirc_id(<recirc>),in_port(ovs-p1),eth(),eth_type(0x0800),ipv4(frag=no), packets:2, bytes:168, used:0.001s, actions:ovs-p2
1122+
])
1123+
AT_CHECK([ovs-appctl dpctl/dump-flows --names type=ovs filter='in_port(ovs-p1),ipv4' | \
1124+
strip_recirc | strip_dp_hash | DUMP_CLEAN_SORTED], [0], [dnl
1125+
])
1126+
1127+
AT_DATA([flows.txt], [dnl
1128+
table=0,priority=100,in_port=ovs-p1,ip,nw_dst=10.0.0.2,actions=controller(),ct(table=1)
1129+
])
1130+
1131+
AT_CHECK([ovs-ofctl del-flows br0 table=0,in_port=ovs-p1,ip,nw_dst=10.0.0.2])
1132+
AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
1133+
AT_CHECK([ovs-appctl revalidator/wait])
1134+
1135+
NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.1 -W 2 10.0.0.2 | FORMAT_PING], [0], [dnl
1136+
3 packets transmitted, 3 received, 0% packet loss, time 0ms
1137+
])
1138+
1139+
AT_CHECK([ovs-appctl revalidator/wait])
1140+
1141+
dnl Unfortunately, until the unreachable TC rule times out, packets will be
1142+
dnl handled by upcalls. So we first check for this case and then verify that
1143+
dnl the rule finally moved.
1144+
AT_CHECK([ovs-appctl dpctl/dump-flows --names type=tc filter='in_port(ovs-p1),ipv4' | \
1145+
strip_recirc | strip_dp_hash | DUMP_CLEAN_SORTED], [0], [dnl
1146+
recirc_id(<recirc>),in_port(ovs-p1),eth(),eth_type(0x0800),ipv4(frag=no), packets:2, bytes:168, used:0.001s, actions:ovs-p2
1147+
])
1148+
AT_CHECK([ovs-appctl dpctl/dump-flows --names type=ovs filter='in_port(ovs-p1),ipv4' | \
1149+
strip_recirc | strip_dp_hash | DUMP_CLEAN_SORTED], [0], [dnl
1150+
recirc_id(<recirc>),in_port(ovs-p1),eth(),eth_type(0x0800),ipv4(dst=10.0.0.2,frag=no), packets:3, bytes:294, used:0.001s, actions:userspace(pid=4294967295,controller(reason=1,dont_send=0,continuation=0,recirc_id=<recirc>,rule_cookie=0,controller_id=0,max_len=65535)),ct,recirc(<recirc>)
1151+
])
1152+
1153+
check_rules_and_ping() {
1154+
NS_CHECK_EXEC([at_ns0], [ping -q -c 1 -i 0.1 -W 2 10.0.0.2], [0], [ignore])
1155+
AT_CHECK([ovs-appctl revalidator/wait])
1156+
AT_CHECK([ovs-appctl dpctl/dump-flows -m filter='in_port(ovs-p1),ipv4'],
1157+
[0], [stdout])
1158+
1159+
[[ "$(wc -l < stdout)" -ne 2 ]] && return 0
1160+
grep -q "dp:tc" stdout
1161+
}
1162+
1163+
OVS_WAIT_WHILE(
1164+
[check_rules_and_ping],
1165+
[ovs-appctl dpctl/dump-flows -m --names filter='in_port(ovs-p1),ipv4'])
1166+
1167+
OVS_TRAFFIC_VSWITCHD_STOP
1168+
AT_CLEANUP

0 commit comments

Comments
 (0)