diff --git a/NEWS b/NEWS index 9c86be89798..6973f6b45eb 100644 --- a/NEWS +++ b/NEWS @@ -17,6 +17,7 @@ v2.9.0 - xx xxx xxxx - NSH implementation now conforms to latest draft (draft-ietf-sfc-nsh-28). * Add ttl field. * Add a new action dec_nsh_ttl. + * Enable NSH support in kernel datapath. - OVSDB: * New high-level documentation in ovsdb(7). * New file format documentation for developers in ovsdb(5). diff --git a/acinclude.m4 b/acinclude.m4 index c04c2c65bcd..bc8be462a42 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -543,6 +543,7 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [ OVS_GREP_IFELSE([$KSRC/include/linux/netdevice.h], [dev_get_by_index_rcu]) OVS_GREP_IFELSE([$KSRC/include/linux/netdevice.h], [dev_recursion_level]) OVS_GREP_IFELSE([$KSRC/include/linux/netdevice.h], [__skb_gso_segment]) + OVS_GREP_IFELSE([$KSRC/include/linux/netdevice.h], [skb_gso_error_unwind]) OVS_GREP_IFELSE([$KSRC/include/linux/netdevice.h], [can_checksum_protocol]) OVS_GREP_IFELSE([$KSRC/include/linux/netdevice.h], [ndo_get_iflink]) OVS_GREP_IFELSE([$KSRC/include/linux/netdevice.h], [ndo_features_check], diff --git a/datapath/actions.c b/datapath/actions.c index 1840fe556ba..eab147617c8 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -42,6 +42,7 @@ #include "conntrack.h" #include "gso.h" #include "vport.h" +#include "flow_netlink.h" static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, @@ -385,6 +386,38 @@ static int push_eth(struct sk_buff *skb, struct sw_flow_key *key, return 0; } +static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key, + const struct nshhdr *nh) +{ + int err; + + err = ovs_nsh_push(skb, nh); + if (err) + return err; + + /* safe right before invalidate_flow_key */ + key->mac_proto = MAC_PROTO_NONE; + invalidate_flow_key(key); + return 0; +} + +static int pop_nsh(struct sk_buff *skb, struct sw_flow_key *key) +{ + int err; + + err = ovs_nsh_pop(skb); + if (err) + return err; + + /* safe right before invalidate_flow_key */ + if (skb->protocol == htons(ETH_P_TEB)) + key->mac_proto = MAC_PROTO_ETHERNET; + else + key->mac_proto = MAC_PROTO_NONE; + invalidate_flow_key(key); + return 0; +} + static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh, __be32 addr, __be32 new_addr) { @@ -608,6 +641,69 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, return 0; } +static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key, + const struct nlattr *a) +{ + struct nshhdr *nh; + size_t length; + int err; + u8 flags; + u8 ttl; + int i; + + struct ovs_key_nsh key; + struct ovs_key_nsh mask; + + err = nsh_key_from_nlattr(a, &key, &mask); + if (err) + return err; + + /* Make sure the NSH base header is there */ + if (!pskb_may_pull(skb, skb_network_offset(skb) + NSH_BASE_HDR_LEN)) + return -ENOMEM; + + nh = nsh_hdr(skb); + length = nsh_hdr_len(nh); + + /* Make sure the whole NSH header is there */ + err = skb_ensure_writable(skb, skb_network_offset(skb) + + length); + if (unlikely(err)) + return err; + + nh = nsh_hdr(skb); + skb_postpull_rcsum(skb, nh, length); + flags = nsh_get_flags(nh); + flags = OVS_MASKED(flags, key.base.flags, mask.base.flags); + flow_key->nsh.base.flags = flags; + ttl = nsh_get_ttl(nh); + ttl = OVS_MASKED(ttl, key.base.ttl, mask.base.ttl); + flow_key->nsh.base.ttl = ttl; + nsh_set_flags_and_ttl(nh, flags, ttl); + nh->path_hdr = OVS_MASKED(nh->path_hdr, key.base.path_hdr, + mask.base.path_hdr); + flow_key->nsh.base.path_hdr = nh->path_hdr; + switch (nh->mdtype) { + case NSH_M_TYPE1: + for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) { + nh->md1.context[i] = + OVS_MASKED(nh->md1.context[i], key.context[i], + mask.context[i]); + } + memcpy(flow_key->nsh.context, nh->md1.context, + sizeof(nh->md1.context)); + break; + case NSH_M_TYPE2: + memset(flow_key->nsh.context, 0, + sizeof(flow_key->nsh.context)); + break; + default: + return -EINVAL; + } + skb_postpush_rcsum(skb, nh, length); + return 0; +} + /* Must follow skb_ensure_writable() since that can move the skb data. */ static void set_tp_port(struct sk_buff *skb, __be16 *port, __be16 new_port, __sum16 *check) @@ -1035,6 +1131,10 @@ static int execute_masked_set_action(struct sk_buff *skb, get_mask(a, struct ovs_key_ethernet *)); break; + case OVS_KEY_ATTR_NSH: + err = set_nsh(skb, flow_key, a); + break; + case OVS_KEY_ATTR_IPV4: err = set_ipv4(skb, flow_key, nla_data(a), get_mask(a, struct ovs_key_ipv4 *)); @@ -1225,6 +1325,22 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, case OVS_ACTION_ATTR_POP_ETH: err = pop_eth(skb, key); break; + + case OVS_ACTION_ATTR_PUSH_NSH: { + u8 buffer[NSH_HDR_MAX_LEN]; + struct nshhdr *nh = (struct nshhdr *)buffer; + + err = nsh_hdr_from_nlattr(nla_data(a), nh, + NSH_HDR_MAX_LEN); + if (unlikely(err)) + break; + err = push_nsh(skb, key, nh); + break; + } + + case OVS_ACTION_ATTR_POP_NSH: + err = pop_nsh(skb, key); + break; } if (unlikely(err)) { diff --git a/datapath/flow.c b/datapath/flow.c index 5da7e3ecbe1..9da388e1f0b 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "datapath.h" #include "conntrack.h" @@ -494,6 +495,52 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, return 0; } +static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key) +{ + struct nshhdr *nh; + unsigned int nh_ofs = skb_network_offset(skb); + u8 version, length; + int err; + + err = check_header(skb, nh_ofs + NSH_BASE_HDR_LEN); + if (unlikely(err)) + return err; + + nh = nsh_hdr(skb); + version = nsh_get_ver(nh); + length = nsh_hdr_len(nh); + + if (version != 0) + return -EINVAL; + + err = check_header(skb, nh_ofs + length); + if (unlikely(err)) + return err; + + nh = nsh_hdr(skb); + key->nsh.base.flags = nsh_get_flags(nh); + key->nsh.base.ttl = nsh_get_ttl(nh); + key->nsh.base.mdtype = nh->mdtype; + key->nsh.base.np = nh->np; + key->nsh.base.path_hdr = nh->path_hdr; + switch (key->nsh.base.mdtype) { + case NSH_M_TYPE1: + if (length != NSH_M_TYPE1_LEN) + return -EINVAL; + memcpy(key->nsh.context, nh->md1.context, + sizeof(nh->md1)); + break; + case NSH_M_TYPE2: + memset(key->nsh.context, 0, + sizeof(nh->md1)); + break; + default: + return -EINVAL; + } + + return 0; +} + /** * key_extract - extracts a flow key from an Ethernet frame. * @skb: sk_buff that contains the frame, with skb->data pointing to the @@ -751,6 +798,10 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) memset(&key->tp, 0, sizeof(key->tp)); } } + } else if (key->eth.type == htons(ETH_P_NSH)) { + error = parse_nsh(skb, key); + if (error) + return error; } return 0; } diff --git a/datapath/flow.h b/datapath/flow.h index 0796b09472b..c63e5869af4 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -35,6 +35,7 @@ #include #include #include +#include struct sk_buff; @@ -66,6 +67,11 @@ struct vlan_head { (offsetof(struct sw_flow_key, recirc_id) + \ FIELD_SIZEOF(struct sw_flow_key, recirc_id)) +struct ovs_key_nsh { + struct ovs_nsh_key_base base; + __be32 context[NSH_MD1_CONTEXT_SIZE]; +}; + struct sw_flow_key { u8 tun_opts[255]; u8 tun_opts_len; @@ -143,6 +149,7 @@ struct sw_flow_key { } nd; }; } ipv6; + struct ovs_key_nsh nsh; /* network service header */ }; struct { /* Connection tracking fields not packed above. */ diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c index 303337c4e99..f49848ff163 100644 --- a/datapath/flow_netlink.c +++ b/datapath/flow_netlink.c @@ -46,6 +46,7 @@ #include #include #include +#include #include "datapath.h" #include "conntrack.h" @@ -81,9 +82,11 @@ static bool actions_may_change_flow(const struct nlattr *actions) case OVS_ACTION_ATTR_HASH: case OVS_ACTION_ATTR_POP_ETH: case OVS_ACTION_ATTR_POP_MPLS: + case OVS_ACTION_ATTR_POP_NSH: case OVS_ACTION_ATTR_POP_VLAN: case OVS_ACTION_ATTR_PUSH_ETH: case OVS_ACTION_ATTR_PUSH_MPLS: + case OVS_ACTION_ATTR_PUSH_NSH: case OVS_ACTION_ATTR_PUSH_VLAN: case OVS_ACTION_ATTR_SAMPLE: case OVS_ACTION_ATTR_SET: @@ -176,7 +179,8 @@ static bool match_validate(const struct sw_flow_match *match, | (1ULL << OVS_KEY_ATTR_ICMPV6) | (1ULL << OVS_KEY_ATTR_ARP) | (1ULL << OVS_KEY_ATTR_ND) - | (1ULL << OVS_KEY_ATTR_MPLS)); + | (1ULL << OVS_KEY_ATTR_MPLS) + | (1ULL << OVS_KEY_ATTR_NSH)); /* Always allowed mask fields. */ mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL) @@ -285,6 +289,14 @@ static bool match_validate(const struct sw_flow_match *match, } } + if (match->key->eth.type == htons(ETH_P_NSH)) { + key_expected |= 1 << OVS_KEY_ATTR_NSH; + if (match->mask && + match->mask->key.eth.type == htons(0xffff)) { + mask_allowed |= 1 << OVS_KEY_ATTR_NSH; + } + } + if ((key_attrs & key_expected) != key_expected) { /* Key attributes check failed. */ OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)", @@ -325,12 +337,25 @@ size_t ovs_tun_key_attr_size(void) + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ } +static size_t ovs_nsh_key_attr_size(void) +{ + /* Whenever adding new OVS_NSH_KEY_ FIELDS, we should consider + * updating this function. + */ + return nla_total_size(NSH_BASE_HDR_LEN) /* OVS_NSH_KEY_ATTR_BASE */ + /* OVS_NSH_KEY_ATTR_MD1 and OVS_NSH_KEY_ATTR_MD2 are + * mutually exclusive, so the bigger one can cover + * the small one. + */ + + nla_total_size(NSH_CTX_HDRS_MAX_LEN); +} + size_t ovs_key_attr_size(void) { /* Whenever adding new OVS_KEY_ FIELDS, we should consider * updating this function. */ - BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 28); + BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 29); return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ @@ -344,6 +369,8 @@ size_t ovs_key_attr_size(void) + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */ + nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */ + + nla_total_size(0) /* OVS_KEY_ATTR_NSH */ + + ovs_nsh_key_attr_size() + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ @@ -376,6 +403,13 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) }, }; +static const struct ovs_len_tbl +ovs_nsh_key_attr_lens[OVS_NSH_KEY_ATTR_MAX + 1] = { + [OVS_NSH_KEY_ATTR_BASE] = { .len = sizeof(struct ovs_nsh_key_base) }, + [OVS_NSH_KEY_ATTR_MD1] = { .len = sizeof(struct ovs_nsh_key_md1) }, + [OVS_NSH_KEY_ATTR_MD2] = { .len = OVS_ATTR_VARIABLE }, +}; + /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED }, @@ -408,6 +442,8 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { .len = sizeof(struct ovs_key_ct_tuple_ipv4) }, [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = { .len = sizeof(struct ovs_key_ct_tuple_ipv6) }, + [OVS_KEY_ATTR_NSH] = { .len = OVS_ATTR_NESTED, + .next = ovs_nsh_key_attr_lens, }, }; static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) @@ -1182,6 +1218,221 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, return 0; } +int nsh_hdr_from_nlattr(const struct nlattr *attr, + struct nshhdr *nh, size_t size) +{ + struct nlattr *a; + int rem; + u8 flags = 0; + u8 ttl = 0; + int mdlen = 0; + + /* validate_nsh has check this, so we needn't do duplicate check here + */ + if (size < NSH_BASE_HDR_LEN) + return -ENOBUFS; + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + + switch (type) { + case OVS_NSH_KEY_ATTR_BASE: { + const struct ovs_nsh_key_base *base = nla_data(a); + + flags = base->flags; + ttl = base->ttl; + nh->np = base->np; + nh->mdtype = base->mdtype; + nh->path_hdr = base->path_hdr; + break; + } + case OVS_NSH_KEY_ATTR_MD1: + mdlen = nla_len(a); + if (mdlen > size - NSH_BASE_HDR_LEN) + return -ENOBUFS; + memcpy(&nh->md1, nla_data(a), mdlen); + break; + + case OVS_NSH_KEY_ATTR_MD2: + mdlen = nla_len(a); + if (mdlen > size - NSH_BASE_HDR_LEN) + return -ENOBUFS; + memcpy(&nh->md2, nla_data(a), mdlen); + break; + + default: + return -EINVAL; + } + } + + /* nsh header length = NSH_BASE_HDR_LEN + mdlen */ + nh->ver_flags_ttl_len = 0; + nsh_set_flags_ttl_len(nh, flags, ttl, NSH_BASE_HDR_LEN + mdlen); + + return 0; +} + +int nsh_key_from_nlattr(const struct nlattr *attr, + struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask) +{ + struct nlattr *a; + int rem; + + /* validate_nsh has check this, so we needn't do duplicate check here + */ + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + + switch (type) { + case OVS_NSH_KEY_ATTR_BASE: { + const struct ovs_nsh_key_base *base = nla_data(a); + const struct ovs_nsh_key_base *base_mask = base + 1; + + nsh->base = *base; + nsh_mask->base = *base_mask; + break; + } + case OVS_NSH_KEY_ATTR_MD1: { + const struct ovs_nsh_key_md1 *md1 = nla_data(a); + const struct ovs_nsh_key_md1 *md1_mask = md1 + 1; + + memcpy(nsh->context, md1->context, sizeof(*md1)); + memcpy(nsh_mask->context, md1_mask->context, + sizeof(*md1_mask)); + break; + } + case OVS_NSH_KEY_ATTR_MD2: + /* Not supported yet */ + return -ENOTSUPP; + default: + return -EINVAL; + } + } + + return 0; +} + +static int nsh_key_put_from_nlattr(const struct nlattr *attr, + struct sw_flow_match *match, bool is_mask, + bool is_push_nsh, bool log) +{ + struct nlattr *a; + int rem; + bool has_base = false; + bool has_md1 = false; + bool has_md2 = false; + u8 mdtype = 0; + int mdlen = 0; + + if (WARN_ON(is_push_nsh && is_mask)) + return -EINVAL; + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + int i; + + if (type > OVS_NSH_KEY_ATTR_MAX) { + OVS_NLERR(log, "nsh attr %d is out of range max %d", + type, OVS_NSH_KEY_ATTR_MAX); + return -EINVAL; + } + + if (!check_attr_len(nla_len(a), + ovs_nsh_key_attr_lens[type].len)) { + OVS_NLERR( + log, + "nsh attr %d has unexpected len %d expected %d", + type, + nla_len(a), + ovs_nsh_key_attr_lens[type].len + ); + return -EINVAL; + } + + switch (type) { + case OVS_NSH_KEY_ATTR_BASE: { + const struct ovs_nsh_key_base *base = nla_data(a); + + has_base = true; + mdtype = base->mdtype; + SW_FLOW_KEY_PUT(match, nsh.base.flags, + base->flags, is_mask); + SW_FLOW_KEY_PUT(match, nsh.base.ttl, + base->ttl, is_mask); + SW_FLOW_KEY_PUT(match, nsh.base.mdtype, + base->mdtype, is_mask); + SW_FLOW_KEY_PUT(match, nsh.base.np, + base->np, is_mask); + SW_FLOW_KEY_PUT(match, nsh.base.path_hdr, + base->path_hdr, is_mask); + break; + } + case OVS_NSH_KEY_ATTR_MD1: { + const struct ovs_nsh_key_md1 *md1 = nla_data(a); + + has_md1 = true; + for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) + SW_FLOW_KEY_PUT(match, nsh.context[i], + md1->context[i], is_mask); + break; + } + case OVS_NSH_KEY_ATTR_MD2: + if (!is_push_nsh) /* Not supported MD type 2 yet */ + return -ENOTSUPP; + + has_md2 = true; + mdlen = nla_len(a); + if (mdlen > NSH_CTX_HDRS_MAX_LEN || mdlen <= 0) { + OVS_NLERR( + log, + "Invalid MD length %d for MD type %d", + mdlen, + mdtype + ); + return -EINVAL; + } + break; + default: + OVS_NLERR(log, "Unknown nsh attribute %d", + type); + return -EINVAL; + } + } + + if (rem > 0) { + OVS_NLERR(log, "nsh attribute has %d unknown bytes.", rem); + return -EINVAL; + } + + if (has_md1 && has_md2) { + OVS_NLERR( + 1, + "invalid nsh attribute: md1 and md2 are exclusive." + ); + return -EINVAL; + } + + if (!is_mask) { + if ((has_md1 && mdtype != NSH_M_TYPE1) || + (has_md2 && mdtype != NSH_M_TYPE2)) { + OVS_NLERR(1, "nsh attribute has unmatched MD type %d.", + mdtype); + return -EINVAL; + } + + if (is_push_nsh && + (!has_base || (!has_md1 && !has_md2))) { + OVS_NLERR( + 1, + "push_nsh: missing base or metadata attributes" + ); + return -EINVAL; + } + } + + return 0; +} + static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, u64 attrs, const struct nlattr **a, bool is_mask, bool log) @@ -1309,6 +1560,13 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, attrs &= ~(1ULL << OVS_KEY_ATTR_ARP); } + if (attrs & (1 << OVS_KEY_ATTR_NSH)) { + if (nsh_key_put_from_nlattr(a[OVS_KEY_ATTR_NSH], match, + is_mask, false, log) < 0) + return -EINVAL; + attrs &= ~(1 << OVS_KEY_ATTR_NSH); + } + if (attrs & (1ULL << OVS_KEY_ATTR_MPLS)) { const struct ovs_key_mpls *mpls_key; @@ -1624,6 +1882,34 @@ static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh, return 0; } +static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask, + struct sk_buff *skb) +{ + struct nlattr *start; + + start = nla_nest_start(skb, OVS_KEY_ATTR_NSH); + if (!start) + return -EMSGSIZE; + + if (nla_put(skb, OVS_NSH_KEY_ATTR_BASE, sizeof(nsh->base), &nsh->base)) + goto nla_put_failure; + + if (is_mask || nsh->base.mdtype == NSH_M_TYPE1) { + if (nla_put(skb, OVS_NSH_KEY_ATTR_MD1, + sizeof(nsh->context), nsh->context)) + goto nla_put_failure; + } + + /* Don't support MD type 2 yet */ + + nla_nest_end(skb, start); + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + static int __ovs_nla_put_key(const struct sw_flow_key *swkey, const struct sw_flow_key *output, bool is_mask, struct sk_buff *skb) @@ -1752,6 +2038,9 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, ipv6_key->ipv6_tclass = output->ip.tos; ipv6_key->ipv6_hlimit = output->ip.ttl; ipv6_key->ipv6_frag = output->ip.frag; + } else if (swkey->eth.type == htons(ETH_P_NSH)) { + if (nsh_key_to_nlattr(&output->nsh, is_mask, skb)) + goto nla_put_failure; } else if (swkey->eth.type == htons(ETH_P_ARP) || swkey->eth.type == htons(ETH_P_RARP)) { struct ovs_key_arp *arp_key; @@ -2247,6 +2536,19 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, return err; } +static bool validate_nsh(const struct nlattr *attr, bool is_mask, + bool is_push_nsh, bool log) +{ + struct sw_flow_match match; + struct sw_flow_key key; + int ret = 0; + + ovs_match_init(&match, &key, true, NULL); + ret = nsh_key_put_from_nlattr(attr, &match, is_mask, + is_push_nsh, log); + return !ret; +} + /* Return false if there are any non-masked bits set. * Mask follows data immediately, before any netlink padding. */ @@ -2393,6 +2695,13 @@ static int validate_set(const struct nlattr *a, break; + case OVS_KEY_ATTR_NSH: + if (eth_type != htons(ETH_P_NSH)) + return -EINVAL; + if (!validate_nsh(nla_data(a), masked, false, log)) + return -EINVAL; + break; + default: return -EINVAL; } @@ -2492,6 +2801,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc), [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth), [OVS_ACTION_ATTR_POP_ETH] = 0, + [OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1, + [OVS_ACTION_ATTR_POP_NSH] = 0, }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -2649,6 +2960,34 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, mac_proto = MAC_PROTO_ETHERNET; break; + case OVS_ACTION_ATTR_PUSH_NSH: + if (mac_proto != MAC_PROTO_ETHERNET) { + u8 next_proto; + + next_proto = tun_p_from_eth_p(eth_type); + if (!next_proto) + return -EINVAL; + } + mac_proto = MAC_PROTO_NONE; + if (!validate_nsh(nla_data(a), false, true, true)) + return -EINVAL; + break; + + case OVS_ACTION_ATTR_POP_NSH: { + __be16 inner_proto; + + if (eth_type != htons(ETH_P_NSH)) + return -EINVAL; + inner_proto = tun_p_to_eth_p(key->nsh.base.np); + if (!inner_proto) + return -EINVAL; + if (key->nsh.base.np == TUN_P_ETHERNET) + mac_proto = MAC_PROTO_ETHERNET; + else + mac_proto = MAC_PROTO_NONE; + break; + } + default: OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; diff --git a/datapath/flow_netlink.h b/datapath/flow_netlink.h index f9a4d397998..97153f3282b 100644 --- a/datapath/flow_netlink.h +++ b/datapath/flow_netlink.h @@ -78,4 +78,9 @@ int ovs_nla_put_actions(const struct nlattr *attr, void ovs_nla_free_flow_actions(struct sw_flow_actions *); void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *); +int nsh_key_from_nlattr(const struct nlattr *attr, struct ovs_key_nsh *nsh, + struct ovs_key_nsh *nsh_mask); +int nsh_hdr_from_nlattr(const struct nlattr *attr, struct nshhdr *nh, + size_t size); + #endif /* flow_netlink.h */ diff --git a/datapath/linux/compat/include/linux/netdevice.h b/datapath/linux/compat/include/linux/netdevice.h index 3c3cf429fa8..3ce02b5aa79 100644 --- a/datapath/linux/compat/include/linux/netdevice.h +++ b/datapath/linux/compat/include/linux/netdevice.h @@ -284,4 +284,18 @@ static inline int skb_csum_hwoffload_help(struct sk_buff *skb, } #endif +#ifndef HAVE_SKB_GSO_ERROR_UNWIND +static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, + int pulled_hlen, u16 mac_offset, + int mac_len) +{ + skb->protocol = protocol; + skb->encapsulation = 1; + skb_push(skb, pulled_hlen); + skb_reset_transport_header(skb); + skb->mac_header = mac_offset; + skb->network_header = skb->mac_header + mac_len; + skb->mac_len = mac_len; +} +#endif #endif /* __LINUX_NETDEVICE_WRAPPER_H */ diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h index 3deaba6f98a..588bb5993d0 100644 --- a/datapath/linux/compat/include/linux/openvswitch.h +++ b/datapath/linux/compat/include/linux/openvswitch.h @@ -360,6 +360,7 @@ enum ovs_key_attr { OVS_KEY_ATTR_CT_LABELS, /* 16-octet connection tracking labels */ OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, /* struct ovs_key_ct_tuple_ipv4 */ OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, /* struct ovs_key_ct_tuple_ipv6 */ + OVS_KEY_ATTR_NSH, /* Nested set of ovs_nsh_key_* */ #ifdef __KERNEL__ /* Only used within kernel data path. */ @@ -369,7 +370,6 @@ enum ovs_key_attr { #ifndef __KERNEL__ /* Only used within userspace data path. */ OVS_KEY_ATTR_PACKET_TYPE, /* be32 packet type */ - OVS_KEY_ATTR_NSH, /* Nested set of ovs_nsh_key_* */ #endif __OVS_KEY_ATTR_MAX @@ -922,14 +922,14 @@ enum ovs_action_attr { OVS_ACTION_ATTR_PUSH_ETH, /* struct ovs_action_push_eth. */ OVS_ACTION_ATTR_POP_ETH, /* No argument. */ OVS_ACTION_ATTR_CT_CLEAR, /* No argument. */ + OVS_ACTION_ATTR_PUSH_NSH, /* Nested OVS_NSH_KEY_ATTR_*. */ + OVS_ACTION_ATTR_POP_NSH, /* No argument. */ #ifndef __KERNEL__ OVS_ACTION_ATTR_TUNNEL_PUSH, /* struct ovs_action_push_tnl*/ OVS_ACTION_ATTR_TUNNEL_POP, /* u32 port number. */ OVS_ACTION_ATTR_CLONE, /* Nested OVS_CLONE_ATTR_*. */ OVS_ACTION_ATTR_METER, /* u32 meter number. */ - OVS_ACTION_ATTR_PUSH_NSH, /* Nested OVS_NSH_KEY_ATTR_*. */ - OVS_ACTION_ATTR_POP_NSH, /* No argument. */ #endif __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ diff --git a/datapath/linux/compat/include/net/nsh.h b/datapath/linux/compat/include/net/nsh.h index c9c30e09c4c..ff3733d9032 100644 --- a/datapath/linux/compat/include/net/nsh.h +++ b/datapath/linux/compat/include/net/nsh.h @@ -307,4 +307,7 @@ static inline void nsh_set_flags_ttl_len(struct nshhdr *nsh, u8 flags, int ovs_nsh_init(void); void ovs_nsh_cleanup(void); +int ovs_nsh_push(struct sk_buff *skb, const struct nshhdr *pushed_nh); +int ovs_nsh_pop(struct sk_buff *skb); + #endif /* __NET_NSH_H */ diff --git a/datapath/nsh.c b/datapath/nsh.c new file mode 100644 index 00000000000..9e583edbee8 --- /dev/null +++ b/datapath/nsh.c @@ -0,0 +1,142 @@ +/* + * Network Service Header + * + * Copyright (c) 2017 Red Hat, Inc. -- Jiri Benc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +int ovs_nsh_push(struct sk_buff *skb, const struct nshhdr *pushed_nh) +{ + struct nshhdr *nh; + size_t length = nsh_hdr_len(pushed_nh); + u8 next_proto; + + if (skb->mac_len) { + next_proto = TUN_P_ETHERNET; + } else { + next_proto = tun_p_from_eth_p(skb->protocol); + if (!next_proto) + return -EAFNOSUPPORT; + } + + /* Add the NSH header */ + if (skb_cow_head(skb, length) < 0) + return -ENOMEM; + + skb_push(skb, length); + nh = (struct nshhdr *)(skb->data); + memcpy(nh, pushed_nh, length); + nh->np = next_proto; + skb_postpush_rcsum(skb, nh, length); + + skb->protocol = htons(ETH_P_NSH); + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_mac_len(skb); + + return 0; +} +EXPORT_SYMBOL_GPL(ovs_nsh_push); + +int ovs_nsh_pop(struct sk_buff *skb) +{ + struct nshhdr *nh; + size_t length; + __be16 inner_proto; + + if (!pskb_may_pull(skb, NSH_BASE_HDR_LEN)) + return -ENOMEM; + nh = (struct nshhdr *)(skb->data); + length = nsh_hdr_len(nh); + inner_proto = tun_p_to_eth_p(nh->np); + if (!pskb_may_pull(skb, length)) + return -ENOMEM; + + if (!inner_proto) + return -EAFNOSUPPORT; + + skb_pull_rcsum(skb, length); + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_mac_len(skb); + skb->protocol = inner_proto; + + return 0; +} +EXPORT_SYMBOL_GPL(ovs_nsh_pop); + +static struct sk_buff *nsh_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + unsigned int nsh_len, mac_len; + __be16 proto; + int nhoff; + + skb_reset_network_header(skb); + + nhoff = skb->network_header - skb->mac_header; + mac_len = skb->mac_len; + + if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN))) + goto out; + nsh_len = nsh_hdr_len(nsh_hdr(skb)); + if (unlikely(!pskb_may_pull(skb, nsh_len))) + goto out; + + proto = tun_p_to_eth_p(nsh_hdr(skb)->np); + if (!proto) + goto out; + + __skb_pull(skb, nsh_len); + + skb_reset_mac_header(skb); + skb_reset_mac_len(skb); + skb->protocol = proto; + + features &= NETIF_F_SG; + segs = skb_mac_gso_segment(skb, features); + if (IS_ERR_OR_NULL(segs)) { + skb_gso_error_unwind(skb, htons(ETH_P_NSH), nsh_len, + skb->network_header - nhoff, + mac_len); + goto out; + } + + for (skb = segs; skb; skb = skb->next) { + skb->protocol = htons(ETH_P_NSH); + __skb_push(skb, nsh_len); + skb_set_mac_header(skb, -nhoff); + skb->network_header = skb->mac_header + mac_len; + skb->mac_len = mac_len; + } + +out: + return segs; +} + +static struct packet_offload nsh_packet_offload __read_mostly = { + .type = htons(ETH_P_NSH), + .callbacks = { + .gso_segment = nsh_gso_segment, + }, +}; + +int ovs_nsh_init(void) +{ + dev_add_offload(&nsh_packet_offload); + return 0; +} + +void ovs_nsh_cleanup(void) +{ + dev_remove_offload(&nsh_packet_offload); +}