Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds the draft of the XDP scheduler testing tool #40

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions xdq-tester/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)

USER_TARGETS := xdq-tester
BPF_TARGETS := $(patsubst %.c,%,$(wildcard *.bpf.c))

USER_LIBS = -llua -ldl -lm

LIB_DIR = ../lib

include $(LIB_DIR)/common.mk
116 changes: 116 additions & 0 deletions xdq-tester/bpf_local_helpers.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)

#ifndef BPF_LOCAL_HELPERS_H_
#define BPF_LOCAL_HELPERS_H_

#include "bpf_shared_data.h"

#define EEXIST 17 /* File exists */

#define BPF_MAP_TYPE_PIFO_GENERIC 31
#define BPF_MAP_TYPE_PIFO_XDP 32

/*
* bpf_packet_dequeue
*
* Dequeue the packet at the head of the PIFO in *map* and return a pointer
* to the packet (or NULL if the PIFO is empty).
*
* Returns
* On success, a pointer to the packet, or NULL if the PIFO is empty. The
* packet pointer must be freed using *bpf_packet_drop()* or returning
* the packet pointer. The *rank* pointer will be set to the rank of
* the dequeued packet on success, or a negative error code on error.
*/
static long (*bpf_packet_dequeue)(void *ctx, void *map, __u64 flags, __u64 *rank) = (void *) 208;;
static long (*bpf_packet_drop)(void *ctx, void *pkt) = (void *) 209;

struct parsing_context {
void *data; // Start of eth hdr
void *data_end; // End of safe acessible area
void *meta; // Meta data
struct hdr_cursor nh; // Position to parse next
__u32 pkt_len; // Full packet length (headers+data)
};

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-function"
static __always_inline void *
bpf_map_lookup_or_try_init(void *map, const void *key, const void *init)
{
void *val;
long err;

val = bpf_map_lookup_elem(map, key);
if (val)
return val;

err = bpf_map_update_elem(map, key, init, BPF_NOEXIST);
if (err && err != -EEXIST)
return NULL;

return bpf_map_lookup_elem(map, key);
}

static __always_inline int bpf_max(__u64 left, __u64 right)
{
return right > left ? right : left;
}


/*
* Maps an IPv4 address into an IPv6 address according to RFC 4291 sec 2.5.5.2
*/
static void map_ipv4_to_ipv6(struct in6_addr *ipv6, __be32 ipv4)
{
__builtin_memset(&ipv6->in6_u.u6_addr8[0], 0x00, 10);
__builtin_memset(&ipv6->in6_u.u6_addr8[10], 0xff, 2);
ipv6->in6_u.u6_addr32[3] = ipv4;
}

/*
* Five-tuple helpers
*/

/* This function currently only supports UDP packets */
static __always_inline int parse_packet(struct parsing_context *pctx, struct packet_info *p_info)
{
/* Parse Ethernet and IP/IPv6 headers */
p_info->eth_type = parse_ethhdr(&pctx->nh, pctx->data_end, &p_info->eth);
if (p_info->eth_type == bpf_htons(ETH_P_IP)) {
p_info->ip_type = parse_iphdr(&pctx->nh, pctx->data_end, &p_info->iph);
if (p_info->ip_type < 0)
goto err;
p_info->nt.ipv = 4;
map_ipv4_to_ipv6(&p_info->nt.saddr.ip, p_info->iph->saddr);
map_ipv4_to_ipv6(&p_info->nt.daddr.ip, p_info->iph->daddr);
} else if (p_info->eth_type == bpf_htons(ETH_P_IPV6)) {
p_info->ip_type = parse_ip6hdr(&pctx->nh, pctx->data_end, &p_info->ip6h);
if (p_info->ip_type < 0)
goto err;
p_info->nt.ipv = 6;
p_info->nt.saddr.ip = p_info->ip6h->saddr;
p_info->nt.daddr.ip = p_info->ip6h->daddr;
} else {
goto err;
}

/* Parse UDP header */
if (p_info->ip_type != IPPROTO_UDP)
goto err;
if (parse_udphdr(&pctx->nh, pctx->data_end, &p_info->udph) < 0)
goto err;

p_info->nt.proto = IPPROTO_UDP;
p_info->nt.saddr.port = p_info->udph->source;
p_info->nt.daddr.port = p_info->udph->dest;

return 0;
err:
bpf_printk("Failed to parse UDP packet");
return -1;
}

#pragma GCC diagnostic pop

#endif // BPF_LOCAL_HELPERS_H_
52 changes: 52 additions & 0 deletions xdq-tester/bpf_shared_data.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#ifndef BPF_SHARED_DATA_H_
#define BPF_SHARED_DATA_H_

#include "codel_impl.h"

struct flow_address {
struct in6_addr ip;
__u16 port;
__u16 reserved;
};

struct network_tuple {
struct flow_address saddr;
struct flow_address daddr;
__u16 proto;
__u8 ipv;
__u8 reserved;
};

struct flow_state {
__u32 pkts;
__u32 root_finish_bytes;
__u32 finish_bytes;
__u16 root_weight;
__u16 weight;
__u32 persistent;
__u64 root_priority;
};

struct fq_codel_flow_state {
__u32 pkts;
__u32 finish_bytes;
__u32 total_bytes;
__u32 grace_period;
struct codel_state codel;
};

struct packet_info {
struct ethhdr *eth;
union {
struct iphdr *iph;
struct ipv6hdr *ip6h;
};
union {
struct udphdr *udph;
};
struct network_tuple nt;
int eth_type;
int ip_type;
};

#endif // BPF_SHARED_DATA_H_
155 changes: 155 additions & 0 deletions xdq-tester/codel_impl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
#include <stdbool.h>

#ifndef __CODEL_IMPL_H
#define __CODEL_IMPL_H

#ifndef CODEL_TARGET
#define CODEL_TARGET (10 * 1000 * 1000ULL) /* 10 ms in nanosec */
#endif

#ifndef CODEL_EXCEED_INTERVAL
#define CODEL_EXCEED_INTERVAL (100 * 1000 * 1000ULL) /* 100 ms in ns*/
#endif

/* Codel like dropping scheme, inspired by:
* - RFC: https://queue.acm.org/detail.cfm?id=2209336
* - Code: https://queue.acm.org/appendices/codel.html
* - Kernel: include/net/codel_impl.h
*/
struct codel_state {
/* codel like dropping scheme */
__u64 first_above_time; /* Time when above target (0 if below)*/
__u64 drop_next; /* Time to drop next packet */
__u32 count; /* Packets dropped since going into drop state */
__u32 dropping; /* Equal to 1 if in drop state */
};

/* Table lookup for square-root shifted 16 bit */
static __always_inline __u32 get_sqrt_sh16(__u64 cnt)
{
switch (cnt) {
case 1: return 65536; /* 65536 * sqrt(1) */
case 2: return 92682; /* 65536 * sqrt(2) */
case 3: return 113512; /* 65536 * sqrt(3) */
case 4: return 131072; /* 65536 * sqrt(4) */
case 5: return 146543; /* 65536 * sqrt(5) */
case 6: return 160530; /* 65536 * sqrt(6) */
case 7: return 173392;
case 8: return 185364;
case 9: return 196608;
case 10: return 207243;
case 11: return 217358;
case 12: return 227023;
case 13: return 236293;
case 14: return 245213;
case 15: return 253820;
case 16: return 262144; /* 100 ms / sqrt(16) = 25 ms */
case 17: return 270212;
case 18: return 278046;
case 19: return 285664;
case 20: return 293086;
case 21: return 300324;
case 22: return 307391;
case 23: return 314300;
case 24: return 321060;
case 25: return 327680; /* 100 ms / sqrt(25) = 20 ms */
case 26: return 334169;
case 27: return 340535;
case 28: return 346784;
case 29: return 352922;
case 30: return 358955;
case 31: return 364889;
case 32: return 370728;
case 33: return 376476;
case 34: return 382137;
case 35: return 387716;
case 36: return 393216; /* 100 / sqrt(36) = 16.66 ms */
default:
return 463410; /* 65536*sqrt(50) => 100/sqrt(50) = 14.14 ms */
}
}

static __always_inline __u64 get_next_interval_sqrt(__u64 cnt)
{
__u64 val = ((__u64)CODEL_EXCEED_INTERVAL << 16) / get_sqrt_sh16(cnt);
return val;
}

static __always_inline __u64
codel_control_law(__u64 t, __u64 cnt)
{
return t + get_next_interval_sqrt(cnt);
}

static __always_inline
bool codel_should_drop(struct codel_state *codel, __u64 t_queue_sz, __u64 now)
{
__u64 interval = CODEL_EXCEED_INTERVAL;

if (t_queue_sz < CODEL_TARGET) {
/* went below so we'll stay below for at least interval */
codel->first_above_time = 0;
return false;
}

if (codel->first_above_time == 0) {
/* just went above from below. If we stay above
* for at least interval we'll say it's ok to drop
*/
codel->first_above_time = now + interval;
return false;
} else if (now >= codel->first_above_time) {
return true;
}
return false;
}

static __always_inline
bool codel_drop(struct codel_state *codel, __u64 t_queue_sz, __u64 now)
{
__u64 interval = CODEL_EXCEED_INTERVAL;

/* If horizon have been exceed for a while, inc drop intensity*/
bool drop = codel_should_drop(codel, t_queue_sz, now);

if (codel->dropping) { /* In dropping state */
if (!drop) {
/* time below target - leave dropping state */
codel->dropping = false;
return false;
} else if (now >= codel->drop_next) {
/* It's time for the next drop. Drop the current
* packet. Schedule the next drop
*/
codel->count += 1;
// schedule the next drop.
codel->drop_next =
codel_control_law(codel->drop_next, codel->count);
return true;
}
} else if (drop &&
((now - codel->drop_next < interval) ||
(now - codel->first_above_time >= interval))) {
/* If we get here, then we're not in dropping state.
* Decide whether it's time to enter dropping state.
*/
__u32 count = codel->count;

codel->dropping = true;

/* If we're in a drop cycle, drop rate that controlled queue
* on the last cycle is a good starting point to control it now.
*/
if (now - codel->drop_next < interval)
count = count > 2 ? (count - 2) : 1;
else
count = 1;

codel->count = count;
codel->drop_next = codel_control_law(now, count);
return true;
}
return false;
}

#endif /* __CODEL_IMPL_H */
Loading