From f305651e79343552c2468a7b810e585ff3443e6a Mon Sep 17 00:00:00 2001 From: Frey Alfredsson Date: Wed, 27 Apr 2022 13:52:34 +0200 Subject: [PATCH] Adds the draft of the XDP scheduler testing tool This commit contains the XDP scheduling framework. It consists of a testing program called xdq-tester used to test schedulers using the XDP and DEQUEUE hooks. It uses trace files written in Lua that the xdq-tester program uses to check the XDP schedulers for correctness. The FIFO, SPRIO, and WFQ are fully functional in this commit. The SPRIO and WFQ have an API to set the weights from the Lua scripts. This commit's FQ-CoDel contains sparse flow handling and testing. The CoDel part has a bug in how it adds the metadata section in this version. Signed-off-by: Frey Alfredsson --- xdq-tester/Makefile | 10 + xdq-tester/bpf_local_helpers.h | 116 ++++ xdq-tester/bpf_shared_data.h | 52 ++ xdq-tester/codel_impl.h | 155 +++++ xdq-tester/fifo.lua | 25 + xdq-tester/fq_codel.lua | 179 ++++++ xdq-tester/hpfq.lua | 42 ++ xdq-tester/lib.lua | 343 +++++++++++ xdq-tester/sched_fifo.bpf.c | 46 ++ xdq-tester/sched_fq_codel.bpf.c | 226 +++++++ xdq-tester/sched_hpfq.bpf.c | 228 +++++++ xdq-tester/sched_sprio.bpf.c | 127 ++++ xdq-tester/sched_wfq.bpf.c | 135 ++++ xdq-tester/sprio.lua | 27 + xdq-tester/wfq.lua | 148 +++++ xdq-tester/xdq-tester.c | 1021 +++++++++++++++++++++++++++++++ xdq-tester/xdq-tester.h | 46 ++ 17 files changed, 2926 insertions(+) create mode 100644 xdq-tester/Makefile create mode 100644 xdq-tester/bpf_local_helpers.h create mode 100644 xdq-tester/bpf_shared_data.h create mode 100644 xdq-tester/codel_impl.h create mode 100644 xdq-tester/fifo.lua create mode 100644 xdq-tester/fq_codel.lua create mode 100644 xdq-tester/hpfq.lua create mode 100644 xdq-tester/lib.lua create mode 100644 xdq-tester/sched_fifo.bpf.c create mode 100644 xdq-tester/sched_fq_codel.bpf.c create mode 100644 xdq-tester/sched_hpfq.bpf.c create mode 100644 xdq-tester/sched_sprio.bpf.c create mode 100644 xdq-tester/sched_wfq.bpf.c create mode 100644 xdq-tester/sprio.lua create mode 100644 xdq-tester/wfq.lua create mode 100644 xdq-tester/xdq-tester.c create mode 100644 xdq-tester/xdq-tester.h diff --git a/xdq-tester/Makefile b/xdq-tester/Makefile new file mode 100644 index 00000000..01f8fa44 --- /dev/null +++ b/xdq-tester/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) + +USER_TARGETS := xdq-tester +BPF_TARGETS := $(patsubst %.c,%,$(wildcard *.bpf.c)) + +USER_LIBS = -llua -ldl -lm + +LIB_DIR = ../lib + +include $(LIB_DIR)/common.mk diff --git a/xdq-tester/bpf_local_helpers.h b/xdq-tester/bpf_local_helpers.h new file mode 100644 index 00000000..4f875e66 --- /dev/null +++ b/xdq-tester/bpf_local_helpers.h @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +#ifndef BPF_LOCAL_HELPERS_H_ +#define BPF_LOCAL_HELPERS_H_ + +#include "bpf_shared_data.h" + +#define EEXIST 17 /* File exists */ + +#define BPF_MAP_TYPE_PIFO_GENERIC 31 +#define BPF_MAP_TYPE_PIFO_XDP 32 + +/* + * bpf_packet_dequeue + * + * Dequeue the packet at the head of the PIFO in *map* and return a pointer + * to the packet (or NULL if the PIFO is empty). + * + * Returns + * On success, a pointer to the packet, or NULL if the PIFO is empty. The + * packet pointer must be freed using *bpf_packet_drop()* or returning + * the packet pointer. The *rank* pointer will be set to the rank of + * the dequeued packet on success, or a negative error code on error. + */ +static long (*bpf_packet_dequeue)(void *ctx, void *map, __u64 flags, __u64 *rank) = (void *) 208;; +static long (*bpf_packet_drop)(void *ctx, void *pkt) = (void *) 209; + +struct parsing_context { + void *data; // Start of eth hdr + void *data_end; // End of safe acessible area + void *meta; // Meta data + struct hdr_cursor nh; // Position to parse next + __u32 pkt_len; // Full packet length (headers+data) +}; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" +static __always_inline void * +bpf_map_lookup_or_try_init(void *map, const void *key, const void *init) +{ + void *val; + long err; + + val = bpf_map_lookup_elem(map, key); + if (val) + return val; + + err = bpf_map_update_elem(map, key, init, BPF_NOEXIST); + if (err && err != -EEXIST) + return NULL; + + return bpf_map_lookup_elem(map, key); +} + +static __always_inline int bpf_max(__u64 left, __u64 right) +{ + return right > left ? right : left; +} + + +/* + * Maps an IPv4 address into an IPv6 address according to RFC 4291 sec 2.5.5.2 + */ +static void map_ipv4_to_ipv6(struct in6_addr *ipv6, __be32 ipv4) +{ + __builtin_memset(&ipv6->in6_u.u6_addr8[0], 0x00, 10); + __builtin_memset(&ipv6->in6_u.u6_addr8[10], 0xff, 2); + ipv6->in6_u.u6_addr32[3] = ipv4; +} + +/* + * Five-tuple helpers + */ + +/* This function currently only supports UDP packets */ +static __always_inline int parse_packet(struct parsing_context *pctx, struct packet_info *p_info) +{ + /* Parse Ethernet and IP/IPv6 headers */ + p_info->eth_type = parse_ethhdr(&pctx->nh, pctx->data_end, &p_info->eth); + if (p_info->eth_type == bpf_htons(ETH_P_IP)) { + p_info->ip_type = parse_iphdr(&pctx->nh, pctx->data_end, &p_info->iph); + if (p_info->ip_type < 0) + goto err; + p_info->nt.ipv = 4; + map_ipv4_to_ipv6(&p_info->nt.saddr.ip, p_info->iph->saddr); + map_ipv4_to_ipv6(&p_info->nt.daddr.ip, p_info->iph->daddr); + } else if (p_info->eth_type == bpf_htons(ETH_P_IPV6)) { + p_info->ip_type = parse_ip6hdr(&pctx->nh, pctx->data_end, &p_info->ip6h); + if (p_info->ip_type < 0) + goto err; + p_info->nt.ipv = 6; + p_info->nt.saddr.ip = p_info->ip6h->saddr; + p_info->nt.daddr.ip = p_info->ip6h->daddr; + } else { + goto err; + } + + /* Parse UDP header */ + if (p_info->ip_type != IPPROTO_UDP) + goto err; + if (parse_udphdr(&pctx->nh, pctx->data_end, &p_info->udph) < 0) + goto err; + + p_info->nt.proto = IPPROTO_UDP; + p_info->nt.saddr.port = p_info->udph->source; + p_info->nt.daddr.port = p_info->udph->dest; + + return 0; +err: + bpf_printk("Failed to parse UDP packet"); + return -1; +} + +#pragma GCC diagnostic pop + +#endif // BPF_LOCAL_HELPERS_H_ diff --git a/xdq-tester/bpf_shared_data.h b/xdq-tester/bpf_shared_data.h new file mode 100644 index 00000000..f0211384 --- /dev/null +++ b/xdq-tester/bpf_shared_data.h @@ -0,0 +1,52 @@ +#ifndef BPF_SHARED_DATA_H_ +#define BPF_SHARED_DATA_H_ + +#include "codel_impl.h" + +struct flow_address { + struct in6_addr ip; + __u16 port; + __u16 reserved; +}; + +struct network_tuple { + struct flow_address saddr; + struct flow_address daddr; + __u16 proto; + __u8 ipv; + __u8 reserved; +}; + +struct flow_state { + __u32 pkts; + __u32 root_finish_bytes; + __u32 finish_bytes; + __u16 root_weight; + __u16 weight; + __u32 persistent; + __u64 root_priority; +}; + +struct fq_codel_flow_state { + __u32 pkts; + __u32 finish_bytes; + __u32 total_bytes; + __u32 grace_period; + struct codel_state codel; +}; + +struct packet_info { + struct ethhdr *eth; + union { + struct iphdr *iph; + struct ipv6hdr *ip6h; + }; + union { + struct udphdr *udph; + }; + struct network_tuple nt; + int eth_type; + int ip_type; +}; + +#endif // BPF_SHARED_DATA_H_ diff --git a/xdq-tester/codel_impl.h b/xdq-tester/codel_impl.h new file mode 100644 index 00000000..ae6c70b0 --- /dev/null +++ b/xdq-tester/codel_impl.h @@ -0,0 +1,155 @@ +#include + +#ifndef __CODEL_IMPL_H +#define __CODEL_IMPL_H + +#ifndef CODEL_TARGET +#define CODEL_TARGET (10 * 1000 * 1000ULL) /* 10 ms in nanosec */ +#endif + +#ifndef CODEL_EXCEED_INTERVAL +#define CODEL_EXCEED_INTERVAL (100 * 1000 * 1000ULL) /* 100 ms in ns*/ +#endif + +/* Codel like dropping scheme, inspired by: + * - RFC: https://queue.acm.org/detail.cfm?id=2209336 + * - Code: https://queue.acm.org/appendices/codel.html + * - Kernel: include/net/codel_impl.h + */ +struct codel_state { + /* codel like dropping scheme */ + __u64 first_above_time; /* Time when above target (0 if below)*/ + __u64 drop_next; /* Time to drop next packet */ + __u32 count; /* Packets dropped since going into drop state */ + __u32 dropping; /* Equal to 1 if in drop state */ +}; + +/* Table lookup for square-root shifted 16 bit */ +static __always_inline __u32 get_sqrt_sh16(__u64 cnt) +{ + switch (cnt) { + case 1: return 65536; /* 65536 * sqrt(1) */ + case 2: return 92682; /* 65536 * sqrt(2) */ + case 3: return 113512; /* 65536 * sqrt(3) */ + case 4: return 131072; /* 65536 * sqrt(4) */ + case 5: return 146543; /* 65536 * sqrt(5) */ + case 6: return 160530; /* 65536 * sqrt(6) */ + case 7: return 173392; + case 8: return 185364; + case 9: return 196608; + case 10: return 207243; + case 11: return 217358; + case 12: return 227023; + case 13: return 236293; + case 14: return 245213; + case 15: return 253820; + case 16: return 262144; /* 100 ms / sqrt(16) = 25 ms */ + case 17: return 270212; + case 18: return 278046; + case 19: return 285664; + case 20: return 293086; + case 21: return 300324; + case 22: return 307391; + case 23: return 314300; + case 24: return 321060; + case 25: return 327680; /* 100 ms / sqrt(25) = 20 ms */ + case 26: return 334169; + case 27: return 340535; + case 28: return 346784; + case 29: return 352922; + case 30: return 358955; + case 31: return 364889; + case 32: return 370728; + case 33: return 376476; + case 34: return 382137; + case 35: return 387716; + case 36: return 393216; /* 100 / sqrt(36) = 16.66 ms */ + default: + return 463410; /* 65536*sqrt(50) => 100/sqrt(50) = 14.14 ms */ + } +} + +static __always_inline __u64 get_next_interval_sqrt(__u64 cnt) +{ + __u64 val = ((__u64)CODEL_EXCEED_INTERVAL << 16) / get_sqrt_sh16(cnt); + return val; +} + +static __always_inline __u64 +codel_control_law(__u64 t, __u64 cnt) +{ + return t + get_next_interval_sqrt(cnt); +} + +static __always_inline +bool codel_should_drop(struct codel_state *codel, __u64 t_queue_sz, __u64 now) +{ + __u64 interval = CODEL_EXCEED_INTERVAL; + + if (t_queue_sz < CODEL_TARGET) { + /* went below so we'll stay below for at least interval */ + codel->first_above_time = 0; + return false; + } + + if (codel->first_above_time == 0) { + /* just went above from below. If we stay above + * for at least interval we'll say it's ok to drop + */ + codel->first_above_time = now + interval; + return false; + } else if (now >= codel->first_above_time) { + return true; + } + return false; +} + +static __always_inline +bool codel_drop(struct codel_state *codel, __u64 t_queue_sz, __u64 now) +{ + __u64 interval = CODEL_EXCEED_INTERVAL; + + /* If horizon have been exceed for a while, inc drop intensity*/ + bool drop = codel_should_drop(codel, t_queue_sz, now); + + if (codel->dropping) { /* In dropping state */ + if (!drop) { + /* time below target - leave dropping state */ + codel->dropping = false; + return false; + } else if (now >= codel->drop_next) { + /* It's time for the next drop. Drop the current + * packet. Schedule the next drop + */ + codel->count += 1; + // schedule the next drop. + codel->drop_next = + codel_control_law(codel->drop_next, codel->count); + return true; + } + } else if (drop && + ((now - codel->drop_next < interval) || + (now - codel->first_above_time >= interval))) { + /* If we get here, then we're not in dropping state. + * Decide whether it's time to enter dropping state. + */ + __u32 count = codel->count; + + codel->dropping = true; + + /* If we're in a drop cycle, drop rate that controlled queue + * on the last cycle is a good starting point to control it now. + */ + if (now - codel->drop_next < interval) + count = count > 2 ? (count - 2) : 1; + else + count = 1; + + codel->count = count; + codel->drop_next = codel_control_law(now, count); + return true; + } + return false; +} + +#endif /* __CODEL_IMPL_H */ diff --git a/xdq-tester/fifo.lua b/xdq-tester/fifo.lua new file mode 100644 index 00000000..f537e2ca --- /dev/null +++ b/xdq-tester/fifo.lua @@ -0,0 +1,25 @@ +-- SPDX-License-Identifier: GPL-2.0 +-- Copyright (c) 2022 Freysteinn Alfredsson + +-- FIFO scheduler +config.bpf.file = "./sched_fifo.bpf.o" + +-- Setup flows +packet_flow1 = Udp:new() +packet_flow1.udp.dest = 8080 + +packet_flow2 = Udp:new() +packet_flow2.udp.dest = 8081 + +packet_flow3 = Udp:new() +packet_flow3.udp.dest = 8082 + + +-- Test scheduler +enqueue(packet_flow1) +enqueue(packet_flow2) +enqueue(packet_flow3) + +dequeue_cmp(packet_flow1) +dequeue_cmp(packet_flow2) +dequeue_cmp(packet_flow3) diff --git a/xdq-tester/fq_codel.lua b/xdq-tester/fq_codel.lua new file mode 100644 index 00000000..90928fc0 --- /dev/null +++ b/xdq-tester/fq_codel.lua @@ -0,0 +1,179 @@ +-- SPDX-License-Identifier: GPL-2.0 +-- Copyright (c) 2022 Freysteinn Alfredsson + +-- Fair Queuing with Controlled Delay (FQ_CoDel) +config.bpf.file = "./sched_fq_codel.bpf.o" + + +-- Setup flows +-- We use this flow to test sparse flow handling +packet_sparse_flow_tester = Udp:new() +packet_sparse_flow_tester.udp.dest = 8000 + +-- The background stream flow increments the time bytes +-- so that we can test our sparse flow tester when time has passed +packet_flow_background_stream = Udp:new() +packet_flow_background_stream.udp.dest = 8001 +-- Make the packet the size of full a quantom (1522 - 62) +packet_flow_background_stream.udp.payload = create_payload(1460) + +set_time_ns(1000) + +-- Test scheduler + +-- +-- 1. Sparse flow tests +-- +-- In our implementation of FQ-CoDel, the time_bytes variable is the only thing +-- that connects sparse flows. Therefore, we can test all possible scenarios +-- using only two flows. One background flow that we only use to advance time. +-- And the flow that we use for testing. + +function make_sparse(flow) + -- The background flow needs two packets to be a stream: + -- * The first packet will be sparse. + -- * The second packet exceeds the sparse quantom. + flow.udp.payload = create_payload(1460) + enqueue(flow) -- Sparse + enqueue(flow) -- Stream + dequeue_cmp(flow) -- Dequeue sparse + dequeue_cmp(flow) -- Dequeue sparse + -- Note that the type_bytes has not advanced at this point but will after the + -- next dequeued packet. +end + +-- 1.1 Test when a sparse flow ends while sparse +function fq_codel_sparse_test1() + -- This test does the following: + -- 1. Creates a sparse flow with a couple of packets. + -- 2. Advance time_bytes and expire the sparse flow. + -- 3. Creates a new sparse flow with a couple of packets. + -- 4. Advance time_bytes and expire the new sparse flow. + -- In steps two and four the test confirms that the sparse flows + -- were still sparse. + make_sparse(packet_flow_background_stream) + + -- Prime the background stream so it can update the time_bytes variable later. + enqueue(packet_flow_background_stream) -- Prime for updating time_bytes + enqueue(packet_flow_background_stream) -- Make sure the flow is not recycled after update + + -- Make the packet the size of half a quantom (1522/2 - 62) + -- The flow will cease being a sparse flow after two packets. + packet_sparse_flow_tester.udp.payload = create_payload(699) + + -- The sparse flow gets a full quantom of packets. + enqueue(packet_sparse_flow_tester) -- Sparse 1 + enqueue(packet_sparse_flow_tester) -- Sparse 2 + + -- Remove all sparse packets. + dequeue_cmp(packet_sparse_flow_tester) -- Dequeue sparse + dequeue_cmp(packet_sparse_flow_tester) -- Dequeue sparse + + -- Advance time_bytes + dequeue_cmp(packet_flow_background_stream) -- Advances time_bytes one quantom + -- Our FQ-CoDel algorithm should have expired the sparse_flow_tester + -- flow at this point, but not the background stream. + + -- Test that the sparse_flow_tester is indeed expired. + enqueue(packet_sparse_flow_tester) -- Add sparse packet with a higher priority + dequeue_cmp(packet_sparse_flow_tester) -- Dequeue the sparse packet + dequeue_cmp(packet_flow_background_stream) -- Advances time_bytes one quantom + -- Our FQ-CoDel algorithm should have expired both the sparse_flow_tester + -- flow and the background stream at this point. +end + +-- 1.2 Test a sparse flow when the time_bytes advances while the flow is sparse +function fq_codel_sparse_test2() + -- This test does the following: + -- 1. Creates a sparse flow with a couple of packets. + -- 2. Advances time_bytes by a half a quantom + -- 3. Adds a couple of packets to the sparse flow. + -- In steps one and three the test confirms that the sparse flow + -- is still sparse. + make_sparse(packet_flow_background_stream) + + -- Make the packet the size of half a quantom (1522/2 - 62) + packet_flow_background_stream.udp.payload = create_payload(699) + + -- Make each packet 50 bytes for our sparse flow + packet_sparse_flow_tester.udp.payload = create_payload(38) + + -- Keep in mind that the last background packet ends at a full quantom. Therefore, + -- if we want to update the time_bytes by a half a quantom, we will need to enqueue + -- and deqeueu a half a quantom packet. + enqueue(packet_flow_background_stream) -- Used to advance time_bytes by half a quantom + enqueue(packet_flow_background_stream) -- Used to advance time_bytes by half a quantom + enqueue(packet_flow_background_stream) -- Make sure the flow is not recycled after update + dequeue_cmp(packet_flow_background_stream) -- Advances time_bytes by a half a quantom + + -- Confirm that the sparse flow has a higher priority than the background stream. + enqueue(packet_sparse_flow_tester) -- Add a sparse packet + enqueue(packet_sparse_flow_tester) -- Add a sparse packet + dequeue_cmp(packet_sparse_flow_tester) -- Dequeue the sparse packet + dequeue_cmp(packet_sparse_flow_tester) -- Dequeue the sparse packet + + dequeue_cmp(packet_flow_background_stream) -- Advances time_bytes by a half a quantom + + -- Confirm that the sparse flow has a higher priority than the stream. + enqueue(packet_sparse_flow_tester) -- Add a sparse packet + enqueue(packet_sparse_flow_tester) -- Add a sparse packet + dequeue_cmp(packet_sparse_flow_tester) -- Dequeue the sparse packet + dequeue_cmp(packet_sparse_flow_tester) -- Dequeue the sparse packet + + -- Recycle both flows. + dequeue_cmp(packet_flow_background_stream) -- Recycle both flows +end + +-- 1.3 Test a flow that becomes a stream. +function fq_codel_sparse_test3() + -- This test does the following: + -- 1. Creates a sparse flow and adds a full quantom to it. + -- 2. Adds packets to the flow to make it a stream. + -- 3. Advances time_bytes by a half a quantom. + -- 4. Adds packets to the stream + -- In steps two and four the test confirms that the flow is a stream. + make_sparse(packet_flow_background_stream) + + -- Make the packet the size of half a quantom (1522/2 - 62) + packet_sparse_flow_tester.udp.payload = create_payload(699) + + -- Make the packet the size of half a quantom (1522/2 - 62) + packet_flow_background_stream.udp.payload = create_payload(699) + + -- Keep in mind that the last background packet ends at a full quantom. Therefore, + -- if we want to update the time_bytes by a half a quantom, we will need to enqueue + -- and deqeueu a half a quantom packet. + enqueue(packet_flow_background_stream) -- Used to advance time_bytes by half a quantom + enqueue(packet_flow_background_stream) -- Used to advance time_bytes by half a quantom + enqueue(packet_flow_background_stream) -- Make sure the flow is not recycled after update + dequeue_cmp(packet_flow_background_stream) -- Advances time_bytes by a half a quantom + + -- Make the sparse_flow_tester flow a stream. + enqueue(packet_sparse_flow_tester) -- Add sparse packet + enqueue(packet_sparse_flow_tester) -- Add sparse packet + enqueue(packet_sparse_flow_tester) -- Make the flow a stream + enqueue(packet_sparse_flow_tester) -- Add stream packet + + -- Dequeue the sparse flow packets. + dequeue_cmp(packet_sparse_flow_tester) + dequeue_cmp(packet_sparse_flow_tester) + + -- Confirm that both flows are streams with equal priority. + dequeue_cmp(packet_sparse_flow_tester) + dequeue_cmp(packet_flow_background_stream) + dequeue_cmp(packet_sparse_flow_tester) + dequeue_cmp(packet_flow_background_stream) +end + +-- +function fq_codel_codel_test1() + -- Not inplemented +end + + +-- Run tests +fq_codel_sparse_test1() +--fq_codel_sparse_test2() +--fq_codel_sparse_test3() +-- +--fq_codel_codel_test1() diff --git a/xdq-tester/hpfq.lua b/xdq-tester/hpfq.lua new file mode 100644 index 00000000..3f78c036 --- /dev/null +++ b/xdq-tester/hpfq.lua @@ -0,0 +1,42 @@ +-- SPDX-License-Identifier: GPL-2.0 +-- Copyright (c) 2022 Freysteinn Alfredsson + +-- Hierarchical Packet Fair Queueing (HPFQ) +config.bpf.file = "./sched_hpfq.bpf.o" + +-- Create flows +packet_flow1 = Udp:new() +packet_flow1.udp.dest = 4000 + +packet_flow2 = Udp:new() +packet_flow2.udp.dest = 8001 + +packet_flow3 = Udp:new() +packet_flow3.udp.dest = 8002 + + +function hpfq_test1() + enqueue(packet_flow1) + enqueue(packet_flow2) + enqueue(packet_flow3) + + dequeue_cmp(packet_flow3) + dequeue_cmp(packet_flow2) + dequeue_cmp(packet_flow1) +end + +function hpfq_debug() + enqueue(packet_flow1) + enqueue(packet_flow1) + dequeue_cmp(packet_flow1) + dequeue_cmp(packet_flow1) + + enqueue(packet_flow1) + enqueue(packet_flow1) + dequeue_cmp(packet_flow1) + dequeue_cmp(packet_flow1) +end + +-- hpfq_test1() + +hpfq_debug() diff --git a/xdq-tester/lib.lua b/xdq-tester/lib.lua new file mode 100644 index 00000000..21af228a --- /dev/null +++ b/xdq-tester/lib.lua @@ -0,0 +1,343 @@ +-- SPDX-License-Identifier: GPL-2.0 +-- Copyright (c) 2022 Freysteinn Alfredsson + +IPPROTO_UDP = 17 +ETH_P_IPV6 = 0x86dd + +XDP_ABORTED = 0 +XDP_DROP = 1 +XDP_PASS = 2 +XDP_TX = 3 +XDP_REDIRECT = 4 + +config = { + bpf = { + file = "./sched_fifo.bpf.o", + xdp_func = "enqueue_prog", + dequeue_func = "dequeue_prog", + }, + + defaultUdp = { + eth = { + proto = ETH_P_IPV6, + source = "01:00:00:00:00:01", + dest = "01:00:00:00:00:02", + }, + + ip = { + priority = 0, + version = 6, + flow_lbl = { 0, 0, 0 }, + -- payload_len = , + nexthdr = IPPROTO_UDP, + hop_limit = 1, + saddr = "fe80::1", + daddr = "fe80::2", + }, + + udp = { + source = 1, + dest = 1, + -- len = , + -- check = , + payload = "" + } + } +} + +xdq = { + total_queued = 0, + total_dequeued = 0, + currently_queued = 0, +} + +-- Monitor config.bpf for changes +local _config_bpf = config.bpf +config.bpf = {} -- create proxy table +local config_bpf_mt = { + __index = function (t,k) + if k == "file" then + load_xdq_file(_config_bpf[k]) + end + return _config_bpf[k] + end, + + __newindex = function (t,k,v) + if k == "file" then + load_xdq_file(v) + end + _config_bpf[k] = v + end +} +setmetatable(config.bpf, config_bpf_mt) + +function table_has_key(table,key) + return table[key] ~= nil +end + +function compare_eth(cmp_eth, eth) + if type(cmp_eth.proto) ~= "number" then + fail("comparison eth.proto must be a number") + end + if type(eth.proto) ~= "number" then + fail("dequeued eth.proto must be a number") + end + if cmp_eth.proto ~= eth.proto then + fail(string.format("expected eth.proto: 0x%x, but found 0x%x", cmp_eth.proto, eth.proto)); + end + + if type(cmp_eth.source) ~= "string" then + fail("comparison eth.source must be a string") + end + if type(eth.source) ~= "string" then + fail("dequeued eth.source must be a string") + end + if cmp_eth.source ~= eth.source then + fail(string.format("expected eth.source: %s, but found %s", cmp_eth.source, eth.source)); + end + + if type(cmp_eth.dest) ~= "string" then + fail("comparison eth.dest must be a string") + end + if type(eth.dest) ~= "string" then + fail("dequeued eth.dest must be a string") + end + if cmp_eth.dest ~= eth.dest then + fail(string.format("expected eth.dest: %s, but found %s", cmp_eth.dest, eth.dest)); + end +end + +function compare_ip(cmp_ip, ip) + local cmp_ip_saddr = nil + local ip_saddr = nil + local cmp_ip_daddr = nil + local ip_daddr = nil + + if type(cmp_ip.priority) ~= "number" then + fail("comparison ip.priority must be a number") + end + if type(ip.priority) ~= "number" then + fail("dequeued ip.priority must be a number") + end + if cmp_ip.priority ~= ip.priority then + fail(string.format("expected ip.priority: %d, but found %d", cmp_ip.priority, ip.priority)); + end + + if type(cmp_ip.version) ~= "number" then + fail("comparison ip.version must be a number") + end + if type(ip.version) ~= "number" then + fail("dequeued ip.version must be a number") + end + if cmp_ip.version ~= ip.version then + fail(string.format("expected ip.version: %d, but found %d", cmp_ip.version, ip.version)); + end + + if type(cmp_ip.flow_lbl) ~= "table" then + fail("comparison ip.flow_lbl not a table") + end + if type(ip.flow_lbl) ~= "table" then + fail("dequeue ip.flow_lbl not a table") + end + for i = 1, 3, 1 do + if type(cmp_ip.flow_lbl[i]) ~= "number" then + fail(string.format("comparison ip.flow_lbl[%d] must be a number", i)) + end + if type(ip.flow_lbl[i]) ~= "number" then + fail(string.format("dequeued ip.flow_lbl[%d] must be a number", i)) + end + if cmp_ip.flow_lbl[i] ~= ip.flow_lbl[i] then + fail(string.format("expected ip.flow_lbl[%d]: %d, but found %d", i, cmp_ip.flow_lbl[i], ip.flow_lbl[i])); + end + end + + -- TODO: Add function that calculates the payload_len in lua + -- if type(cmp_ip.payload_len) ~= "number" then + -- fail("comparison ip.payload_len must be a number") + -- end + -- if type(ip.payload_len) ~= "number" then + -- fail("dequeued ip.payload_len must be a number") + -- end + -- if cmp_ip.payload_len ~= ip.payload_len then + -- fail(string.format("expected ip.payload_len: %s, but found %s", cmp_ip.payload_len, ip.payload_len)); + -- end + + if type(cmp_ip.nexthdr) ~= "number" then + fail("comparison ip.nexthdr must be a number") + end + if type(ip.nexthdr) ~= "number" then + fail("dequeued ip.nexthdr must be a number") + end + if cmp_ip.nexthdr ~= ip.nexthdr then + fail(string.format("expected ip.nexthdr: %d, but found %d", cmp_ip.nexthdr, ip.nexthdr)); + end + + if type(cmp_ip.hop_limit) ~= "number" then + fail("comparison ip.hop_limit must be a number") + end + if type(ip.hop_limit) ~= "number" then + fail("dequeued ip.hop_limit must be a number") + end + if cmp_ip.hop_limit ~= ip.hop_limit then + fail(string.format("expected ip.hop_limit: %d, but found %d", cmp_ip.hop_limit, ip.hop_limit)); + end + + if type(cmp_ip.saddr) ~= "string" then + fail("comparison ip.saddr must be a string") + end + if type(ip.saddr) ~= "string" then + fail("dequeued ip.saddr must be a string") + end + cmp_ip_saddr = normalize_ipv6_address(cmp_ip.saddr) + ip_saddr = normalize_ipv6_address(ip.saddr) + if cmp_ip_saddr ~= ip_saddr then + fail(string.format("expected ip.saddr: %s, but found %s", cmp_ip_saddr, ip_saddr)); + end + + if type(cmp_ip.daddr) ~= "string" then + fail("comparison ip.daddr must be a string") + end + if type(ip.daddr) ~= "string" then + fail("dequeued ip.daddr must be a string") + end + cmp_ip_daddr = normalize_ipv6_address(cmp_ip.daddr) + ip_daddr = normalize_ipv6_address(ip.daddr) + if cmp_ip_daddr ~= ip_daddr then + fail(string.format("expected ip.daddr: %s, but found %s", cmp_ip_daddr, ip_daddr)); + end +end + +function compare_udp(cmp_udp, udp) + if type(cmp_udp.source) ~= "number" then + fail("comparison udp.source must be a number") + end + if type(udp.source) ~= "number" then + fail("dequeued udp.source must be a number") + end + if cmp_udp.source ~= udp.source then + fail(string.format("expected udp.source: %d, but found %d", cmp_udp.source, udp.source)); + end + + if type(cmp_udp.dest) ~= "number" then + fail("comparison udp.dest must be a number") + end + if type(udp.dest) ~= "number" then + fail("dequeued udp.dest must be a number") + end + if cmp_udp.dest ~= udp.dest then + fail(string.format("expected udp.dest: %d, but found %d", cmp_udp.dest, udp.dest)); + end + + -- TODO: Add len when missing + -- if type(cmp_udp.len) ~= "number" then + -- fail("comparison udp.len must be a number") + -- end + -- if type(udp.len) ~= "number" then + -- fail("dequeued udp.len must be a number") + -- end + -- if cmp_udp.len ~= udp.len then + -- fail(string.format("expected udp.len: %d, but found %d", cmp_udp.sourc, udp.len)); + + -- TODO: Add lua function that creates UDP checksum + -- if type(cmp_udp.check) ~= "number" then + -- fail("comparison udp.check must be a number") + -- end + -- if type(udp.check) ~= "number" then + -- fail("dequeued udp.check must be a number") + -- end + -- if cmp_udp.check ~= udp.check then + -- fail(string.format("expected udp.check: %d, but found %d", cmp_udp.sourc, udp.check)); + + if type(cmp_udp.payload) ~= "string" then + fail("comparison udp.payload must be a string") + end + if type(udp.payload) ~= "string" then + fail("dequeued udp.payload must be a string") + end + if cmp_udp.payload ~= udp.payload then + fail(string.format("expected udp.payload: %s, but found %s", cmp_udp.payload, udp.payload)); + end +end + +function dequeue_cmp(cmp) + local packet, retval = dequeue() + local protocol = nil + + if type(cmp) ~= "table" then + fail("parameter not a table") + end + if type(packet) ~= "table" then + fail("dequeue failed") + end + + if type(packet.eth) ~= "table" then + fail("comparision packet missing eth table") + end + if type(packet.eth) ~= "table" then + fail("dequeued packet missing eth table") + end + compare_eth(cmp.eth, packet.eth) + if cmp.eth.proto == ETH_P_IPV6 then + if type(cmp.ip) ~= "table" then + fail("comparision packet missing ip table") + end + if type(packet.ip) ~= "table" then + fail("dequeued packet missing ip table") + end + compare_ip(cmp.ip, packet.ip) + protocol = cmp.ip.nexthdr + end + if protocol == IPPROTO_UDP then + if type(cmp.udp) ~= "table" then + fail("comparision packet missing udp table") + end + if type(packet.udp) ~= "table" then + fail("dequeued packet missing udp table") + end + compare_udp(cmp.udp, packet.udp) + end + return packet, retval +end + +function create_payload(len) + if type(len) ~= 'number' then + fail("parameter must be a number") + end + if len < 0 then + fail("length parameter can't be a negative value") + end + return string.rep("A", len) +end + +function dump(o) + if type(o) == 'table' then + local s = '{\n' + for k,v in pairs(o) do + if type(k) ~= 'number' then k = '"'..k..'"' end + s = s .. '\t['..k..'] = ' .. dump(v) .. ',\n' + end + return s .. '}\n' + else + return tostring(o) + end +end + +function copy(obj) + if type(obj) ~= 'table' then return obj end + local res = {} + for k, v in pairs(obj) do res[copy(k)] = copy(v) end + return res +end + +Udp = { +} + +function Udp:new() + -- meta = {} + -- meta.__index = function (table, key) + -- return config.defaultUdp[key] + -- end + obj = copy(config.defaultUdp) + -- setmetatable(obj, meta) + return obj +end diff --git a/xdq-tester/sched_fifo.bpf.c b/xdq-tester/sched_fifo.bpf.c new file mode 100644 index 00000000..5beadc7c --- /dev/null +++ b/xdq-tester/sched_fifo.bpf.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Freysteinn Alfredsson */ + +#include +#include +#include +#include +#include + +#include "bpf_local_helpers.h" + +struct { + __uint(type, BPF_MAP_TYPE_PIFO_XDP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 4096); + __uint(map_extra, 4096); /* range */ +} pifo_map SEC(".maps"); + + +/* Simple FIFO */ +SEC("xdp") +int enqueue_prog(struct xdp_md *xdp) +{ + void *data = (void *)(long)xdp->data; + void *data_end = (void *)(long)xdp->data_end; + struct ethhdr *eth = data; + + if (eth + 1 > data_end) + return XDP_DROP; + + return bpf_redirect_map(&pifo_map, 0, 0); +} + +SEC("dequeue") +void *dequeue_prog(struct dequeue_ctx *ctx) +{ + __u64 prio = 0; + void *pkt = (void *) bpf_packet_dequeue(ctx, &pifo_map, 0, &prio); + if (!pkt) + return 0; + + return pkt; +} + +char _license[] SEC("license") = "GPL"; diff --git a/xdq-tester/sched_fq_codel.bpf.c b/xdq-tester/sched_fq_codel.bpf.c new file mode 100644 index 00000000..d0622a54 --- /dev/null +++ b/xdq-tester/sched_fq_codel.bpf.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Freysteinn Alfredsson */ + +#include +#include +#include +#include +#include +#include + +#include "bpf_local_helpers.h" + +struct { + __uint(type, BPF_MAP_TYPE_PIFO_XDP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 4096); + __uint(map_extra, 8388608); /* range - 1024×4098×2 */ +} pifo_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct network_tuple); + __type(value, struct fq_codel_flow_state); + __uint(max_entries, 16384); +} flow_states SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u64); + __uint(max_entries, 1); +} xdq_time_ns SEC(".maps"); + +const __u32 quantom = 1522; +__u64 time_bytes = quantom; + +struct xdq_meta { + __u64 time_ns; + __u32 btf_id; +} __attribute__((aligned(4))) __attribute__((packed)); + +static __always_inline __u64 get_time_ns() +{ +#ifdef XDQ_LIVE + return bpf_ktime_get_boot_ns(); +#else + __u32 key = 0; + __u64 *val = bpf_map_lookup_elem(&xdq_time_ns, &key); + if (!val) { + return 0; + } + return *val; +#endif +} + +static __always_inline int xdq_meta_add(struct xdp_md *ctx) +{ + struct xdq_meta *meta; + void *data; + int err; + + /* Reserve space in-front of data pointer for our meta info. + * (Notice drivers not supporting data_meta will fail here!) + */ + err = bpf_xdp_adjust_meta(ctx, -(int)sizeof(*meta)); + if (err) { + bpf_printk("Frey: Failed to add meta data section"); + return -1; + } + + /* Notice: Kernel-side verifier requires that loading of + * ctx->data MUST happen _after_ helper bpf_xdp_adjust_meta(), + * as pkt-data pointers are invalidated. Helpers that require + * this are determined/marked by bpf_helper_changes_pkt_data() + */ + data = (void *)(unsigned long)ctx->data; + + meta = (void *)(unsigned long)ctx->data_meta; + if (meta + 1 > data) /* Verify meta area is accessible */ + return -2; + + meta->time_ns = get_time_ns(); + /* Userspace can identify struct used by BTF id */ + meta->btf_id = bpf_core_type_id_local(struct xdq_meta); + return 0; +} + +static __always_inline int schedule_packet(struct parsing_context *pctx) +{ + struct packet_info p_info = {}; + struct network_tuple nt = {0}; + struct fq_codel_flow_state *flow; + struct fq_codel_flow_state new_flow = {0}; + __u32 flow_time_bytes = time_bytes; // Used to offset sparse flows + __u32 packet_start_time_bytes; + __u32 prio = 0; + + char flow_type = 'd'; + + /* Get flow */ + if (parse_packet(pctx, &p_info) < 0) + goto err; + + nt = p_info.nt; + + flow = bpf_map_lookup_or_try_init(&flow_states, &nt, &new_flow); + if (!flow) + goto err; + + /* Handle Sparse flows */ + if (flow->pkts == 0 && flow_time_bytes >= flow->grace_period) { // New flow + flow_type = 'S'; + flow->pkts = 0; + flow->total_bytes = (pctx->data_end - pctx->data); + flow->finish_bytes = 0; + flow->grace_period = flow_time_bytes + quantom; + + flow_time_bytes -= quantom; // Give sparse flows a negative quantom priority; + } else if (flow->total_bytes < quantom) { + flow_type = 's'; + flow->total_bytes += (pctx->data_end - pctx->data); + flow_time_bytes -= quantom; // Give sparse flows a negative quantom priority; + } + flow->pkts++; + + /* Calculate scheduling priority */ + packet_start_time_bytes = bpf_max(flow_time_bytes, flow->finish_bytes); + flow->finish_bytes = packet_start_time_bytes + pctx->pkt_len; + prio = packet_start_time_bytes; + + if (bpf_map_update_elem(&flow_states, &nt, flow, BPF_ANY)) + goto err; + + bpf_printk("ENQUEUE: port: %d -> prio: %6d -> time_bytes: %6d -> pkt: %6d -> end: %6d type: %c ", (int) bpf_ntohs(nt.daddr.port), prio, time_bytes, flow->pkts, flow->finish_bytes, flow_type); + return bpf_redirect_map(&pifo_map, prio, 0); +err: + bpf_printk("XDP DROP"); + return XDP_DROP; +} + +/* Weighted fair queueing (WFQ) */ +SEC("xdp") +int enqueue_prog(struct xdp_md *xdp) +{ + if (xdq_meta_add(xdp) < 0) { + return XDP_ABORTED; + } + + struct parsing_context pctx = { + .data = (void *)(long)xdp->data, + .data_end = (void *)(long)xdp->data_end, + .meta = (void *)(long)xdp->data_meta, + .pkt_len = (xdp->data_end - xdp->data) & 0xffff, + .nh = { .pos = (void *)(long)xdp->data }, + }; + return schedule_packet(&pctx); +} + + +SEC("dequeue") +void *dequeue_prog(struct dequeue_ctx *ctx) +{ + struct parsing_context pctx; + struct packet_info p_info = {0}; + struct network_tuple nt; + struct fq_codel_flow_state *flow; + struct xdq_meta *meta; + __u64 now; + __u64 sojourn_time; + __u64 prio = 0; + + struct xdp_md *pkt = NULL; + + pkt = (void *) bpf_packet_dequeue(ctx, &pifo_map, 0, &prio); + if (!pkt) { + bpf_printk("Frey: No packet in PIFO"); + goto err; + } + + pctx.data = (void *)(long) pkt->data; + pctx.data_end = (void *)(long) pkt->data_end; + pctx.nh.pos = (void *)(long) pkt->data; + pctx.meta = (void *)(long) pkt->data_meta; + + /* Get flow */ + if (parse_packet(&pctx, &p_info) < 0) { + bpf_printk("Frey: Parse failed"); + goto err; + } + + nt = p_info.nt; + + flow = bpf_map_lookup_elem(&flow_states, &nt); + if (!flow) { + bpf_printk("Frey: Failed to lookup flow"); + goto err; + } + + flow->pkts--; + + if (prio > time_bytes) + time_bytes = prio; + + meta = (struct xdq_meta *) pctx.meta; + if (meta + 1 > pctx.data) { /* Verify meta area is accessible */ + bpf_printk("Frey: Failed to lookup metadata"); + goto err; + } + now = get_time_ns(); + sojourn_time = now - meta->time_ns; + if (codel_drop(&flow->codel, sojourn_time, now)) { + bpf_printk("Frey: Codel dropped packet!"); + goto err; + } + + bpf_printk("DEQUEUE: port: %d <- prio: %6d <- time_bytes: %6d <- pkt: %6d <- tot: %6d", (int) bpf_ntohs(nt.daddr.port), prio, time_bytes, flow->pkts, flow->total_bytes); + return pkt; +err: + if (pkt) + bpf_packet_drop(ctx, pkt); + bpf_printk("DEQUEUE packet failed"); + return NULL; +} + +char _license[] SEC("license") = "GPL"; diff --git a/xdq-tester/sched_hpfq.bpf.c b/xdq-tester/sched_hpfq.bpf.c new file mode 100644 index 00000000..7f984765 --- /dev/null +++ b/xdq-tester/sched_hpfq.bpf.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Freysteinn Alfredsson */ + +#include +#include +#include +#include +#include + +#include "bpf_local_helpers.h" + +/* This code determines root WFQ scheduling using UDP ports. It would be nicer + in the future to use VLANs instead. + All UDP ports up to 4000 go to the left PIFO, and the other ports go to the + right PIFO. */ + +enum leaf_pifo { + NO_PIFO = 0, + LEFT_PIFO, + RIGHT_PIFO +}; + +struct { + __uint(type, BPF_MAP_TYPE_PIFO_GENERIC); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 4096); + __uint(map_extra, 8388608); /* range - 1024×4098×2 */ +} root_pifo_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PIFO_XDP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 4096); + __uint(map_extra, 8388608); /* range - 1024×4098×2 */ +} left_pifo_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PIFO_XDP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 4096); + __uint(map_extra, 8388608); /* range - 1024×4098×2 */ +} right_pifo_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct network_tuple); + __type(value, struct flow_state); + __uint(max_entries, 16384); +} flow_states SEC(".maps"); + +__u64 root_time_bytes = 0; +__u64 left_time_bytes = 0; +__u64 right_time_bytes = 0; + +__u16 default_root_weight = 256; +__u16 default_weight = 256; + +static __always_inline int set_root_flow_priority(struct parsing_context *pctx, + struct flow_state *flow) +{ + __u64 root_start_time_bytes = bpf_max(root_time_bytes, flow->root_finish_bytes); + flow->root_finish_bytes = root_start_time_bytes + (pctx->pkt_len * flow->root_weight >> 8); + return root_start_time_bytes & ((1UL << 60) - 1); // Priority only defined in the lower 60 bits +} + +static __always_inline int set_leaf_flow_priority(struct parsing_context *pctx, + struct flow_state *flow, + __u64 leaf_time_bytes) +{ + __u64 leaf_start_time_bytes = bpf_max(leaf_time_bytes, flow->finish_bytes); + flow->finish_bytes = leaf_start_time_bytes + (pctx->pkt_len * flow->weight >> 8); + return leaf_start_time_bytes; +} + +static __always_inline int schedule_packet(struct parsing_context *pctx) +{ + struct packet_info p_info = {}; + + struct network_tuple nt = {0}; + __u32 leaf_id; + + struct flow_state new_flow = {0}; + struct flow_state *flow; + + __u64 root_prio; + __u32 left_prio; + __u32 right_prio; + + new_flow.root_weight = default_root_weight; + new_flow.weight = default_weight; + + /* Get flow */ + if (parse_packet(pctx, &p_info) < 0) + goto err; + + nt = p_info.nt; + + leaf_id = (bpf_ntohs(p_info.udph->dest) <= 4000) ? LEFT_PIFO : RIGHT_PIFO; + + flow = bpf_map_lookup_or_try_init(&flow_states, &nt, &new_flow); + if (!flow) + goto err; + flow->pkts++; + + /* Calculate scheduling priority */ + // Root WFQ + root_prio = set_root_flow_priority(pctx, flow); + if (bpf_map_push_elem(&root_pifo_map, &leaf_id, root_prio)) + goto err; + flow->root_priority = root_prio; + + // Leaf WFQ + if (leaf_id == LEFT_PIFO) { + left_prio = set_leaf_flow_priority(pctx, flow, left_time_bytes); + + if (bpf_map_update_elem(&flow_states, &nt, flow, BPF_ANY)) + goto err; + + bpf_printk("XDP HPFQ scheduled with priority, root:%d left:%d", root_prio, left_prio); + return bpf_redirect_map(&left_pifo_map, left_prio, 0); + } else if (leaf_id == RIGHT_PIFO) { + right_prio = set_leaf_flow_priority(pctx, flow, right_time_bytes); + + if (bpf_map_update_elem(&flow_states, &nt, flow, BPF_ANY)) + goto err; + + bpf_printk("XDP HPFQ scheduled with priority, root:%d right:%d", root_prio, right_prio); + return bpf_redirect_map(&right_pifo_map, right_prio, 0); + } +err: + bpf_printk("XDP DROP"); + return XDP_DROP; +} + +/* Hierarchical Packet Fair Queueing (HPFQ) */ +SEC("xdp") +int enqueue_prog(struct xdp_md *xdp) +{ + struct parsing_context pctx = { + .data = (void *)(long)xdp->data, + .data_end = (void *)(long)xdp->data_end, + .pkt_len = (xdp->data_end - xdp->data) & 0xffff, + .nh = { .pos = (void *)(long)xdp->data }, + }; + return schedule_packet(&pctx); +} + + +SEC("dequeue") +void *dequeue_prog(struct dequeue_ctx *ctx) +{ + struct parsing_context pctx; + struct packet_info p_info = {0}; + + struct network_tuple nt; + __u32 leaf_id; + + struct flow_state *flow; + + __u64 root_prio = 0; + __u64 leaf_prio = 0; + + struct xdp_md *pkt = NULL; + + + if (bpf_map_pop_elem(&root_pifo_map, &leaf_id)) + goto err; + + if (leaf_id == LEFT_PIFO) + pkt = (void *) bpf_packet_dequeue(ctx, &left_pifo_map, 0, &leaf_prio); + else if (leaf_id == RIGHT_PIFO) + pkt = (void *) bpf_packet_dequeue(ctx, &right_pifo_map, 0, &leaf_prio); + + if (!pkt) + goto err; + + pctx.data = (void *)(long) pkt->data; + pctx.data_end = (void *)(long) pkt->data_end; + pctx.nh.pos = (void *)(long) pkt->data; + + /* Get flows */ + if (parse_packet(&pctx, &p_info) < 0) + goto err; + + nt = p_info.nt; + + // Handle flow + flow = bpf_map_lookup_elem(&flow_states, &nt); + if (!flow) + goto err; + root_prio = flow->root_priority; + + flow->pkts--; + if (flow->pkts <= 0) { + if (!flow->persistent) { + bpf_map_delete_elem(&flow_states, &nt); + } else { + flow->root_finish_bytes = 0; + flow->finish_bytes = 0; + } + } + + // Handle virtual time in bytes + root_time_bytes = root_prio; + if (leaf_id == LEFT_PIFO) + left_time_bytes = leaf_prio; + else + right_time_bytes = leaf_prio; + + bpf_printk("Frey: left_time_bytes:%d right_time_bytes:%d", left_time_bytes, right_time_bytes); + bpf_printk("flow: %hd - root_weight:%d leaf_weight:%d", nt.daddr.port, flow->root_weight, flow->weight); + if (leaf_id == LEFT_PIFO) + bpf_printk("DEQUEUE HPFQ with priority, root:%d left:%d", root_prio, leaf_prio); + else + bpf_printk("DEQUEUE HPFQ with priority, root:%d right:%d", root_prio, leaf_prio); + + return pkt; +err: + if (pkt) + bpf_packet_drop(ctx, pkt); + bpf_printk("DEQUEUE packet failed"); + return NULL; +} + +char _license[] SEC("license") = "GPL"; diff --git a/xdq-tester/sched_sprio.bpf.c b/xdq-tester/sched_sprio.bpf.c new file mode 100644 index 00000000..c38547a7 --- /dev/null +++ b/xdq-tester/sched_sprio.bpf.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Freysteinn Alfredsson */ + +#include +#include +#include +#include +#include + +#include "bpf_local_helpers.h" + +struct { + __uint(type, BPF_MAP_TYPE_PIFO_XDP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 4096); + __uint(map_extra, 4096); /* range */ +} pifo_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct network_tuple); + __type(value, struct flow_state); + __uint(max_entries, 16384); +} flow_states SEC(".maps"); + +__u32 default_weight = 256; + +static __always_inline int schedule_packet(struct parsing_context *pctx) +{ + struct packet_info p_info = {}; + struct network_tuple nt = {0}; + struct flow_state *flow; + struct flow_state new_flow = {0}; + __u32 prio = 0; + + new_flow.pkts = 0; + new_flow.finish_bytes = 0; + new_flow.weight = default_weight; + new_flow.persistent = 0; + + /* Get flow */ + if (parse_packet(pctx, &p_info) < 0) + goto err; + + nt = p_info.nt; + + flow = bpf_map_lookup_or_try_init(&flow_states, &nt, &new_flow); + if (!flow) + goto err; + + flow->pkts++; + + /* Calculate scheduling priority */ + prio = flow->weight; + + if (bpf_map_update_elem(&flow_states, &nt, flow, BPF_ANY)) + goto err; + + bpf_printk("XDP SPRIO scheduled with priority %d", prio); + return bpf_redirect_map(&pifo_map, prio, 0); +err: + bpf_printk("XDP DROP"); + return XDP_DROP; +} + +/* Simple strict priority */ +SEC("xdp") +int enqueue_prog(struct xdp_md *xdp) +{ + struct parsing_context pctx = { + .data = (void *)(long)xdp->data, + .data_end = (void *)(long)xdp->data_end, + .pkt_len = (xdp->data_end - xdp->data) & 0xffff, + .nh = { .pos = (void *)(long)xdp->data }, + }; + return schedule_packet(&pctx); +} + + +SEC("dequeue") +void *dequeue_prog(struct dequeue_ctx *ctx) +{ + struct parsing_context pctx; + struct packet_info p_info = {0}; + struct network_tuple nt; + struct flow_state *flow; + __u64 prio = 0; + + struct xdp_md *pkt = NULL; + + pkt = (void *)bpf_packet_dequeue(ctx, &pifo_map, 0, &prio); + if (!pkt) + goto err; + + pctx.data = (void *)(long) pkt->data; + pctx.data_end = (void *)(long) pkt->data_end; + pctx.nh.pos = (void *)(long) pkt->data; + + /* Get flow */ + if (parse_packet(&pctx, &p_info) < 0) + goto err; + + nt = p_info.nt; + + flow = bpf_map_lookup_elem(&flow_states, &nt); + if (!flow) + goto err; + + flow->pkts--; + if (flow->pkts <= 0) { + if (!flow->persistent) + bpf_map_delete_elem(&flow_states, &nt); + else + flow->finish_bytes = 0; + } + + bpf_printk("DEQUEUE SPRIO with priority %d", prio); + return pkt; +err: + if (pkt) + bpf_packet_drop(ctx, pkt); + bpf_printk("DEQUEUE packet failed"); + return NULL; +} + +char _license[] SEC("license") = "GPL"; diff --git a/xdq-tester/sched_wfq.bpf.c b/xdq-tester/sched_wfq.bpf.c new file mode 100644 index 00000000..eba07d7e --- /dev/null +++ b/xdq-tester/sched_wfq.bpf.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Freysteinn Alfredsson */ + +#include +#include +#include +#include +#include + +#include "bpf_local_helpers.h" + +struct { + __uint(type, BPF_MAP_TYPE_PIFO_XDP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 4096); + __uint(map_extra, 8388608); /* range - 1024×4098×2 */ +} pifo_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct network_tuple); + __type(value, struct flow_state); + __uint(max_entries, 16384); +} flow_states SEC(".maps"); + +__u64 time_bytes = 0; + +__u16 default_weight = 256; + +static __always_inline int schedule_packet(struct parsing_context *pctx) +{ + struct packet_info p_info = {}; + struct network_tuple nt = {0}; + struct flow_state *flow; + struct flow_state new_flow = {0}; + __u32 start_time_bytes; + __u32 prio = 0; + + new_flow.weight = default_weight; + + /* Get flow */ + if (parse_packet(pctx, &p_info) < 0) + goto err; + + nt = p_info.nt; + + flow = bpf_map_lookup_or_try_init(&flow_states, &nt, &new_flow); + if (!flow) + goto err; + + flow->pkts++; + + /* Calculate scheduling priority */ + start_time_bytes = bpf_max(time_bytes, flow->finish_bytes); + flow->finish_bytes = start_time_bytes + (pctx->pkt_len * flow->weight >> 8); + prio = start_time_bytes; + + if (bpf_map_update_elem(&flow_states, &nt, flow, BPF_ANY)) + goto err; + + bpf_printk("Frey: port: %d weight: %d pkt: %d end: %d", (int) bpf_ntohs(nt.daddr.port), flow->weight, flow->pkts, flow->finish_bytes); + bpf_printk("XDP WFQ scheduled with priority, %d", prio); + return bpf_redirect_map(&pifo_map, prio, 0); +err: + bpf_printk("XDP DROP"); + return XDP_DROP; +} + +/* Weighted fair queueing (WFQ) */ +SEC("xdp") +int enqueue_prog(struct xdp_md *xdp) +{ + struct parsing_context pctx = { + .data = (void *)(long)xdp->data, + .data_end = (void *)(long)xdp->data_end, + .pkt_len = (xdp->data_end - xdp->data) & 0xffff, + .nh = { .pos = (void *)(long)xdp->data }, + }; + return schedule_packet(&pctx); +} + + +SEC("dequeue") +void *dequeue_prog(struct dequeue_ctx *ctx) +{ + struct parsing_context pctx; + struct packet_info p_info = {0}; + struct network_tuple nt; + struct flow_state *flow; + __u64 prio = 0; + + struct xdp_md *pkt = NULL; + + pkt = (void *) bpf_packet_dequeue(ctx, &pifo_map, 0, &prio); + if (!pkt) { + bpf_printk("Frey: No packet in PIFO"); + goto err; + } + + pctx.data = (void *)(long) pkt->data; + pctx.data_end = (void *)(long) pkt->data_end; + pctx.nh.pos = (void *)(long) pkt->data; + + /* Get flow */ + if (parse_packet(&pctx, &p_info) < 0) + goto err; + + nt = p_info.nt; + + flow = bpf_map_lookup_elem(&flow_states, &nt); + if (!flow) + goto err; + + flow->pkts--; + if (flow->pkts <= 0) { + if (!flow->persistent) + bpf_map_delete_elem(&flow_states, &nt); + else + flow->finish_bytes = 0; + } + + time_bytes = prio; + + bpf_printk("port: %d - weight: %d", bpf_ntohs(nt.daddr.port), flow->weight); + bpf_printk("DEQUEUE WFQ with priority %d", prio); + return pkt; +err: + if (pkt) + bpf_packet_drop(ctx, pkt); + bpf_printk("DEQUEUE packet failed"); + return NULL; +} + +char _license[] SEC("license") = "GPL"; diff --git a/xdq-tester/sprio.lua b/xdq-tester/sprio.lua new file mode 100644 index 00000000..721e00d6 --- /dev/null +++ b/xdq-tester/sprio.lua @@ -0,0 +1,27 @@ +-- SPDX-License-Identifier: GPL-2.0 +-- Copyright (c) 2022 Freysteinn Alfredsson + +-- Strict Priority scheduler (SPRIO) +config.bpf.file = "./sched_sprio.bpf.o" + +-- Create flows +packet_flow1 = Udp:new() +packet_flow1.udp.dest = 8080 +set_flow_weight(packet_flow1, 2) + +packet_flow2 = Udp:new() +packet_flow2.udp.dest = 8081 +set_flow_weight(packet_flow2, 1) + +packet_flow3 = Udp:new() +packet_flow3.udp.dest = 8082 +set_flow_weight(packet_flow3, 0) + +-- Test scheduler +enqueue(packet_flow1) +enqueue(packet_flow2) +enqueue(packet_flow3) + +dequeue_cmp(packet_flow3) +dequeue_cmp(packet_flow2) +dequeue_cmp(packet_flow1) diff --git a/xdq-tester/wfq.lua b/xdq-tester/wfq.lua new file mode 100644 index 00000000..77cd40d0 --- /dev/null +++ b/xdq-tester/wfq.lua @@ -0,0 +1,148 @@ +-- SPDX-License-Identifier: GPL-2.0 +-- Copyright (c) 2022 Freysteinn Alfredsson + +-- Weighted Fair Queueing (WFQ) +config.bpf.file = "./sched_wfq.bpf.o" + + +-- Setup flows +packet_flow1 = Udp:new() +packet_flow1.udp.dest = 8000 +packet_flow1.udp.payload = create_payload(38) + +packet_flow2 = Udp:new() +packet_flow2.udp.dest = 8001 +packet_flow2.udp.payload = create_payload(138) + +packet_flow3 = Udp:new() +packet_flow3.udp.dest = 8002 +packet_flow3.udp.payload = create_payload(38) + +-- Test scheduler + +-- 1. Enqueue two packets using the same flow. +-- Tests that no flows remain after the PIFO is empty. +function wfq_test1() + enqueue(packet_flow1) + dequeue_cmp(packet_flow1) +end + + +-- 2. Enqueue two flows +function wfq_test2() + enqueue(packet_flow1) + enqueue(packet_flow1) + enqueue(packet_flow3) + + dequeue_cmp(packet_flow1) + dequeue_cmp(packet_flow3) + dequeue_cmp(packet_flow1) +end + + +-- 3. Enqueue three flows where one flow has a larger packet size. +function wfq_test3() + -- priority: flow(packet_number, flow_end_byte), flow(...), ... + enqueue(packet_flow1) + -- 0: *f1(1, 100) + + enqueue(packet_flow2) + -- 0: *f2(1, 200), f1(1, 100) + + enqueue(packet_flow1) + -- 0: f2(1, 200), f1(1, 100) + -- 100: *f1(2, 100) + + enqueue(packet_flow2) + -- 0: f2(1, 200), f1(1, 100) + -- 100: f1(2, 100) + -- 200: *f2(2, 400) + + dequeue_cmp(packet_flow1) + -- 0: f2(1, 200) ---> *f1(1, 100) + -- 100: f1(2, 100) + -- 200: f2(2, 400) + + enqueue(packet_flow1) + -- 0: f2(1, 200) + -- 100: f1(2, 100) + -- 200: *f1(3, 300), f2(2, 400) + + dequeue_cmp(packet_flow2) + -- 0: ---> *f2(1, 200) + -- 100: f1(2, 100) + -- 200: f1(3, 300), f2(2, 400) + + dequeue_cmp(packet_flow1) + -- 100: ---> *f1(2, 100) + -- 200: f1(3, 300), f2(2, 400) + + enqueue(packet_flow3) + -- 100: *f3(1, 200) + -- 200: f1(3, 300), f2(2, 400) + + enqueue(packet_flow3) + -- 100: f3(1, 200) + -- 200: *f3(2, 300), f1(3, 300), f2(2, 400) + + dequeue_cmp(packet_flow3) + -- 100: ---> *f3(1, 200) + -- 200: f3(2, 300), f1(3, 300), f2(2, 400) + + dequeue_cmp(packet_flow2) + -- 200: f3(2, 300), f1(3, 300) ---> *f2(2, 400) + + dequeue_cmp(packet_flow1) + -- 200: f3(2, 300) ---> *f1(3, 300) + + dequeue_cmp(packet_flow3) + -- 200: ---> *f3(2, 300) +end + + +-- 4. Enqueue multiple packets +function wfq_test4() + for i = 0, 4095, 1 + do + enqueue(packet_flow1) + end + for i = 0, 4095, 1 + do + dequeue_cmp(packet_flow1) + end +end + + +-- 5. Enqueue packets with weights +function wfq_test5() + set_flow_weight(packet_flow1, 1024) + enqueue(packet_flow1) + enqueue(packet_flow1) + enqueue(packet_flow2) + enqueue(packet_flow2) + + dequeue_cmp(packet_flow1) + dequeue_cmp(packet_flow2) + dequeue_cmp(packet_flow2) + dequeue_cmp(packet_flow1) + + set_flow_weight(packet_flow1, 256) + set_flow_weight(packet_flow2, 32) + enqueue(packet_flow1) + enqueue(packet_flow1) + enqueue(packet_flow2) + enqueue(packet_flow2) + + dequeue_cmp(packet_flow1) + dequeue_cmp(packet_flow2) + dequeue_cmp(packet_flow2) + dequeue_cmp(packet_flow1) +end + + +-- Run tests +wfq_test1() +wfq_test2() +wfq_test3() +wfq_test4() +wfq_test5() diff --git a/xdq-tester/xdq-tester.c b/xdq-tester/xdq-tester.c new file mode 100644 index 00000000..7318bcf8 --- /dev/null +++ b/xdq-tester/xdq-tester.c @@ -0,0 +1,1021 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2022 Freysteinn Alfredsson */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include "logging.h" + +#include "xdq-tester.h" + +#include "bpf_shared_data.h" + +static const struct option long_options[] = { + {"verbose", no_argument, NULL, 'v' }, + {"help", no_argument, NULL, 'h' }, + {} +}; + +static void mac_to_string(char *dst, unsigned char *mac); +static __be32 calc_ipv6_chksum_part(const struct ipv6hdr *iph); +static __be16 calc_udp_cksum(const struct udphdr *udp, __be32 chksum_part); +static struct ethhdr *lua_to_eth_header(lua_State *L, struct packet *pkt); +static struct ipv6hdr *lua_to_ipv6_header(lua_State *L, struct packet *pkt); +static struct udphdr *lua_to_udp_header(lua_State *L, struct packet *pkt, __be64 checksum_part); +static struct packet *lua_parse_packet(lua_State *L); +static void set_bpf_fd(lua_State *L, struct bpf_object *obj, const char *func_name, int *prog_fd); +static struct ethhdr *parse_eth(lua_State *L, struct packet *pkt); +static struct ipv6hdr *parse_ipv6(lua_State *L, struct packet *pkt); +static struct udphdr *parse_udp(lua_State *L, struct packet *pkt); +static int bpf_xdp(lua_State *L, struct packet *pkt); +static int bpf_dequeue(lua_State *L, struct packet *pkt); +static struct ethhdr *parse_eth_to_lua(lua_State *L, struct packet *pkt); +static struct ipv6hdr *parse_ipv6_to_lua(lua_State *L, struct packet *pkt); +static struct udphdr *parse_udp_to_lua(lua_State *L, struct packet *pkt); +static void parse_packet_to_lua(lua_State *L, struct packet *pkt); +static void initLuaFunctions(lua_State *L, char *prog_name); +static void usage(char *prog_name); + + +static void mac_to_string(char *dst, unsigned char *mac) +{ + snprintf(dst, 18, "%02x:%02x:%02x:%02x:%02x:%02x", + mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); +} + +static __be32 calc_ipv6_chksum_part(const struct ipv6hdr *iph) +{ + __u32 chksum = iph->nexthdr + ntohs(iph->payload_len); + int i; + + for (i = 0; i < 8; i++) { + chksum += ntohs(iph->saddr.s6_addr16[i]); + chksum += ntohs(iph->daddr.s6_addr16[i]); + } + return chksum; +} + +static __be16 calc_udp_cksum(const struct udphdr *udp, __be32 chksum_part) +{ + __u32 chksum = chksum_part; + chksum += ntohs(udp->source); + chksum += ntohs(udp->dest); + chksum += ntohs(udp->len); + + while (chksum >> 16) + chksum = (chksum & 0xffff) + (chksum >> 16); + return htons(~chksum); +} + +struct xdq_state *get_xdq_state(lua_State *L) +{ + struct xdq_state *state; + + lua_getglobal(L, "_xdq"); + if (!lua_isuserdata(L, -1)) + die(L, ""); + state = lua_touserdata(L, -1); + lua_remove(L, -1); + + return state; +} + +void die(lua_State *L, const char *format, ...) +{ + struct xdq_state *state; + lua_Debug ar; + int line; + va_list args; + + lua_getglobal(L, "_xdq"); + if (!lua_isuserdata(L, -1)) { + fprintf(stderr, "Missing internal XDQ state within the Lua environment\n"); + exit(EXIT_FAILURE); + } + state = lua_touserdata(L, -1); + + if (lua_getstack(L, 1, &ar)) { + lua_getinfo(L, "nSl", &ar); + line = ar.currentline; + fprintf(stderr, "%s:%s:%d: ", state->prog_name, ar.short_src, line); + } else { + fprintf(stderr, "%s: ", state->prog_name); + } + + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + fprintf(stderr, "\n"); + free(state->xdq_script); + exit(EXIT_FAILURE); +} + +struct packet *packet_alloc(lua_State *L, struct packet *pkt, size_t size) +{ + const int ALLOC_SIZE = 4096; + size_t old_length; + + if (pkt == NULL) + pkt = calloc(1, sizeof(struct packet)); + if (pkt->data == NULL) { + pkt->data = malloc(ALLOC_SIZE); + if (!pkt->data) + die(L, "Failed to allocate memory for packet: %s", strerror(errno)); + pkt->data_end = pkt->data; + pkt->cur = pkt->data; + pkt->length = 0; + } + old_length = pkt->length; + pkt->length += size; + pkt->data_end += size; + if (pkt->length > ALLOC_SIZE) + die(L, "Packet larger than %d octets\n", ALLOC_SIZE); + memset(pkt->data + old_length, '\0', size); + return pkt; +} + +void packet_free(struct packet *pkt) +{ + free(pkt->data); + free(pkt); +} + +static struct ethhdr *lua_to_eth_header(lua_State *L, struct packet *pkt) +{ + struct ethhdr *eth; + const char *mac_src_str; + const char *mac_dst_str; + int proto; + packet_alloc(L, pkt, sizeof(struct ethhdr)); + eth = (struct ethhdr *) pkt->cur; + + if (!lua_istable(L, -1)) + die(L, "Missing eth header\n"); + lua_getfield(L, -1, "eth"); + + lua_getfield(L, -1, "source"); + if (!lua_isstring(L, -1)) + die(L, "Source MAC address is not a string\n"); + mac_src_str = lua_tostring(L, -1); + if (!ether_aton_r(mac_src_str, (struct ether_addr *) ð->h_source)) + die(L, "Not a valid source MAC address: '%s'\n", mac_src_str); + lua_remove(L, -1); + + lua_getfield(L, -1, "dest"); + if (!lua_isstring(L, -1)) + die(L, "Destination MAC address is not a string\n"); + mac_dst_str = lua_tostring(L, -1); + if (!ether_aton_r(mac_dst_str, (struct ether_addr *) ð->h_dest)) + die(L, "Not a valid destination MAC address: '%s'\n", mac_dst_str); + lua_remove(L, -1); + + lua_getfield(L, -1, "proto"); + if (!lua_isinteger(L, -1)) + die(L, "Ethernet protocol field must be an integer\n"); + proto = lua_tointeger(L, -1); + if (proto < 0 || proto > 0xffff) + die(L, "Ethernet protocol field must be an integer between 0x0 and 0xffff, but was 0x%x\n", + proto); + eth->h_proto = htons((short) proto); + lua_remove(L, -1); + + lua_remove(L, -1); // Remove eth table from the stack + return eth; +} + +static struct ipv6hdr *lua_to_ipv6_header(lua_State *L, struct packet *pkt) +{ + struct ipv6hdr *iph; + int priority; + int version; + int flow_lbl_int; + int payload_len; + int nexthdr; + int hop_limit; + const char *src_ip; + const char *dst_ip; + + packet_alloc(L, pkt, sizeof(struct ipv6hdr)); + + iph = (struct ipv6hdr *) pkt->cur; + + lua_getfield(L, -1, "ip"); + if (!lua_istable(L, -1)) + die(L, "Missing eth header\n"); + + lua_getfield(L, -1, "priority"); + if (!lua_isinteger(L, -1)) + die(L, "IPv6 prirotiy field must be an integer\n"); + priority = lua_tointeger(L, -1); + if (priority < 0 || priority > 15) + die(L, "IPv6 prirotiy must be an integer between 0 and 15, but was %d\n", priority); + iph->priority = priority; + lua_remove(L, -1); + + lua_getfield(L, -1, "version"); + if (!lua_isinteger(L, -1)) + die(L, "IPv6 version field must be an integer\n"); + version = lua_tointeger(L, -1); + if (version < 0 || version > 15) + die(L, "IPv6 version must be an integer between 0 and 15, but was %d\n", version); + iph->version = version; + lua_remove(L, -1); + + lua_getfield(L, -1, "flow_lbl"); + if (!lua_istable(L, -1)) + die(L, "IPv6 flow_lbl must be a table\n"); + for (int i = 0; i < 3; i++) { + lua_rawgeti(L, -1, i + 1); + if (!lua_isinteger(L, -1)) + die(L, "IPv6 flow_lbl[%d] field must be an integer\n", i); + flow_lbl_int = lua_tointeger(L, -1); + if (flow_lbl_int < 0 || flow_lbl_int > 0xff) + die(L, "IPv6 flow_lbl[%d] field must be between 0x0 and 0xff but was 0x%x\n", + i, flow_lbl_int); + iph->flow_lbl[i] = flow_lbl_int; + lua_remove(L, -1); + } + lua_remove(L, -1); + + if (lua_getfield(L, -1, "payload_len") != LUA_TNIL) { + if (!lua_isinteger(L, -1)) + die(L, "IPv6 payload_len field must be an integer\n"); + payload_len = lua_tointeger(L, -1); + if (payload_len < 0 || payload_len > 0xffff) + die(L, "IPv6 payload_len field must be an integer between 0x0 and 0xffff, but was 0x%x\n", + payload_len); + iph->payload_len = htons((short) payload_len); + } + lua_remove(L, -1); + + lua_getfield(L, -1, "nexthdr"); + if (!lua_isinteger(L, -1)) + die(L, "IPv6 nexthdr field must be an integer\n"); + nexthdr = lua_tointeger(L, -1); + if (nexthdr < 0x0 || nexthdr > 0xff) + die(L, "IPv6 nexthdr must be an integer between 0x0 and 0xff, but was 0x%x\n", + nexthdr); + iph->nexthdr = nexthdr; + lua_remove(L, -1); + + lua_getfield(L, -1, "hop_limit"); + if (!lua_isinteger(L, -1)) + die(L, "IPv6 hop_limit field must be an integer\n"); + hop_limit = lua_tointeger(L, -1); + if (hop_limit < 0x0 || hop_limit > 0xff) + die(L, "IPv6 hop_limit must be an integer between 0x0 and 0xff, but was 0x%x\n", + hop_limit); + iph->hop_limit = hop_limit; + lua_remove(L, -1); + + lua_getfield(L, -1, "saddr"); + if (!lua_isstring(L, -1)) + die(L, "Source IPv6 address is not a string\n"); + src_ip = lua_tostring(L, -1); + if (!inet_pton(AF_INET6, src_ip, &iph->saddr)) + die(L, "Failed to set source IPv6 address to %s", src_ip); + lua_remove(L, -1); + + lua_getfield(L, -1, "daddr"); + if (!lua_isstring(L, -1)) + die(L, "Destination IPv6 address is not a string\n"); + dst_ip = lua_tostring(L, -1); + if (!inet_pton(AF_INET6, dst_ip, &iph->daddr)) + die(L, "Failed to set destination IPv6 address to %s", dst_ip); + lua_remove(L, -1); + + lua_remove(L, -1); // Remove ip table from the stack + return iph; +} + +static struct udphdr *lua_to_udp_header(lua_State *L, struct packet *pkt, __be64 checksum_part) +{ + struct udphdr *udp; + int src_port; + int dst_port; + int len; + int check; + const char *payload; + + packet_alloc(L, pkt, sizeof(struct udphdr)); + udp = (struct udphdr *) pkt->cur; + + lua_getfield(L, -1, "udp"); + if (!lua_istable(L, -1)) + die(L, "Missing udp header\n"); + + lua_getfield(L, -1, "source"); + if (!lua_isinteger(L, -1)) + die(L, "UDP source port must be an integer\n"); + src_port = lua_tointeger(L, -1); + if (src_port < 0 || src_port > 65535) + die(L, "UDP source port must be an integer between 0 and 65535, but was %d\n", + src_port); + udp->source = htons((short) src_port); + lua_remove(L, -1); + + lua_getfield(L, -1, "dest"); + if (!lua_isinteger(L, -1)) + die(L, "UDP destination port must be an integer\n"); + dst_port = lua_tointeger(L, -1); + if (dst_port < 0 || dst_port > 0xffff) + die(L, "UDP destination port must be an integer between 0 and 65535, but was %d\n", + dst_port); + udp->dest = htons((short) dst_port); + lua_remove(L, -1); + + if (lua_getfield(L, -1, "payload") != LUA_TNIL) { + if (!lua_isstring(L, -1)) + die(L, "UDP payload field must be a string\n"); + len = lua_rawlen(L, -1); + payload = lua_tostring(L, -1); + packet_alloc(L, pkt, len); + memcpy(pkt->cur + sizeof(struct udphdr), payload, len); + udp->len = htons(sizeof(struct udphdr) + len); + } + lua_remove(L, -1); + + if (lua_getfield(L, -1, "len") != LUA_TNIL) { + if (!lua_isinteger(L, -1)) + die(L, "UDP len field must be an integer\n"); + len = lua_tointeger(L, -1); + if (len < 0 || len > 0xffff) + die(L, "UDP len field must be an integer between 0 and 65535, but was %d\n", + len); + udp->len = htons((short) len); + } + lua_remove(L, -1); + + if (lua_getfield(L, -1, "check") != LUA_TNIL) { + if (!lua_isinteger(L, -1)) + die(L, "UDP check field must be an integer\n"); + check = lua_tointeger(L, -1); + if (check < 0 || check > 0xffff) + die(L, "UDP check field must be an integer between 0 and 65535, but was %d\n", + check); + udp->check = htons((short) check); + } + lua_remove(L, -1); + + if (checksum_part != -1) { + udp->check = calc_udp_cksum(udp, checksum_part); + } + + lua_remove(L, -1); // Remove udp table from the stack + return udp; +} + +static struct packet *lua_parse_packet(lua_State *L) +{ + struct packet *pkt = packet_alloc(L, NULL, 0); + struct ethhdr *eth = NULL; + struct ipv6hdr *iph = NULL; + struct udphdr *udp = NULL; + int proto = -1; + __be64 checksum_part = -1; + + eth = lua_to_eth_header(L, pkt); + pkt->cur += sizeof(struct ethhdr); + if (eth->h_proto == ntohs(ETH_P_IPV6)) { + iph = lua_to_ipv6_header(L, pkt); + proto = iph->nexthdr; + pkt->cur += sizeof(struct ipv6hdr); + checksum_part = calc_ipv6_chksum_part(iph); + } + if (proto == IPPROTO_UDP) { + udp = lua_to_udp_header(L, pkt, checksum_part); + + if (iph && iph->payload_len == 0) + iph->payload_len = udp->len; + } + pkt->cur = pkt->data; // Reset cur pointer for comparison + return pkt; +} + +static void set_bpf_fd(lua_State *L, struct bpf_object *obj, const char *func_name, int *prog_fd) +{ + struct bpf_program *prog = bpf_object__find_program_by_name(obj, func_name); + *prog_fd = bpf_program__fd(prog); + if (*prog_fd < 0 ) { + bpf_object__close(obj); + die(L, "Failed to run bpf_program__fd: %s", strerror(errno)); + } +} + +int load_xdq_file(lua_State *L) +{ + struct xdq_state *state; + const char *filename; + struct bpf_object *xdq_bpf_obj; + const char *xdp_func; + const char *dequeue_func; + struct bpf_program *prog; + int err = 0; + + if (lua_gettop(L) != 1) + die(L, "Incorrect number of arguments"); + if (!lua_isstring(L, 1)) + die(L, "Argument must be a string"); + filename = lua_tostring(L, 1); + + state = get_xdq_state(L); + + if (state->xdq_script) + free(state->xdq_script); + state->xdq_script = strdup(filename); + + lua_getglobal(L, "config"); + if (!lua_istable(L, -1)) + die(L, "Missing config table\n"); + + lua_getfield(L, -1, "bpf"); + if (!lua_istable(L, -1)) + die(L, "Missing config.bpf table\n"); + + lua_getfield(L, -1, "xdp_func"); + if (!lua_isstring(L, -1)) + die(L, "Missing config.bpf.xdq_func\n"); + xdp_func = lua_tostring(L, -1); + if (strlen(xdp_func) == 0) + die(L, "config.bpf.xdp_func can't be an empty string"); + lua_remove(L, -1); + + lua_getfield(L, -1, "dequeue_func"); + if (!lua_isstring(L, -1)) + die(L, "Missing config.bpf.dequeue_func\n"); + dequeue_func = lua_tostring(L, -1); + if (strlen(xdp_func) == 0) + die(L, "config.bpf.dequeue_func can't be an empty string"); + lua_remove(L, -1); + + lua_remove(L, -1); // Remove bpf table from the stack + lua_remove(L, -1); // Remove config table from the stack + + xdq_bpf_obj = bpf_object__open_file(state->xdq_script, NULL); + err = libbpf_get_error(xdq_bpf_obj); + if (err) + die(L, "Failed to run bpf_object__open: %s", strerror(errno)); + state->xdq_bpf_obj = xdq_bpf_obj; + + prog = bpf_object__find_program_by_name(xdq_bpf_obj, dequeue_func); + if (!prog) { + bpf_object__close(xdq_bpf_obj); + die(L, "Failed to run bpf_object_find_program_by_name: %s", strerror(errno)); + } + + bpf_program__set_type(prog, BPF_PROG_TYPE_DEQUEUE); + err = bpf_object__load(xdq_bpf_obj); + if (err) { + bpf_object__close(xdq_bpf_obj); + die(L, "Failed to run bpf_object__load: %s", strerror(errno)); + } + + set_bpf_fd(L, xdq_bpf_obj, xdp_func, &state->xdp_prog_fd); + set_bpf_fd(L, xdq_bpf_obj, dequeue_func, &state->dequeue_prog_fd); + return 0; +} + + +static int bpf_xdp(lua_State *L, struct packet *pkt) +{ + struct xdq_state *state; + int total_queued_packets; + int currently_queued_packets; + int err; + struct xdp_md ctx_in = { + .data_end = pkt->length, + }; + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = pkt->data, + .data_size_in = pkt->length, + .ctx_in = &ctx_in, + .ctx_size_in = sizeof(ctx_in), + .repeat = 1, + .flags = BPF_F_TEST_XDP_DO_REDIRECT, + ); + ctx_in.data_end = ctx_in.data + pkt->length; + + state = get_xdq_state(L); + if (state->xdp_prog_fd <= 0) + die(L, "No XDP hook attached"); + err = bpf_prog_test_run_opts(state->xdp_prog_fd, &opts); + if (err) + die(L, "Failed to run XDP hook: %s", strerror(errno)); + + lua_getglobal(L, "xdq"); + if (!lua_istable(L, -1)) + die(L, "Missing xdq table\n"); + + lua_getfield(L, -1, "total_queued"); + if (!lua_isinteger(L, -1)) + die(L, "xdq.total_queued is not an integer\n"); + total_queued_packets = lua_tointeger(L, -1); + lua_remove(L, -1); + total_queued_packets++; + lua_pushinteger(L, total_queued_packets); + lua_setfield(L, -2, "total_queued"); + + lua_getfield(L, -1, "currently_queued"); + if (!lua_isinteger(L, -1)) + die(L, "xdq.currently_queued is not an integer\n"); + currently_queued_packets = lua_tointeger(L, -1); + lua_remove(L, -1); + currently_queued_packets++; + lua_pushinteger(L, currently_queued_packets); + lua_setfield(L, -2, "currently_queued"); + + lua_remove(L, -1); // Remove xdq table from the stack + + return opts.retval; +} + +static struct ethhdr *parse_eth(lua_State *L, struct packet *pkt) +{ + struct ethhdr *eth = (struct ethhdr *) pkt->cur; + + if (pkt->cur + sizeof(struct ethhdr) > pkt->data_end) + die(L, "Missing expected eth header"); + pkt->cur += sizeof(struct ethhdr); + return eth; +} + +static struct ipv6hdr *parse_ipv6(lua_State *L, struct packet *pkt) +{ + struct ipv6hdr *iph = (struct ipv6hdr *) pkt->cur; + + if (pkt->cur + sizeof(struct ipv6hdr) > pkt->data_end) + die(L, "Missing expected IPv6 header"); + pkt->cur += sizeof(struct ipv6hdr); + return iph; +} + +static struct udphdr *parse_udp(lua_State *L, struct packet *pkt) +{ + struct udphdr *udp = (struct udphdr *) pkt->cur; + + if (pkt->cur + sizeof(struct udphdr) > pkt->data_end) + die(L, "Missing expected UDP header"); + pkt->cur += sizeof(struct udphdr); + return udp; +} + +int enqueue(lua_State *L) +{ + struct packet *pkt; + int retval; + + if (lua_gettop(L) != 1) + die(L, "Incorrect number of arguments"); + if (!lua_istable(L, 1)) + die(L, "Argument must be a table"); + + pkt = lua_parse_packet(L); + + retval = bpf_xdp(L, pkt); + + packet_free(pkt); + + lua_pushinteger(L, retval); + return 1; +} + +static struct ethhdr *parse_eth_to_lua(lua_State *L, struct packet *pkt) +{ + struct ethhdr *eth = parse_eth(L, pkt); + char src_mac[18]; + char dst_mac[18]; + + // Ethernet header + lua_createtable(L, -1, 0); + + lua_pushinteger(L, ntohs(eth->h_proto)); + lua_setfield(L, -2, "proto"); + + + mac_to_string(src_mac, eth->h_source); + lua_pushstring(L, src_mac); + lua_setfield(L, -2, "source"); + + mac_to_string(dst_mac, eth->h_dest); + lua_pushstring(L, dst_mac); + lua_setfield(L, -2, "dest"); + + lua_setfield(L, -2, "eth"); + return eth; +} + +static struct ipv6hdr *parse_ipv6_to_lua(lua_State *L, struct packet *pkt) +{ + struct ipv6hdr *iph = parse_ipv6(L, pkt); + char src_ip[INET6_ADDRSTRLEN + 1]; + char dst_ip[INET6_ADDRSTRLEN + 1]; + + // IPv6 header + lua_createtable(L, -1, 0); + + lua_pushinteger(L, (unsigned int) iph->priority); + lua_setfield(L, -2, "priority"); + + lua_pushinteger(L, (unsigned int) iph->version); + lua_setfield(L, -2, "version"); + + lua_createtable(L, -1, 0); + for (int i = 0; i < 3; i++) { + lua_pushinteger(L, (unsigned int) iph->flow_lbl[i]); + lua_rawseti(L, -2, i + 1); + } + lua_setfield(L, -2, "flow_lbl"); + + lua_pushinteger(L, (unsigned int) ntohs(iph->payload_len)); + lua_setfield(L, -2, "payload_len"); + + lua_pushinteger(L, (unsigned int) iph->nexthdr); + lua_setfield(L, -2, "nexthdr"); + + lua_pushinteger(L, (unsigned int) iph->hop_limit); + lua_setfield(L, -2, "hop_limit"); + + lua_pushstring(L, inet_ntop(AF_INET6, &iph->saddr, (char *) &src_ip, sizeof(src_ip))); + lua_setfield(L, -2, "saddr"); + + lua_pushstring(L, inet_ntop(AF_INET6, &iph->daddr, (char *) &dst_ip, sizeof(dst_ip))); + lua_setfield(L, -2, "daddr"); + + lua_setfield(L, -2, "ip"); + return iph; +} + +static struct udphdr *parse_udp_to_lua(lua_State *L, struct packet *pkt) +{ + struct udphdr *udp = parse_udp(L, pkt); + + // UDP header + lua_createtable(L, -1, 0); + + lua_pushinteger(L, (unsigned int) ntohs(udp->source)); + lua_setfield(L, -2, "source"); + + lua_pushinteger(L, (unsigned int) ntohs(udp->dest)); + lua_setfield(L, -2, "dest"); + + lua_pushinteger(L, (unsigned int) ntohs(udp->len)); + lua_setfield(L, -2, "len"); + + lua_pushinteger(L, (unsigned int) ntohs(udp->check)); + lua_setfield(L, -2, "check"); + + if (udp->len - sizeof(struct udphdr) > 0) { + lua_pushlstring(L, pkt->cur, ntohs(udp->len) - sizeof(struct udphdr)); + lua_setfield(L, -2, "payload"); + } + + lua_setfield(L, -2, "udp"); + + return udp; +} + +static void parse_packet_to_lua(lua_State *L, struct packet *pkt) +{ + struct ethhdr *eth = NULL; + struct ipv6hdr *iph = NULL; + int proto = -1; + + // Packet table + lua_createtable(L, -1, 0); + + eth = parse_eth_to_lua(L, pkt); + + if (eth->h_proto == ntohs(ETH_P_IPV6)) { + iph = parse_ipv6_to_lua(L, pkt); + proto = iph->nexthdr; + } + if (proto == IPPROTO_UDP) + parse_udp_to_lua(L, pkt); +} + +static int bpf_dequeue(lua_State *L, struct packet *pkt) +{ + + struct xdq_state *state = get_xdq_state(L); + int err; + int total_dequeued_packets; + int currently_queued_packets; + if (state->dequeue_prog_fd <= 0) + die(L, "No DEQUEUE hook attached"); + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_out = pkt->data, + .data_size_out = pkt->length, + .repeat = 1, + ); + + err = bpf_prog_test_run_opts(state->dequeue_prog_fd, &opts); + if (err) + die(L, "Failed to run DEQUEUE hook: %s", strerror(errno)); + + lua_getglobal(L, "xdq"); + if (!lua_istable(L, -1)) + die(L, "Missing xdq table\n"); + + lua_getfield(L, -1, "total_dequeued"); + if (!lua_isinteger(L, -1)) + die(L, "xdq.total_dequeued is not an integer\n"); + total_dequeued_packets = lua_tointeger(L, -1); + lua_remove(L, -1); + total_dequeued_packets++; + lua_pushinteger(L, total_dequeued_packets); + lua_setfield(L, -2, "total_dequeued"); + + lua_getfield(L, -1, "currently_queued"); + if (!lua_isinteger(L, -1)) + die(L, "xdq.currently_queued is not an integer\n"); + currently_queued_packets = lua_tointeger(L, -1); + lua_remove(L, -1); + currently_queued_packets--; + lua_pushinteger(L, currently_queued_packets); + lua_setfield(L, -2, "currently_queued"); + + lua_remove(L, -1); // Remove xdq table from the stack + + return opts.retval; +} + +int dequeue(lua_State *L) +{ + struct packet *pkt; + int retval; + + if (lua_gettop(L) != 0) + die(L, "Function takes no arguments"); + + pkt = packet_alloc(L, NULL, 4096); + retval = bpf_dequeue(L, pkt); + + parse_packet_to_lua(L, pkt); + packet_free(pkt); + + lua_pushinteger(L, retval); + // Return packet and DEQEUEUE hook return value + return 2; +} + +int normalize_ipv6_address(lua_State *L) +{ + char ip_str[INET6_ADDRSTRLEN + 1]; + struct in6_addr ip; + const char *ip_param; + + if (lua_gettop(L) != 1) + die(L, "Incorrect number of arguments"); + if (!lua_isstring(L, 1)) + die(L, "Argument must be a string"); + ip_param = lua_tostring(L, 1); + + if (!inet_pton(AF_INET6, ip_param, &ip)) + die(L, "Failed to parse IPv6 address %s", ip_param); + + lua_pushstring(L, inet_ntop(AF_INET6, &ip, ip_str, sizeof(ip_str))); + return 1; +} + +int fail_xdq(lua_State *L) +{ + const char *message = ""; + if (lua_gettop(L) == 1 && lua_isstring(L, -1)) + message = lua_tostring(L, -1); + die(L, message); + return -1; +} + +/* Scheduler specific helpers + * + * The following functions are here to aid scheduling algorithms to function, such + * as providing our Lua implementation with five-tuple flow handling. Ideally, this + * functionality these functions would be generic and use BTF. + * For now, we have tailored these functions for specific scheduling algorithms. + */ + +int show_flow_map(lua_State *L) +{ + system("bpftool map dump name flow_states"); + return 0; +} + +int set_flow_weight(lua_State *L) +{ + struct xdq_state *state = get_xdq_state(L); + struct network_tuple nt = {0}; + struct flow_state flow = {0}; + int flow_states_fd; + struct packet *pkt; + struct ipv6hdr *iph; + struct udphdr *udp; + int weight; + + if (lua_gettop(L) != 2) + die(L, "Incorrect number of arguments"); + if (!lua_istable(L, 1)) + die(L, "First argument must be a table"); + + if (!lua_isinteger(L, -1)) + die(L, "weight parameter isn't a number\n"); + weight = lua_tointeger(L, 2); + lua_pop(L, 1); + + pkt = lua_parse_packet(L); + parse_eth(L, pkt); + iph = parse_ipv6(L, pkt); + udp = parse_udp(L, pkt); + + nt.proto = iph->nexthdr; + nt.ipv = iph->version; + nt.saddr.ip = iph->saddr; + nt.daddr.ip = iph->daddr; + nt.daddr.port = udp->dest; + nt.saddr.port = udp->source; + + + flow.pkts = 0; + flow.finish_bytes = 0; + flow.weight = weight; + flow.persistent = 1; + + flow_states_fd = bpf_object__find_map_fd_by_name(state->xdq_bpf_obj, "flow_states"); + + if (bpf_map_update_elem(flow_states_fd, &nt, &flow, BPF_ANY)) + die(L, "Failed to update map"); + + return 0; +} + +int set_time_ns(lua_State *L) +{ + struct xdq_state *state = get_xdq_state(L); + int time_ns_fd; + __u32 key = 0; + __u64 time_ns; + + if (lua_gettop(L) != 1) + die(L, "Incorrect number of arguments"); + if (!lua_isinteger(L, -1)) + die(L, "Argument must be an integer"); + time_ns = lua_tointeger(L, 1); + lua_pop(L, 1); + + time_ns_fd = bpf_object__find_map_fd_by_name(state->xdq_bpf_obj, "xdq_time_ns"); + + if (bpf_map_update_elem(time_ns_fd, &key, &time_ns, BPF_ANY)) + die(L, "Failed to update map"); + + return 0; +} + +int get_time_ns(lua_State *L) +{ + struct xdq_state *state = get_xdq_state(L); + int time_ns_fd; + __u32 key = 0; + __u64 time_ns; + + if (lua_gettop(L) != 0) + die(L, "Incorrect number of arguments"); + + time_ns_fd = bpf_object__find_map_fd_by_name(state->xdq_bpf_obj, "xdq_time_ns"); + + if (bpf_map_lookup_elem(time_ns_fd, &key, &time_ns)) + die(L, "Failed to lookup map"); + + lua_pushnumber(L, time_ns); + + return 1; +} +/* End of Scheduler specific helpers */ + + +static void initLuaFunctions(lua_State *L, char *prog_name) +{ + struct xdq_state *state = lua_newuserdatauv(L, sizeof(struct xdq_state), 0); + state->prog_name = prog_name; + state->xdq_script = NULL; + state->xdp_prog_fd = -1; + state->dequeue_prog_fd = -1; + lua_setglobal(L, "_xdq"); + + lua_pushcfunction(L, enqueue); + lua_setglobal(L, "enqueue"); + + lua_pushcfunction(L, dequeue); + lua_setglobal(L, "dequeue"); + + lua_pushcfunction(L, load_xdq_file); + lua_setglobal(L, "load_xdq_file"); + + lua_pushcfunction(L, normalize_ipv6_address); + lua_setglobal(L, "normalize_ipv6_address"); + + lua_pushcfunction(L, fail_xdq); + lua_setglobal(L, "fail"); + + /* Scheduler specific helpers */ + lua_pushcfunction(L, set_flow_weight); + lua_setglobal(L, "set_flow_weight"); + + lua_pushcfunction(L, set_time_ns); + lua_setglobal(L, "set_time_ns"); + + lua_pushcfunction(L, get_time_ns); + lua_setglobal(L, "get_time_ns"); + + lua_pushcfunction(L, show_flow_map); + lua_setglobal(L, "show_flow_map"); +} + +static void usage(char *prog_name) +{ + printf("Usage: %s [OPTIONS] \n", prog_name); + fputs("\nTest XDP and DEQUEUE BPF hooks.\n", stdout); + fputs("Mandatory arguments to long options are mandatory for short options too.\n", stdout); + fputs("\ + -v, --verbose output BPF diagnostic\n\ + -h, --help display this help and exit\n", stdout); +} + +int main(int argc, char *argv[]) +{ + lua_State *L; + struct xdq_state *state; + char lib_file[PATH_MAX + 1] = {0}; + char *sched_file = NULL; + int opt; + + init_lib_logging(); + L = luaL_newstate(); + luaL_openlibs(L); + initLuaFunctions(L, argv[0]); + + if (!realpath("/proc/self/exe", lib_file)) + die(L, "Program location not found"); + dirname(lib_file); + if (strlen(lib_file) + strlen(XDQ_LIBRARY + 1) >= PATH_MAX) + die(L, "Path to library '%s' too long\nPath: '%s'", XDQ_LIBRARY, lib_file); + strncat(lib_file, "/", PATH_MAX); + strncat(lib_file, XDQ_LIBRARY, PATH_MAX); + + if (luaL_dofile(L, lib_file) != LUA_OK) + die(L, "Failed to load LUA library\n"); + + + while ((opt = getopt_long(argc, argv, "f:vh", long_options, NULL)) != -1) { + switch (opt) { + case 'f': + sched_file = optarg; + break; + case 'v': + state = get_xdq_state(L); + set_log_level(LOG_VERBOSE); + break; + case 'h': + usage(argv[0]); + exit(EXIT_SUCCESS); + default: + usage(argv[0]); + exit(EXIT_FAILURE); + } + } + if (argc - optind == 1) + sched_file = argv[optind]; + if (!sched_file) { + fprintf(stderr, "No XDQ object file provided. Use %s \n", argv[0]); + usage(argv[0]); + exit(EXIT_FAILURE); + } + + if (luaL_dofile(L, sched_file) != LUA_OK) { + die(L, "%s", lua_tostring(L, -1)); + } + state = get_xdq_state(L); + free(state->xdq_script); + lua_close(L); + return EXIT_SUCCESS; +} diff --git a/xdq-tester/xdq-tester.h b/xdq-tester/xdq-tester.h new file mode 100644 index 00000000..b0945da5 --- /dev/null +++ b/xdq-tester/xdq-tester.h @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2022 Freysteinn Alfredsson */ + +#ifndef XDQ_TESTER_H_ +#define XDQ_TESTER_H_ + +#include +#include +#include +#include +#include +#include + +#define BPF_PROG_TYPE_DEQUEUE 32 +#define BPF_F_TEST_XDP_DO_REDIRECT (1U << 1) + +#define XDQ_LIBRARY "lib.lua" + +struct packet { + char *data; + char *data_end; + size_t length; + char *cur; +}; + +struct xdq_state { + struct bpf_object *xdq_bpf_obj; + char *xdq_script; + char *prog_name; + int xdp_prog_fd; + int dequeue_prog_fd; +}; + +struct xdq_state *get_xdq_state(lua_State *L); +void die(lua_State *L, const char *format, ...); +struct packet *packet_alloc(lua_State *L, struct packet *pkt, size_t size); +void packet_free(struct packet *pkt); + +/* Lua exported functions */ +int load_xdq_file(lua_State *L); +int enqueue(lua_State *L); +int dequeue(lua_State *L); +int normalize_ipv6_address(lua_State *L); +int fail_xdq(lua_State *L); + +#endif // XDQ_TESTER_H_