Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,13 @@ test: lint unit e2e
e2e:
KILO_IMAGE=squat/kilo:test bash_unit $(BASH_UNIT_FLAGS) ./e2e/setup.sh ./e2e/full-mesh.sh ./e2e/location-mesh.sh ./e2e/cross-mesh.sh ./e2e/multi-cluster.sh ./e2e/handlers.sh ./e2e/kgctl.sh ./e2e/teardown.sh

# e2e-cilium runs the Kilo --compatibility=cilium e2e suite against a
# kind cluster where Cilium provides the CNI. It is a separate target
# from `e2e` because the Cilium cluster is incompatible with the Kilo
# bridge CNI used by the default suite.
e2e-cilium:
KILO_IMAGE=squat/kilo:test bash_unit $(BASH_UNIT_FLAGS) ./e2e/cilium-setup.sh ./e2e/cilium-cross-mesh.sh ./e2e/cilium-teardown.sh

docs/kg.md:
go run ./cmd/kg/... --help | head -n -2 > help.txt
go tool embedmd -w docs/kg.md
Expand Down
34 changes: 34 additions & 0 deletions e2e/cilium-cross-mesh.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/usr/bin/env bash
# shellcheck disable=SC1091
. lib.sh

# Cilium-CNI counterpart of e2e/cross-mesh.sh. The Kilo DaemonSet is the
# one applied by create_cilium_cluster (kilo-kind-cilium.yaml), which
# already runs Kilo with --cni=false --compatibility=cilium. This suite
# only annotates locations and switches granularity to "cross".
setup_suite() {
_kubectl annotate node "$KIND_CLUSTER-control-plane" kilo.squat.ai/location=loc-a --overwrite
_kubectl annotate node "$KIND_CLUSTER-worker" kilo.squat.ai/location=loc-a --overwrite
_kubectl annotate node "$KIND_CLUSTER-worker2" kilo.squat.ai/location=loc-b --overwrite
# shellcheck disable=SC2016
_kubectl patch ds -n kube-system kilo -p '{"spec":{"template":{"spec":{"containers":[{"name":"kilo","args":["--hostname=$(NODE_NAME)","--create-interface=false","--cni=false","--compatibility=cilium","--mesh-granularity=cross","--kubeconfig=/etc/kubernetes/kubeconfig","--internal-cidr=$(NODE_IP)/32"]}]}}}}'
block_until_ready_by_name kube-system kilo-userspace
}

test_cilium_cross_mesh_connectivity() {
assert "retry 30 5 '' check_ping" "should be able to ping all Pods over Cilium VXLAN + Kilo cross mesh"
assert "retry 10 5 'the adjacency matrix is not complete yet' check_adjacent 3" "adjacency should return the right number of successful pings"
echo "sleep for 30s (one reconciliation period) and try again..."
sleep 30
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Using a hardcoded sleep in end-to-end tests is generally discouraged as it can lead to flaky tests (if the reconciliation takes longer than 30s) or unnecessarily slow tests (if it takes much less). It is better to poll for a specific state or condition that indicates the reconciliation has completed, such as checking for a specific log message or a change in the node/peer status.

assert "retry 10 5 'the adjacency matrix is not complete yet' check_adjacent 3" "adjacency should return the right number of successful pings after reconciling"
}

test_cilium_cross_peer_topology() {
local CP_PEERS WORKER_PEERS WORKER2_PEERS
CP_PEERS=$(_kgctl showconf node "$KIND_CLUSTER-control-plane" | grep -c '^\[Peer\]')
WORKER_PEERS=$(_kgctl showconf node "$KIND_CLUSTER-worker" | grep -c '^\[Peer\]')
WORKER2_PEERS=$(_kgctl showconf node "$KIND_CLUSTER-worker2" | grep -c '^\[Peer\]')
assert_equals "1" "$CP_PEERS" "control-plane (loc-a) should have 1 peer (the loc-b node)"
assert_equals "1" "$WORKER_PEERS" "worker (loc-a) should have 1 peer (the loc-b node)"
assert_equals "2" "$WORKER2_PEERS" "worker2 (loc-b) should have 2 peers (both loc-a nodes)"
}
10 changes: 10 additions & 0 deletions e2e/cilium-setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/env bash
# shellcheck disable=SC1091
. lib.sh

# Bring up a kind cluster with Cilium as the CNI for the Cilium-mode e2e
# suite. Counterpart of e2e/setup.sh, which provisions a cluster that
# uses the Kilo bridge CNI.
setup_suite() {
create_cilium_cluster "$(build_kind_config 2)"
}
10 changes: 10 additions & 0 deletions e2e/cilium-teardown.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/env bash
# shellcheck disable=SC1091
. lib.sh

teardown_suite () {
if [ -n "$E2E_SKIP_TEARDOWN_ON_FAILURE" ]; then
return
fi
delete_cluster
}
146 changes: 146 additions & 0 deletions e2e/kilo-kind-cilium.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: kilo
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kilo
rules:
- apiGroups:
- ""
resources:
- nodes
verbs:
- list
- patch
- watch
- apiGroups:
- kilo.squat.ai
resources:
- peers
verbs:
- list
- watch
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: kilo
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kilo
subjects:
- kind: ServiceAccount
name: kilo
namespace: kube-system
---
# Kilo DaemonSet for the Cilium e2e suite. The CNI is provided by Cilium
# (no kilo CNI ConfigMap and no install-cni init container), so Kilo runs
# in --cni=false / --compatibility=cilium mode and only manages the WG mesh
# on top of Cilium's overlay.
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: kilo
namespace: kube-system
labels:
app.kubernetes.io/name: kilo-userspace
app.kubernetes.io/part-of: kilo
spec:
selector:
matchLabels:
app.kubernetes.io/name: kilo-userspace
app.kubernetes.io/part-of: kilo
template:
metadata:
labels:
app.kubernetes.io/name: kilo-userspace
app.kubernetes.io/part-of: kilo
spec:
serviceAccountName: kilo
hostNetwork: true
containers:
- name: kilo
image: squat/kilo:test
imagePullPolicy: Never
args:
- --hostname=$(NODE_NAME)
- --create-interface=false
- --cni=false
- --compatibility=cilium
- --mesh-granularity=full
- --kubeconfig=/etc/kubernetes/kubeconfig
- --internal-cidr=$(NODE_IP)/32
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: NODE_IP
valueFrom:
fieldRef:
fieldPath: status.hostIP
ports:
- containerPort: 1107
name: metrics
securityContext:
privileged: true
volumeMounts:
- name: kilo-dir
mountPath: /var/lib/kilo
- name: lib-modules
mountPath: /lib/modules
readOnly: true
- name: xtables-lock
mountPath: /run/xtables.lock
readOnly: false
- name: wireguard
mountPath: /var/run/wireguard
readOnly: false
- name: kubeconfig
mountPath: /etc/kubernetes
readOnly: true
- name: wireguard
image: ghcr.io/masipcat/wireguard-go-docker:0.0.20230223
args:
- wireguard-go
- --foreground
- kilo0
securityContext:
privileged: true
volumeMounts:
- name: wireguard
mountPath: /var/run/wireguard
readOnly: false
tolerations:
- effect: NoSchedule
operator: Exists
- effect: NoExecute
operator: Exists
volumes:
- name: kilo-dir
hostPath:
path: /var/lib/kilo
- name: lib-modules
hostPath:
path: /lib/modules
- name: xtables-lock
hostPath:
path: /run/xtables.lock
type: FileOrCreate
- name: wireguard
hostPath:
path: /var/run/wireguard
- name: kubeconfig
secret:
secretName: kubeconfig
49 changes: 49 additions & 0 deletions e2e/lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,55 @@ delete_cluster () {
_kind delete clusters $KIND_CLUSTER
}

# install_cilium installs Cilium via Helm into the current kind cluster
# using a minimal config: VXLAN overlay, Kubernetes IPAM, host firewall off.
# Kube-proxy replacement is intentionally left at the default (off) to
# keep the e2e harness focused on Kilo's --compatibility=cilium path
# rather than Cilium's eBPF service LB; KPR coverage can be added in a
# follow-up.
install_cilium() {
local CILIUM_VERSION="${CILIUM_VERSION:-1.16.5}"
helm repo add cilium https://helm.cilium.io/ >/dev/null 2>&1 || true
helm repo update cilium >/dev/null 2>&1 || true
helm --kubeconfig="$KUBECONFIG" install cilium cilium/cilium \
--namespace kube-system \
--version "$CILIUM_VERSION" \
--set ipam.mode=kubernetes \
--set tunnelProtocol=vxlan \
--set hostFirewall.enabled=false \
--set image.pullPolicy=IfNotPresent \
--set rollOutCiliumPods=true \
--wait
}

# create_cilium_cluster launches a kind cluster, installs Cilium as the CNI,
# deploys Kilo in --compatibility=cilium mode, and brings up Adjacency +
# the curl helper, mirroring create_cluster.
create_cilium_cluster() {
# shellcheck disable=SC2119
local CONFIG="${1:-$(build_kind_config)}"
_kind delete clusters $KIND_CLUSTER > /dev/null
_kind create cluster --name $KIND_CLUSTER --config <(echo "$CONFIG")
# Cilium needs to be installed before any pod that requires CNI networking
# can become Ready, so install it first.
install_cilium
block_until_ready kube-system k8s-app=cilium
_kubectl wait nodes --all --for=condition=Ready --timeout=120s
block_until_ready kube_system k8s-app=kube-dns
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The namespace kube_system contains an underscore, which is likely a typo for the standard Kubernetes namespace kube-system. This inconsistency with line 177 will cause the block_until_ready call to look in a non-existent namespace, which may lead to a false positive or a timeout depending on how is_ready handles missing namespaces.

Suggested change
block_until_ready kube_system k8s-app=kube-dns
block_until_ready kube-system k8s-app=kube-dns

# Load the Kilo image into kind and apply the Cilium-mode manifest.
docker tag "$KILO_IMAGE" squat/kilo:test
$KIND_BINARY load docker-image squat/kilo:test --name $KIND_CLUSTER
_kubectl create secret generic kubeconfig --from-file=kubeconfig="$KUBECONFIG" -n kube-system
_kubectl apply -f ../manifests/crds.yaml
_kubectl apply -f kilo-kind-cilium.yaml
if ! block_until_ready_by_name kube-system kilo-userspace; then return 1; fi
_kubectl apply -f helper-curl.yaml
block_until_ready_by_name default curl || return 1
_kubectl taint node $KIND_CLUSTER-control-plane node-role.kubernetes.io/control-plane:NoSchedule-
_kubectl apply -f https://raw.githubusercontent.com/kilo-io/adjacency/main/example.yaml
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Relying on a raw URL from the main branch of an external repository for test manifests can introduce fragility. If the external file is modified or moved, the e2e suite may break unexpectedly. It is recommended to use a specific commit hash in the URL or to vendor the manifest locally within the repository.

block_until_ready_by_name default adjacency
}

curl_pod() {
_kubectl get pods -l app.kubernetes.io/name=curl -o name | xargs -I{} "$KUBECTL_BINARY" --kubeconfig="$KUBECONFIG" exec {} -- /usr/bin/curl "$@"
}
Expand Down
Loading