Skip to content

Commit 44fb03e

Browse files
authored
ci: [CNI] Load testing for cilium cni (#1871)
ci:[CNI] Load testing for cilium cni
1 parent e792ef5 commit 44fb03e

File tree

6 files changed

+323
-9
lines changed

6 files changed

+323
-9
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
pr: none
2+
trigger: none
3+
4+
stages:
5+
- stage: creating_aks_cluster
6+
displayName: "Create AKS Cluster with Cilium"
7+
jobs:
8+
- job: create_aks_cluster_with_cilium
9+
steps:
10+
- task: AzureCLI@1
11+
inputs:
12+
azureSubscription: $(TEST_SUB_SERVICE_CONNECTION)
13+
scriptLocation: "inlineScript"
14+
scriptType: "bash"
15+
addSpnToEnvironment: true
16+
inlineScript: |
17+
set -ex
18+
make -C ./hack/swift azcfg AZCLI=az REGION=$(LOCATION)
19+
make -C ./hack/swift overlay-cilium-up AZCLI=az REGION=$(LOCATION) SUB=$(SUBSCRIPTION_ID) CLUSTER=${RESOURCE_GROUP} NODE_COUNT=10 VM_SIZE=Standard_DS4_v2
20+
name: "CreateAksCluster"
21+
displayName: "Create AKS Cluster"
22+
- stage: pod_deployment
23+
dependsOn: creating_aks_cluster
24+
displayName: "Pod Deployment"
25+
jobs:
26+
- job: deploy_pods
27+
steps:
28+
- task: AzureCLI@1
29+
displayName: "Pod Deployment"
30+
inputs:
31+
azureSubscription: $(TEST_SUB_SERVICE_CONNECTION)
32+
scriptLocation: "inlineScript"
33+
scriptType: "bash"
34+
addSpnToEnvironment: true
35+
inlineScript: |
36+
set -ex
37+
az extension add --name aks-preview
38+
make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP}
39+
bash hack/scripts/scale_deployment.sh
40+
- stage: validate_state
41+
dependsOn: pod_deployment
42+
displayName: "Validate State"
43+
jobs:
44+
- job: validate_state
45+
steps:
46+
- task: AzureCLI@1
47+
inputs:
48+
azureSubscription: $(TEST_SUB_SERVICE_CONNECTION)
49+
scriptLocation: "inlineScript"
50+
scriptType: "bash"
51+
addSpnToEnvironment: true
52+
inlineScript: |
53+
make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP}
54+
kubectl get pods -A
55+
bash hack/scripts/validate_state.sh
56+
name: "ValidateState"
57+
displayName: "Validate State"
58+
retryCountOnTaskFailure: 3
59+
- stage: connectivity_tests
60+
dependsOn: validate_state
61+
displayName: "Connectivity Tests"
62+
jobs:
63+
- job: cni_tests
64+
steps:
65+
- script: |
66+
echo "install cilium CLI"
67+
CILIUM_CLI_VERSION=v0.13.2
68+
CLI_ARCH=amd64
69+
curl -L --fail --remote-name-all https://github.com/cilium/cilium-cli/releases/download/${CILIUM_CLI_VERSION}/cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum}
70+
sha256sum --check cilium-linux-${CLI_ARCH}.tar.gz.sha256sum
71+
sudo tar xzvfC cilium-linux-${CLI_ARCH}.tar.gz /usr/local/bin
72+
rm cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum}
73+
name: "InstallCiliumCli"
74+
displayName: "Install Cilium CLI"
75+
- task: AzureCLI@1
76+
inputs:
77+
azureSubscription: $(TEST_SUB_SERVICE_CONNECTION)
78+
scriptLocation: "inlineScript"
79+
scriptType: "bash"
80+
addSpnToEnvironment: true
81+
inlineScript: |
82+
set -ex
83+
make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP}
84+
name: "GetCluster"
85+
displayName: "Get AKS Cluster"
86+
- script: |
87+
cilium connectivity test
88+
retryCountOnTaskFailure: 6
89+
name: "CiliumConnectivityTests"
90+
displayName: "Run Cilium Connectivity Tests"
91+
- stage: delete
92+
displayName: "Delete Resources"
93+
dependsOn:
94+
- connectivity_tests
95+
jobs:
96+
- job: delete_resources
97+
steps:
98+
- task: AzureCLI@1
99+
inputs:
100+
azureSubscription: $(TEST_SUB_SERVICE_CONNECTION)
101+
scriptLocation: "inlineScript"
102+
scriptType: "bash"
103+
addSpnToEnvironment: true
104+
inlineScript: |
105+
set -ex
106+
if [ "$(DELETE_RESOURCES)" ]
107+
then
108+
echo "Deleting Cluster and resource group"
109+
make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP}
110+
make -C ./hack/swift azcfg AZCLI=az REGION=$(LOCATION)
111+
make -C ./hack/swift down AZCLI=az REGION=$(LOCATION) SUB=$(SUBSCRIPTION_ID) CLUSTER=${RESOURCE_GROUP}
112+
echo "Cluster and resources down"
113+
else
114+
echo "Deletion of resources is False"
115+
fi
116+
name: "CleanUpCluster"
117+
displayName: "Cleanup cluster"
118+
condition: always()

hack/manifests/hostprocess.yaml

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
apiVersion: apps/v1
2+
kind: DaemonSet
3+
metadata:
4+
name: privileged-daemonset
5+
namespace: kube-system
6+
labels:
7+
app: privileged-daemonset
8+
spec:
9+
selector:
10+
matchLabels:
11+
app: privileged-daemonset
12+
template:
13+
metadata:
14+
labels:
15+
app: privileged-daemonset
16+
spec:
17+
hostNetwork: true
18+
hostPID: true
19+
containers:
20+
- name: privileged-container
21+
image: mcr.microsoft.com/dotnet/runtime-deps:6.0
22+
command: ["/bin/sleep", "3650d"]
23+
securityContext:
24+
privileged: true
25+
runAsUser: 0
26+
volumeMounts:
27+
- mountPath: /var/run/azure-cns
28+
name: azure-cns
29+
- mountPath: /host
30+
name: host-root
31+
volumes:
32+
- name: azure-cns
33+
hostPath:
34+
path: /var/run/azure-cns
35+
- hostPath:
36+
path: /
37+
type: ""
38+
name: host-root

hack/manifests/pod.yaml

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: container
5+
namespace: default
6+
spec:
7+
selector:
8+
matchLabels:
9+
app: container
10+
template:
11+
metadata:
12+
labels:
13+
app: container
14+
spec:
15+
containers:
16+
- name: ubuntu
17+
image: mcr.microsoft.com/oss/kubernetes/pause:3.6
18+
imagePullPolicy: Always
19+
securityContext:
20+
privileged: true

hack/scripts/scale_deployment.sh

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#!/bin/bash
2+
set -ex
3+
kubectl apply -f hack/manifests/pod.yaml
4+
kubectl apply -f hack/manifests/hostprocess.yaml
5+
sleep 1m
6+
total_num_of_run=4
7+
scale_up_of_pods=2400
8+
scale_down_pods=1
9+
echo "Total num of run $total_num_of_run"
10+
11+
function check_deployment() {
12+
available=-1
13+
replicas="$1"
14+
while [ "${available}" -ne "${replicas}" ]; do
15+
sleep 5s
16+
current_available=$(kubectl get deployment container -o "jsonpath={.status.availableReplicas}" )
17+
if [ "$current_available" != '' ]; then
18+
available=$current_available
19+
fi
20+
echo "available replicas: ${available}"
21+
done
22+
echo "deployment complete."
23+
}
24+
25+
for ((i=1; i <= total_num_of_run; i++))
26+
do
27+
echo "Current Run: $i"
28+
echo "Scaling pods to : $scale_up_of_pods"
29+
kubectl scale deployment container --replicas $scale_up_of_pods
30+
check_deployment $scale_up_of_pods
31+
echo "Scaling down pods to : $scale_down_pods"
32+
kubectl scale deployment container --replicas $scale_down_pods
33+
check_deployment $scale_down_pods
34+
done
35+
36+
kubectl scale deployment container --replicas $scale_up_of_pods
37+
check_deployment $scale_up_of_pods

hack/scripts/validate_state.sh

+98
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#!/bin/bash
2+
function find_in_array() {
3+
for i in $1
4+
do
5+
if [ "$i" == "$2" ] ; then
6+
return 0
7+
fi
8+
done
9+
return 1
10+
}
11+
12+
for node in $(kubectl get nodes -o name);
13+
do
14+
echo "Current : $node"
15+
node_name="${node##*/}"
16+
node_ip=$(kubectl get "$node" -o jsonpath='{$.status.addresses[?(@.type=="InternalIP")].address}')
17+
echo "Node internal ip: $node_ip"
18+
privileged_pod=$(kubectl get pods -n kube-system -l app=privileged-daemonset -o wide | grep "$node_name" | awk '{print $1}')
19+
echo "privileged pod : $privileged_pod"
20+
while ! [ -s "azure_endpoints.json" ]
21+
do
22+
echo "trying to get the azure_endpoints"
23+
kubectl exec -i "$privileged_pod" -n kube-system -- bash -c "cat /var/run/azure-cns/azure-endpoints.json" > azure_endpoints.json
24+
sleep 10
25+
done
26+
27+
cilium_agent=$(kubectl get pod -l k8s-app=cilium -n kube-system -o wide | grep "$node_name" | awk '{print $1}')
28+
echo "cilium agent : $cilium_agent"
29+
30+
while ! [ -s "cilium_endpoints.json" ]
31+
do
32+
echo "trying to get the cilium_endpoints"
33+
kubectl exec -i "$cilium_agent" -n kube-system -- bash -c "cilium endpoint list -o json" > cilium_endpoints.json
34+
sleep 10
35+
done
36+
37+
total_pods=$(kubectl get pods --all-namespaces -o wide --field-selector spec.nodeName="$node_name",status.phase=Running --output json)
38+
39+
echo "Checking if there are any pods with no ips"
40+
pods_with_no_ip=$(echo "$total_pods" | jq -j '(.items[] | select(.status.podIP == "" or .status.podIP == null))')
41+
if [ "$pods_with_no_ip" != "" ]; then
42+
echo "There are some pods with no ip assigned."
43+
kubectl get pods -A -o wide
44+
exit 1
45+
fi
46+
total_pods_ips=$(echo "$total_pods" | jq -r '(.items[] | .status.podIP)')
47+
pod_ips=()
48+
num_of_pod_ips=0
49+
for ip in $total_pods_ips
50+
do
51+
if [ "$ip" != "$node_ip" ]; then
52+
pod_ips+=("$ip")
53+
num_of_pod_ips=$((num_of_pod_ips+1))
54+
fi
55+
done
56+
echo "Number of pods running with ip assigned $num_of_pod_ips"
57+
58+
num_of_azure_endpoint_ips=$( cat azure_endpoints.json | jq -r '[.Endpoints | .[] | .IfnameToIPMap.eth0.IPv4[0].IP] | length' )
59+
azure_endpoint_ips=$( cat azure_endpoints.json | jq -r '(.Endpoints | .[] | .IfnameToIPMap.eth0.IPv4[0].IP) ' )
60+
echo "Number of azure endpoint ips : $num_of_azure_endpoint_ips"
61+
62+
if [ "$num_of_pod_ips" != "$num_of_azure_endpoint_ips" ]; then
63+
printf "Error: Number of pods in running state is less than total ips in the azure ednpoint file" >&2
64+
exit 1
65+
fi
66+
67+
echo "checking the ips in the azure endpoints file"
68+
for ip in "${pod_ips[@]}"
69+
do
70+
find_in_array "$azure_endpoint_ips" "$ip" "azure_endpoints.json"
71+
if [[ $? -eq 1 ]]; then
72+
printf "Error: %s Not found in the azure_endpoints.json" "$ip" >&2
73+
exit 1
74+
fi
75+
done
76+
77+
num_of_cilium_endpoints=$(cat cilium_endpoints.json | jq -r '[.[] | select(.status.networking.addressing[0].ipv4 != null)] | length')
78+
cilium_endpoint_ips=$(cat cilium_endpoints.json | jq -r '(.[] | select(.status.networking.addressing[0].ipv4 != null) | .status.networking.addressing[0].ipv4)')
79+
echo "Number of cilium endpoints: $num_of_cilium_endpoints"
80+
81+
if [ "$num_of_pod_ips" != "$num_of_cilium_endpoints" ]; then
82+
printf "Error: Number of pods in running state is less than total ips in the cilium endpoint file" >&2
83+
exit 1
84+
fi
85+
86+
for ip in "${pod_ips[@]}"
87+
do
88+
find_in_array "$cilium_endpoint_ips" "$ip" "cilium_endpoints.json"
89+
if [[ $? -eq 1 ]]; then
90+
printf "Error: %s Not found in the cilium_endpoints.json" "$ip" >&2
91+
exit 1
92+
fi
93+
done
94+
95+
#We are restarting the systmemd network and checking that the connectivity works after the restart. For more details: https://github.com/cilium/cilium/issues/18706
96+
kubectl exec -i "$privileged_pod" -n kube-system -- bash -c "chroot /host /bin/bash -c 'systemctl restart systemd-networkd'"
97+
rm -rf cilium_endpoints.json azure_endpoints.json
98+
done

hack/swift/Makefile

+12-9
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ AZCLI ?= docker run --rm -v $(AZCFG):/root/.azure -v $(KUBECFG):/root/.kube -v
1111
REGION ?= westus2
1212
OS_SKU ?= Ubuntu
1313
VM_SIZE ?= Standard_B2s
14+
NODE_COUNT ?= 2
15+
1416
# overrideable variables
1517
SUB ?= $(AZURE_SUBSCRIPTION)
1618
CLUSTER ?= $(USER)-$(REGION)
@@ -53,6 +55,7 @@ vars: ## Show the input vars configured for the cluster commands
5355
@echo VNET=$(VNET)
5456
@echo OS_SKU=$(OS_SKU)
5557
@echo VM_SIZE=$(VM_SIZE)
58+
@echo NODE_COUNT=$(NODE_COUNT)
5659

5760

5861
##@ SWIFT Infra
@@ -81,8 +84,8 @@ up: swift-up ## Alias to swift-up
8184

8285
overlay-byocni-up: rg-up overlay-net-up ## Brings up an Overlay BYO CNI cluster
8386
$(AZCLI) aks create -n $(CLUSTER) -g $(GROUP) -l $(REGION) \
84-
--node-count 2 \
85-
--node-vm-size Standard_B2s \
87+
--node-count $(NODE_COUNT) \
88+
--node-vm-size $(VM_SIZE) \
8689
--load-balancer-sku basic \
8790
--network-plugin none \
8891
--network-plugin-mode overlay \
@@ -94,11 +97,11 @@ overlay-byocni-up: rg-up overlay-net-up ## Brings up an Overlay BYO CNI cluster
9497

9598
overlay-cilium-up: rg-up overlay-net-up ## Brings up an Overlay AzCNI cluster
9699
$(AZCLI) aks create -n $(CLUSTER) -g $(GROUP) -l $(REGION) \
97-
--node-count 2 \
100+
--node-count $(NODE_COUNT) \
98101
--node-vm-size $(VM_SIZE) \
99102
--load-balancer-sku basic \
100103
--network-plugin azure \
101-
--enable-cilium-dataplane \
104+
--network-dataplane cilium \
102105
--network-plugin-mode overlay \
103106
--pod-cidr 192.168.0.0/16 \
104107
--vnet-subnet-id /subscriptions/$(SUB)/resourceGroups/$(GROUP)/providers/Microsoft.Network/virtualNetworks/$(VNET)/subnets/nodenet \
@@ -108,7 +111,7 @@ overlay-cilium-up: rg-up overlay-net-up ## Brings up an Overlay AzCNI cluster
108111

109112
overlay-up: rg-up overlay-net-up ## Brings up an Overlay AzCNI cluster
110113
$(AZCLI) aks create -n $(CLUSTER) -g $(GROUP) -l $(REGION) \
111-
--node-count 2 \
114+
--node-count $(NODE_COUNT) \
112115
--node-vm-size $(VM_SIZE) \
113116
--load-balancer-sku basic \
114117
--network-plugin azure \
@@ -121,7 +124,7 @@ overlay-up: rg-up overlay-net-up ## Brings up an Overlay AzCNI cluster
121124

122125
swift-byocni-up: rg-up swift-net-up ## Bring up a SWIFT BYO CNI cluster
123126
$(AZCLI) aks create -n $(CLUSTER) -g $(GROUP) -l $(REGION) \
124-
--node-count 2 \
127+
--node-count $(NODE_COUNT) \
125128
--node-vm-size $(VM_SIZE) \
126129
--load-balancer-sku basic \
127130
--network-plugin none \
@@ -134,11 +137,11 @@ swift-byocni-up: rg-up swift-net-up ## Bring up a SWIFT BYO CNI cluster
134137

135138
swift-cilium-up: rg-up swift-net-up ## Bring up a SWIFT Cilium cluster
136139
$(AZCLI) aks create -n $(CLUSTER) -g $(GROUP) -l $(REGION) \
137-
--node-count 2 \
140+
--node-count $(NODE_COUNT) \
138141
--node-vm-size $(VM_SIZE) \
139142
--load-balancer-sku basic \
140143
--network-plugin azure \
141-
--enable-cilium-dataplane \
144+
--network-dataplane cilium \
142145
--aks-custom-headers AKSHTTPCustomFeatures=Microsoft.ContainerService/CiliumDataplanePreview \
143146
--vnet-subnet-id /subscriptions/$(SUB)/resourceGroups/$(GROUP)/providers/Microsoft.Network/virtualNetworks/$(VNET)/subnets/nodenet \
144147
--pod-subnet-id /subscriptions/$(SUB)/resourceGroups/$(GROUP)/providers/Microsoft.Network/virtualNetworks/$(VNET)/subnets/podnet \
@@ -148,7 +151,7 @@ swift-cilium-up: rg-up swift-net-up ## Bring up a SWIFT Cilium cluster
148151

149152
swift-up: rg-up swift-net-up ## Bring up a SWIFT AzCNI cluster
150153
$(AZCLI) aks create -n $(CLUSTER) -g $(GROUP) -l $(REGION) \
151-
--node-count 2 \
154+
--node-count $(NODE_COUNT) \
152155
--node-vm-size $(VM_SIZE) \
153156
--load-balancer-sku basic \
154157
--network-plugin azure \

0 commit comments

Comments
 (0)