Skip to content

Commit 8edc584

Browse files
author
Bangqi Zhu
committed
RAGEngine e2e tests
Signed-off-by: Bangqi Zhu <[email protected]>
1 parent 6bd421f commit 8edc584

File tree

10 files changed

+1568
-8
lines changed

10 files changed

+1568
-8
lines changed
Lines changed: 285 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,285 @@
1+
name: ragengine-e2e-workflow
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
git_sha:
7+
type: string
8+
required: true
9+
node_provisioner:
10+
type: string
11+
required: false
12+
default: gpuprovisioner
13+
tag:
14+
type: string
15+
isRelease:
16+
type: boolean
17+
default: false
18+
registry:
19+
type: string
20+
region:
21+
type: string
22+
description: "the azure location to run the e2e test in"
23+
default: "eastus"
24+
k8s_version:
25+
type: string
26+
default: "1.30.0"
27+
28+
jobs:
29+
e2e-tests:
30+
runs-on: [ "self-hosted", "hostname:kaito-e2e-github-runner" ]
31+
name: e2e-tests-${{ inputs.node_provisioner }}
32+
permissions:
33+
contents: read
34+
id-token: write # This is required for requesting the JWT
35+
environment: e2e-test
36+
env:
37+
GO_VERSION: "1.22"
38+
KARPENTER_NAMESPACE: "karpenter"
39+
GPU_PROVISIONER_NAMESPACE: "gpu-provisioner"
40+
41+
steps:
42+
- name: Harden Runner
43+
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
44+
with:
45+
egress-policy: audit
46+
47+
- name: Checkout
48+
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
49+
with:
50+
ref: ${{ inputs.git_sha }}
51+
52+
- name: Set e2e Resource and Cluster Name
53+
run: |
54+
rand=$(git rev-parse --short ${{ inputs.git_sha }})
55+
56+
if [ "$rand" = "" ]; then
57+
rand=$RANDOM
58+
fi
59+
60+
echo "VERSION=${rand}" >> $GITHUB_ENV
61+
echo "CLUSTER_NAME=${{ inputs.node_provisioner }}${rand}" >> $GITHUB_ENV
62+
echo "REGISTRY=${{ inputs.node_provisioner }}${rand}.azurecr.io" >> $GITHUB_ENV
63+
echo "RUN_LLAMA_13B=false" >> $GITHUB_ENV
64+
65+
- name: Set Registry
66+
if: ${{ inputs.isRelease }}
67+
run: |
68+
echo "REGISTRY=${{ inputs.registry }}" >> $GITHUB_ENV
69+
echo "VERSION=$(echo ${{ inputs.tag }} | tr -d v)" >> $GITHUB_ENV
70+
71+
- name: Remove existing Go modules directory
72+
run: sudo rm -rf ~/go/pkg/mod
73+
74+
- name: Set up Go ${{ env.GO_VERSION }}
75+
uses: actions/[email protected]
76+
with:
77+
go-version: ${{ env.GO_VERSION }}
78+
79+
- name: Install Azure CLI latest
80+
run: |
81+
if ! which az > /dev/null; then
82+
echo "Azure CLI not found. Installing..."
83+
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
84+
else
85+
echo "Azure CLI already installed."
86+
fi
87+
88+
- name: Azure CLI Login
89+
run: |
90+
az login --identity
91+
92+
- uses: azure/setup-helm@v4
93+
id: install
94+
95+
- name: Create Resource Group
96+
shell: bash
97+
run: |
98+
make create-rg
99+
env:
100+
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }}
101+
102+
- name: Create ACR
103+
shell: bash
104+
run: |
105+
make create-acr
106+
env:
107+
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }}
108+
AZURE_ACR_NAME: ${{ env.CLUSTER_NAME }}
109+
110+
- name: Create Azure Identity
111+
uses: azure/[email protected]
112+
with:
113+
inlineScript: |
114+
az identity create --name ${{ inputs.node_provisioner }}Identity --resource-group ${{ env.CLUSTER_NAME }}
115+
116+
- name: Generate APIs
117+
run: |
118+
make generate
119+
120+
- name: build KAITO image
121+
if: ${{ !inputs.isRelease }}
122+
shell: bash
123+
run: |
124+
make docker-build-workspace
125+
env:
126+
REGISTRY: ${{ env.REGISTRY }}
127+
VERSION: ${{ env.VERSION }}
128+
129+
- name: build kaito RAG Engine image
130+
if: ${{ !inputs.isRelease }}
131+
shell: bash
132+
run: |
133+
make docker-build-ragengine
134+
env:
135+
REGISTRY: ${{ env.REGISTRY }}
136+
VERSION: ${{ env.VERSION }}
137+
138+
139+
140+
- name: create cluster
141+
shell: bash
142+
run: |
143+
if [ "${{ inputs.node_provisioner }}" == "gpuprovisioner" ]; then
144+
make create-aks-cluster
145+
else
146+
make create-aks-cluster-for-karpenter
147+
fi
148+
env:
149+
AZURE_ACR_NAME: ${{ env.CLUSTER_NAME }}
150+
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }}
151+
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
152+
AZURE_LOCATION: ${{ inputs.region }}
153+
AKS_K8S_VERSION: ${{ inputs.k8s_version }}
154+
155+
- name: Create Identities and Permissions for ${{ inputs.node_provisioner }}
156+
shell: bash
157+
run: |
158+
AZURE_SUBSCRIPTION_ID=$E2E_SUBSCRIPTION_ID \
159+
make generate-identities
160+
env:
161+
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }}
162+
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
163+
TEST_SUITE: ${{ inputs.node_provisioner }}
164+
165+
- name: Install gpu-provisioner helm chart
166+
if: ${{ inputs.node_provisioner == 'gpuprovisioner' }}
167+
shell: bash
168+
run: |
169+
AZURE_TENANT_ID=$E2E_TENANT_ID \
170+
AZURE_SUBSCRIPTION_ID=$E2E_SUBSCRIPTION_ID \
171+
make gpu-provisioner-helm
172+
env:
173+
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }}
174+
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
175+
176+
- name: Install karpenter Azure provider helm chart
177+
if: ${{ inputs.node_provisioner == 'azkarpenter' }}
178+
shell: bash
179+
run: |
180+
AZURE_TENANT_ID=$E2E_TENANT_ID \
181+
AZURE_SUBSCRIPTION_ID=$E2E_SUBSCRIPTION_ID \
182+
make azure-karpenter-helm
183+
env:
184+
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }}
185+
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
186+
KARPENTER_VERSION: ${{ vars.KARPENTER_VERSION }}
187+
KARPENTER_NAMESPACE: ${{ env.KARPENTER_NAMESPACE }}
188+
189+
# This 600s is only for testing when done, change it back to 300
190+
- name: Install KAITO Workspace helm chart
191+
shell: bash
192+
run: |
193+
make az-patch-install-helm
194+
kubectl wait --for=condition=available deploy "kaito-workspace" -n kaito-workspace --timeout=600s
195+
env:
196+
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }}
197+
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
198+
REGISTRY: ${{ env.REGISTRY }}
199+
VERSION: ${{ env.VERSION }}
200+
TEST_SUITE: ${{ inputs.node_provisioner }}
201+
202+
- name: Install KAITO RAG Engine helm chart
203+
shell: bash
204+
run: |
205+
make az-patch-install-ragengine-helm
206+
kubectl wait --for=condition=available deploy "kaito-ragengine" -n kaito-ragengine --timeout=300s
207+
env:
208+
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }}
209+
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
210+
REGISTRY: ${{ env.REGISTRY }}
211+
VERSION: ${{ env.VERSION }}
212+
TEST_SUITE: ${{ inputs.node_provisioner }}
213+
214+
# Retrieve E2E ACR credentials and create Kubernetes secret
215+
- name: Set up E2E ACR Credentials and Secret
216+
shell: bash
217+
run: |
218+
# Retrieve the ACR username and password
219+
ACR_USERNAME=$(az acr credential show --name ${{ env.CLUSTER_NAME }} --resource-group ${{ env.CLUSTER_NAME }} --query "username" -o tsv)
220+
ACR_PASSWORD=$(az acr credential show --name ${{ env.CLUSTER_NAME }} --resource-group ${{ env.CLUSTER_NAME }} --query "passwords[0].value" -o tsv)
221+
222+
# Ensure credentials were retrieved successfully
223+
if [ -z "$ACR_USERNAME" ] || [ -z "$ACR_PASSWORD" ]; then
224+
echo "Failed to retrieve ACR credentials"
225+
exit 1
226+
fi
227+
228+
# Create the Kubernetes secret with the retrieved credentials
229+
kubectl create secret docker-registry ${{ env.CLUSTER_NAME }}-acr-secret \
230+
--docker-server=${{ env.CLUSTER_NAME }}.azurecr.io \
231+
--docker-username=${ACR_USERNAME} \
232+
--docker-password=${ACR_PASSWORD}
233+
234+
# Add Private-Hosted ACR secret for private models like llama
235+
- name: Add Private-Hosted ACR Secret Credentials
236+
run: |
237+
# Ensure E2E_AMRT_SECRET_NAME is sanitized to remove any accidental quotes
238+
E2E_AMRT_SECRET_NAME=$(echo "$E2E_AMRT_SECRET_NAME" | sed 's/[\"'\'']//g')
239+
240+
if kubectl get secret "$E2E_AMRT_SECRET_NAME" >/dev/null 2>&1; then
241+
echo "Secret $E2E_AMRT_SECRET_NAME already exists. Skipping creation."
242+
else
243+
kubectl create secret docker-registry "$E2E_AMRT_SECRET_NAME" \
244+
--docker-server="$E2E_ACR_AMRT_USERNAME.azurecr.io" \
245+
--docker-username="$E2E_ACR_AMRT_USERNAME" \
246+
--docker-password="$E2E_ACR_AMRT_PASSWORD"
247+
echo "Secret $E2E_AMRT_SECRET_NAME created successfully."
248+
fi
249+
250+
- name: Log ${{ inputs.node_provisioner }}
251+
run: |
252+
if [ "${{ inputs.node_provisioner }}" == "gpuprovisioner" ]; then
253+
kubectl logs -n "${{ env.GPU_PROVISIONER_NAMESPACE }}" -l app.kubernetes.io/name=gpu-provisioner -c controller
254+
else
255+
kubectl logs -n "${{ env.KARPENTER_NAMESPACE }}" -l app.kubernetes.io/name=karpenter -c controller
256+
fi
257+
258+
- name: Log kaito-workspace
259+
run: |
260+
kubectl get pods -n kaito-workspace -o name | grep "^pod/kaito-workspace" | sed 's/^pod\///' | xargs -I {} kubectl logs -n kaito-workspace {}
261+
262+
- name: Log kaito-ragengine
263+
run: |
264+
kubectl get pods -n kaito-ragengine -o name | grep "^pod/kaito-ragengine" | sed 's/^pod\///' | xargs -I {} kubectl logs -n kaito-ragengine {}
265+
266+
- name: Run e2e test
267+
run: |
268+
AI_MODELS_REGISTRY=$E2E_ACR_AMRT_USERNAME.azurecr.io \
269+
AI_MODELS_REGISTRY_SECRET=$E2E_AMRT_SECRET_NAME \
270+
make kaito-ragengine-e2e-test
271+
env:
272+
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
273+
RUN_LLAMA_13B: ${{ env.RUN_LLAMA_13B }}
274+
REGISTRY: ${{ env.REGISTRY }}
275+
TEST_SUITE: ${{ inputs.node_provisioner }}
276+
E2E_ACR_REGISTRY: ${{ env.CLUSTER_NAME }}.azurecr.io
277+
E2E_ACR_REGISTRY_SECRET: ${{ env.CLUSTER_NAME }}-acr-secret
278+
279+
- name: Cleanup e2e resources
280+
if: ${{ always() }}
281+
uses: azure/[email protected]
282+
with:
283+
inlineScript: |
284+
set +e
285+
az group delete --name "${{ env.CLUSTER_NAME }}" --yes --no-wait || true

.github/workflows/ragengine-e2e.yml

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
name: ragengine-e2e-test
2+
3+
concurrency:
4+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
5+
cancel-in-progress: true
6+
7+
on:
8+
pull_request:
9+
paths-ignore: ['docs/**', '**.md', '**.mdx', '**.png', '**.jpg']
10+
11+
env:
12+
GO_VERSION: "1.22"
13+
14+
permissions:
15+
id-token: write # This is required for requesting the JWT
16+
contents: read # This is required for actions/checkout
17+
18+
jobs:
19+
run-e2e:
20+
strategy:
21+
fail-fast: false
22+
matrix:
23+
node-provisioner: [gpuprovisioner] # WIP: azkarpenter]
24+
permissions:
25+
contents: read
26+
id-token: write
27+
statuses: write
28+
uses: ./.github/workflows/ragengine-e2e-workflow.yml
29+
with:
30+
git_sha: ${{ github.event.pull_request.head.sha }}
31+
node_provisioner: ${{ matrix.node-provisioner }}

0 commit comments

Comments
 (0)