|
| 1 | +name: ragengine-e2e-workflow |
| 2 | + |
| 3 | +on: |
| 4 | + workflow_call: |
| 5 | + inputs: |
| 6 | + git_sha: |
| 7 | + type: string |
| 8 | + required: true |
| 9 | + node_provisioner: |
| 10 | + type: string |
| 11 | + required: false |
| 12 | + default: gpuprovisioner |
| 13 | + tag: |
| 14 | + type: string |
| 15 | + isRelease: |
| 16 | + type: boolean |
| 17 | + default: false |
| 18 | + registry: |
| 19 | + type: string |
| 20 | + region: |
| 21 | + type: string |
| 22 | + description: "the azure location to run the e2e test in" |
| 23 | + default: "eastus" |
| 24 | + k8s_version: |
| 25 | + type: string |
| 26 | + default: "1.30.0" |
| 27 | + |
| 28 | +jobs: |
| 29 | + e2e-tests: |
| 30 | + runs-on: [ "self-hosted", "hostname:kaito-e2e-github-runner" ] |
| 31 | + name: e2e-tests-${{ inputs.node_provisioner }} |
| 32 | + permissions: |
| 33 | + contents: read |
| 34 | + id-token: write # This is required for requesting the JWT |
| 35 | + environment: e2e-test |
| 36 | + env: |
| 37 | + GO_VERSION: "1.22" |
| 38 | + KARPENTER_NAMESPACE: "karpenter" |
| 39 | + GPU_PROVISIONER_NAMESPACE: "gpu-provisioner" |
| 40 | + |
| 41 | + steps: |
| 42 | + - name: Harden Runner |
| 43 | + uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2 |
| 44 | + with: |
| 45 | + egress-policy: audit |
| 46 | + |
| 47 | + - name: Checkout |
| 48 | + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 |
| 49 | + with: |
| 50 | + ref: ${{ inputs.git_sha }} |
| 51 | + |
| 52 | + - name: Set e2e Resource and Cluster Name |
| 53 | + run: | |
| 54 | + rand=$(git rev-parse --short ${{ inputs.git_sha }}) |
| 55 | +
|
| 56 | + if [ "$rand" = "" ]; then |
| 57 | + rand=$RANDOM |
| 58 | + fi |
| 59 | +
|
| 60 | + echo "VERSION=${rand}" >> $GITHUB_ENV |
| 61 | + echo "CLUSTER_NAME=${{ inputs.node_provisioner }}${rand}" >> $GITHUB_ENV |
| 62 | + echo "REGISTRY=${{ inputs.node_provisioner }}${rand}.azurecr.io" >> $GITHUB_ENV |
| 63 | + echo "RUN_LLAMA_13B=false" >> $GITHUB_ENV |
| 64 | +
|
| 65 | + - name: Set Registry |
| 66 | + if: ${{ inputs.isRelease }} |
| 67 | + run: | |
| 68 | + echo "REGISTRY=${{ inputs.registry }}" >> $GITHUB_ENV |
| 69 | + echo "VERSION=$(echo ${{ inputs.tag }} | tr -d v)" >> $GITHUB_ENV |
| 70 | + |
| 71 | + - name: Remove existing Go modules directory |
| 72 | + run: sudo rm -rf ~/go/pkg/mod |
| 73 | + |
| 74 | + - name: Set up Go ${{ env.GO_VERSION }} |
| 75 | + |
| 76 | + with: |
| 77 | + go-version: ${{ env.GO_VERSION }} |
| 78 | + |
| 79 | + - name: Install Azure CLI latest |
| 80 | + run: | |
| 81 | + if ! which az > /dev/null; then |
| 82 | + echo "Azure CLI not found. Installing..." |
| 83 | + curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash |
| 84 | + else |
| 85 | + echo "Azure CLI already installed." |
| 86 | + fi |
| 87 | +
|
| 88 | + - name: Azure CLI Login |
| 89 | + run: | |
| 90 | + az login --identity |
| 91 | +
|
| 92 | + - uses: azure/setup-helm@v4 |
| 93 | + id: install |
| 94 | + |
| 95 | + - name: Create Resource Group |
| 96 | + shell: bash |
| 97 | + run: | |
| 98 | + make create-rg |
| 99 | + env: |
| 100 | + AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }} |
| 101 | + |
| 102 | + - name: Create ACR |
| 103 | + shell: bash |
| 104 | + run: | |
| 105 | + make create-acr |
| 106 | + env: |
| 107 | + AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }} |
| 108 | + AZURE_ACR_NAME: ${{ env.CLUSTER_NAME }} |
| 109 | + |
| 110 | + - name: Create Azure Identity |
| 111 | + |
| 112 | + with: |
| 113 | + inlineScript: | |
| 114 | + az identity create --name ${{ inputs.node_provisioner }}Identity --resource-group ${{ env.CLUSTER_NAME }} |
| 115 | +
|
| 116 | + - name: Generate APIs |
| 117 | + run: | |
| 118 | + make generate |
| 119 | +
|
| 120 | + - name: build KAITO image |
| 121 | + if: ${{ !inputs.isRelease }} |
| 122 | + shell: bash |
| 123 | + run: | |
| 124 | + make docker-build-workspace |
| 125 | + env: |
| 126 | + REGISTRY: ${{ env.REGISTRY }} |
| 127 | + VERSION: ${{ env.VERSION }} |
| 128 | + |
| 129 | + - name: build kaito RAG Engine image |
| 130 | + if: ${{ !inputs.isRelease }} |
| 131 | + shell: bash |
| 132 | + run: | |
| 133 | + make docker-build-ragengine |
| 134 | + env: |
| 135 | + REGISTRY: ${{ env.REGISTRY }} |
| 136 | + VERSION: ${{ env.VERSION }} |
| 137 | + |
| 138 | + |
| 139 | + |
| 140 | + - name: create cluster |
| 141 | + shell: bash |
| 142 | + run: | |
| 143 | + if [ "${{ inputs.node_provisioner }}" == "gpuprovisioner" ]; then |
| 144 | + make create-aks-cluster |
| 145 | + else |
| 146 | + make create-aks-cluster-for-karpenter |
| 147 | + fi |
| 148 | + env: |
| 149 | + AZURE_ACR_NAME: ${{ env.CLUSTER_NAME }} |
| 150 | + AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }} |
| 151 | + AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }} |
| 152 | + AZURE_LOCATION: ${{ inputs.region }} |
| 153 | + AKS_K8S_VERSION: ${{ inputs.k8s_version }} |
| 154 | + |
| 155 | + - name: Create Identities and Permissions for ${{ inputs.node_provisioner }} |
| 156 | + shell: bash |
| 157 | + run: | |
| 158 | + AZURE_SUBSCRIPTION_ID=$E2E_SUBSCRIPTION_ID \ |
| 159 | + make generate-identities |
| 160 | + env: |
| 161 | + AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }} |
| 162 | + AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }} |
| 163 | + TEST_SUITE: ${{ inputs.node_provisioner }} |
| 164 | + |
| 165 | + - name: Install gpu-provisioner helm chart |
| 166 | + if: ${{ inputs.node_provisioner == 'gpuprovisioner' }} |
| 167 | + shell: bash |
| 168 | + run: | |
| 169 | + AZURE_TENANT_ID=$E2E_TENANT_ID \ |
| 170 | + AZURE_SUBSCRIPTION_ID=$E2E_SUBSCRIPTION_ID \ |
| 171 | + make gpu-provisioner-helm |
| 172 | + env: |
| 173 | + AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }} |
| 174 | + AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }} |
| 175 | + |
| 176 | + - name: Install karpenter Azure provider helm chart |
| 177 | + if: ${{ inputs.node_provisioner == 'azkarpenter' }} |
| 178 | + shell: bash |
| 179 | + run: | |
| 180 | + AZURE_TENANT_ID=$E2E_TENANT_ID \ |
| 181 | + AZURE_SUBSCRIPTION_ID=$E2E_SUBSCRIPTION_ID \ |
| 182 | + make azure-karpenter-helm |
| 183 | + env: |
| 184 | + AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }} |
| 185 | + AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }} |
| 186 | + KARPENTER_VERSION: ${{ vars.KARPENTER_VERSION }} |
| 187 | + KARPENTER_NAMESPACE: ${{ env.KARPENTER_NAMESPACE }} |
| 188 | + |
| 189 | + # This 600s is only for testing when done, change it back to 300 |
| 190 | + - name: Install KAITO Workspace helm chart |
| 191 | + shell: bash |
| 192 | + run: | |
| 193 | + make az-patch-install-helm |
| 194 | + kubectl wait --for=condition=available deploy "kaito-workspace" -n kaito-workspace --timeout=600s |
| 195 | + env: |
| 196 | + AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }} |
| 197 | + AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }} |
| 198 | + REGISTRY: ${{ env.REGISTRY }} |
| 199 | + VERSION: ${{ env.VERSION }} |
| 200 | + TEST_SUITE: ${{ inputs.node_provisioner }} |
| 201 | + |
| 202 | + - name: Install KAITO RAG Engine helm chart |
| 203 | + shell: bash |
| 204 | + run: | |
| 205 | + make az-patch-install-ragengine-helm |
| 206 | + kubectl wait --for=condition=available deploy "kaito-ragengine" -n kaito-ragengine --timeout=300s |
| 207 | + env: |
| 208 | + AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }} |
| 209 | + AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }} |
| 210 | + REGISTRY: ${{ env.REGISTRY }} |
| 211 | + VERSION: ${{ env.VERSION }} |
| 212 | + TEST_SUITE: ${{ inputs.node_provisioner }} |
| 213 | + |
| 214 | + # Retrieve E2E ACR credentials and create Kubernetes secret |
| 215 | + - name: Set up E2E ACR Credentials and Secret |
| 216 | + shell: bash |
| 217 | + run: | |
| 218 | + # Retrieve the ACR username and password |
| 219 | + ACR_USERNAME=$(az acr credential show --name ${{ env.CLUSTER_NAME }} --resource-group ${{ env.CLUSTER_NAME }} --query "username" -o tsv) |
| 220 | + ACR_PASSWORD=$(az acr credential show --name ${{ env.CLUSTER_NAME }} --resource-group ${{ env.CLUSTER_NAME }} --query "passwords[0].value" -o tsv) |
| 221 | +
|
| 222 | + # Ensure credentials were retrieved successfully |
| 223 | + if [ -z "$ACR_USERNAME" ] || [ -z "$ACR_PASSWORD" ]; then |
| 224 | + echo "Failed to retrieve ACR credentials" |
| 225 | + exit 1 |
| 226 | + fi |
| 227 | +
|
| 228 | + # Create the Kubernetes secret with the retrieved credentials |
| 229 | + kubectl create secret docker-registry ${{ env.CLUSTER_NAME }}-acr-secret \ |
| 230 | + --docker-server=${{ env.CLUSTER_NAME }}.azurecr.io \ |
| 231 | + --docker-username=${ACR_USERNAME} \ |
| 232 | + --docker-password=${ACR_PASSWORD} |
| 233 | +
|
| 234 | + # Add Private-Hosted ACR secret for private models like llama |
| 235 | + - name: Add Private-Hosted ACR Secret Credentials |
| 236 | + run: | |
| 237 | + # Ensure E2E_AMRT_SECRET_NAME is sanitized to remove any accidental quotes |
| 238 | + E2E_AMRT_SECRET_NAME=$(echo "$E2E_AMRT_SECRET_NAME" | sed 's/[\"'\'']//g') |
| 239 | +
|
| 240 | + if kubectl get secret "$E2E_AMRT_SECRET_NAME" >/dev/null 2>&1; then |
| 241 | + echo "Secret $E2E_AMRT_SECRET_NAME already exists. Skipping creation." |
| 242 | + else |
| 243 | + kubectl create secret docker-registry "$E2E_AMRT_SECRET_NAME" \ |
| 244 | + --docker-server="$E2E_ACR_AMRT_USERNAME.azurecr.io" \ |
| 245 | + --docker-username="$E2E_ACR_AMRT_USERNAME" \ |
| 246 | + --docker-password="$E2E_ACR_AMRT_PASSWORD" |
| 247 | + echo "Secret $E2E_AMRT_SECRET_NAME created successfully." |
| 248 | + fi |
| 249 | +
|
| 250 | + - name: Log ${{ inputs.node_provisioner }} |
| 251 | + run: | |
| 252 | + if [ "${{ inputs.node_provisioner }}" == "gpuprovisioner" ]; then |
| 253 | + kubectl logs -n "${{ env.GPU_PROVISIONER_NAMESPACE }}" -l app.kubernetes.io/name=gpu-provisioner -c controller |
| 254 | + else |
| 255 | + kubectl logs -n "${{ env.KARPENTER_NAMESPACE }}" -l app.kubernetes.io/name=karpenter -c controller |
| 256 | + fi |
| 257 | +
|
| 258 | + - name: Log kaito-workspace |
| 259 | + run: | |
| 260 | + kubectl get pods -n kaito-workspace -o name | grep "^pod/kaito-workspace" | sed 's/^pod\///' | xargs -I {} kubectl logs -n kaito-workspace {} |
| 261 | + |
| 262 | + - name: Log kaito-ragengine |
| 263 | + run: | |
| 264 | + kubectl get pods -n kaito-ragengine -o name | grep "^pod/kaito-ragengine" | sed 's/^pod\///' | xargs -I {} kubectl logs -n kaito-ragengine {} |
| 265 | + |
| 266 | + - name: Run e2e test |
| 267 | + run: | |
| 268 | + AI_MODELS_REGISTRY=$E2E_ACR_AMRT_USERNAME.azurecr.io \ |
| 269 | + AI_MODELS_REGISTRY_SECRET=$E2E_AMRT_SECRET_NAME \ |
| 270 | + make kaito-ragengine-e2e-test |
| 271 | + env: |
| 272 | + AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }} |
| 273 | + RUN_LLAMA_13B: ${{ env.RUN_LLAMA_13B }} |
| 274 | + REGISTRY: ${{ env.REGISTRY }} |
| 275 | + TEST_SUITE: ${{ inputs.node_provisioner }} |
| 276 | + E2E_ACR_REGISTRY: ${{ env.CLUSTER_NAME }}.azurecr.io |
| 277 | + E2E_ACR_REGISTRY_SECRET: ${{ env.CLUSTER_NAME }}-acr-secret |
| 278 | + |
| 279 | + - name: Cleanup e2e resources |
| 280 | + if: ${{ always() }} |
| 281 | + |
| 282 | + with: |
| 283 | + inlineScript: | |
| 284 | + set +e |
| 285 | + az group delete --name "${{ env.CLUSTER_NAME }}" --yes --no-wait || true |
0 commit comments