Skip to content

Commit c331f5e

Browse files
authored
Merge branch 'main' into gomaxprocs
2 parents 8f808fe + 55508f8 commit c331f5e

File tree

272 files changed

+6447
-2881
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

272 files changed

+6447
-2881
lines changed

.common-ci.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ trigger-pipeline:
150150
# Download the regctl binary for use in the release steps
151151
.regctl-setup:
152152
before_script:
153-
- export REGCTL_VERSION=v0.8.0
153+
- export REGCTL_VERSION=v0.8.2
154154
- apk add --no-cache curl
155155
- mkdir -p bin
156156
- curl -sSLo bin/regctl https://github.com/regclient/regclient/releases/download/${REGCTL_VERSION}/regctl-linux-amd64

.github/workflows/ci.yaml

+166-21
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ jobs:
2929
- name: Checkout code
3030
uses: actions/checkout@v4
3131
- name: Install Helm
32-
uses: azure/setup-helm@v4.2.0
32+
uses: azure/setup-helm@v4.3.0
3333
id: install
3434
- run: helm lint deployments/gpu-operator/
3535
validate-csv:
@@ -114,9 +114,9 @@ jobs:
114114
- run: make docker-build
115115

116116
### Image builds ###
117-
build-gpu-operator:
117+
build-gpu-operator-arm64:
118118
needs: [go-check, go-test, go-build]
119-
runs-on: ubuntu-latest
119+
runs-on: ubuntu-24.04-arm
120120
strategy:
121121
matrix:
122122
dist: [ubi9]
@@ -141,11 +141,7 @@ jobs:
141141
GENERATE_ARTIFACTS="true"
142142
fi
143143
echo "PUSH_ON_BUILD=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV
144-
echo "BUILD_MULTI_ARCH_IMAGES=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV
145-
- name: Set up QEMU
146-
uses: docker/setup-qemu-action@v3
147-
with:
148-
image: tonistiigi/binfmt:master
144+
echo "DOCKER_BUILD_PLATFORM_OPTIONS=--platform=linux/arm64" >> $GITHUB_ENV
149145
- name: Set up Docker Buildx
150146
uses: docker/setup-buildx-action@v3
151147
- name: Login to GitHub Container Registry
@@ -157,11 +153,54 @@ jobs:
157153
- name: Build image
158154
env:
159155
IMAGE_NAME: ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator
160-
VERSION: ${COMMIT_SHORT_SHA}
156+
VERSION: ${COMMIT_SHORT_SHA}-arm64
157+
run: |
158+
echo "${VERSION}"
159+
make build-${{ matrix.dist }}
160+
build-gpu-operator-validator-arm64:
161+
needs: [go-check, go-test, go-build]
162+
runs-on: ubuntu-24.04-arm
163+
strategy:
164+
matrix:
165+
dist: [ubi9]
166+
steps:
167+
- uses: actions/checkout@v4
168+
name: Check out code
169+
- name: Calculate build vars
170+
id: vars
171+
run: |
172+
echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV
173+
echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV
174+
175+
GENERATE_ARTIFACTS="false"
176+
if [[ "${{ github.actor }}" == "dependabot[bot]" ]]; then
177+
GENERATE_ARTIFACTS="false"
178+
elif [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.repository }}" ]]; then
179+
GENERATE_ARTIFACTS="true"
180+
elif [[ "${{ github.event_name }}" == "push" ]]; then
181+
GENERATE_ARTIFACTS="true"
182+
fi
183+
echo "PUSH_ON_BUILD=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV
184+
echo "DOCKER_BUILD_PLATFORM_OPTIONS=--platform=linux/arm64" >> $GITHUB_ENV
185+
- name: Set up Docker Buildx
186+
uses: docker/setup-buildx-action@v3
187+
- name: Login to GitHub Container Registry
188+
uses: docker/login-action@v3
189+
with:
190+
registry: ghcr.io
191+
username: ${{ github.actor }}
192+
password: ${{ secrets.GITHUB_TOKEN }}
193+
- name: Build image
194+
env:
195+
IMAGE_NAME: ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator/gpu-operator-validator
196+
VERSION: ${COMMIT_SHORT_SHA}-arm64
197+
SUBCOMPONENT: validator
161198
run: |
162199
echo "${VERSION}"
163200
make build-${{ matrix.dist }}
164-
build-gpu-operator-validator:
201+
202+
### Image builds ###
203+
build-gpu-operator-amd64:
165204
needs: [go-check, go-test, go-build]
166205
runs-on: ubuntu-latest
167206
strategy:
@@ -188,11 +227,48 @@ jobs:
188227
GENERATE_ARTIFACTS="true"
189228
fi
190229
echo "PUSH_ON_BUILD=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV
191-
echo "BUILD_MULTI_ARCH_IMAGES=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV
192-
- name: Set up QEMU
193-
uses: docker/setup-qemu-action@v3
230+
echo "DOCKER_BUILD_PLATFORM_OPTIONS=--platform=linux/amd64" >> $GITHUB_ENV
231+
- name: Set up Docker Buildx
232+
uses: docker/setup-buildx-action@v3
233+
- name: Login to GitHub Container Registry
234+
uses: docker/login-action@v3
194235
with:
195-
image: tonistiigi/binfmt:master
236+
registry: ghcr.io
237+
username: ${{ github.actor }}
238+
password: ${{ secrets.GITHUB_TOKEN }}
239+
- name: Build image
240+
env:
241+
IMAGE_NAME: ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator
242+
VERSION: ${COMMIT_SHORT_SHA}-amd64
243+
run: |
244+
echo "${VERSION}"
245+
make build-${{ matrix.dist }}
246+
247+
build-gpu-operator-validator-amd64:
248+
needs: [go-check, go-test, go-build]
249+
runs-on: ubuntu-latest
250+
strategy:
251+
matrix:
252+
dist: [ubi9]
253+
steps:
254+
- uses: actions/checkout@v4
255+
name: Check out code
256+
- name: Calculate build vars
257+
id: vars
258+
run: |
259+
echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV
260+
echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV
261+
262+
GENERATE_ARTIFACTS="false"
263+
if [[ "${{ github.actor }}" == "dependabot[bot]" ]]; then
264+
GENERATE_ARTIFACTS="false"
265+
elif [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.repository }}" ]]; then
266+
GENERATE_ARTIFACTS="true"
267+
elif [[ "${{ github.event_name }}" == "push" ]]; then
268+
GENERATE_ARTIFACTS="true"
269+
fi
270+
echo "PUSH_ON_BUILD=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV
271+
echo "DOCKER_BUILD_PLATFORM_OPTIONS=--platform=linux/amd64" >> $GITHUB_ENV
196272
- name: Set up Docker Buildx
197273
uses: docker/setup-buildx-action@v3
198274
- name: Login to GitHub Container Registry
@@ -204,21 +280,61 @@ jobs:
204280
- name: Build image
205281
env:
206282
IMAGE_NAME: ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator/gpu-operator-validator
207-
VERSION: ${COMMIT_SHORT_SHA}
283+
VERSION: ${COMMIT_SHORT_SHA}-amd64
208284
SUBCOMPONENT: validator
209285
run: |
210286
echo "${VERSION}"
211287
make build-${{ matrix.dist }}
212288
289+
build-multi-arch-images:
290+
needs: [build-gpu-operator-arm64, build-gpu-operator-validator-arm64, build-gpu-operator-amd64, build-gpu-operator-validator-amd64]
291+
runs-on: ubuntu-latest
292+
strategy:
293+
matrix:
294+
dist: [ubi9]
295+
steps:
296+
- uses: actions/checkout@v4
297+
name: Check out code
298+
- name: Calculate build vars
299+
id: vars
300+
run: |
301+
echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV
302+
echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV
303+
- name: Login to GitHub Container Registry
304+
uses: docker/login-action@v3
305+
with:
306+
registry: ghcr.io
307+
username: ${{ github.actor }}
308+
password: ${{ secrets.GITHUB_TOKEN }}
309+
- name: Build Manifest
310+
env:
311+
OPERATOR_IMAGE_ARM: ghcr.io/${{ env.LOWERCASE_REPO_OWNER }}/gpu-operator:${{ env.COMMIT_SHORT_SHA }}-arm64
312+
OPERATOR_IMAGE_AMD: ghcr.io/${{ env.LOWERCASE_REPO_OWNER}}/gpu-operator:${{ env.COMMIT_SHORT_SHA }}-amd64
313+
VALIDATOR_IMAGE_ARM: ghcr.io/${{ env.LOWERCASE_REPO_OWNER }}/gpu-operator/gpu-operator-validator:${{ env.COMMIT_SHORT_SHA }}-arm64
314+
VALIDATOR_IMAGE_AMD: ghcr.io/${{ env.LOWERCASE_REPO_OWNER }}/gpu-operator/gpu-operator-validator:${{ env.COMMIT_SHORT_SHA }}-amd64
315+
OPERATOR_MULTIARCH_IMAGE: ghcr.io/${{ env.LOWERCASE_REPO_OWNER }}/gpu-operator:${{ env.COMMIT_SHORT_SHA }}
316+
VALIDATOR_MULTIARCH_IMAGE: ghcr.io/${{ env.LOWERCASE_REPO_OWNER }}/gpu-operator/gpu-operator-validator:${{ env.COMMIT_SHORT_SHA }}
317+
run: |
318+
docker manifest create \
319+
${OPERATOR_MULTIARCH_IMAGE} \
320+
${OPERATOR_IMAGE_AMD} \
321+
${OPERATOR_IMAGE_ARM}
322+
docker manifest push ${OPERATOR_MULTIARCH_IMAGE}
323+
docker manifest create \
324+
${VALIDATOR_MULTIARCH_IMAGE} \
325+
${VALIDATOR_IMAGE_AMD} \
326+
${VALIDATOR_IMAGE_ARM}
327+
docker manifest push ${VALIDATOR_MULTIARCH_IMAGE}
328+
213329
### e2e tests ###
214330
e2e-tests-containerd:
215-
needs: [build-gpu-operator, build-gpu-operator-validator]
331+
needs: [build-multi-arch-images]
216332
runs-on: linux-amd64-cpu4
217333
steps:
218334
- uses: actions/checkout@v4
219335
name: Check out code
220336
- name: Set up Holodeck
221-
uses: NVIDIA/[email protected].5
337+
uses: NVIDIA/[email protected].6
222338
with:
223339
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
224340
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
@@ -264,13 +380,13 @@ jobs:
264380
retention-days: 15
265381

266382
e2e-tests-nvidiadriver:
267-
needs: [build-gpu-operator, build-gpu-operator-validator]
383+
needs: [build-multi-arch-images]
268384
runs-on: linux-amd64-cpu4
269385
steps:
270386
- uses: actions/checkout@v4
271387
name: Check out code
272388
- name: Set up Holodeck
273-
uses: NVIDIA/[email protected].5
389+
uses: NVIDIA/[email protected].6
274390
with:
275391
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
276392
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
@@ -315,10 +431,39 @@ jobs:
315431
path: ./logs/
316432
retention-days: 15
317433

318-
push-gpu-operator-bundle-image:
434+
release-latest-gpu-operator-and-validator-image:
319435
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} # Runs only if the event is a push to the main branch
320436
needs: [e2e-tests-containerd, e2e-tests-nvidiadriver]
321-
runs-on: ubuntu-latest
437+
runs-on: linux-amd64-cpu4
438+
steps:
439+
- uses: actions/checkout@v4
440+
name: Check out code
441+
- name: set-up regctl
442+
run: |
443+
export REGCTL_VERSION=v0.8.2
444+
mkdir -p bin
445+
curl -sSLo bin/regctl https://github.com/regclient/regclient/releases/download/${REGCTL_VERSION}/regctl-linux-amd64
446+
chmod a+x bin/regctl
447+
echo "$(pwd)/bin" >> $GITHUB_PATH
448+
- name: Set environment variables
449+
id: vars
450+
run: |
451+
COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}
452+
LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')
453+
echo "OPERATOR_VERSION=${COMMIT_SHORT_SHA}" >> $GITHUB_ENV
454+
echo "OPERATOR_IMAGE=ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator" >> $GITHUB_ENV
455+
echo "VALIDATOR_VERSION=${COMMIT_SHORT_SHA}" >> $GITHUB_ENV
456+
echo "VALIDATOR_IMAGE=ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator/gpu-operator-validator" >> $GITHUB_ENV
457+
- name: Retag gpu-operator and gpu-operator-validator
458+
run: |
459+
regctl registry login ghcr.io -u $GITHUB_ACTOR -p ${{ secrets.GITHUB_TOKEN }}
460+
regctl image copy ${OPERATOR_IMAGE}:${OPERATOR_VERSION} ${OPERATOR_IMAGE}:main-latest
461+
regctl image copy ${VALIDATOR_IMAGE}:${VALIDATOR_VERSION} ${VALIDATOR_IMAGE}:main-latest
462+
463+
push-gpu-operator-bundle-image:
464+
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} # Runs only if the event is a push to the main branch
465+
needs: [release-latest-gpu-operator-and-validator-image]
466+
runs-on: linux-amd64-cpu4
322467
steps:
323468
- uses: actions/checkout@v4
324469
name: Check out code

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
BUILD_MULTI_ARCH_IMAGES ?= no
15+
BUILD_MULTI_ARCH_IMAGES ?= false
1616
DOCKER ?= docker
1717
GO_CMD ?= go
1818
PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))

assets/gpu-feature-discovery/0500_daemonset.yaml

+5-64
Original file line numberDiff line numberDiff line change
@@ -33,51 +33,9 @@ spec:
3333
securityContext:
3434
privileged: true
3535
volumeMounts:
36-
- name: run-nvidia-validations
37-
mountPath: /run/nvidia/validations
36+
- name: run-nvidia
37+
mountPath: /run/nvidia
3838
mountPropagation: HostToContainer
39-
- name: gpu-feature-discovery-imex-init
40-
image: "FILLED BY THE OPERATOR"
41-
command: ["/bin/bash", "-c"]
42-
args:
43-
- |
44-
until [[ -f /run/nvidia/validations/driver-ready ]]
45-
do
46-
echo "waiting for the driver validations to be ready..."
47-
sleep 5
48-
done
49-
set -o allexport
50-
cat /run/nvidia/validations/driver-ready
51-
. /run/nvidia/validations/driver-ready
52-
53-
IMEX_NODES_CONFIG_FILE=/etc/nvidia-imex/nodes_config.cfg
54-
if [[ -f /config/${IMEX_NODES_CONFIG_FILE} ]]; then
55-
echo "Removing cached IMEX nodes config"
56-
rm -f /config/${IMEX_NODES_CONFIG_FILE}
57-
fi
58-
if [[ ! -f ${DRIVER_ROOT_CTR_PATH}/${IMEX_NODES_CONFIG_FILE} ]]; then
59-
echo "No IMEX nodes config path detected; Skipping"
60-
exit 0
61-
fi
62-
echo "Copying IMEX nodes config"
63-
mkdir -p $(dirname /config/${IMEX_NODES_CONFIG_FILE})
64-
cp ${DRIVER_ROOT_CTR_PATH}/${IMEX_NODES_CONFIG_FILE} /config/${IMEX_NODES_CONFIG_FILE}
65-
securityContext:
66-
privileged: true
67-
volumeMounts:
68-
- name: config
69-
mountPath: /config
70-
- name: run-nvidia-validations
71-
mountPath: /run/nvidia/validations
72-
mountPropagation: HostToContainer
73-
- name: host-root
74-
mountPath: /host/etc
75-
subPath: etc
76-
readOnly: true
77-
- name: driver-install-dir
78-
mountPath: /driver-root/etc
79-
subPath: etc
80-
readOnly: true
8139
- name: config-manager-init
8240
image: "FILLED BY THE OPERATOR"
8341
command: ["config-manager"]
@@ -104,9 +62,6 @@ spec:
10462
value: ""
10563
- name: PROCESS_TO_SIGNAL
10664
value: ""
107-
volumeMounts:
108-
- name: config
109-
mountPath: /config
11065
containers:
11166
- image: "FILLED BY THE OPERATOR"
11267
name: gpu-feature-discovery
@@ -130,8 +85,6 @@ spec:
13085
- name: host-sys
13186
mountPath: /sys
13287
readOnly: true
133-
- name: config
134-
mountPath: /config
13588
securityContext:
13689
privileged: true
13790
- image: "FILLED BY THE OPERATOR"
@@ -162,26 +115,14 @@ spec:
162115
value: "1" # SIGHUP
163116
- name: PROCESS_TO_SIGNAL
164117
value: "gpu-feature-discovery"
165-
volumeMounts:
166-
- name: config
167-
mountPath: /config
168118
volumes:
169119
- name: output-dir
170120
hostPath:
171121
path: "/etc/kubernetes/node-feature-discovery/features.d"
172122
- name: host-sys
173123
hostPath:
174124
path: /sys
175-
- name: run-nvidia-validations
176-
hostPath:
177-
path: "/run/nvidia/validations"
178-
type: DirectoryOrCreate
179-
- name: host-root
180-
hostPath:
181-
path: /
182-
- name: driver-install-dir
125+
- name: run-nvidia
183126
hostPath:
184-
path: /run/nvidia/driver
185-
type: DirectoryOrCreate
186-
- name: config
187-
emptyDir: {}
127+
path: "/run/nvidia"
128+
type: Directory

0 commit comments

Comments
 (0)