Skip to content

Commit d905a49

Browse files
[no-relnote] Add E2E for libnvidia-container
Signed-off-by: Carlos Eduardo Arango Gutierrez <[email protected]>
1 parent 890db82 commit d905a49

File tree

3 files changed

+295
-5
lines changed

3 files changed

+295
-5
lines changed

tests/e2e/Makefile

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,14 @@ LOG_ARTIFACTS_DIR ?= $(CURDIR)/e2e_logs
2020

2121
GINKGO_BIN := $(CURDIR)/bin/ginkgo
2222

23+
# If GINKGO_FOCUS is not set, run all tests
24+
# current available tests:
25+
# - nvidia-container-cli
26+
# - docker
27+
GINKGO_FOCUS ?=
28+
2329
test: $(GINKGO_BIN)
24-
$(GINKGO_BIN) $(GINKGO_ARGS) -v --json-report ginkgo.json ./tests/e2e/...
30+
$(GINKGO_BIN) $(GINKGO_ARGS) -v --json-report ginkgo.json --focus="$(GINKGO_FOCUS)" ./tests/e2e/...
2531

2632
$(GINKGO_BIN):
2733
mkdir -p $(CURDIR)/bin

tests/e2e/installer.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,20 @@ var dockerInstallTemplate = `
2828
#! /usr/bin/env bash
2929
set -xe
3030
31-
: ${IMAGE:={{.Image}}}
31+
# if the TEMP_DIR is already set, use it
32+
if [ -f /tmp/ctk_e2e_temp_dir.txt ]; then
33+
TEMP_DIR=$(cat /tmp/ctk_e2e_temp_dir.txt)
34+
else
35+
TEMP_DIR="/tmp/ctk_e2e.$(date +%s)_$RANDOM"
36+
echo "$TEMP_DIR" > /tmp/ctk_e2e_temp_dir.txt
37+
fi
38+
39+
# if TEMP_DIR does not exist, create it
40+
if [ ! -d "$TEMP_DIR" ]; then
41+
mkdir -p "$TEMP_DIR"
42+
fi
3243
33-
# Create a temporary directory
34-
TEMP_DIR="/tmp/ctk_e2e.$(date +%s)_$RANDOM"
35-
mkdir -p "$TEMP_DIR"
44+
: ${IMAGE:={{.Image}}}
3645
3746
# Given that docker has an init function that checks for the existence of the
3847
# nvidia-container-toolkit, we need to create a symlink to the nvidia-container-runtime-hook
Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
/*
2+
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package e2e
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"strings"
23+
"text/template"
24+
25+
. "github.com/onsi/ginkgo/v2"
26+
. "github.com/onsi/gomega"
27+
)
28+
29+
const (
30+
libnvidiaContainerCliDockerRunTemplate = `
31+
docker run -d --name test-nvidia-container-cli \
32+
--privileged \
33+
--runtime=nvidia \
34+
-e NVIDIA_VISIBLE_DEVICES=all \
35+
-e NVIDIA_DRIVER_CAPABILITIES=all \
36+
-v $HOME/libnvidia-container-cli.sh:/usr/local/bin/libnvidia-container-cli.sh \
37+
-v {{.NvidiaContainerCliSrc}}:/usr/bin/nvidia-container-cli \
38+
-v {{.NvidiaContainerCliRealSrc}}:{{.NvidiaContainerCliRealTarget}} \
39+
-v {{.NvidiaCtkSrc}}:/usr/bin/nvidia-ctk \
40+
-v {{.NvidiaCtkRealSrc}}:{{.NvidiaCtkRealTarget}} \
41+
-v {{.NvidiaContainerRuntimeSrc}}:/usr/bin/nvidia-container-runtime \
42+
-v {{.NvidiaContainerRuntimeRealSrc}}:{{.NvidiaContainerRuntimeRealTarget}} \
43+
-v {{.NvidiaContainerRuntimeHookSrc}}:/usr/bin/nvidia-container-runtime-hook \
44+
-v {{.NvidiaContainerRuntimeHookRealSrc}}:{{.NvidiaContainerRuntimeHookRealTarget}} \
45+
-v {{.NvidiaContainerToolkitSrc}}:/usr/bin/nvidia-container-toolkit \
46+
-v {{.NvidiaCdiHookSrc}}:/usr/bin/nvidia-cdi-hook \
47+
-v {{.NvidiaCdiHookRealSrc}}:{{.NvidiaCdiHookRealTarget}} \
48+
-v {{.NvidiaContainerRuntimeCdiSrc}}:/usr/bin/nvidia-container-runtime.cdi \
49+
-v {{.NvidiaContainerRuntimeCdiRealSrc}}:{{.NvidiaContainerRuntimeCdiRealTarget}} \
50+
-v {{.NvidiaContainerRuntimeLegacySrc}}:/usr/bin/nvidia-container-runtime.legacy \
51+
-v {{.NvidiaContainerRuntimeLegacyRealSrc}}:{{.NvidiaContainerRuntimeLegacyRealTarget}} \
52+
-v {{.ToolkitDir}}/toolkit:/usr/local/nvidia/toolkit \
53+
-v /etc/nvidia-container-runtime:/etc/nvidia-container-runtime \
54+
-v {{.LibNvidiaContainerSo1Src}}:/usr/lib/x86_64-linux-gnu/libnvidia-container.so.1 \
55+
-v {{.LibNvidiaContainerTargetSrc}}:/usr/lib/x86_64-linux-gnu/{{.LibNvidiaContainerTarget}} \
56+
-v {{.LibNvidiaContainerGoSo1Src}}:/usr/lib/x86_64-linux-gnu/libnvidia-container-go.so.1 \
57+
-v {{.LibNvidiaContainerGoTargetSrc}}:/usr/lib/x86_64-linux-gnu/{{.LibNvidiaContainerGoTarget}} \
58+
-e LD_LIBRARY_PATH=/usr/lib64:/usr/lib/x86_64-linux-gnu:/usr/lib/aarch64-linux-gnu:/lib64:/lib/x86_64-linux-gnu:/lib/aarch64-linux-gnu \
59+
--entrypoint /usr/local/bin/libnvidia-container-cli.sh \
60+
ubuntu
61+
`
62+
63+
libnvidiaContainerCliTestTemplate = `#!/usr/bin/env bash
64+
set -euo pipefail
65+
66+
apt-get update -y && apt-get install -y curl gnupg2
67+
68+
WORKDIR="$(mktemp -d)"
69+
ROOTFS="${WORKDIR}/rootfs"
70+
mkdir -p "${ROOTFS}"
71+
72+
export WORKDIR ROOTFS # make them visible in the child shell
73+
74+
unshare --mount --pid --fork --propagation private -- bash -eux <<'IN_NS'
75+
: "${ROOTFS:?}" "${WORKDIR:?}" # abort if either is empty
76+
77+
# 1 Populate minimal Ubuntu base
78+
curl -L http://cdimage.ubuntu.com/ubuntu-base/releases/22.04/release/ubuntu-base-22.04-base-amd64.tar.gz \
79+
| tar -C "$ROOTFS" -xz
80+
81+
# 2 Add non-root user
82+
useradd -R "$ROOTFS" -U -u 1000 -s /bin/bash nvidia
83+
84+
# 3 Bind-mount new root and unshare mounts
85+
mount --bind "$ROOTFS" "$ROOTFS"
86+
mount --make-private "$ROOTFS"
87+
cd "$ROOTFS"
88+
89+
# 4 Minimal virtual filesystems
90+
mount -t proc proc proc
91+
mount -t sysfs sys sys
92+
mount -t tmpfs tmp tmp
93+
mount -t tmpfs run run
94+
95+
# 5 GPU setup
96+
nvidia-container-cli --load-kmods --debug=container-cli.log \
97+
configure --ldconfig=@/sbin/ldconfig.real \
98+
--no-cgroups --utility --device=0 "$(pwd)"
99+
100+
# 6 Switch root
101+
mkdir -p mnt
102+
pivot_root . mnt
103+
umount -l /mnt
104+
105+
exec nvidia-smi -L
106+
IN_NS
107+
`
108+
)
109+
110+
// getToolkitDir tries to read the toolkit dir from /tmp/ctk_e2e_temp_dir.txt using the runner. Returns empty string if not found.
111+
func getToolkitDir(runner Runner) string {
112+
out, _, err := runner.Run("cat /tmp/ctk_e2e_temp_dir.txt")
113+
if err == nil {
114+
dir := strings.TrimSpace(out)
115+
if dir != "" {
116+
return dir
117+
}
118+
}
119+
return ""
120+
}
121+
122+
// getToolkitLayout returns the toolkit dir, a flag for flat layout, and a function to get the source path for a given filename.
123+
func getToolkitLayout(runner Runner) (toolkitDir string, useFlatLayout bool, srcPath func(string) string) {
124+
tempDir := getToolkitDir(runner)
125+
if tempDir == "" {
126+
return "", false, func(filename string) string {
127+
if strings.HasPrefix(filename, "lib") {
128+
return "/usr/lib/x86_64-linux-gnu/" + filename
129+
}
130+
return "/usr/bin/" + filename
131+
}
132+
}
133+
return tempDir, true, func(filename string) string {
134+
return tempDir + "/toolkit/" + filename
135+
}
136+
}
137+
138+
// getRealTargetPath returns the correct target path for a .real binary depending on the install type.
139+
func getRealTargetPath(filename, toolkitDir string) string {
140+
if toolkitDir == "" {
141+
return "/usr/bin/" + filename
142+
}
143+
return toolkitDir + "/toolkit/" + filename
144+
}
145+
146+
// Integration tests for Docker runtime
147+
var _ = Describe("nvidia-container-cli", Ordered, ContinueOnFailure, func() {
148+
var runner Runner
149+
150+
// Install the NVIDIA Container Toolkit
151+
BeforeAll(func(ctx context.Context) {
152+
runner = NewRunner(
153+
WithHost(sshHost),
154+
WithPort(sshPort),
155+
WithSshKey(sshKey),
156+
WithSshUser(sshUser),
157+
)
158+
159+
if installCTK {
160+
installer, err := NewToolkitInstaller(
161+
WithRunner(runner),
162+
WithImage(imageName+":"+imageTag),
163+
WithTemplate(dockerInstallTemplate),
164+
)
165+
Expect(err).ToNot(HaveOccurred())
166+
167+
err = installer.Install()
168+
Expect(err).ToNot(HaveOccurred())
169+
}
170+
})
171+
172+
When("running nvidia-smi -L", Ordered, func() {
173+
var hostOutput string
174+
var err error
175+
176+
BeforeAll(func(ctx context.Context) {
177+
hostOutput, _, err = runner.Run("nvidia-smi -L")
178+
Expect(err).ToNot(HaveOccurred())
179+
180+
_, _, err := runner.Run("docker pull ubuntu")
181+
Expect(err).ToNot(HaveOccurred())
182+
})
183+
184+
AfterAll(func(ctx context.Context) {
185+
_, _, err := runner.Run("docker rm -f test-nvidia-container-cli")
186+
Expect(err).ToNot(HaveOccurred())
187+
})
188+
189+
It("should support NVIDIA_VISIBLE_DEVICES and NVIDIA_DRIVER_CAPABILITIES", func(ctx context.Context) {
190+
// 1. Create the test script on the remote host at $HOME/test.sh using a here-document
191+
testScriptPath := "$HOME/libnvidia-container-cli.sh"
192+
testScript := libnvidiaContainerCliTestTemplate
193+
createScriptCmd := fmt.Sprintf("cat > %s <<'EOF'\n%s\nEOF\nchmod +x %s", testScriptPath, testScript, testScriptPath)
194+
_, _, err := runner.Run(createScriptCmd)
195+
Expect(err).ToNot(HaveOccurred())
196+
197+
// 2. Discover the symlink targets for the libraries on the remote host
198+
getTargetCmd := func(lib string) string {
199+
return fmt.Sprintf("readlink -f /usr/lib/x86_64-linux-gnu/%s.1", lib)
200+
}
201+
libNvidiaContainerTarget, _, err := runner.Run(getTargetCmd("libnvidia-container.so"))
202+
Expect(err).ToNot(HaveOccurred())
203+
204+
libNvidiaContainerTarget = strings.TrimSpace(libNvidiaContainerTarget)
205+
libNvidiaContainerTarget = strings.TrimPrefix(libNvidiaContainerTarget, "/usr/lib/x86_64-linux-gnu/")
206+
207+
libNvidiaContainerGoTarget, _, err := runner.Run(getTargetCmd("libnvidia-container-go.so"))
208+
Expect(err).ToNot(HaveOccurred())
209+
210+
libNvidiaContainerGoTarget = strings.TrimSpace(libNvidiaContainerGoTarget)
211+
libNvidiaContainerGoTarget = strings.TrimPrefix(libNvidiaContainerGoTarget, "/usr/lib/x86_64-linux-gnu/")
212+
213+
// 3. Get toolkit layout info and source path helper
214+
toolkitDir, _, srcPath := getToolkitLayout(runner)
215+
216+
// 4. Render the docker run template with the discovered targets and computed source paths
217+
tmpl, err := template.New("dockerRun").Parse(libnvidiaContainerCliDockerRunTemplate)
218+
Expect(err).ToNot(HaveOccurred())
219+
var dockerRunCmdBuilder strings.Builder
220+
err = tmpl.Execute(&dockerRunCmdBuilder, map[string]string{
221+
"ToolkitDir": toolkitDir,
222+
"NvidiaContainerCliSrc": srcPath("nvidia-container-cli"),
223+
"NvidiaContainerCliRealSrc": srcPath("nvidia-container-cli.real"),
224+
"NvidiaContainerCliRealTarget": getRealTargetPath("nvidia-container-cli.real", toolkitDir),
225+
"NvidiaCtkSrc": srcPath("nvidia-ctk"),
226+
"NvidiaCtkRealSrc": srcPath("nvidia-ctk.real"),
227+
"NvidiaCtkRealTarget": getRealTargetPath("nvidia-ctk.real", toolkitDir),
228+
"NvidiaContainerRuntimeSrc": srcPath("nvidia-container-runtime"),
229+
"NvidiaContainerRuntimeRealSrc": srcPath("nvidia-container-runtime.real"),
230+
"NvidiaContainerRuntimeRealTarget": getRealTargetPath("nvidia-container-runtime.real", toolkitDir),
231+
"NvidiaContainerRuntimeHookSrc": srcPath("nvidia-container-runtime-hook"),
232+
"NvidiaContainerRuntimeHookRealSrc": srcPath("nvidia-container-runtime-hook.real"),
233+
"NvidiaContainerRuntimeHookRealTarget": getRealTargetPath("nvidia-container-runtime-hook.real", toolkitDir),
234+
"NvidiaContainerToolkitSrc": srcPath("nvidia-container-toolkit"),
235+
"NvidiaCdiHookSrc": srcPath("nvidia-cdi-hook"),
236+
"NvidiaCdiHookRealSrc": srcPath("nvidia-cdi-hook.real"),
237+
"NvidiaCdiHookRealTarget": getRealTargetPath("nvidia-cdi-hook.real", toolkitDir),
238+
"NvidiaContainerRuntimeCdiSrc": srcPath("nvidia-container-runtime.cdi"),
239+
"NvidiaContainerRuntimeCdiRealSrc": srcPath("nvidia-container-runtime.cdi.real"),
240+
"NvidiaContainerRuntimeCdiRealTarget": getRealTargetPath("nvidia-container-runtime.cdi.real", toolkitDir),
241+
"NvidiaContainerRuntimeLegacySrc": srcPath("nvidia-container-runtime.legacy"),
242+
"NvidiaContainerRuntimeLegacyRealSrc": srcPath("nvidia-container-runtime.legacy.real"),
243+
"NvidiaContainerRuntimeLegacyRealTarget": getRealTargetPath("nvidia-container-runtime.legacy.real", toolkitDir),
244+
"LibNvidiaContainerSo1Src": srcPath("libnvidia-container.so.1"),
245+
"LibNvidiaContainerTargetSrc": srcPath(libNvidiaContainerTarget),
246+
"LibNvidiaContainerGoSo1Src": srcPath("libnvidia-container-go.so.1"),
247+
"LibNvidiaContainerGoTargetSrc": srcPath(libNvidiaContainerGoTarget),
248+
"LibNvidiaContainerTarget": libNvidiaContainerTarget,
249+
"LibNvidiaContainerGoTarget": libNvidiaContainerGoTarget,
250+
})
251+
Expect(err).ToNot(HaveOccurred())
252+
dockerRunCmd := dockerRunCmdBuilder.String()
253+
254+
// 5. Start the container using the rendered docker run command
255+
_, _, err = runner.Run(dockerRunCmd)
256+
Expect(err).ToNot(HaveOccurred())
257+
258+
// 6. Use Eventually to check the container logs contain hostOutput
259+
expected := strings.TrimSpace(strings.ReplaceAll(hostOutput, "\r", ""))
260+
Eventually(func() string {
261+
logs, _, err := runner.Run("docker logs test-nvidia-container-cli | tail -n 20")
262+
if err != nil {
263+
return ""
264+
}
265+
266+
logLines := strings.Split(strings.TrimSpace(logs), "\n")
267+
if len(logLines) == 0 {
268+
return ""
269+
}
270+
lastLine := strings.TrimSpace(strings.ReplaceAll(logLines[len(logLines)-1], "\r", ""))
271+
return lastLine
272+
}, "5m", "5s").Should(Equal(expected))
273+
})
274+
})
275+
})

0 commit comments

Comments
 (0)