Skip to content

Commit 9674787

Browse files
[no-relnote] Add E2E for libnvidia-container
Signed-off-by: Carlos Eduardo Arango Gutierrez <[email protected]>
1 parent 890db82 commit 9674787

File tree

3 files changed

+228
-5
lines changed

3 files changed

+228
-5
lines changed

tests/e2e/Makefile

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,14 @@ LOG_ARTIFACTS_DIR ?= $(CURDIR)/e2e_logs
2020

2121
GINKGO_BIN := $(CURDIR)/bin/ginkgo
2222

23+
# If GINKGO_FOCUS is not set, run all tests
24+
# current available tests:
25+
# - nvidia-container-cli
26+
# - docker
27+
GINKGO_FOCUS ?=
28+
2329
test: $(GINKGO_BIN)
24-
$(GINKGO_BIN) $(GINKGO_ARGS) -v --json-report ginkgo.json ./tests/e2e/...
30+
$(GINKGO_BIN) $(GINKGO_ARGS) -v --json-report ginkgo.json --focus="$(GINKGO_FOCUS)" ./tests/e2e/...
2531

2632
$(GINKGO_BIN):
2733
mkdir -p $(CURDIR)/bin

tests/e2e/installer.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,20 @@ var dockerInstallTemplate = `
2828
#! /usr/bin/env bash
2929
set -xe
3030
31-
: ${IMAGE:={{.Image}}}
31+
# if the TEMP_DIR is already set, use it
32+
if [ -f /tmp/ctk_e2e_temp_dir.txt ]; then
33+
TEMP_DIR=$(cat /tmp/ctk_e2e_temp_dir.txt)
34+
else
35+
TEMP_DIR="/tmp/ctk_e2e.$(date +%s)_$RANDOM"
36+
echo "$TEMP_DIR" > /tmp/ctk_e2e_temp_dir.txt
37+
fi
38+
39+
# if TEMP_DIR does not exist, create it
40+
if [ ! -d "$TEMP_DIR" ]; then
41+
mkdir -p "$TEMP_DIR"
42+
fi
3243
33-
# Create a temporary directory
34-
TEMP_DIR="/tmp/ctk_e2e.$(date +%s)_$RANDOM"
35-
mkdir -p "$TEMP_DIR"
44+
: ${IMAGE:={{.Image}}}
3645
3746
# Given that docker has an init function that checks for the existence of the
3847
# nvidia-container-toolkit, we need to create a symlink to the nvidia-container-runtime-hook
Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
/*
2+
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package e2e
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"strings"
23+
"text/template"
24+
25+
. "github.com/onsi/ginkgo/v2"
26+
. "github.com/onsi/gomega"
27+
)
28+
29+
const (
30+
dockerDindTemplate = `docker run -d --rm --privileged \
31+
-v {{.SharedDir}}/etc/docker:/etc/docker \
32+
-v {{.SharedDir}}/run/nvidia:/run/nvidia \
33+
-v {{.SharedDir}}/usr/local/nvidia:/usr/local/nvidia \
34+
--name {{.ContainerName}} \
35+
docker:dind -H unix://{{.DockerSocket}}`
36+
37+
dockerToolkitTemplate = `docker run -d --rm --privileged \
38+
--volumes-from {{.DindContainerName}} \
39+
--pid "container:{{.DindContainerName}}" \
40+
-e RUNTIME_ARGS="--socket {{.DockerSocket}}" \
41+
-v {{.TestScriptPath}}:/usr/local/bin/libnvidia-container-cli.sh \
42+
--name {{.ContainerName}} \
43+
{{.ToolkitImage}} /usr/local/bin/libnvidia-container-cli.sh`
44+
45+
dockerDefaultConfigTemplate = `
46+
{
47+
"registry-mirrors": ["https://mirror.gcr.io"]
48+
}`
49+
50+
libnvidiaContainerCliTestTemplate = `#!/usr/bin/env bash
51+
set -euo pipefail
52+
53+
apt-get update -y && apt-get install -y curl gnupg2
54+
55+
WORKDIR="$(mktemp -d)"
56+
ROOTFS="${WORKDIR}/rootfs"
57+
mkdir -p "${ROOTFS}"
58+
59+
export WORKDIR ROOTFS # make them visible in the child shell
60+
61+
unshare --mount --pid --fork --propagation private -- bash -eux <<'IN_NS'
62+
: "${ROOTFS:?}" "${WORKDIR:?}" # abort if either is empty
63+
64+
# 1 Populate minimal Ubuntu base
65+
curl -L http://cdimage.ubuntu.com/ubuntu-base/releases/22.04/release/ubuntu-base-22.04-base-amd64.tar.gz \
66+
| tar -C "$ROOTFS" -xz
67+
68+
# 2 Add non-root user
69+
useradd -R "$ROOTFS" -U -u 1000 -s /bin/bash nvidia
70+
71+
# 3 Bind-mount new root and unshare mounts
72+
mount --bind "$ROOTFS" "$ROOTFS"
73+
mount --make-private "$ROOTFS"
74+
cd "$ROOTFS"
75+
76+
# 4 Minimal virtual filesystems
77+
mount -t proc proc proc
78+
mount -t sysfs sys sys
79+
mount -t tmpfs tmp tmp
80+
mount -t tmpfs run run
81+
82+
# 5 GPU setup
83+
nvidia-container-cli --load-kmods --debug=container-cli.log \
84+
configure --ldconfig=@/sbin/ldconfig.real \
85+
--no-cgroups --utility --device=0 "$(pwd)"
86+
87+
# 6 Switch root
88+
mkdir -p mnt
89+
pivot_root . mnt
90+
umount -l /mnt
91+
92+
exec nvidia-smi -L
93+
IN_NS
94+
`
95+
)
96+
97+
// Integration tests for Docker runtime
98+
var _ = Describe("nvidia-container-cli", Ordered, ContinueOnFailure, func() {
99+
var runner Runner
100+
var sharedDir string
101+
var dindContainerName string
102+
var toolkitContainerName string
103+
var dockerSocket string
104+
var hostOutput string
105+
106+
// Install the NVIDIA Container Toolkit
107+
BeforeAll(func(ctx context.Context) {
108+
runner = NewRunner(
109+
WithHost(sshHost),
110+
WithPort(sshPort),
111+
WithSshKey(sshKey),
112+
WithSshUser(sshUser),
113+
)
114+
115+
// Setup shared directory and container names
116+
sharedDir = "/tmp/nvidia-container-toolkit-test"
117+
dindContainerName = "nvidia-container-toolkit-dind"
118+
toolkitContainerName = "nvidia-container-toolkit-test"
119+
dockerSocket = "/run/nvidia/docker.sock"
120+
121+
// Get host nvidia-smi output
122+
var err error
123+
hostOutput, _, err = runner.Run("nvidia-smi -L")
124+
Expect(err).ToNot(HaveOccurred())
125+
126+
// Pull ubuntu image
127+
_, _, err = runner.Run("docker pull ubuntu")
128+
Expect(err).ToNot(HaveOccurred())
129+
130+
// Create shared directory structure
131+
_, _, err = runner.Run(fmt.Sprintf("mkdir -p %s/{etc/docker,run/nvidia,usr/local/nvidia}", sharedDir))
132+
Expect(err).ToNot(HaveOccurred())
133+
134+
// Copy docker default config
135+
createDockerConfigCmd := fmt.Sprintf("cat > %s/etc/docker/daemon.json <<'EOF'\n%s\nEOF",
136+
sharedDir, dockerDefaultConfigTemplate)
137+
_, _, err = runner.Run(createDockerConfigCmd)
138+
Expect(err).ToNot(HaveOccurred())
139+
140+
// Start Docker-in-Docker container
141+
tmpl, err := template.New("dockerDind").Parse(dockerDindTemplate)
142+
Expect(err).ToNot(HaveOccurred())
143+
144+
var dindCmdBuilder strings.Builder
145+
err = tmpl.Execute(&dindCmdBuilder, map[string]string{
146+
"SharedDir": sharedDir,
147+
"ContainerName": dindContainerName,
148+
"DockerSocket": dockerSocket,
149+
})
150+
Expect(err).ToNot(HaveOccurred())
151+
152+
_, _, err = runner.Run(dindCmdBuilder.String())
153+
Expect(err).ToNot(HaveOccurred())
154+
})
155+
156+
AfterAll(func(ctx context.Context) {
157+
// Cleanup containers
158+
runner.Run(fmt.Sprintf("docker rm -f %s", toolkitContainerName))
159+
runner.Run(fmt.Sprintf("docker rm -f %s", dindContainerName))
160+
161+
// Cleanup shared directory
162+
_, _, err := runner.Run(fmt.Sprintf("rm -rf %s", sharedDir))
163+
Expect(err).ToNot(HaveOccurred())
164+
})
165+
166+
When("running nvidia-smi -L", Ordered, func() {
167+
It("should support NVIDIA_VISIBLE_DEVICES and NVIDIA_DRIVER_CAPABILITIES", func(ctx context.Context) {
168+
// 1. Create the test script
169+
testScriptPath := fmt.Sprintf("%s/libnvidia-container-cli.sh", sharedDir)
170+
createScriptCmd := fmt.Sprintf("cat > %s <<'EOF'\n%s\nEOF\nchmod +x %s",
171+
testScriptPath, libnvidiaContainerCliTestTemplate, testScriptPath)
172+
_, _, err := runner.Run(createScriptCmd)
173+
Expect(err).ToNot(HaveOccurred())
174+
175+
// 2. Start the toolkit container
176+
tmpl, err := template.New("dockerToolkit").Parse(dockerToolkitTemplate)
177+
Expect(err).ToNot(HaveOccurred())
178+
179+
var toolkitCmdBuilder strings.Builder
180+
err = tmpl.Execute(&toolkitCmdBuilder, map[string]string{
181+
"DindContainerName": dindContainerName,
182+
"ContainerName": toolkitContainerName,
183+
"DockerSocket": dockerSocket,
184+
"TestScriptPath": testScriptPath,
185+
"ToolkitImage": imageName + ":" + imageTag,
186+
})
187+
Expect(err).ToNot(HaveOccurred())
188+
189+
_, _, err = runner.Run(toolkitCmdBuilder.String())
190+
Expect(err).ToNot(HaveOccurred())
191+
192+
// 3. Wait for and verify the output
193+
expected := strings.TrimSpace(strings.ReplaceAll(hostOutput, "\r", ""))
194+
Eventually(func() string {
195+
logs, _, err := runner.Run(fmt.Sprintf("docker logs %s | tail -n 20", toolkitContainerName))
196+
if err != nil {
197+
return ""
198+
}
199+
200+
logLines := strings.Split(strings.TrimSpace(logs), "\n")
201+
if len(logLines) == 0 {
202+
return ""
203+
}
204+
return strings.TrimSpace(strings.ReplaceAll(logLines[len(logLines)-1], "\r", ""))
205+
}, "5m", "5s").Should(Equal(expected))
206+
})
207+
})
208+
})

0 commit comments

Comments
 (0)