|
| 1 | +/* |
| 2 | + * Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | + |
| 17 | +package e2e |
| 18 | + |
| 19 | +import ( |
| 20 | + "context" |
| 21 | + "fmt" |
| 22 | + "strings" |
| 23 | + "text/template" |
| 24 | + |
| 25 | + . "github.com/onsi/ginkgo/v2" |
| 26 | + . "github.com/onsi/gomega" |
| 27 | +) |
| 28 | + |
| 29 | +const ( |
| 30 | + dockerDindTemplate = `docker run -d --rm --privileged \ |
| 31 | + -v {{.SharedDir}}/etc/docker:/etc/docker \ |
| 32 | + -v {{.SharedDir}}/run/nvidia:/run/nvidia \ |
| 33 | + -v {{.SharedDir}}/usr/local/nvidia:/usr/local/nvidia \ |
| 34 | + --name {{.ContainerName}} \ |
| 35 | + docker:dind -H unix://{{.DockerSocket}}` |
| 36 | + |
| 37 | + dockerToolkitTemplate = `docker run -d --rm --privileged \ |
| 38 | + --volumes-from {{.DindContainerName}} \ |
| 39 | + --pid "container:{{.DindContainerName}}" \ |
| 40 | + -e RUNTIME_ARGS="--socket {{.DockerSocket}}" \ |
| 41 | + -v {{.TestScriptPath}}:/usr/local/bin/libnvidia-container-cli.sh \ |
| 42 | + --name {{.ContainerName}} \ |
| 43 | + {{.ToolkitImage}} /usr/local/bin/libnvidia-container-cli.sh` |
| 44 | + |
| 45 | + dockerDefaultConfigTemplate = ` |
| 46 | +{ |
| 47 | + "registry-mirrors": ["https://mirror.gcr.io"] |
| 48 | +}` |
| 49 | + |
| 50 | + libnvidiaContainerCliTestTemplate = `#!/usr/bin/env bash |
| 51 | +set -euo pipefail |
| 52 | +
|
| 53 | +apt-get update -y && apt-get install -y curl gnupg2 |
| 54 | +
|
| 55 | +WORKDIR="$(mktemp -d)" |
| 56 | +ROOTFS="${WORKDIR}/rootfs" |
| 57 | +mkdir -p "${ROOTFS}" |
| 58 | +
|
| 59 | +export WORKDIR ROOTFS # make them visible in the child shell |
| 60 | +
|
| 61 | +unshare --mount --pid --fork --propagation private -- bash -eux <<'IN_NS' |
| 62 | + : "${ROOTFS:?}" "${WORKDIR:?}" # abort if either is empty |
| 63 | +
|
| 64 | + # 1 Populate minimal Ubuntu base |
| 65 | + curl -L http://cdimage.ubuntu.com/ubuntu-base/releases/22.04/release/ubuntu-base-22.04-base-amd64.tar.gz \ |
| 66 | + | tar -C "$ROOTFS" -xz |
| 67 | +
|
| 68 | + # 2 Add non-root user |
| 69 | + useradd -R "$ROOTFS" -U -u 1000 -s /bin/bash nvidia |
| 70 | +
|
| 71 | + # 3 Bind-mount new root and unshare mounts |
| 72 | + mount --bind "$ROOTFS" "$ROOTFS" |
| 73 | + mount --make-private "$ROOTFS" |
| 74 | + cd "$ROOTFS" |
| 75 | +
|
| 76 | + # 4 Minimal virtual filesystems |
| 77 | + mount -t proc proc proc |
| 78 | + mount -t sysfs sys sys |
| 79 | + mount -t tmpfs tmp tmp |
| 80 | + mount -t tmpfs run run |
| 81 | +
|
| 82 | + # 5 GPU setup |
| 83 | + nvidia-container-cli --load-kmods --debug=container-cli.log \ |
| 84 | + configure --ldconfig=@/sbin/ldconfig.real \ |
| 85 | + --no-cgroups --utility --device=0 "$(pwd)" |
| 86 | +
|
| 87 | + # 6 Switch root |
| 88 | + mkdir -p mnt |
| 89 | + pivot_root . mnt |
| 90 | + umount -l /mnt |
| 91 | +
|
| 92 | + exec nvidia-smi -L |
| 93 | +IN_NS |
| 94 | +` |
| 95 | +) |
| 96 | + |
| 97 | +// Integration tests for Docker runtime |
| 98 | +var _ = Describe("nvidia-container-cli", Ordered, ContinueOnFailure, func() { |
| 99 | + var runner Runner |
| 100 | + var sharedDir string |
| 101 | + var dindContainerName string |
| 102 | + var toolkitContainerName string |
| 103 | + var dockerSocket string |
| 104 | + var hostOutput string |
| 105 | + |
| 106 | + // Install the NVIDIA Container Toolkit |
| 107 | + BeforeAll(func(ctx context.Context) { |
| 108 | + runner = NewRunner( |
| 109 | + WithHost(sshHost), |
| 110 | + WithPort(sshPort), |
| 111 | + WithSshKey(sshKey), |
| 112 | + WithSshUser(sshUser), |
| 113 | + ) |
| 114 | + |
| 115 | + // Setup shared directory and container names |
| 116 | + sharedDir = "/tmp/nvidia-container-toolkit-test" |
| 117 | + dindContainerName = "nvidia-container-toolkit-dind" |
| 118 | + toolkitContainerName = "nvidia-container-toolkit-test" |
| 119 | + dockerSocket = "/run/nvidia/docker.sock" |
| 120 | + |
| 121 | + // Get host nvidia-smi output |
| 122 | + var err error |
| 123 | + hostOutput, _, err = runner.Run("nvidia-smi -L") |
| 124 | + Expect(err).ToNot(HaveOccurred()) |
| 125 | + |
| 126 | + // Pull ubuntu image |
| 127 | + _, _, err = runner.Run("docker pull ubuntu") |
| 128 | + Expect(err).ToNot(HaveOccurred()) |
| 129 | + |
| 130 | + // Create shared directory structure |
| 131 | + _, _, err = runner.Run(fmt.Sprintf("mkdir -p %s/{etc/docker,run/nvidia,usr/local/nvidia}", sharedDir)) |
| 132 | + Expect(err).ToNot(HaveOccurred()) |
| 133 | + |
| 134 | + // Copy docker default config |
| 135 | + createDockerConfigCmd := fmt.Sprintf("cat > %s/etc/docker/daemon.json <<'EOF'\n%s\nEOF", |
| 136 | + sharedDir, dockerDefaultConfigTemplate) |
| 137 | + _, _, err = runner.Run(createDockerConfigCmd) |
| 138 | + Expect(err).ToNot(HaveOccurred()) |
| 139 | + |
| 140 | + // Start Docker-in-Docker container |
| 141 | + tmpl, err := template.New("dockerDind").Parse(dockerDindTemplate) |
| 142 | + Expect(err).ToNot(HaveOccurred()) |
| 143 | + |
| 144 | + var dindCmdBuilder strings.Builder |
| 145 | + err = tmpl.Execute(&dindCmdBuilder, map[string]string{ |
| 146 | + "SharedDir": sharedDir, |
| 147 | + "ContainerName": dindContainerName, |
| 148 | + "DockerSocket": dockerSocket, |
| 149 | + }) |
| 150 | + Expect(err).ToNot(HaveOccurred()) |
| 151 | + |
| 152 | + _, _, err = runner.Run(dindCmdBuilder.String()) |
| 153 | + Expect(err).ToNot(HaveOccurred()) |
| 154 | + }) |
| 155 | + |
| 156 | + AfterAll(func(ctx context.Context) { |
| 157 | + // Cleanup containers |
| 158 | + runner.Run(fmt.Sprintf("docker rm -f %s", toolkitContainerName)) |
| 159 | + runner.Run(fmt.Sprintf("docker rm -f %s", dindContainerName)) |
| 160 | + |
| 161 | + // Cleanup shared directory |
| 162 | + _, _, err := runner.Run(fmt.Sprintf("rm -rf %s", sharedDir)) |
| 163 | + Expect(err).ToNot(HaveOccurred()) |
| 164 | + }) |
| 165 | + |
| 166 | + When("running nvidia-smi -L", Ordered, func() { |
| 167 | + It("should support NVIDIA_VISIBLE_DEVICES and NVIDIA_DRIVER_CAPABILITIES", func(ctx context.Context) { |
| 168 | + // 1. Create the test script |
| 169 | + testScriptPath := fmt.Sprintf("%s/libnvidia-container-cli.sh", sharedDir) |
| 170 | + createScriptCmd := fmt.Sprintf("cat > %s <<'EOF'\n%s\nEOF\nchmod +x %s", |
| 171 | + testScriptPath, libnvidiaContainerCliTestTemplate, testScriptPath) |
| 172 | + _, _, err := runner.Run(createScriptCmd) |
| 173 | + Expect(err).ToNot(HaveOccurred()) |
| 174 | + |
| 175 | + // 2. Start the toolkit container |
| 176 | + tmpl, err := template.New("dockerToolkit").Parse(dockerToolkitTemplate) |
| 177 | + Expect(err).ToNot(HaveOccurred()) |
| 178 | + |
| 179 | + var toolkitCmdBuilder strings.Builder |
| 180 | + err = tmpl.Execute(&toolkitCmdBuilder, map[string]string{ |
| 181 | + "DindContainerName": dindContainerName, |
| 182 | + "ContainerName": toolkitContainerName, |
| 183 | + "DockerSocket": dockerSocket, |
| 184 | + "TestScriptPath": testScriptPath, |
| 185 | + "ToolkitImage": imageName + ":" + imageTag, |
| 186 | + }) |
| 187 | + Expect(err).ToNot(HaveOccurred()) |
| 188 | + |
| 189 | + _, _, err = runner.Run(toolkitCmdBuilder.String()) |
| 190 | + Expect(err).ToNot(HaveOccurred()) |
| 191 | + |
| 192 | + // 3. Wait for and verify the output |
| 193 | + expected := strings.TrimSpace(strings.ReplaceAll(hostOutput, "\r", "")) |
| 194 | + Eventually(func() string { |
| 195 | + logs, _, err := runner.Run(fmt.Sprintf("docker logs %s | tail -n 20", toolkitContainerName)) |
| 196 | + if err != nil { |
| 197 | + return "" |
| 198 | + } |
| 199 | + |
| 200 | + logLines := strings.Split(strings.TrimSpace(logs), "\n") |
| 201 | + if len(logLines) == 0 { |
| 202 | + return "" |
| 203 | + } |
| 204 | + return strings.TrimSpace(strings.ReplaceAll(logLines[len(logLines)-1], "\r", "")) |
| 205 | + }, "5m", "5s").Should(Equal(expected)) |
| 206 | + }) |
| 207 | + }) |
| 208 | +}) |
0 commit comments