Skip to content

Commit 3649ca9

Browse files
EtiennePerotgvisor-bot
authored andcommitted
On COS, add NVIDIA library directory to LD configuration and update cache.
Unlike Ubuntu VMs where we use Docker's `--gpus` flag, COS VMs do not use this flag and instead mount the NVIDIA library directories automatically. However, nothing guarantees that these directories are added to the LD config. This change fixes that. It take advantage of the fact that all GPU tests have the sniffer binary as entrypoint, which slightly overloads the role of the sniffer within the GPU test infrastructure... but then again the ioctl sniffer is already deeply intertwined with ld configuration because it already overrides the `ioctl` libc function, so this doesn't seem like too big of a stretch. This change makes the ffmpeg test succeed with `runc` on COS, but they still fail with gVisor (with `CUDA_ERROR_OUT_OF_MEMORY` errors). So there must be some further gVisor-specific error. Updates #11351 Updates #11321 PiperOrigin-RevId: 715222952
1 parent 4ba931d commit 3649ca9

File tree

4 files changed

+63
-3
lines changed

4 files changed

+63
-3
lines changed

pkg/test/dockerutil/gpu.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ func GPURunOpts(sniffGPUOpts SniffGPUOpts) (RunOpts, error) {
126126
Type: mount.TypeBind,
127127
ReadOnly: true,
128128
})
129+
break
129130
}
130131
}
131132
for _, nvidiaLib64 := range []string{
@@ -139,6 +140,8 @@ func GPURunOpts(sniffGPUOpts SniffGPUOpts) (RunOpts, error) {
139140
Type: mount.TypeBind,
140141
ReadOnly: true,
141142
})
143+
sniffGPUOpts.addLDPath = "/usr/local/nvidia/lib64"
144+
break
142145
}
143146
}
144147

@@ -166,6 +169,10 @@ type SniffGPUOpts struct {
166169
// If unset, defaults to `DefaultGPUCapabilities`.
167170
Capabilities string
168171

172+
// If set, add the given directory to the ld cache.
173+
// Must be a directory visible from within the container.
174+
addLDPath string
175+
169176
// The fields below are set internally.
170177
runSniffer *os.File
171178
}
@@ -191,6 +198,9 @@ func (sgo *SniffGPUOpts) prepend(argv []string) []string {
191198
if !sgo.AllowIncompatibleIoctl {
192199
snifferArgv = append(snifferArgv, "--enforce_compatibility=INSTANT")
193200
}
201+
if sgo.addLDPath != "" {
202+
snifferArgv = append(snifferArgv, fmt.Sprintf("--add_ld_path=%s", sgo.addLDPath))
203+
}
194204
return append(snifferArgv, argv...)
195205
}
196206

tools/ioctl_sniffer/BUILD

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,10 @@ cc_binary(
3434

3535
go_binary(
3636
name = "run_sniffer",
37-
srcs = ["run_sniffer.go"],
37+
srcs = [
38+
"ld.go",
39+
"run_sniffer.go",
40+
],
3841
embedsrcs = [
3942
# The 'keep' comment is needed to prevent glaze from removing this
4043
# dependency. This is because the `:ioctl_hook` `cc_binary` rule

tools/ioctl_sniffer/ld.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// Copyright 2025 The gVisor Authors.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package main
16+
17+
import (
18+
"context"
19+
"fmt"
20+
"os"
21+
"os/exec"
22+
)
23+
24+
// addPathToLd adds the given path to the ld cache.
25+
func addPathToLd(ctx context.Context, path string) error {
26+
const myLdConfigPath = "/etc/ld.so.conf.d/gvisor.conf"
27+
if err := os.WriteFile(myLdConfigPath, []byte(fmt.Sprintf("# Generated by gVisor ioctl sniffer\n%s", path)), 0644); err != nil {
28+
return fmt.Errorf("failed to write to ld config file %q: %w", myLdConfigPath, err)
29+
}
30+
if err := os.Remove("/etc/ld.so.cache"); err != nil && !os.IsNotExist(err) {
31+
return fmt.Errorf("failed to remove ld cache file: %w", err)
32+
}
33+
output, err := exec.CommandContext(ctx, "ldconfig").CombinedOutput()
34+
if err != nil {
35+
return fmt.Errorf("failed to run ldconfig: %w; output: %s", err, string(output))
36+
}
37+
return nil
38+
}

tools/ioctl_sniffer/run_sniffer.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,11 @@ import (
2828
_ "embed" // Necessary to use go:embed.
2929
)
3030

31-
var enforceCompatibility = flag.String("enforce_compatibility", "", "May be set to 'INSTANT' or 'REPORT'. If set, the sniffer will return a non-zero error code if it detects an unsupported ioctl. 'INSTANT' causes the sniffer to exit immediately when this happens. 'REPORT' causes the sniffer to report all unsupported ioctls at the end of execution.")
32-
var verbose = flag.Bool("verbose", false, "If true, the sniffer will print all Nvidia ioctls it sees.")
31+
var (
32+
enforceCompatibility = flag.String("enforce_compatibility", "", "May be set to 'INSTANT' or 'REPORT'. If set, the sniffer will return a non-zero error code if it detects an unsupported ioctl. 'INSTANT' causes the sniffer to exit immediately when this happens. 'REPORT' causes the sniffer to report all unsupported ioctls at the end of execution.")
33+
verbose = flag.Bool("verbose", false, "If true, the sniffer will print all Nvidia ioctls it sees.")
34+
addLdPath = flag.String("add_ld_path", "", "If set, reconfigure the ld cache to include the given directory")
35+
)
3336

3437
//go:embed libioctl_hook.so
3538
var ioctlHookSharedObject []byte
@@ -68,6 +71,12 @@ func Main(ctx context.Context) error {
6871
log.SetLevel(log.Debug)
6972
}
7073

74+
if *addLdPath != "" {
75+
if err := addPathToLd(ctx, *addLdPath); err != nil {
76+
return fmt.Errorf("failed to add path %q to ld: %w", *addLdPath, err)
77+
}
78+
}
79+
7180
// Init our sniffer
7281
if err := sniffer.Init(); err != nil {
7382
return fmt.Errorf("failed to init sniffer: %w", err)

0 commit comments

Comments
 (0)