Skip to content

Commit 0e79c46

Browse files
cdesiniotiselezar
authored andcommitted
Add 'vfio' mode to pkg/nvcdi for generating CDI specs for NVIDIA passthrough GPUs
Signed-off-by: Christopher Desiniotis <[email protected]>
1 parent d1d1676 commit 0e79c46

File tree

3 files changed

+136
-0
lines changed

3 files changed

+136
-0
lines changed

pkg/nvcdi/lib-vfio.go

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/**
2+
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
**/
16+
17+
package nvcdi
18+
19+
import (
20+
"fmt"
21+
"path/filepath"
22+
"strconv"
23+
24+
"tags.cncf.io/container-device-interface/pkg/cdi"
25+
"tags.cncf.io/container-device-interface/specs-go"
26+
)
27+
28+
type vfiolib nvcdilib
29+
30+
type vfioDevice struct {
31+
index int
32+
group int
33+
devRoot string
34+
}
35+
36+
var _ deviceSpecGeneratorFactory = (*vfiolib)(nil)
37+
38+
func (l *vfiolib) DeviceSpecGenerators(ids ...string) (DeviceSpecGenerator, error) {
39+
vfioDevices, err := l.getVfioDevices(ids...)
40+
if err != nil {
41+
return nil, err
42+
}
43+
44+
var deviceSpecGenerators DeviceSpecGenerators
45+
for _, vfioDevice := range vfioDevices {
46+
deviceSpecGenerators = append(deviceSpecGenerators, vfioDevice)
47+
}
48+
49+
return deviceSpecGenerators, nil
50+
}
51+
52+
// GetDeviceSpecs returns the CDI device specs the specified vfio device.
53+
func (l *vfioDevice) GetDeviceSpecs() ([]specs.Device, error) {
54+
path := fmt.Sprintf("/dev/vfio/%d", l.group)
55+
deviceSpec := specs.Device{
56+
Name: fmt.Sprintf("%d", l.index),
57+
ContainerEdits: specs.ContainerEdits{
58+
DeviceNodes: []*specs.DeviceNode{
59+
{
60+
Path: path,
61+
HostPath: filepath.Join(l.devRoot, path),
62+
},
63+
},
64+
},
65+
}
66+
return []specs.Device{deviceSpec}, nil
67+
}
68+
69+
// GetCommonEdits returns common edits for ALL devices.
70+
// Note, currently there are no common edits.
71+
func (l *vfiolib) GetCommonEdits() (*cdi.ContainerEdits, error) {
72+
e := cdi.ContainerEdits{
73+
ContainerEdits: &specs.ContainerEdits{
74+
DeviceNodes: []*specs.DeviceNode{
75+
{
76+
Path: "/dev/vfio/vfio",
77+
HostPath: filepath.Join(l.devRoot, "/dev/vfio/vfio"),
78+
},
79+
},
80+
},
81+
}
82+
return &e, nil
83+
}
84+
85+
func (l *vfiolib) getVfioDevices(ids ...string) ([]*vfioDevice, error) {
86+
var vfioDevices []*vfioDevice
87+
for _, id := range ids {
88+
if id == "all" {
89+
return l.getAllVfioDevices()
90+
}
91+
index, err := strconv.ParseInt(id, 10, 32)
92+
if err != nil {
93+
return nil, fmt.Errorf("invalid channel ID %v: %w", id, err)
94+
}
95+
i := int(index)
96+
dev, err := l.nvpcilib.GetGPUByIndex(i)
97+
if err != nil {
98+
return nil, fmt.Errorf("failed to get device: %w", err)
99+
}
100+
vfioDevices = append(vfioDevices, &vfioDevice{index: i, group: dev.IommuGroup, devRoot: l.devRoot})
101+
}
102+
103+
return vfioDevices, nil
104+
}
105+
106+
func (l *vfiolib) getAllVfioDevices() ([]*vfioDevice, error) {
107+
devices, err := l.nvpcilib.GetGPUs()
108+
if err != nil {
109+
return nil, fmt.Errorf("failed getting NVIDIA GPUs: %v", err)
110+
}
111+
112+
var vfioDevices []*vfioDevice
113+
for i, dev := range devices {
114+
if dev.Driver != "vfio-pci" {
115+
continue
116+
}
117+
l.logger.Debugf("Found NVIDIA device: address=%s, driver=%s, iommu_group=%d, deviceId=%x",
118+
dev.Address, dev.Driver, dev.IommuGroup, dev.Device)
119+
vfioDevices = append(vfioDevices, &vfioDevice{index: i, group: dev.IommuGroup, devRoot: l.devRoot})
120+
}
121+
return vfioDevices, nil
122+
}

pkg/nvcdi/lib.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121

2222
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
2323
"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
24+
"github.com/NVIDIA/go-nvlib/pkg/nvpci"
2425
"github.com/NVIDIA/go-nvml/pkg/nvml"
2526

2627
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
@@ -54,6 +55,8 @@ type nvcdilib struct {
5455
driver *root.Driver
5556
infolib info.Interface
5657

58+
nvpcilib nvpci.Interface
59+
5760
mergedDeviceOptions []transform.MergedDeviceOption
5861

5962
featureFlags map[FeatureFlag]bool
@@ -151,6 +154,14 @@ func New(opts ...Option) (Interface, error) {
151154
l.class = classImexChannel
152155
}
153156
factory = (*imexlib)(l)
157+
case ModeVfio:
158+
if l.class == "" {
159+
l.class = "pgpu"
160+
}
161+
if l.nvpcilib == nil {
162+
l.nvpcilib = nvpci.New()
163+
}
164+
factory = (*vfiolib)(l)
154165
default:
155166
return nil, fmt.Errorf("unknown mode %q", l.mode)
156167
}

pkg/nvcdi/mode.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ const (
4242
ModeCSV = Mode("csv")
4343
// ModeImex configures the CDI spec generated to generate a spec for the available IMEX channels.
4444
ModeImex = Mode("imex")
45+
// ModeVfio configures the CDI spec generator to generate a VFIO spec.
46+
ModeVfio = Mode("vfio")
4547
)
4648

4749
type modeConstraint interface {
@@ -66,6 +68,7 @@ func getModes() modes {
6668
ModeGds,
6769
ModeMofed,
6870
ModeCSV,
71+
ModeVfio,
6972
}
7073
lookup := make(map[Mode]bool)
7174

0 commit comments

Comments
 (0)