-
Notifications
You must be signed in to change notification settings - Fork 376
Add vfio mode to generate CDI specs for NVIDIA passthrough GPUs #315
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,122 @@ | ||||||
/** | ||||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||||||
# SPDX-License-Identifier: Apache-2.0 | ||||||
# | ||||||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
# you may not use this file except in compliance with the License. | ||||||
# You may obtain a copy of the License at | ||||||
# | ||||||
# http://www.apache.org/licenses/LICENSE-2.0 | ||||||
# | ||||||
# Unless required by applicable law or agreed to in writing, software | ||||||
# distributed under the License is distributed on an "AS IS" BASIS, | ||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
# See the License for the specific language governing permissions and | ||||||
# limitations under the License. | ||||||
**/ | ||||||
|
||||||
package nvcdi | ||||||
|
||||||
import ( | ||||||
"fmt" | ||||||
"path/filepath" | ||||||
"strconv" | ||||||
|
||||||
"tags.cncf.io/container-device-interface/pkg/cdi" | ||||||
"tags.cncf.io/container-device-interface/specs-go" | ||||||
) | ||||||
|
||||||
type vfiolib nvcdilib | ||||||
|
||||||
type vfioDevice struct { | ||||||
index int | ||||||
group int | ||||||
devRoot string | ||||||
} | ||||||
|
||||||
var _ deviceSpecGeneratorFactory = (*vfiolib)(nil) | ||||||
|
||||||
func (l *vfiolib) DeviceSpecGenerators(ids ...string) (DeviceSpecGenerator, error) { | ||||||
vfioDevices, err := l.getVfioDevices(ids...) | ||||||
if err != nil { | ||||||
return nil, err | ||||||
} | ||||||
var deviceSpecGenerators DeviceSpecGenerators | ||||||
for _, vfioDevice := range vfioDevices { | ||||||
deviceSpecGenerators = append(deviceSpecGenerators, vfioDevice) | ||||||
} | ||||||
|
||||||
return deviceSpecGenerators, nil | ||||||
} | ||||||
|
||||||
// GetDeviceSpecs returns the CDI device specs for a vfio device. | ||||||
func (l *vfioDevice) GetDeviceSpecs() ([]specs.Device, error) { | ||||||
path := fmt.Sprintf("/dev/vfio/%d", l.group) | ||||||
deviceSpec := specs.Device{ | ||||||
Name: fmt.Sprintf("%d", l.index), | ||||||
ContainerEdits: specs.ContainerEdits{ | ||||||
DeviceNodes: []*specs.DeviceNode{ | ||||||
{ | ||||||
Path: path, | ||||||
HostPath: filepath.Join(l.devRoot, path), | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When
Suggested change
Copilot uses AI. Check for mistakes. Positive FeedbackNegative Feedback |
||||||
}, | ||||||
}, | ||||||
}, | ||||||
} | ||||||
return []specs.Device{deviceSpec}, nil | ||||||
} | ||||||
|
||||||
// GetCommonEdits returns common edits for ALL devices. | ||||||
// Note, currently there are no common edits. | ||||||
func (l *vfiolib) GetCommonEdits() (*cdi.ContainerEdits, error) { | ||||||
e := cdi.ContainerEdits{ | ||||||
ContainerEdits: &specs.ContainerEdits{ | ||||||
DeviceNodes: []*specs.DeviceNode{ | ||||||
{ | ||||||
Path: "/dev/vfio/vfio", | ||||||
HostPath: filepath.Join(l.devRoot, "/dev/vfio/vfio"), | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same issue here: joining an absolute path to
Suggested change
Copilot uses AI. Check for mistakes. Positive FeedbackNegative Feedback |
||||||
}, | ||||||
}, | ||||||
}, | ||||||
} | ||||||
return &e, nil | ||||||
} | ||||||
|
||||||
func (l *vfiolib) getVfioDevices(ids ...string) ([]*vfioDevice, error) { | ||||||
var vfioDevices []*vfioDevice | ||||||
for _, id := range ids { | ||||||
if id == "all" { | ||||||
return l.getAllVfioDevices() | ||||||
} | ||||||
index, err := strconv.ParseInt(id, 10, 32) | ||||||
if err != nil { | ||||||
return nil, fmt.Errorf("invalid channel ID %v: %w", id, err) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use
Suggested change
Copilot uses AI. Check for mistakes. Positive FeedbackNegative Feedback |
||||||
} | ||||||
i := int(index) | ||||||
dev, err := l.nvpcilib.GetGPUByIndex(i) | ||||||
if err != nil { | ||||||
return nil, fmt.Errorf("failed to get device: %w", err) | ||||||
} | ||||||
vfioDevices = append(vfioDevices, &vfioDevice{index: i, group: dev.IommuGroup, devRoot: l.devRoot}) | ||||||
} | ||||||
|
||||||
return vfioDevices, nil | ||||||
} | ||||||
|
||||||
func (l *vfiolib) getAllVfioDevices() ([]*vfioDevice, error) { | ||||||
devices, err := l.nvpcilib.GetGPUs() | ||||||
if err != nil { | ||||||
return nil, fmt.Errorf("failed getting NVIDIA GPUs: %v", err) | ||||||
} | ||||||
|
||||||
var vfioDevices []*vfioDevice | ||||||
for i, dev := range devices { | ||||||
if dev.Driver != "vfio-pci" { | ||||||
continue | ||||||
} | ||||||
l.logger.Debugf("Found NVIDIA device: address=%s, driver=%s, iommu_group=%d, deviceId=%x", | ||||||
dev.Address, dev.Driver, dev.IommuGroup, dev.Device) | ||||||
vfioDevices = append(vfioDevices, &vfioDevice{index: i, group: dev.IommuGroup, devRoot: l.devRoot}) | ||||||
} | ||||||
return vfioDevices, nil | ||||||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
/** | ||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
**/ | ||
|
||
package nvcdi | ||
|
||
import ( | ||
"bytes" | ||
"testing" | ||
|
||
"github.com/NVIDIA/go-nvlib/pkg/nvpci" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func TestModeVfio(t *testing.T) { | ||
testCases := []struct { | ||
description string | ||
pcilib *nvpci.InterfaceMock | ||
ids []string | ||
expectedError error | ||
expectedSpec string | ||
}{ | ||
{ | ||
description: "get all specs single device", | ||
pcilib: &nvpci.InterfaceMock{ | ||
GetGPUsFunc: func() ([]*nvpci.NvidiaPCIDevice, error) { | ||
devices := []*nvpci.NvidiaPCIDevice{ | ||
{ | ||
Driver: "vfio-pci", | ||
IommuGroup: 5, | ||
}, | ||
} | ||
return devices, nil | ||
}, | ||
}, | ||
expectedSpec: `--- | ||
cdiVersion: 0.5.0 | ||
kind: nvidia.com/pgpu | ||
devices: | ||
- name: "0" | ||
containerEdits: | ||
deviceNodes: | ||
- path: /dev/vfio/5 | ||
hostPath: /dev/vfio/5 | ||
containerEdits: | ||
env: | ||
- NVIDIA_VISIBLE_DEVICES=void | ||
deviceNodes: | ||
- path: /dev/vfio/vfio | ||
hostPath: /dev/vfio/vfio | ||
`, | ||
}, | ||
{ | ||
description: "get single device spec by index", | ||
pcilib: &nvpci.InterfaceMock{ | ||
GetGPUByIndexFunc: func(n int) (*nvpci.NvidiaPCIDevice, error) { | ||
devices := []*nvpci.NvidiaPCIDevice{ | ||
{ | ||
Driver: "vfio-pci", | ||
IommuGroup: 45, | ||
}, | ||
{ | ||
Driver: "vfio-pci", | ||
IommuGroup: 5, | ||
}, | ||
} | ||
return devices[n], nil | ||
}, | ||
}, | ||
ids: []string{"1"}, | ||
expectedSpec: `--- | ||
cdiVersion: 0.5.0 | ||
kind: nvidia.com/pgpu | ||
devices: | ||
- name: "1" | ||
containerEdits: | ||
deviceNodes: | ||
- path: /dev/vfio/5 | ||
hostPath: /dev/vfio/5 | ||
containerEdits: | ||
env: | ||
- NVIDIA_VISIBLE_DEVICES=void | ||
deviceNodes: | ||
- path: /dev/vfio/vfio | ||
hostPath: /dev/vfio/vfio | ||
`, | ||
}, | ||
} | ||
|
||
for _, tc := range testCases { | ||
t.Run(tc.description, func(t *testing.T) { | ||
lib, err := New( | ||
WithMode(ModeVfio), | ||
WithPCILib(tc.pcilib), | ||
) | ||
require.NoError(t, err) | ||
|
||
spec, err := lib.GetSpec(tc.ids...) | ||
require.EqualValues(t, tc.expectedError, err) | ||
|
||
var output bytes.Buffer | ||
|
||
_, err = spec.WriteTo(&output) | ||
require.NoError(t, err) | ||
|
||
require.Equal(t, tc.expectedSpec, output.String()) | ||
}) | ||
} | ||
|
||
} |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -19,6 +19,7 @@ package nvcdi | |||||
import ( | ||||||
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device" | ||||||
"github.com/NVIDIA/go-nvlib/pkg/nvlib/info" | ||||||
"github.com/NVIDIA/go-nvlib/pkg/nvpci" | ||||||
"github.com/NVIDIA/go-nvml/pkg/nvml" | ||||||
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover" | ||||||
|
@@ -43,6 +44,13 @@ func WithInfoLib(infolib info.Interface) Option { | |||||
} | ||||||
} | ||||||
|
||||||
// WithPCILib sets the pci library to be used for CDI spec generation. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment should use the uppercase acronym PCI for consistency with the function name (
Suggested change
Copilot uses AI. Check for mistakes. Positive FeedbackNegative Feedback |
||||||
func WithPCILib(pcilib nvpci.Interface) Option { | ||||||
return func(l *nvcdilib) { | ||||||
l.nvpcilib = pcilib | ||||||
} | ||||||
} | ||||||
|
||||||
// WithDeviceNamers sets the device namer for the library | ||||||
func WithDeviceNamers(namers ...DeviceNamer) Option { | ||||||
return func(l *nvcdilib) { | ||||||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Uh oh!
There was an error while loading. Please reload this page.