Skip to content

Commit ec8bf87

Browse files
committed
gpu: add pci device id allow/deny support
By defining allowed/denied PCI IDs, it's possible to only select some GPUs per host. For example, on a desktop with integrated and discrete graphics, GPU plugin can only register the discrete one. Signed-off-by: Tuomas Katila <[email protected]>
1 parent c81472c commit ec8bf87

File tree

10 files changed

+304
-0
lines changed

10 files changed

+304
-0
lines changed

cmd/gpu_plugin/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ For workloads on different KMDs, see [KMD and UMD](#kmd-and-umd).
5757
| -health-management | - | disabled | Enable health management by requesting data from oneAPI/Level-Zero interface. Requires [GPU Level-Zero](../gpu_levelzero/) sidecar. See [health management](#health-management) |
5858
| -wsl | - | disabled | Adapt plugin to run in the WSL environment. Requires [GPU Level-Zero](../gpu_levelzero/) sidecar. |
5959
| -shared-dev-num | int | 1 | Number of containers that can share the same GPU device |
60+
| -allow-ids | string | "" | A list of PCI Device IDs that are allowed to be registered as resources. Default is empty (=all registered). Cannot be used together with `deny-ids`. |
61+
| -deny-ids | string | "" | A list of PCI Device IDs that are denied to be registered as resources. Default is empty (=all registered). Cannot be used together with `allow-ids`. |
6062
| -allocation-policy | string | none | 3 possible values: balanced, packed, none. For shared-dev-num > 1: _balanced_ mode spreads workloads among GPU devices, _packed_ mode fills one GPU fully before moving to next, and _none_ selects first available device from kubelet. Default is _none_. |
6163

6264
The plugin also accepts a number of other arguments (common to all plugins) related to logging.

cmd/gpu_plugin/gpu_plugin.go

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ const (
6969

7070
type cliOptions struct {
7171
preferredAllocationPolicy string
72+
allowIDs string
73+
denyIDs string
7274
sharedDevNum int
7375
temperatureLimit int
7476
enableMonitoring bool
@@ -204,6 +206,23 @@ func packedPolicy(req *pluginapi.ContainerPreferredAllocationRequest) []string {
204206
return deviceIds
205207
}
206208

209+
func validatePCIDeviceIDs(pciIDList string) error {
210+
r := regexp.MustCompile(`^0x[0-9a-f]{4}$`)
211+
212+
for id := range strings.SplitSeq(pciIDList, ",") {
213+
id = strings.TrimSpace(id)
214+
if id == "" {
215+
return os.ErrNotExist
216+
}
217+
218+
if !r.MatchString(id) {
219+
return os.ErrInvalid
220+
}
221+
}
222+
223+
return nil
224+
}
225+
207226
func (dp *devicePlugin) pciAddressForCard(cardPath, cardName string) (string, error) {
208227
linkPath, err := os.Readlink(cardPath)
209228
if err != nil {
@@ -585,6 +604,31 @@ func (dp *devicePlugin) filterOutInvalidCards(files []fs.DirEntry) []fs.DirEntry
585604
continue
586605
}
587606

607+
allowlist := len(dp.options.allowIDs) > 0
608+
denylist := len(dp.options.denyIDs) > 0
609+
610+
// Skip if the device is either not allowed or denied.
611+
if allowlist || denylist {
612+
pciID, err := pciDeviceIDForCard(path.Join(dp.sysfsDir, f.Name()))
613+
if err != nil {
614+
klog.Warningf("Failed to get PCI ID for device %s: %+v", f.Name(), err)
615+
616+
continue
617+
}
618+
619+
if allowlist && !strings.Contains(dp.options.allowIDs, pciID) {
620+
klog.V(4).Infof("Skipping device %s (%s), not in allowlist: %s", f.Name(), pciID, dp.options.allowIDs)
621+
622+
continue
623+
}
624+
625+
if denylist && strings.Contains(dp.options.denyIDs, pciID) {
626+
klog.V(4).Infof("Skipping device %s (%s), in denylist: %s", f.Name(), pciID, dp.options.denyIDs)
627+
628+
continue
629+
}
630+
}
631+
588632
filtered = append(filtered, f)
589633
}
590634

@@ -710,6 +754,25 @@ func (dp *devicePlugin) Allocate(request *pluginapi.AllocateRequest) (*pluginapi
710754
return nil, &dpapi.UseDefaultMethodError{}
711755
}
712756

757+
func checkAllowDenyOptions(opts cliOptions) bool {
758+
if len(opts.allowIDs) > 0 && len(opts.denyIDs) > 0 {
759+
klog.Error("Cannot use both allow-ids and deny-ids options at the same time. Please use only one of them.")
760+
return false
761+
}
762+
763+
if err := validatePCIDeviceIDs(opts.allowIDs); err != nil {
764+
klog.Error("Failed to validate allow-ids: ", err)
765+
return false
766+
}
767+
768+
if err := validatePCIDeviceIDs(opts.denyIDs); err != nil {
769+
klog.Error("Failed to validate deny-ids: ", err)
770+
return false
771+
}
772+
773+
return true
774+
}
775+
713776
func main() {
714777
var (
715778
prefix string
@@ -723,6 +786,9 @@ func main() {
723786
flag.IntVar(&opts.sharedDevNum, "shared-dev-num", 1, "number of containers sharing the same GPU device")
724787
flag.IntVar(&opts.temperatureLimit, "temp-limit", 100, "temperature limit at which device is marked unhealthy")
725788
flag.StringVar(&opts.preferredAllocationPolicy, "allocation-policy", "none", "modes of allocating GPU devices: balanced, packed and none")
789+
flag.StringVar(&opts.allowIDs, "allow-ids", "", "comma-separated list of device IDs to allow (e.g. 0x49c5,0x49c6)")
790+
flag.StringVar(&opts.denyIDs, "deny-ids", "", "comma-separated list of device IDs to deny (e.g. 0x49c5,0x49c6)")
791+
726792
flag.Parse()
727793

728794
if opts.sharedDevNum < 1 {
@@ -736,6 +802,12 @@ func main() {
736802
os.Exit(1)
737803
}
738804

805+
if !checkAllowDenyOptions(opts) {
806+
klog.Error("Invalid allow/deny options.")
807+
808+
os.Exit(1)
809+
}
810+
739811
klog.V(1).Infof("GPU device plugin started with %s preferred allocation policy", opts.preferredAllocationPolicy)
740812

741813
plugin := newDevicePlugin(prefix+sysfsDrmDirectory, prefix+devfsDriDirectory, opts)

cmd/gpu_plugin/gpu_plugin_test.go

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,93 @@ func TestScan(t *testing.T) {
361361
expectedI915Devs: 1,
362362
expectedI915Monitors: 1,
363363
},
364+
{
365+
name: "two devices with only one allowed",
366+
sysfsdirs: []string{"card0/device/drm/card0", "card0/device/drm/controlD64", "card1/device/drm/card1"},
367+
sysfsfiles: map[string][]byte{
368+
"card0/device/vendor": []byte("0x8086"),
369+
"card0/device/device": []byte("0x1234"),
370+
"card1/device/vendor": []byte("0x8086"),
371+
"card1/device/device": []byte("0x9876"),
372+
},
373+
symlinkfiles: map[string]string{
374+
"card0/device/driver": "drivers/xe",
375+
"card1/device/driver": "drivers/i915",
376+
},
377+
devfsdirs: []string{
378+
"card0",
379+
"by-path/pci-0000:00:00.0-card",
380+
"by-path/pci-0000:00:00.0-render",
381+
"card1",
382+
"by-path/pci-0000:00:01.0-card",
383+
"by-path/pci-0000:00:01.0-render",
384+
},
385+
options: cliOptions{enableMonitoring: true, allowIDs: "0x1234"},
386+
expectedXeDevs: 1,
387+
expectedXeMonitors: 1,
388+
expectedI915Devs: 0,
389+
expectedI915Monitors: 0,
390+
},
391+
{
392+
name: "three devices with two allowed",
393+
sysfsdirs: []string{"card0/device/drm/card0", "card0/device/drm/controlD64", "card1/device/drm/card1", "card2/device/drm/card2"},
394+
sysfsfiles: map[string][]byte{
395+
"card0/device/vendor": []byte("0x8086"),
396+
"card0/device/device": []byte("0x1234"),
397+
"card1/device/vendor": []byte("0x8086"),
398+
"card1/device/device": []byte("0x9876"),
399+
"card2/device/vendor": []byte("0x8086"),
400+
"card2/device/device": []byte("0x0101"),
401+
},
402+
symlinkfiles: map[string]string{
403+
"card0/device/driver": "drivers/xe",
404+
"card1/device/driver": "drivers/i915",
405+
"card2/device/driver": "drivers/i915",
406+
},
407+
devfsdirs: []string{
408+
"card0",
409+
"by-path/pci-0000:00:00.0-card",
410+
"by-path/pci-0000:00:00.0-render",
411+
"card1",
412+
"by-path/pci-0000:00:01.0-card",
413+
"by-path/pci-0000:00:01.0-render",
414+
"card2",
415+
"by-path/pci-0000:00:02.0-card",
416+
"by-path/pci-0000:00:02.0-render",
417+
},
418+
options: cliOptions{enableMonitoring: true, allowIDs: "0x1234,0x9876"},
419+
expectedXeDevs: 1,
420+
expectedXeMonitors: 1,
421+
expectedI915Devs: 1,
422+
expectedI915Monitors: 1,
423+
},
424+
{
425+
name: "two devices with one denied",
426+
sysfsdirs: []string{"card0/device/drm/card0", "card0/device/drm/controlD64", "card1/device/drm/card1"},
427+
sysfsfiles: map[string][]byte{
428+
"card0/device/vendor": []byte("0x8086"),
429+
"card0/device/device": []byte("0x1234"),
430+
"card1/device/vendor": []byte("0x8086"),
431+
"card1/device/device": []byte("0x9876"),
432+
},
433+
symlinkfiles: map[string]string{
434+
"card0/device/driver": "drivers/xe",
435+
"card1/device/driver": "drivers/i915",
436+
},
437+
devfsdirs: []string{
438+
"card0",
439+
"by-path/pci-0000:00:00.0-card",
440+
"by-path/pci-0000:00:00.0-render",
441+
"card1",
442+
"by-path/pci-0000:00:01.0-card",
443+
"by-path/pci-0000:00:01.0-render",
444+
},
445+
options: cliOptions{enableMonitoring: true, denyIDs: "0x1234"},
446+
expectedXeDevs: 0,
447+
expectedXeMonitors: 0,
448+
expectedI915Devs: 1,
449+
expectedI915Monitors: 1,
450+
},
364451
{
365452
name: "sriov-1-pf-no-vfs + monitoring",
366453
sysfsdirs: []string{"card0/device/drm/card0", "card0/device/drm/controlD64"},
@@ -1048,3 +1135,61 @@ func TestCDIDeviceInclusion(t *testing.T) {
10481135
t.Error("Invalid count for device (xe)")
10491136
}
10501137
}
1138+
1139+
func TestParsePCIDeviceIDs(t *testing.T) {
1140+
tests := []struct {
1141+
name string
1142+
input string
1143+
wantError bool
1144+
}{
1145+
{
1146+
name: "valid single ID",
1147+
input: "0x1234",
1148+
wantError: false,
1149+
},
1150+
{
1151+
name: "valid multiple IDs",
1152+
input: "0x1234,0x5678,0x9abc",
1153+
wantError: false,
1154+
},
1155+
{
1156+
name: "valid IDs with spaces",
1157+
input: " 0x1234 , 0x5678 ",
1158+
wantError: false,
1159+
},
1160+
{
1161+
name: "empty string",
1162+
input: "",
1163+
wantError: true,
1164+
},
1165+
{
1166+
name: "invalid ID format",
1167+
input: "0x1234,abcd",
1168+
wantError: true,
1169+
},
1170+
{
1171+
name: "invalid hex length",
1172+
input: "0x123,0x5678",
1173+
wantError: true,
1174+
},
1175+
{
1176+
name: "extra comma",
1177+
input: "0x1234,",
1178+
wantError: true,
1179+
},
1180+
{
1181+
name: "capita hex",
1182+
input: "0xAA12,",
1183+
wantError: true,
1184+
},
1185+
}
1186+
1187+
for _, tt := range tests {
1188+
t.Run(tt.name, func(t *testing.T) {
1189+
err := validatePCIDeviceIDs(tt.input)
1190+
if (err != nil) != tt.wantError {
1191+
t.Errorf("parsePCIDeviceIDs() error = %v, wantError %v", err, tt.wantError)
1192+
}
1193+
})
1194+
}
1195+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
apiVersion: apps/v1
2+
kind: DaemonSet
3+
metadata:
4+
name: intel-gpu-plugin
5+
spec:
6+
template:
7+
spec:
8+
containers:
9+
- name: intel-gpu-plugin
10+
args:
11+
- "-v=4"
12+
- "-allow-ids=0x56a6,0x56a5,0x56a1,0x56a0,0x5694,0x5693,0x5692,0x5691,0x5690,0x56b3,0x56b2,0x56a4,0x56a3,0x5697,0x5696,0x5695,0x56b1,0x56b0,0x56a2,0x56ba,0x56bc,0x56bd,0x56bb"
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
resources:
2+
- ../../base
3+
patches:
4+
- path: add-args.yaml

deployments/operator/crd/bases/deviceplugin.intel.com_gpudeviceplugins.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,20 @@ spec:
5555
spec:
5656
description: GpuDevicePluginSpec defines the desired state of GpuDevicePlugin.
5757
properties:
58+
allowIDs:
59+
description: |-
60+
AllowIDs is a comma-separated list of PCI IDs of GPU devices that should only be advertised by the plugin.
61+
If not set, all devices are advertised.
62+
The list can contain IDs in the form of '0x1234,0x49a4,0x50b4'.
63+
Cannot be used together with DenyIDs.
64+
type: string
65+
denyIDs:
66+
description: |-
67+
DenyIDs is a comma-separated list of PCI IDs of GPU devices that should only be denied by the plugin.
68+
If not set, all devices are advertised.
69+
The list can contain IDs in the form of '0x1234,0x49a4,0x50b4'.
70+
Cannot be used together with AllowIDs.
71+
type: string
5872
enableMonitoring:
5973
description: |-
6074
EnableMonitoring enables the monitoring resource ('i915_monitoring')

deployments/operator/samples/deviceplugin_v1_gpudeviceplugin.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,6 @@ spec:
77
sharedDevNum: 10
88
logLevel: 4
99
enableMonitoring: true
10+
denyIDs: "0x9a4a"
1011
nodeSelector:
1112
intel.feature.node.kubernetes.io/gpu: "true"

pkg/apis/deviceplugin/v1/gpudeviceplugin_types.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,18 @@ type GpuDevicePluginSpec struct {
3434
// InitImage is a container image with tools (e.g., GPU NFD source hook) installed on each node.
3535
InitImage string `json:"initImage,omitempty"`
3636

37+
// AllowIDs is a comma-separated list of PCI IDs of GPU devices that should only be advertised by the plugin.
38+
// If not set, all devices are advertised.
39+
// The list can contain IDs in the form of '0x1234,0x49a4,0x50b4'.
40+
// Cannot be used together with DenyIDs.
41+
AllowIDs string `json:"allowIDs,omitempty"`
42+
43+
// DenyIDs is a comma-separated list of PCI IDs of GPU devices that should only be denied by the plugin.
44+
// If not set, all devices are advertised.
45+
// The list can contain IDs in the form of '0x1234,0x49a4,0x50b4'.
46+
// Cannot be used together with AllowIDs.
47+
DenyIDs string `json:"denyIDs,omitempty"`
48+
3749
// PreferredAllocationPolicy sets the mode of allocating GPU devices on a node.
3850
// See documentation for detailed description of the policies. Only valid when SharedDevNum > 1 is set.
3951
// +kubebuilder:validation:Enum=balanced;packed;none

pkg/apis/deviceplugin/v1/gpudeviceplugin_webhook.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,20 @@ package v1
1616

1717
import (
1818
"fmt"
19+
"regexp"
20+
"strings"
1921

2022
ctrl "sigs.k8s.io/controller-runtime"
2123

2224
"github.com/intel/intel-device-plugins-for-kubernetes/pkg/controllers"
2325
)
2426

27+
var pciIDRegex regexp.Regexp
28+
2529
// SetupWebhookWithManager sets up a webhook for GpuDevicePlugin custom resources.
2630
func (r *GpuDevicePlugin) SetupWebhookWithManager(mgr ctrl.Manager) error {
31+
pciIDRegex = *regexp.MustCompile(`^0x[0-9a-f]{4}$`)
32+
2733
return ctrl.NewWebhookManagedBy(mgr).
2834
For(r).
2935
WithDefaulter(&commonDevicePluginDefaulter{
@@ -44,5 +50,33 @@ func (r *GpuDevicePlugin) validatePlugin(ref *commonDevicePluginValidator) error
4450
return fmt.Errorf("%w: PreferredAllocationPolicy is valid only when setting sharedDevNum > 1", errValidation)
4551
}
4652

53+
if r.Spec.AllowIDs != "" {
54+
for id := range strings.SplitSeq(r.Spec.AllowIDs, ",") {
55+
if id == "" {
56+
return fmt.Errorf("%w: Empty PCI Device ID in AllowIDs", errValidation)
57+
}
58+
59+
if !pciIDRegex.MatchString(id) {
60+
return fmt.Errorf("%w: Invalid PCI Device ID: %s", errValidation, id)
61+
}
62+
}
63+
}
64+
65+
if r.Spec.DenyIDs != "" {
66+
for id := range strings.SplitSeq(r.Spec.DenyIDs, ",") {
67+
if id == "" {
68+
return fmt.Errorf("%w: Empty PCI Device ID in DenyIDs", errValidation)
69+
}
70+
71+
if !pciIDRegex.MatchString(id) {
72+
return fmt.Errorf("%w: Invalid PCI Device ID: %s", errValidation, id)
73+
}
74+
}
75+
}
76+
77+
if len(r.Spec.AllowIDs) > 0 && len(r.Spec.DenyIDs) > 0 {
78+
return fmt.Errorf("%w: AllowIDs and DenyIDs cannot be used together", errValidation)
79+
}
80+
4781
return validatePluginImage(r.Spec.Image, ref.expectedImage, &ref.expectedVersion)
4882
}

0 commit comments

Comments
 (0)