@@ -20,6 +20,7 @@ import (
20
20
"context"
21
21
"fmt"
22
22
"slices"
23
+ "strings"
23
24
"sync"
24
25
25
26
resourceapi "k8s.io/api/resource/v1beta1"
@@ -29,6 +30,8 @@ import (
29
30
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
30
31
cdiapi "tags.cncf.io/container-device-interface/pkg/cdi"
31
32
33
+ "golang.org/x/mod/semver"
34
+
32
35
configapi "github.com/NVIDIA/k8s-dra-driver/api/nvidia.com/resource/gpu/v1alpha1"
33
36
)
34
37
@@ -390,6 +393,21 @@ func (s *DeviceState) applySharingConfig(ctx context.Context, config configapi.S
390
393
allocatableDevices [r .Device ] = s .allocatable [r .Device ]
391
394
}
392
395
396
+ // allow devices only with cuda compute compatility >= 7.5 as time slicing and MPS does not work with old arch
397
+ shareableAllocatableDevices := make (AllocatableDevices )
398
+ for device , deviceType := range allocatableDevices {
399
+ if deviceType .Gpu != nil {
400
+ cudaCCv := "v" + strings .TrimPrefix (deviceType .Gpu .cudaComputeCapability , "v" )
401
+ gpuUUID := deviceType .Gpu .UUID
402
+ if semver .Compare (semver .Canonical (cudaCCv ), semver .Canonical ("v7.5" )) >= 0 {
403
+ klog .Infof ("GPU sharing is available on this device UUID=%v with CudaComputeCapability=%v" , gpuUUID , cudaCCv )
404
+ shareableAllocatableDevices [device ] = deviceType
405
+ } else {
406
+ return nil , fmt .Errorf ("GPU sharing is not available on this device UUID=%v" , gpuUUID )
407
+ }
408
+ }
409
+ }
410
+
393
411
// Declare a device group state object to populate.
394
412
var configState DeviceConfigState
395
413
@@ -400,7 +418,7 @@ func (s *DeviceState) applySharingConfig(ctx context.Context, config configapi.S
400
418
return nil , fmt .Errorf ("error getting timeslice config for requests '%v' in claim '%v': %w" , requests , claim .UID , err )
401
419
}
402
420
if tsc != nil {
403
- err = s .tsManager .SetTimeSlice (allocatableDevices , tsc )
421
+ err = s .tsManager .SetTimeSlice (shareableAllocatableDevices , tsc )
404
422
if err != nil {
405
423
return nil , fmt .Errorf ("error setting timeslice config for requests '%v' in claim '%v': %w" , requests , claim .UID , err )
406
424
}
@@ -413,7 +431,8 @@ func (s *DeviceState) applySharingConfig(ctx context.Context, config configapi.S
413
431
if err != nil {
414
432
return nil , fmt .Errorf ("error getting MPS configuration: %w" , err )
415
433
}
416
- mpsControlDaemon := s .mpsManager .NewMpsControlDaemon (string (claim .UID ), allocatableDevices )
434
+
435
+ mpsControlDaemon := s .mpsManager .NewMpsControlDaemon (string (claim .UID ), shareableAllocatableDevices )
417
436
if err := mpsControlDaemon .Start (ctx , mpsc ); err != nil {
418
437
return nil , fmt .Errorf ("error starting MPS control daemon: %w" , err )
419
438
}
0 commit comments