@@ -22,6 +22,7 @@ import (
22
22
"fmt"
23
23
"os"
24
24
"os/exec"
25
+ "strconv"
25
26
"strings"
26
27
"text/template"
27
28
"time"
@@ -101,6 +102,19 @@ func (t *TimeSlicingManager) SetTimeSlice(devices *PreparedDevices, config *nasc
101
102
return fmt .Errorf ("setting a TimeSlice duration on MIG devices is unsupported" )
102
103
}
103
104
105
+ var supportTimeSliceIDs []string
106
+ for _ , gpu := range devices .Gpu .Devices {
107
+ err , isSupportTimeSlice := detectSupportTimeSliceByCudaComputeCapability (gpu .cudaComputeCapability )
108
+ if err != nil {
109
+ return fmt .Errorf ("failed to detectSupportTimeSliceByCudaComputeCapability : %w" , err )
110
+ }
111
+ if isSupportTimeSlice {
112
+ supportTimeSliceIDs = append (supportTimeSliceIDs , gpu .uuid )
113
+ continue
114
+ }
115
+ klog .InfoS ("the current card does not support setting time slices and will be ignored." , "arch" , gpu .architecture , "uuid" , gpu .uuid , "cudaComputeCapability" , gpu .cudaComputeCapability )
116
+ }
117
+
104
118
timeSlice := nascrd .DefaultTimeSlice
105
119
if config != nil && config .TimeSlice != nil {
106
120
timeSlice = * config .TimeSlice
@@ -111,7 +125,7 @@ func (t *TimeSlicingManager) SetTimeSlice(devices *PreparedDevices, config *nasc
111
125
return fmt .Errorf ("error setting compute mode: %w" , err )
112
126
}
113
127
114
- err = t .nvdevlib .setTimeSlice (devices . UUIDs () , timeSlice .Int ())
128
+ err = t .nvdevlib .setTimeSlice (supportTimeSliceIDs , timeSlice .Int ())
115
129
if err != nil {
116
130
return fmt .Errorf ("error setting time slice: %w" , err )
117
131
}
@@ -389,3 +403,36 @@ func (m *MpsControlDaemon) Stop(ctx context.Context) error {
389
403
390
404
return nil
391
405
}
406
+
407
+ // detactSupportTimeSliceByArch Determine whether the architecture series
408
+ // supports setting time slices based on the gpu cudaComputeCapability.
409
+ func detectSupportTimeSliceByCudaComputeCapability (cudaComputeCapability string ) (error , bool ) {
410
+ // ref https://github.com/NVIDIA/k8s-dra-driver/pull/58#discussion_r1469338562
411
+ // we believe time-slicing is available on Volta+ architectures, so the check would simply be cudaComputeCapability >= 7.0
412
+ // by https://github.com/NVIDIA/go-nvlib/blob/main/pkg/nvlib/device/device.go#L149, We know that cuda major and minor versions are concatenated through `.` .
413
+
414
+ cudaVersion := strings .Split (cudaComputeCapability , "." )
415
+ major , err := strconv .Atoi (cudaVersion [0 ])
416
+ if err != nil {
417
+ return fmt .Errorf ("error to get cudaComputeCapability major version %v" , cudaComputeCapability ), false
418
+ }
419
+ if major >= 7 {
420
+ return nil , true
421
+ }
422
+ return nil , false
423
+ }
424
+
425
+ // difference returns the elements in `a` that aren't in `b`.
426
+ func difference (a , b []string ) []string {
427
+ mb := make (map [string ]struct {}, len (b ))
428
+ for _ , x := range b {
429
+ mb [x ] = struct {}{}
430
+ }
431
+ var diff []string
432
+ for _ , x := range a {
433
+ if _ , found := mb [x ]; ! found {
434
+ diff = append (diff , x )
435
+ }
436
+ }
437
+ return diff
438
+ }
0 commit comments