@@ -26,6 +26,7 @@ import (
26
26
"text/template"
27
27
"time"
28
28
29
+ "golang.org/x/mod/semver"
29
30
appsv1 "k8s.io/api/apps/v1"
30
31
"k8s.io/apimachinery/pkg/api/errors"
31
32
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -101,6 +102,15 @@ func (t *TimeSlicingManager) SetTimeSlice(devices *PreparedDevices, config *nasc
101
102
return fmt .Errorf ("setting a TimeSlice duration on MIG devices is unsupported" )
102
103
}
103
104
105
+ var supportTimeSliceIDs []string
106
+ for _ , gpu := range devices .Gpu .Devices {
107
+ isSupportTimeSlice := detectSupportTimeSliceByCudaComputeCapability (gpu .cudaComputeCapability )
108
+ if ! isSupportTimeSlice {
109
+ //todo: we need a more desirable way to handle it, instead of directly err, causing plugin crash
110
+ return fmt .Errorf ("setting a TimeSlice duration on uuid:%v,cudaComputeCapability:%v is unsupported" , gpu .uuid , gpu .cudaComputeCapability )
111
+ }
112
+ }
113
+
104
114
timeSlice := nascrd .DefaultTimeSlice
105
115
if config != nil && config .TimeSlice != nil {
106
116
timeSlice = * config .TimeSlice
@@ -111,7 +121,7 @@ func (t *TimeSlicingManager) SetTimeSlice(devices *PreparedDevices, config *nasc
111
121
return fmt .Errorf ("error setting compute mode: %w" , err )
112
122
}
113
123
114
- err = t .nvdevlib .setTimeSlice (devices . UUIDs () , timeSlice .Int ())
124
+ err = t .nvdevlib .setTimeSlice (supportTimeSliceIDs , timeSlice .Int ())
115
125
if err != nil {
116
126
return fmt .Errorf ("error setting time slice: %w" , err )
117
127
}
@@ -389,3 +399,13 @@ func (m *MpsControlDaemon) Stop(ctx context.Context) error {
389
399
390
400
return nil
391
401
}
402
+
403
+ // detactSupportTimeSliceByArch Determine whether the architecture series
404
+ // supports setting time slices based on the gpu cudaComputeCapability.
405
+ func detectSupportTimeSliceByCudaComputeCapability (cudaComputeCapability string ) bool {
406
+ // ref https://github.com/NVIDIA/k8s-dra-driver/pull/58#discussion_r1469338562
407
+ // we believe time-slicing is available on Volta+ architectures, so the check would simply be cudaComputeCapability >= 7.0
408
+ // by https://github.com/NVIDIA/go-nvlib/blob/main/pkg/nvlib/device/device.go#L149, We know that cuda major and minor versions are concatenated through `.` .
409
+
410
+ return semver .Compare ("v" + strings .TrimPrefix (cudaComputeCapability , "v" ), "v7.0" ) >= 0
411
+ }
0 commit comments