Skip to content

Commit 6722a16

Browse files
committed
skip old architecture version GPU settings time slice
Signed-off-by: wawa0210 <[email protected]>
1 parent b6c7aae commit 6722a16

File tree

1 file changed

+21
-1
lines changed

1 file changed

+21
-1
lines changed

cmd/nvidia-dra-plugin/sharing.go

+21-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"text/template"
2727
"time"
2828

29+
"golang.org/x/mod/semver"
2930
appsv1 "k8s.io/api/apps/v1"
3031
"k8s.io/apimachinery/pkg/api/errors"
3132
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -101,6 +102,15 @@ func (t *TimeSlicingManager) SetTimeSlice(devices *PreparedDevices, config *nasc
101102
return fmt.Errorf("setting a TimeSlice duration on MIG devices is unsupported")
102103
}
103104

105+
var supportTimeSliceIDs []string
106+
for _, gpu := range devices.Gpu.Devices {
107+
isSupportTimeSlice := detectSupportTimeSliceByCudaComputeCapability(gpu.cudaComputeCapability)
108+
if !isSupportTimeSlice {
109+
//todo: we need a more desirable way to handle it, instead of directly err, causing plugin crash
110+
return fmt.Errorf("setting a TimeSlice duration on uuid:%v,cudaComputeCapability:%v is unsupported", gpu.uuid, gpu.cudaComputeCapability)
111+
}
112+
}
113+
104114
timeSlice := nascrd.DefaultTimeSlice
105115
if config != nil && config.TimeSlice != nil {
106116
timeSlice = *config.TimeSlice
@@ -111,7 +121,7 @@ func (t *TimeSlicingManager) SetTimeSlice(devices *PreparedDevices, config *nasc
111121
return fmt.Errorf("error setting compute mode: %w", err)
112122
}
113123

114-
err = t.nvdevlib.setTimeSlice(devices.UUIDs(), timeSlice.Int())
124+
err = t.nvdevlib.setTimeSlice(supportTimeSliceIDs, timeSlice.Int())
115125
if err != nil {
116126
return fmt.Errorf("error setting time slice: %w", err)
117127
}
@@ -389,3 +399,13 @@ func (m *MpsControlDaemon) Stop(ctx context.Context) error {
389399

390400
return nil
391401
}
402+
403+
// detactSupportTimeSliceByArch Determine whether the architecture series
404+
// supports setting time slices based on the gpu cudaComputeCapability.
405+
func detectSupportTimeSliceByCudaComputeCapability(cudaComputeCapability string) bool {
406+
// ref https://github.com/NVIDIA/k8s-dra-driver/pull/58#discussion_r1469338562
407+
// we believe time-slicing is available on Volta+ architectures, so the check would simply be cudaComputeCapability >= 7.0
408+
// by https://github.com/NVIDIA/go-nvlib/blob/main/pkg/nvlib/device/device.go#L149, We know that cuda major and minor versions are concatenated through `.` .
409+
410+
return semver.Compare("v"+strings.TrimPrefix(cudaComputeCapability, "v"), "v7.0") >= 0
411+
}

0 commit comments

Comments
 (0)