Skip to content

Commit 0dc4244

Browse files
committed
skip old architecture version GPU settings time slice
Signed-off-by: wawa0210 <[email protected]>
1 parent 58ae162 commit 0dc4244

File tree

1 file changed

+50
-2
lines changed

1 file changed

+50
-2
lines changed

cmd/nvidia-dra-plugin/sharing.go

+50-2
Original file line numberDiff line numberDiff line change
@@ -101,17 +101,32 @@ func (t *TimeSlicingManager) SetTimeSlice(devices *PreparedDevices, config *nasc
101101
return fmt.Errorf("setting a TimeSlice duration on MIG devices is unsupported")
102102
}
103103

104+
noSupportTimeSliceIDs := []string{}
105+
for _, gpu := range devices.Gpu.Devices {
106+
if !detectSupportTimeSliceByArch(gpu.architecture) {
107+
klog.InfoS("the current card does not support setting time slices and will be ignored.", "arch", gpu.architecture, "uuid", gpu.uuid, "cudaComputeCapability", gpu.cudaComputeCapability)
108+
noSupportTimeSliceIDs = append(noSupportTimeSliceIDs, gpu.uuid)
109+
continue
110+
}
111+
}
112+
113+
supportTimeSliceIDs := difference(devices.UUIDs(), noSupportTimeSliceIDs)
114+
115+
if len(supportTimeSliceIDs) == 0 {
116+
klog.InfoS("all card does not support setting time slices and will be ignored.", "uuids", noSupportTimeSliceIDs)
117+
}
118+
104119
timeSlice := nascrd.DefaultTimeSlice
105120
if config != nil && config.TimeSlice != nil {
106121
timeSlice = *config.TimeSlice
107122
}
108123

109-
err := t.nvdevlib.setComputeMode(devices.UUIDs(), "DEFAULT")
124+
err := t.nvdevlib.setComputeMode(supportTimeSliceIDs, "DEFAULT")
110125
if err != nil {
111126
return fmt.Errorf("error setting compute mode: %w", err)
112127
}
113128

114-
err = t.nvdevlib.setTimeSlice(devices.UUIDs(), timeSlice.Int())
129+
err = t.nvdevlib.setTimeSlice(supportTimeSliceIDs, timeSlice.Int())
115130
if err != nil {
116131
return fmt.Errorf("error setting time slice: %w", err)
117132
}
@@ -389,3 +404,36 @@ func (m *MpsControlDaemon) Stop(ctx context.Context) error {
389404

390405
return nil
391406
}
407+
408+
// detactSupportTimeSliceByArch Determine whether the architecture series
409+
// supports setting time slices based on the gpu architecture.
410+
func detectSupportTimeSliceByArch(arch string) bool {
411+
// todo: More information is needed to determine the support of various architectures
412+
switch arch {
413+
case "Pascal":
414+
return false
415+
case "Ada":
416+
case "Ampere":
417+
case "Turing":
418+
case "Hopper":
419+
return true
420+
default:
421+
return true
422+
}
423+
return true
424+
}
425+
426+
// difference returns the elements in `a` that aren't in `b`.
427+
func difference(a, b []string) []string {
428+
mb := make(map[string]struct{}, len(b))
429+
for _, x := range b {
430+
mb[x] = struct{}{}
431+
}
432+
var diff []string
433+
for _, x := range a {
434+
if _, found := mb[x]; !found {
435+
diff = append(diff, x)
436+
}
437+
}
438+
return diff
439+
}

0 commit comments

Comments
 (0)