Skip to content

Commit c883a59

Browse files
committed
Add resource.sharing-strategy labels
This change adds sharing-strategy labels per resource. This label can have the value: none, mps, time-slicing depending on the sharing configuration. For invalid configurations, this label is empty. Signed-off-by: Evan Lezar <[email protected]>
1 parent c469c54 commit c883a59

9 files changed

+379
-258
lines changed

internal/lm/mig-strategy.go

+1
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ func newInvalidMigStrategyLabeler(device resource.Device, reason string) (Labele
257257

258258
rl.updateLabel(labels, "count", 0)
259259
rl.updateLabel(labels, "replicas", 0)
260+
rl.updateLabel(labels, "sharing-strategy", "")
260261
rl.updateLabel(labels, "memory", 0)
261262

262263
return labels, nil

internal/lm/mig-strategy_test.go

+118-104
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,14 @@ func TestMigStrategyNoneLabels(t *testing.T) {
4343
rt.NewFullGPU(),
4444
},
4545
expectedLabels: Labels{
46-
"nvidia.com/gpu.compute.major": "8",
47-
"nvidia.com/gpu.compute.minor": "0",
48-
"nvidia.com/gpu.family": "ampere",
49-
"nvidia.com/gpu.count": "1",
50-
"nvidia.com/gpu.replicas": "1",
51-
"nvidia.com/gpu.memory": "300",
52-
"nvidia.com/gpu.product": "MOCKMODEL",
46+
"nvidia.com/gpu.compute.major": "8",
47+
"nvidia.com/gpu.compute.minor": "0",
48+
"nvidia.com/gpu.family": "ampere",
49+
"nvidia.com/gpu.count": "1",
50+
"nvidia.com/gpu.replicas": "1",
51+
"nvidia.com/gpu.sharing-strategy": "none",
52+
"nvidia.com/gpu.memory": "300",
53+
"nvidia.com/gpu.product": "MOCKMODEL",
5354
},
5455
},
5556
{
@@ -66,13 +67,14 @@ func TestMigStrategyNoneLabels(t *testing.T) {
6667
},
6768
},
6869
expectedLabels: Labels{
69-
"nvidia.com/gpu.compute.major": "8",
70-
"nvidia.com/gpu.compute.minor": "0",
71-
"nvidia.com/gpu.family": "ampere",
72-
"nvidia.com/gpu.count": "1",
73-
"nvidia.com/gpu.replicas": "2",
74-
"nvidia.com/gpu.memory": "300",
75-
"nvidia.com/gpu.product": "MOCKMODEL-SHARED",
70+
"nvidia.com/gpu.compute.major": "8",
71+
"nvidia.com/gpu.compute.minor": "0",
72+
"nvidia.com/gpu.family": "ampere",
73+
"nvidia.com/gpu.count": "1",
74+
"nvidia.com/gpu.replicas": "2",
75+
"nvidia.com/gpu.sharing-strategy": "time-slicing",
76+
"nvidia.com/gpu.memory": "300",
77+
"nvidia.com/gpu.product": "MOCKMODEL-SHARED",
7678
},
7779
},
7880
{
@@ -90,13 +92,14 @@ func TestMigStrategyNoneLabels(t *testing.T) {
9092
},
9193
},
9294
expectedLabels: Labels{
93-
"nvidia.com/gpu.compute.major": "8",
94-
"nvidia.com/gpu.compute.minor": "0",
95-
"nvidia.com/gpu.family": "ampere",
96-
"nvidia.com/gpu.count": "2",
97-
"nvidia.com/gpu.replicas": "2",
98-
"nvidia.com/gpu.memory": "300",
99-
"nvidia.com/gpu.product": "MOCKMODEL-SHARED",
95+
"nvidia.com/gpu.compute.major": "8",
96+
"nvidia.com/gpu.compute.minor": "0",
97+
"nvidia.com/gpu.family": "ampere",
98+
"nvidia.com/gpu.count": "2",
99+
"nvidia.com/gpu.replicas": "2",
100+
"nvidia.com/gpu.sharing-strategy": "time-slicing",
101+
"nvidia.com/gpu.memory": "300",
102+
"nvidia.com/gpu.product": "MOCKMODEL-SHARED",
100103
},
101104
},
102105
{
@@ -113,10 +116,11 @@ func TestMigStrategyNoneLabels(t *testing.T) {
113116
},
114117
},
115118
expectedLabels: Labels{
116-
"nvidia.com/gpu.count": "1",
117-
"nvidia.com/gpu.replicas": "0",
118-
"nvidia.com/gpu.memory": "300",
119-
"nvidia.com/gpu.product": "MOCKMODEL",
119+
"nvidia.com/gpu.count": "1",
120+
"nvidia.com/gpu.replicas": "0",
121+
"nvidia.com/gpu.sharing-strategy": "none",
122+
"nvidia.com/gpu.memory": "300",
123+
"nvidia.com/gpu.product": "MOCKMODEL",
120124
},
121125
},
122126
{
@@ -134,10 +138,11 @@ func TestMigStrategyNoneLabels(t *testing.T) {
134138
},
135139
},
136140
expectedLabels: Labels{
137-
"nvidia.com/gpu.count": "2",
138-
"nvidia.com/gpu.replicas": "0",
139-
"nvidia.com/gpu.memory": "300",
140-
"nvidia.com/gpu.product": "MOCKMODEL",
141+
"nvidia.com/gpu.count": "2",
142+
"nvidia.com/gpu.replicas": "0",
143+
"nvidia.com/gpu.sharing-strategy": "none",
144+
"nvidia.com/gpu.memory": "300",
145+
"nvidia.com/gpu.product": "MOCKMODEL",
141146
},
142147
},
143148
{
@@ -155,13 +160,14 @@ func TestMigStrategyNoneLabels(t *testing.T) {
155160
},
156161
},
157162
expectedLabels: Labels{
158-
"nvidia.com/gpu.compute.major": "8",
159-
"nvidia.com/gpu.compute.minor": "0",
160-
"nvidia.com/gpu.family": "ampere",
161-
"nvidia.com/gpu.count": "2",
162-
"nvidia.com/gpu.replicas": "2",
163-
"nvidia.com/gpu.memory": "300",
164-
"nvidia.com/gpu.product": "MOCKMODEL-SHARED",
163+
"nvidia.com/gpu.compute.major": "8",
164+
"nvidia.com/gpu.compute.minor": "0",
165+
"nvidia.com/gpu.family": "ampere",
166+
"nvidia.com/gpu.count": "2",
167+
"nvidia.com/gpu.replicas": "2",
168+
"nvidia.com/gpu.sharing-strategy": "time-slicing",
169+
"nvidia.com/gpu.memory": "300",
170+
"nvidia.com/gpu.product": "MOCKMODEL-SHARED",
165171
},
166172
},
167173
}
@@ -212,14 +218,15 @@ func TestMigStrategySingleLabels(t *testing.T) {
212218
rt.NewFullGPU(),
213219
},
214220
expectedLabels: Labels{
215-
"nvidia.com/gpu.compute.major": "8",
216-
"nvidia.com/gpu.compute.minor": "0",
217-
"nvidia.com/gpu.family": "ampere",
218-
"nvidia.com/gpu.count": "1",
219-
"nvidia.com/gpu.replicas": "1",
220-
"nvidia.com/gpu.memory": "300",
221-
"nvidia.com/gpu.product": "MOCKMODEL",
222-
"nvidia.com/mig.strategy": "single",
221+
"nvidia.com/gpu.compute.major": "8",
222+
"nvidia.com/gpu.compute.minor": "0",
223+
"nvidia.com/gpu.family": "ampere",
224+
"nvidia.com/gpu.count": "1",
225+
"nvidia.com/gpu.replicas": "1",
226+
"nvidia.com/gpu.sharing-strategy": "none",
227+
"nvidia.com/gpu.memory": "300",
228+
"nvidia.com/gpu.product": "MOCKMODEL",
229+
"nvidia.com/mig.strategy": "single",
223230
},
224231
},
225232
{
@@ -229,14 +236,15 @@ func TestMigStrategySingleLabels(t *testing.T) {
229236
rt.NewFullGPU(),
230237
},
231238
expectedLabels: Labels{
232-
"nvidia.com/gpu.compute.major": "8",
233-
"nvidia.com/gpu.compute.minor": "0",
234-
"nvidia.com/gpu.family": "ampere",
235-
"nvidia.com/gpu.count": "2",
236-
"nvidia.com/gpu.replicas": "1",
237-
"nvidia.com/gpu.memory": "300",
238-
"nvidia.com/gpu.product": "MOCKMODEL",
239-
"nvidia.com/mig.strategy": "single",
239+
"nvidia.com/gpu.compute.major": "8",
240+
"nvidia.com/gpu.compute.minor": "0",
241+
"nvidia.com/gpu.family": "ampere",
242+
"nvidia.com/gpu.count": "2",
243+
"nvidia.com/gpu.replicas": "1",
244+
"nvidia.com/gpu.sharing-strategy": "none",
245+
"nvidia.com/gpu.memory": "300",
246+
"nvidia.com/gpu.product": "MOCKMODEL",
247+
"nvidia.com/mig.strategy": "single",
240248
},
241249
},
242250
{
@@ -247,19 +255,20 @@ func TestMigStrategySingleLabels(t *testing.T) {
247255
),
248256
},
249257
expectedLabels: Labels{
250-
"nvidia.com/gpu.count": "1",
251-
"nvidia.com/gpu.replicas": "1",
252-
"nvidia.com/gpu.memory": "100",
253-
"nvidia.com/gpu.product": "MOCKMODEL-MIG-1g.100gb",
254-
"nvidia.com/mig.strategy": "single",
255-
"nvidia.com/gpu.multiprocessors": "0",
256-
"nvidia.com/gpu.slices.gi": "1",
257-
"nvidia.com/gpu.slices.ci": "2",
258-
"nvidia.com/gpu.engines.copy": "0",
259-
"nvidia.com/gpu.engines.decoder": "0",
260-
"nvidia.com/gpu.engines.encoder": "0",
261-
"nvidia.com/gpu.engines.jpeg": "0",
262-
"nvidia.com/gpu.engines.ofa": "0",
258+
"nvidia.com/gpu.count": "1",
259+
"nvidia.com/gpu.replicas": "1",
260+
"nvidia.com/gpu.sharing-strategy": "none",
261+
"nvidia.com/gpu.memory": "100",
262+
"nvidia.com/gpu.product": "MOCKMODEL-MIG-1g.100gb",
263+
"nvidia.com/mig.strategy": "single",
264+
"nvidia.com/gpu.multiprocessors": "0",
265+
"nvidia.com/gpu.slices.gi": "1",
266+
"nvidia.com/gpu.slices.ci": "2",
267+
"nvidia.com/gpu.engines.copy": "0",
268+
"nvidia.com/gpu.engines.decoder": "0",
269+
"nvidia.com/gpu.engines.encoder": "0",
270+
"nvidia.com/gpu.engines.jpeg": "0",
271+
"nvidia.com/gpu.engines.ofa": "0",
263272
},
264273
},
265274
{
@@ -287,19 +296,20 @@ func TestMigStrategySingleLabels(t *testing.T) {
287296
),
288297
},
289298
expectedLabels: Labels{
290-
"nvidia.com/gpu.count": "2",
291-
"nvidia.com/gpu.replicas": "1",
292-
"nvidia.com/gpu.memory": "100",
293-
"nvidia.com/gpu.product": "MOCKMODEL-MIG-1g.100gb",
294-
"nvidia.com/mig.strategy": "single",
295-
"nvidia.com/gpu.multiprocessors": "12",
296-
"nvidia.com/gpu.slices.gi": "1",
297-
"nvidia.com/gpu.slices.ci": "2",
298-
"nvidia.com/gpu.engines.copy": "13",
299-
"nvidia.com/gpu.engines.decoder": "14",
300-
"nvidia.com/gpu.engines.encoder": "15",
301-
"nvidia.com/gpu.engines.jpeg": "16",
302-
"nvidia.com/gpu.engines.ofa": "17",
299+
"nvidia.com/gpu.count": "2",
300+
"nvidia.com/gpu.replicas": "1",
301+
"nvidia.com/gpu.sharing-strategy": "none",
302+
"nvidia.com/gpu.memory": "100",
303+
"nvidia.com/gpu.product": "MOCKMODEL-MIG-1g.100gb",
304+
"nvidia.com/mig.strategy": "single",
305+
"nvidia.com/gpu.multiprocessors": "12",
306+
"nvidia.com/gpu.slices.gi": "1",
307+
"nvidia.com/gpu.slices.ci": "2",
308+
"nvidia.com/gpu.engines.copy": "13",
309+
"nvidia.com/gpu.engines.decoder": "14",
310+
"nvidia.com/gpu.engines.encoder": "15",
311+
"nvidia.com/gpu.engines.jpeg": "16",
312+
"nvidia.com/gpu.engines.ofa": "17",
303313
},
304314
},
305315
{
@@ -309,11 +319,12 @@ func TestMigStrategySingleLabels(t *testing.T) {
309319
},
310320
isInvalid: true,
311321
expectedLabels: Labels{
312-
"nvidia.com/gpu.count": "0",
313-
"nvidia.com/gpu.replicas": "0",
314-
"nvidia.com/gpu.memory": "0",
315-
"nvidia.com/gpu.product": "MOCKMODEL-MIG-INVALID",
316-
"nvidia.com/mig.strategy": "single",
322+
"nvidia.com/gpu.count": "0",
323+
"nvidia.com/gpu.replicas": "0",
324+
"nvidia.com/gpu.sharing-strategy": "",
325+
"nvidia.com/gpu.memory": "0",
326+
"nvidia.com/gpu.product": "MOCKMODEL-MIG-INVALID",
327+
"nvidia.com/mig.strategy": "single",
317328
},
318329
},
319330
{
@@ -326,11 +337,12 @@ func TestMigStrategySingleLabels(t *testing.T) {
326337
},
327338
isInvalid: true,
328339
expectedLabels: Labels{
329-
"nvidia.com/gpu.count": "0",
330-
"nvidia.com/gpu.replicas": "0",
331-
"nvidia.com/gpu.memory": "0",
332-
"nvidia.com/gpu.product": "MOCKMODEL-MIG-INVALID",
333-
"nvidia.com/mig.strategy": "single",
340+
"nvidia.com/gpu.count": "0",
341+
"nvidia.com/gpu.replicas": "0",
342+
"nvidia.com/gpu.sharing-strategy": "",
343+
"nvidia.com/gpu.memory": "0",
344+
"nvidia.com/gpu.product": "MOCKMODEL-MIG-INVALID",
345+
"nvidia.com/mig.strategy": "single",
334346
},
335347
},
336348
{
@@ -343,14 +355,15 @@ func TestMigStrategySingleLabels(t *testing.T) {
343355
},
344356
isInvalid: true,
345357
expectedLabels: Labels{
346-
"nvidia.com/gpu.compute.major": "8",
347-
"nvidia.com/gpu.compute.minor": "0",
348-
"nvidia.com/gpu.family": "ampere",
349-
"nvidia.com/gpu.count": "0",
350-
"nvidia.com/gpu.replicas": "0",
351-
"nvidia.com/gpu.memory": "0",
352-
"nvidia.com/gpu.product": "MOCKMODEL-MIG-INVALID",
353-
"nvidia.com/mig.strategy": "single",
358+
"nvidia.com/gpu.compute.major": "8",
359+
"nvidia.com/gpu.compute.minor": "0",
360+
"nvidia.com/gpu.family": "ampere",
361+
"nvidia.com/gpu.count": "0",
362+
"nvidia.com/gpu.replicas": "0",
363+
"nvidia.com/gpu.sharing-strategy": "",
364+
"nvidia.com/gpu.memory": "0",
365+
"nvidia.com/gpu.product": "MOCKMODEL-MIG-INVALID",
366+
"nvidia.com/mig.strategy": "single",
354367
},
355368
},
356369
{
@@ -364,14 +377,15 @@ func TestMigStrategySingleLabels(t *testing.T) {
364377
},
365378
isInvalid: true,
366379
expectedLabels: Labels{
367-
"nvidia.com/gpu.compute.major": "8",
368-
"nvidia.com/gpu.compute.minor": "0",
369-
"nvidia.com/gpu.family": "ampere",
370-
"nvidia.com/gpu.count": "0",
371-
"nvidia.com/gpu.replicas": "0",
372-
"nvidia.com/gpu.memory": "0",
373-
"nvidia.com/gpu.product": "MOCKMODEL-MIG-INVALID",
374-
"nvidia.com/mig.strategy": "single",
380+
"nvidia.com/gpu.compute.major": "8",
381+
"nvidia.com/gpu.compute.minor": "0",
382+
"nvidia.com/gpu.family": "ampere",
383+
"nvidia.com/gpu.count": "0",
384+
"nvidia.com/gpu.replicas": "0",
385+
"nvidia.com/gpu.sharing-strategy": "",
386+
"nvidia.com/gpu.memory": "0",
387+
"nvidia.com/gpu.product": "MOCKMODEL-MIG-INVALID",
388+
"nvidia.com/mig.strategy": "single",
375389
},
376390
},
377391
}

0 commit comments

Comments
 (0)