Skip to content

Commit ff977e9

Browse files
committed
wip: handle driver migrations in nvidiadriver controller
Signed-off-by: Christopher Desiniotis <[email protected]>
1 parent 81640fd commit ff977e9

20 files changed

+208
-6
lines changed

controllers/nvidiadriver_controller.go

+167
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,14 @@ import (
2020
"context"
2121
"fmt"
2222
"maps"
23+
"os"
2324
"time"
2425

2526
appsv1 "k8s.io/api/apps/v1"
2627
corev1 "k8s.io/api/core/v1"
2728
apierrors "k8s.io/apimachinery/pkg/api/errors"
2829
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
30+
"k8s.io/apimachinery/pkg/labels"
2931
"k8s.io/apimachinery/pkg/runtime"
3032
"k8s.io/apimachinery/pkg/types"
3133
"k8s.io/client-go/util/workqueue"
@@ -57,6 +59,7 @@ type NVIDIADriverReconciler struct {
5759
stateManager state.Manager
5860
nodeSelectorValidator validator.Validator
5961
conditionUpdater conditions.Updater
62+
operatorNamespace string
6063
}
6164

6265
//+kubebuilder:rbac:groups=nvidia.com,resources=nvidiadrivers,verbs=get;list;watch;create;update;patch;delete
@@ -124,6 +127,8 @@ func (r *NVIDIADriverReconciler) Reconcile(ctx context.Context, req ctrl.Request
124127
}
125128
clusterPolicyInstance := clusterPolicyList.Items[0]
126129

130+
r.operatorNamespace = os.Getenv("OPERATOR_NAMESPACE")
131+
127132
// Create a new InfoCatalog which is a generic interface for passing information to state managers
128133
infoCatalog := state.NewInfoCatalog()
129134

@@ -168,6 +173,16 @@ func (r *NVIDIADriverReconciler) Reconcile(ctx context.Context, req ctrl.Request
168173
return reconcile.Result{}, nil
169174
}
170175

176+
clusterpolicyDriverLabels, err := getClusterpolicyDriverLabels(r.ClusterInfo, clusterPolicyInstance)
177+
if err != nil {
178+
return reconcile.Result{}, fmt.Errorf("failed to get clusterpolicy driver labels: %w", err)
179+
}
180+
181+
err = updateNodesManagedByDriver(ctx, r, instance, clusterpolicyDriverLabels)
182+
if err != nil {
183+
return reconcile.Result{}, fmt.Errorf("failed to update nodes managed by driver: %w", err)
184+
}
185+
171186
// Sync state and update status
172187
managerStatus := r.stateManager.SyncState(ctx, instance, infoCatalog)
173188

@@ -191,6 +206,9 @@ func (r *NVIDIADriverReconciler) Reconcile(ctx context.Context, req ctrl.Request
191206
}
192207
}
193208
// if no errors are reported from any state, then we would be waiting on driver daemonset pods
209+
// TODO: Avoid marking 'default' NVIDIADriver instances as NotReady if DesiredNumberScheduled == 0.
210+
// This will avoid unnecessary reconciliations when the 'default' instance is overriden on all
211+
// GPU nodes, and thus, DesiredNumberScheduled being 0 is valid.
194212
if errorInfo == nil {
195213
condErr = r.conditionUpdater.SetConditionsError(ctx, instance, conditions.DriverNotReady, "Waiting for driver pod to be ready")
196214
if condErr != nil {
@@ -404,5 +422,154 @@ func (r *NVIDIADriverReconciler) SetupWithManager(ctx context.Context, mgr ctrl.
404422
return fmt.Errorf("failed to add index key: %w", err)
405423
}
406424

425+
if err := mgr.GetFieldIndexer().IndexField(ctx, &corev1.Pod{}, "spec.nodeName", func(rawObj client.Object) []string {
426+
pod := rawObj.(*corev1.Pod)
427+
return []string{pod.Spec.NodeName}
428+
}); err != nil {
429+
return err
430+
}
431+
407432
return nil
408433
}
434+
435+
func updateNodesManagedByDriver(ctx context.Context, r *NVIDIADriverReconciler, instance *nvidiav1alpha1.NVIDIADriver, clusterpolicyDriverLabels map[string]string) error {
436+
nodes, err := getNVIDIADriverSelectedNodes(ctx, r.Client, instance)
437+
if err != nil {
438+
return fmt.Errorf("failed to get selected nodes for NVIDIADriver CR: %w", err)
439+
}
440+
441+
// A map tracking which node objects need to be updated. E.g. updated label / annotations
442+
// need to be applied.
443+
nodesToUpdate := map[*corev1.Node]struct{}{}
444+
445+
for _, node := range nodes.Items {
446+
labels := node.GetLabels()
447+
annotations := node.GetAnnotations()
448+
449+
managedBy, exists := labels["nvidia.com/gpu.driver.managed-by"]
450+
if !exists {
451+
// if 'managed-by' label does not exist, label node with cr.Name
452+
labels["nvidia.com/gpu.driver.managed-by"] = instance.Name
453+
node.SetLabels(labels)
454+
nodesToUpdate[&node] = struct{}{}
455+
// If there is an orphan driver pod running on the node,
456+
// indicate to the upgrade controller that an upgrade is required.
457+
// This will occur when we are migrating from a Clusterpolicy owned
458+
// daemonset to a NVIDIADriver owned daemonset.
459+
podList := &corev1.PodList{}
460+
err = r.Client.List(ctx, podList,
461+
client.InNamespace(r.operatorNamespace),
462+
client.MatchingLabels(clusterpolicyDriverLabels),
463+
client.MatchingFields{"spec.nodeName": node.Name})
464+
if err != nil {
465+
return fmt.Errorf("failed to list driver pods: %w", err)
466+
}
467+
if len(podList.Items) == 0 {
468+
continue
469+
}
470+
pod := podList.Items[0]
471+
if pod.OwnerReferences == nil || len(pod.OwnerReferences) == 0 {
472+
annotations["nvidia.com/gpu-driver-upgrade-requested"] = "true"
473+
node.SetAnnotations(annotations)
474+
}
475+
continue
476+
}
477+
478+
// do nothing if node is already being managed by this CR
479+
if managedBy == instance.Name {
480+
continue
481+
}
482+
483+
// If node is 'managed-by' another CR, there are several scenarios
484+
// 1) There is no driver pod running on the node. Therefore the label is stale.
485+
// 2) There is a driver pod running on the node, and it is not an orphan. There are
486+
// two possible actions:
487+
// a) If the NVIDIADriver instance we are currently reconciling is the 'default'
488+
// instance (the node selector is empty), do nothing. All other NVIDIADriver
489+
// instances take precedence.
490+
// b) The pod running no longer falls into the node pool of the CR it is currently
491+
// being managed by. Thus, the NVIDIADriver instance we are currently reconciling
492+
// should take ownership of the node.
493+
podList := &corev1.PodList{}
494+
err = r.Client.List(ctx, podList,
495+
client.InNamespace(r.operatorNamespace),
496+
client.MatchingLabels(map[string]string{AppComponentLabelKey: AppComponentLabelValue}),
497+
client.MatchingFields{"spec.nodeName": node.Name})
498+
if err != nil {
499+
return fmt.Errorf("failed to list driver pods: %w", err)
500+
}
501+
if len(podList.Items) == 0 {
502+
labels["nvidia.com/gpu.driver.managed-by"] = instance.Name
503+
node.SetLabels(labels)
504+
nodesToUpdate[&node] = struct{}{}
505+
continue
506+
}
507+
if instance.Spec.NodeSelector == nil || len(instance.Spec.NodeSelector) == 0 {
508+
// If the nodeSelector for the NVIDIADriver instance is empty, then we
509+
// treat it as the 'default' CR which has the lowest precedence. Allow
510+
// the existing driver pod, owned by another NVIDIADriver CR, to continue
511+
// to run.
512+
continue
513+
}
514+
pod := podList.Items[0]
515+
if pod.OwnerReferences != nil && len(pod.OwnerReferences) > 0 {
516+
err := r.Client.Patch(ctx, &pod, client.RawPatch(types.MergePatchType, []byte(fmt.Sprintf(`{"metadata":{"labels":{"app": null}}}`))))
517+
if err != nil {
518+
return fmt.Errorf("failed to patch pod in order to make it an orphan: %w", err)
519+
}
520+
}
521+
labels["nvidia.com/gpu.driver.managed-by"] = instance.Name
522+
annotations["nvidia.com/gpu-driver-upgrade-requested"] = "true"
523+
node.SetLabels(labels)
524+
node.SetAnnotations(annotations)
525+
nodesToUpdate[&node] = struct{}{}
526+
}
527+
528+
// Apply updated labels / annotations on node objects
529+
for node := range nodesToUpdate {
530+
err = r.Client.Update(ctx, node)
531+
if err != nil {
532+
return fmt.Errorf("failed to update node %s: %w", node.Name, err)
533+
}
534+
}
535+
536+
return nil
537+
}
538+
539+
// getNVIDIADriverSelectedNodes returns selected nodes based on the nodeselector labels set for a given NVIDIADriver instance
540+
func getNVIDIADriverSelectedNodes(ctx context.Context, k8sClient client.Client, cr *nvidiav1alpha1.NVIDIADriver) (*corev1.NodeList, error) {
541+
nodeList := &corev1.NodeList{}
542+
543+
if cr.Spec.NodeSelector == nil {
544+
cr.Spec.NodeSelector = cr.GetNodeSelector()
545+
}
546+
547+
selector := labels.Set(cr.Spec.NodeSelector).AsSelector()
548+
549+
opts := []client.ListOption{
550+
client.MatchingLabelsSelector{Selector: selector},
551+
}
552+
err := k8sClient.List(ctx, nodeList, opts...)
553+
554+
return nodeList, err
555+
}
556+
557+
// getClusterpolicyDriverLabels returns a set of labels that can be used to identify driver pods running that are owned by Clusterpolicy
558+
func getClusterpolicyDriverLabels(clusterInfo clusterinfo.Interface, clusterpolicy gpuv1.ClusterPolicy) (map[string]string, error) {
559+
// initialize with common app=nvidia-driver-daemonset label
560+
driverLabelKey := DriverLabelKey
561+
driverLabelValue := DriverLabelValue
562+
563+
ocpVer, err := clusterInfo.GetOpenshiftVersion()
564+
if err != nil {
565+
return nil, fmt.Errorf("failed to get the OpenShift version: %w", err)
566+
}
567+
if ocpVer != "" && clusterpolicy.Spec.Operator.UseOpenShiftDriverToolkit != nil && *clusterpolicy.Spec.Operator.UseOpenShiftDriverToolkit == true {
568+
// For OCP, when DTK is enabled app=nvidia-driver-daemonset label is not
569+
// constant and changes based on rhcos version. Hence use DTK label instead
570+
driverLabelKey = ocpDriverToolkitIdentificationLabel
571+
driverLabelValue = ocpDriverToolkitIdentificationValue
572+
}
573+
574+
return map[string]string{driverLabelKey: driverLabelValue}, nil
575+
}

internal/state/driver.go

+1
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,7 @@ func getDriverSpec(cr *nvidiav1alpha1.NVIDIADriver, nodePool nodePool) (*driverS
548548

549549
return &driverSpec{
550550
Spec: spec,
551+
CRName: cr.Name,
551552
AppName: nvidiaDriverAppName,
552553
Name: nvidiaDriverName,
553554
ImagePath: imagePath,

internal/state/driver_test.go

+1
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,7 @@ func getMinimalDriverRenderData() *driverRenderData {
624624
ReadinessProbe: getDefaultContainerProbeSpec(),
625625
DriverType: nvidiav1alpha1.GPU,
626626
},
627+
CRName: "test-cr",
627628
AppName: "nvidia-gpu-driver-ubuntu22.04-7c6d7bd86b",
628629
Name: "nvidia-gpu-driver-ubuntu22.04",
629630
ImagePath: "nvcr.io/nvidia/driver:525.85.03-ubuntu22.04",

internal/state/testdata/golden/driver-additional-configs.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ spec:
240240
name: run-mellanox-drivers
241241
nodeSelector:
242242
nvidia.com/gpu.deploy.driver: "true"
243+
nvidia.com/gpu.driver.managed-by: test-cr
243244
priorityClassName: system-node-critical
244245
serviceAccountName: nvidia-gpu-driver-ubuntu22.04
245246
tolerations:

internal/state/testdata/golden/driver-full-spec.yaml

+1-2
Original file line numberDiff line numberDiff line change
@@ -254,9 +254,8 @@ spec:
254254
mountPropagation: HostToContainer
255255
name: run-mellanox-drivers
256256
nodeSelector:
257-
example.com/bar: bar
258-
example.com/foo: foo
259257
nvidia.com/gpu.deploy.driver: "true"
258+
nvidia.com/gpu.driver.managed-by: test-cr
260259
priorityClassName: custom-priority-class-name
261260
serviceAccountName: nvidia-gpu-driver-ubuntu22.04
262261
tolerations:

internal/state/testdata/golden/driver-gdrcopy-openshift.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,7 @@ spec:
407407
nodeSelector:
408408
feature.node.kubernetes.io/system-os_release.OSTREE_VERSION: 413.92.202304252344-0
409409
nvidia.com/gpu.deploy.driver: "true"
410+
nvidia.com/gpu.driver.managed-by: test-cr
410411
priorityClassName: system-node-critical
411412
serviceAccountName: nvidia-gpu-driver-openshift
412413
tolerations:

internal/state/testdata/golden/driver-gdrcopy.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,7 @@ spec:
296296
name: run-mellanox-drivers
297297
nodeSelector:
298298
nvidia.com/gpu.deploy.driver: "true"
299+
nvidia.com/gpu.driver.managed-by: test-cr
299300
priorityClassName: system-node-critical
300301
serviceAccountName: nvidia-gpu-driver-ubuntu22.04
301302
tolerations:

internal/state/testdata/golden/driver-gds.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,7 @@ spec:
296296
name: run-mellanox-drivers
297297
nodeSelector:
298298
nvidia.com/gpu.deploy.driver: "true"
299+
nvidia.com/gpu.driver.managed-by: test-cr
299300
priorityClassName: system-node-critical
300301
serviceAccountName: nvidia-gpu-driver-ubuntu22.04
301302
tolerations:

internal/state/testdata/golden/driver-minimal.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ spec:
231231
name: run-mellanox-drivers
232232
nodeSelector:
233233
nvidia.com/gpu.deploy.driver: "true"
234+
nvidia.com/gpu.driver.managed-by: test-cr
234235
priorityClassName: system-node-critical
235236
serviceAccountName: nvidia-gpu-driver-ubuntu22.04
236237
tolerations:

internal/state/testdata/golden/driver-openshift-drivertoolkit.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,7 @@ spec:
341341
nodeSelector:
342342
feature.node.kubernetes.io/system-os_release.OSTREE_VERSION: 413.92.202304252344-0
343343
nvidia.com/gpu.deploy.driver: "true"
344+
nvidia.com/gpu.driver.managed-by: test-cr
344345
priorityClassName: system-node-critical
345346
serviceAccountName: nvidia-gpu-driver-openshift
346347
tolerations:

internal/state/testdata/golden/driver-precompiled.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ spec:
234234
nodeSelector:
235235
feature.node.kubernetes.io/kernel-version.full: 5.4.0-150-generic
236236
nvidia.com/gpu.deploy.driver: "true"
237+
nvidia.com/gpu.driver.managed-by: test-cr
237238
priorityClassName: system-node-critical
238239
serviceAccountName: nvidia-gpu-driver-ubuntu22.04
239240
tolerations:

internal/state/testdata/golden/driver-rdma-hostmofed.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,7 @@ spec:
307307
name: run-mellanox-drivers
308308
nodeSelector:
309309
nvidia.com/gpu.deploy.driver: "true"
310+
nvidia.com/gpu.driver.managed-by: test-cr
310311
priorityClassName: system-node-critical
311312
serviceAccountName: nvidia-gpu-driver-ubuntu22.04
312313
tolerations:

internal/state/testdata/golden/driver-rdma.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,7 @@ spec:
301301
name: run-mellanox-drivers
302302
nodeSelector:
303303
nvidia.com/gpu.deploy.driver: "true"
304+
nvidia.com/gpu.driver.managed-by: test-cr
304305
priorityClassName: system-node-critical
305306
serviceAccountName: nvidia-gpu-driver-ubuntu22.04
306307
tolerations:

internal/state/testdata/golden/driver-vgpu-host-manager-openshift.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,7 @@ spec:
302302
nodeSelector:
303303
feature.node.kubernetes.io/system-os_release.OSTREE_VERSION: 413.92.202304252344-0
304304
nvidia.com/gpu.deploy.vgpu-manager: "true"
305+
nvidia.com/gpu.driver.managed-by: test-cr
305306
priorityClassName: system-node-critical
306307
serviceAccountName: nvidia-vgpu-manager-openshift
307308
tolerations:

internal/state/testdata/golden/driver-vgpu-host-manager.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ spec:
217217
name: run-mellanox-drivers
218218
nodeSelector:
219219
nvidia.com/gpu.deploy.vgpu-manager: "true"
220+
nvidia.com/gpu.driver.managed-by: test-cr
220221
priorityClassName: system-node-critical
221222
serviceAccountName: nvidia-vgpu-manager-ubuntu22.04
222223
tolerations:

internal/state/testdata/golden/driver-vgpu-licensing.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ spec:
237237
name: run-mellanox-drivers
238238
nodeSelector:
239239
nvidia.com/gpu.deploy.driver: "true"
240+
nvidia.com/gpu.driver.managed-by: test-cr
240241
priorityClassName: system-node-critical
241242
serviceAccountName: nvidia-gpu-driver-ubuntu22.04
242243
tolerations:

internal/state/types.go

+1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ type SyncingSource source.SyncingSource
3030
// which is to be populated with the fully-qualified image path.
3131
type driverSpec struct {
3232
Spec *nvidiav1alpha1.NVIDIADriverSpec
33+
CRName string
3334
AppName string
3435
Name string
3536
ImagePath string

internal/validator/validator.go

+23-1
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,30 @@ func (nsv *nodeSelectorValidator) Validate(ctx context.Context, cr *nvidiav1alph
5151
return err
5252
}
5353

54+
crHasEmptyNodeSelector := false
55+
if cr.Spec.NodeSelector == nil || len(cr.Spec.NodeSelector) == 0 {
56+
crHasEmptyNodeSelector = true
57+
}
58+
5459
names := map[string]struct{}{}
55-
for di := range drivers.Items {
60+
for di, driver := range drivers.Items {
61+
if driver.Name == cr.Name {
62+
continue
63+
}
64+
65+
if crHasEmptyNodeSelector {
66+
// If the CR we are validating has an empty node selector, it does not conflict
67+
// with any other CR unless there is another CR that also is configured with an
68+
// empty node selector. Only one NVIDIADriver instance can be configured with an
69+
// empty node selector at any point in time. We deem such an instance as the 'default'
70+
// instance, as it will get deployed on all GPU nodes. Other CRs, with non-empty
71+
// node selectors, can override the 'default' NVIDIADriver instance.
72+
if driver.Spec.NodeSelector == nil || len(driver.Spec.NodeSelector) == 0 {
73+
return fmt.Errorf("cannot have empty nodeSelector as another CR is configured with an empty nodeSelector: %s", driver.Name)
74+
}
75+
continue
76+
}
77+
5678
nodeList, err := nsv.getNVIDIADriverSelectedNodes(ctx, &drivers.Items[di])
5779
if err != nil {
5880
return err

internal/validator/validator_test.go

+1
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ func makeTestNode(opts ...nodeOptions) *corev1.Node {
9393
return n
9494
}
9595

96+
// TODO: update this test function
9697
func TestCheckNodeSelector(t *testing.T) {
9798
node := makeTestNode(labelled(map[string]string{"os-version": "ubuntu20.04"}))
9899
driver := makeTestDriver(nodeSelector(node.Labels))

0 commit comments

Comments
 (0)