@@ -20,12 +20,14 @@ import (
20
20
"context"
21
21
"fmt"
22
22
"maps"
23
+ "os"
23
24
"time"
24
25
25
26
appsv1 "k8s.io/api/apps/v1"
26
27
corev1 "k8s.io/api/core/v1"
27
28
apierrors "k8s.io/apimachinery/pkg/api/errors"
28
29
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
30
+ "k8s.io/apimachinery/pkg/labels"
29
31
"k8s.io/apimachinery/pkg/runtime"
30
32
"k8s.io/apimachinery/pkg/types"
31
33
"k8s.io/client-go/util/workqueue"
@@ -57,6 +59,7 @@ type NVIDIADriverReconciler struct {
57
59
stateManager state.Manager
58
60
nodeSelectorValidator validator.Validator
59
61
conditionUpdater conditions.Updater
62
+ operatorNamespace string
60
63
}
61
64
62
65
//+kubebuilder:rbac:groups=nvidia.com,resources=nvidiadrivers,verbs=get;list;watch;create;update;patch;delete
@@ -124,6 +127,8 @@ func (r *NVIDIADriverReconciler) Reconcile(ctx context.Context, req ctrl.Request
124
127
}
125
128
clusterPolicyInstance := clusterPolicyList .Items [0 ]
126
129
130
+ r .operatorNamespace = os .Getenv ("OPERATOR_NAMESPACE" )
131
+
127
132
// Create a new InfoCatalog which is a generic interface for passing information to state managers
128
133
infoCatalog := state .NewInfoCatalog ()
129
134
@@ -168,6 +173,16 @@ func (r *NVIDIADriverReconciler) Reconcile(ctx context.Context, req ctrl.Request
168
173
return reconcile.Result {}, nil
169
174
}
170
175
176
+ clusterpolicyDriverLabels , err := getClusterpolicyDriverLabels (r .ClusterInfo , clusterPolicyInstance )
177
+ if err != nil {
178
+ return reconcile.Result {}, fmt .Errorf ("failed to get clusterpolicy driver labels: %w" , err )
179
+ }
180
+
181
+ err = updateNodesManagedByDriver (ctx , r , instance , clusterpolicyDriverLabels )
182
+ if err != nil {
183
+ return reconcile.Result {}, fmt .Errorf ("failed to update nodes managed by driver: %w" , err )
184
+ }
185
+
171
186
// Sync state and update status
172
187
managerStatus := r .stateManager .SyncState (ctx , instance , infoCatalog )
173
188
@@ -191,6 +206,9 @@ func (r *NVIDIADriverReconciler) Reconcile(ctx context.Context, req ctrl.Request
191
206
}
192
207
}
193
208
// if no errors are reported from any state, then we would be waiting on driver daemonset pods
209
+ // TODO: Avoid marking 'default' NVIDIADriver instances as NotReady if DesiredNumberScheduled == 0.
210
+ // This will avoid unnecessary reconciliations when the 'default' instance is overriden on all
211
+ // GPU nodes, and thus, DesiredNumberScheduled being 0 is valid.
194
212
if errorInfo == nil {
195
213
condErr = r .conditionUpdater .SetConditionsError (ctx , instance , conditions .DriverNotReady , "Waiting for driver pod to be ready" )
196
214
if condErr != nil {
@@ -404,5 +422,154 @@ func (r *NVIDIADriverReconciler) SetupWithManager(ctx context.Context, mgr ctrl.
404
422
return fmt .Errorf ("failed to add index key: %w" , err )
405
423
}
406
424
425
+ if err := mgr .GetFieldIndexer ().IndexField (ctx , & corev1.Pod {}, "spec.nodeName" , func (rawObj client.Object ) []string {
426
+ pod := rawObj .(* corev1.Pod )
427
+ return []string {pod .Spec .NodeName }
428
+ }); err != nil {
429
+ return err
430
+ }
431
+
407
432
return nil
408
433
}
434
+
435
+ func updateNodesManagedByDriver (ctx context.Context , r * NVIDIADriverReconciler , instance * nvidiav1alpha1.NVIDIADriver , clusterpolicyDriverLabels map [string ]string ) error {
436
+ nodes , err := getNVIDIADriverSelectedNodes (ctx , r .Client , instance )
437
+ if err != nil {
438
+ return fmt .Errorf ("failed to get selected nodes for NVIDIADriver CR: %w" , err )
439
+ }
440
+
441
+ // A map tracking which node objects need to be updated. E.g. updated label / annotations
442
+ // need to be applied.
443
+ nodesToUpdate := map [* corev1.Node ]struct {}{}
444
+
445
+ for _ , node := range nodes .Items {
446
+ labels := node .GetLabels ()
447
+ annotations := node .GetAnnotations ()
448
+
449
+ managedBy , exists := labels ["nvidia.com/gpu.driver.managed-by" ]
450
+ if ! exists {
451
+ // if 'managed-by' label does not exist, label node with cr.Name
452
+ labels ["nvidia.com/gpu.driver.managed-by" ] = instance .Name
453
+ node .SetLabels (labels )
454
+ nodesToUpdate [& node ] = struct {}{}
455
+ // If there is an orphan driver pod running on the node,
456
+ // indicate to the upgrade controller that an upgrade is required.
457
+ // This will occur when we are migrating from a Clusterpolicy owned
458
+ // daemonset to a NVIDIADriver owned daemonset.
459
+ podList := & corev1.PodList {}
460
+ err = r .Client .List (ctx , podList ,
461
+ client .InNamespace (r .operatorNamespace ),
462
+ client .MatchingLabels (clusterpolicyDriverLabels ),
463
+ client.MatchingFields {"spec.nodeName" : node .Name })
464
+ if err != nil {
465
+ return fmt .Errorf ("failed to list driver pods: %w" , err )
466
+ }
467
+ if len (podList .Items ) == 0 {
468
+ continue
469
+ }
470
+ pod := podList .Items [0 ]
471
+ if pod .OwnerReferences == nil || len (pod .OwnerReferences ) == 0 {
472
+ annotations ["nvidia.com/gpu-driver-upgrade-requested" ] = "true"
473
+ node .SetAnnotations (annotations )
474
+ }
475
+ continue
476
+ }
477
+
478
+ // do nothing if node is already being managed by this CR
479
+ if managedBy == instance .Name {
480
+ continue
481
+ }
482
+
483
+ // If node is 'managed-by' another CR, there are several scenarios
484
+ // 1) There is no driver pod running on the node. Therefore the label is stale.
485
+ // 2) There is a driver pod running on the node, and it is not an orphan. There are
486
+ // two possible actions:
487
+ // a) If the NVIDIADriver instance we are currently reconciling is the 'default'
488
+ // instance (the node selector is empty), do nothing. All other NVIDIADriver
489
+ // instances take precedence.
490
+ // b) The pod running no longer falls into the node pool of the CR it is currently
491
+ // being managed by. Thus, the NVIDIADriver instance we are currently reconciling
492
+ // should take ownership of the node.
493
+ podList := & corev1.PodList {}
494
+ err = r .Client .List (ctx , podList ,
495
+ client .InNamespace (r .operatorNamespace ),
496
+ client .MatchingLabels (map [string ]string {AppComponentLabelKey : AppComponentLabelValue }),
497
+ client.MatchingFields {"spec.nodeName" : node .Name })
498
+ if err != nil {
499
+ return fmt .Errorf ("failed to list driver pods: %w" , err )
500
+ }
501
+ if len (podList .Items ) == 0 {
502
+ labels ["nvidia.com/gpu.driver.managed-by" ] = instance .Name
503
+ node .SetLabels (labels )
504
+ nodesToUpdate [& node ] = struct {}{}
505
+ continue
506
+ }
507
+ if instance .Spec .NodeSelector == nil || len (instance .Spec .NodeSelector ) == 0 {
508
+ // If the nodeSelector for the NVIDIADriver instance is empty, then we
509
+ // treat it as the 'default' CR which has the lowest precedence. Allow
510
+ // the existing driver pod, owned by another NVIDIADriver CR, to continue
511
+ // to run.
512
+ continue
513
+ }
514
+ pod := podList .Items [0 ]
515
+ if pod .OwnerReferences != nil && len (pod .OwnerReferences ) > 0 {
516
+ err := r .Client .Patch (ctx , & pod , client .RawPatch (types .MergePatchType , []byte (fmt .Sprintf (`{"metadata":{"labels":{"app": null}}}` ))))
517
+ if err != nil {
518
+ return fmt .Errorf ("failed to patch pod in order to make it an orphan: %w" , err )
519
+ }
520
+ }
521
+ labels ["nvidia.com/gpu.driver.managed-by" ] = instance .Name
522
+ annotations ["nvidia.com/gpu-driver-upgrade-requested" ] = "true"
523
+ node .SetLabels (labels )
524
+ node .SetAnnotations (annotations )
525
+ nodesToUpdate [& node ] = struct {}{}
526
+ }
527
+
528
+ // Apply updated labels / annotations on node objects
529
+ for node := range nodesToUpdate {
530
+ err = r .Client .Update (ctx , node )
531
+ if err != nil {
532
+ return fmt .Errorf ("failed to update node %s: %w" , node .Name , err )
533
+ }
534
+ }
535
+
536
+ return nil
537
+ }
538
+
539
+ // getNVIDIADriverSelectedNodes returns selected nodes based on the nodeselector labels set for a given NVIDIADriver instance
540
+ func getNVIDIADriverSelectedNodes (ctx context.Context , k8sClient client.Client , cr * nvidiav1alpha1.NVIDIADriver ) (* corev1.NodeList , error ) {
541
+ nodeList := & corev1.NodeList {}
542
+
543
+ if cr .Spec .NodeSelector == nil {
544
+ cr .Spec .NodeSelector = cr .GetNodeSelector ()
545
+ }
546
+
547
+ selector := labels .Set (cr .Spec .NodeSelector ).AsSelector ()
548
+
549
+ opts := []client.ListOption {
550
+ client.MatchingLabelsSelector {Selector : selector },
551
+ }
552
+ err := k8sClient .List (ctx , nodeList , opts ... )
553
+
554
+ return nodeList , err
555
+ }
556
+
557
+ // getClusterpolicyDriverLabels returns a set of labels that can be used to identify driver pods running that are owned by Clusterpolicy
558
+ func getClusterpolicyDriverLabels (clusterInfo clusterinfo.Interface , clusterpolicy gpuv1.ClusterPolicy ) (map [string ]string , error ) {
559
+ // initialize with common app=nvidia-driver-daemonset label
560
+ driverLabelKey := DriverLabelKey
561
+ driverLabelValue := DriverLabelValue
562
+
563
+ ocpVer , err := clusterInfo .GetOpenshiftVersion ()
564
+ if err != nil {
565
+ return nil , fmt .Errorf ("failed to get the OpenShift version: %w" , err )
566
+ }
567
+ if ocpVer != "" && clusterpolicy .Spec .Operator .UseOpenShiftDriverToolkit != nil && * clusterpolicy .Spec .Operator .UseOpenShiftDriverToolkit == true {
568
+ // For OCP, when DTK is enabled app=nvidia-driver-daemonset label is not
569
+ // constant and changes based on rhcos version. Hence use DTK label instead
570
+ driverLabelKey = ocpDriverToolkitIdentificationLabel
571
+ driverLabelValue = ocpDriverToolkitIdentificationValue
572
+ }
573
+
574
+ return map [string ]string {driverLabelKey : driverLabelValue }, nil
575
+ }
0 commit comments