@@ -24,6 +24,7 @@ import (
2424
2525 tfv1 "github.com/NexusGPU/tensor-fusion-operator/api/v1"
2626 "github.com/NexusGPU/tensor-fusion-operator/internal/constants"
27+ "github.com/NexusGPU/tensor-fusion-operator/internal/utils"
2728 corev1 "k8s.io/api/core/v1"
2829 "k8s.io/apimachinery/pkg/api/errors"
2930 "k8s.io/apimachinery/pkg/api/resource"
@@ -33,8 +34,11 @@ import (
3334 "sigs.k8s.io/controller-runtime/pkg/builder"
3435 "sigs.k8s.io/controller-runtime/pkg/client"
3536 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
37+ "sigs.k8s.io/controller-runtime/pkg/event"
38+ "sigs.k8s.io/controller-runtime/pkg/handler"
3639 "sigs.k8s.io/controller-runtime/pkg/log"
3740 "sigs.k8s.io/controller-runtime/pkg/predicate"
41+ "sigs.k8s.io/controller-runtime/pkg/reconcile"
3842
3943 schedulingcorev1 "k8s.io/component-helpers/scheduling/corev1"
4044)
@@ -95,7 +99,14 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
9599 return ctrl.Result {}, err
96100 }
97101 if ! matched {
98- log .Info ("No matched GPU pool found, skip reconcile the Node" , "node" , node .Name , "labels" , node .Labels )
102+ // delete gpunode if no matched pool
103+ if err := r .Client .Delete (ctx , & tfv1.GPUNode {
104+ ObjectMeta : metav1.ObjectMeta {
105+ Name : node .Name ,
106+ },
107+ }); err != nil {
108+ return ctrl.Result {}, fmt .Errorf ("can not delete gpuNode(%s) : %w" , node .Name , err )
109+ }
99110 return ctrl.Result {}, nil
100111 }
101112
@@ -169,11 +180,36 @@ func (r *NodeReconciler) SetupWithManager(mgr ctrl.Manager) error {
169180 if err != nil {
170181 return fmt .Errorf ("unable to create predicate: %w" , err )
171182 }
183+
172184 return ctrl .NewControllerManagedBy (mgr ).
173185 For (& corev1.Node {}, builder .WithPredicates (p )).
174186 Named ("node" ).
187+ Watches (& tfv1.GPUPool {}, handler .EnqueueRequestsFromMapFunc (func (ctx context.Context , obj client.Object ) []reconcile.Request {
188+ nodelist := & tfv1.GPUNodeList {}
189+ if err := mgr .GetClient ().List (ctx , nodelist , client.MatchingLabels {
190+ selectors [0 ]: selectors [1 ],
191+ }); err != nil {
192+ log .FromContext (ctx ).Error (err , "failed to list GPUNode" )
193+ return []reconcile.Request {}
194+ }
195+ var requests []reconcile.Request
196+ for _ , n := range nodelist .Items {
197+ requests = append (requests , reconcile.Request {NamespacedName : client.ObjectKey {Name : n .Name }})
198+ }
199+ return requests
200+ }), builder .WithPredicates (predicate.Funcs {
201+ UpdateFunc : func (e event.UpdateEvent ) bool {
202+ oldObj , ok1 := e .ObjectOld .(* tfv1.GPUPool )
203+ newObj , ok2 := e .ObjectNew .(* tfv1.GPUPool )
204+ if ! ok1 || ! ok2 {
205+ return false
206+ }
207+ oldNodeSelector := oldObj .Spec .NodeManagerConfig .NodeSelector
208+ newNodeSelector := newObj .Spec .NodeManagerConfig .NodeSelector
209+ return utils .GetObjectHash (oldNodeSelector ) != utils .GetObjectHash (newNodeSelector )
210+ },
211+ })).
175212 Complete (r )
176- // TODO: When Pool changed, all nodes should re-generated, delete not matched ones, this logic should be added into GPUPool controller
177213}
178214
179215func getMatchedPoolName (node * corev1.Node , poolList []tfv1.GPUPool ) (* tfv1.GPUPool , bool , error ) {
0 commit comments