@@ -73,13 +73,12 @@ type nodeStatusData struct {
73
73
}
74
74
75
75
type NodeController struct {
76
- allocateNodeCIDRs bool
77
- cloud cloudprovider.Interface
78
- clusterCIDR * net.IPNet
79
- serviceCIDR * net.IPNet
80
- deletingPodsRateLimiter flowcontrol.RateLimiter
81
- knownNodeSet map [string ]* api.Node
82
- kubeClient clientset.Interface
76
+ allocateNodeCIDRs bool
77
+ cloud cloudprovider.Interface
78
+ clusterCIDR * net.IPNet
79
+ serviceCIDR * net.IPNet
80
+ knownNodeSet map [string ]* api.Node
81
+ kubeClient clientset.Interface
83
82
// Method for easy mocking in unittest.
84
83
lookupIP func (host string ) ([]net.IP , error )
85
84
// Value used if sync_nodes_status=False. NodeController will not proactively
@@ -112,9 +111,11 @@ type NodeController struct {
112
111
// Lock to access evictor workers
113
112
evictorLock * sync.Mutex
114
113
// workers that evicts pods from unresponsive nodes.
115
- podEvictor * RateLimitedTimedQueue
116
- terminationEvictor * RateLimitedTimedQueue
117
- podEvictionTimeout time.Duration
114
+ zonePodEvictor map [string ]* RateLimitedTimedQueue
115
+ zoneTerminationEvictor map [string ]* RateLimitedTimedQueue
116
+ evictionLimiterQPS float32
117
+ evictionLimiterBurst int
118
+ podEvictionTimeout time.Duration
118
119
// The maximum duration before a pod evicted from a node can be forcefully terminated.
119
120
maximumGracePeriod time.Duration
120
121
recorder record.EventRecorder
@@ -142,8 +143,8 @@ func NewNodeController(
142
143
cloud cloudprovider.Interface ,
143
144
kubeClient clientset.Interface ,
144
145
podEvictionTimeout time.Duration ,
145
- deletionEvictionLimiter flowcontrol. RateLimiter ,
146
- terminationEvictionLimiter flowcontrol. RateLimiter ,
146
+ evictionLimiterQPS float32 ,
147
+ evictionLimiterBurst int ,
147
148
nodeMonitorGracePeriod time.Duration ,
148
149
nodeStartupGracePeriod time.Duration ,
149
150
nodeMonitorPeriod time.Duration ,
@@ -184,8 +185,8 @@ func NewNodeController(
184
185
podEvictionTimeout : podEvictionTimeout ,
185
186
maximumGracePeriod : 5 * time .Minute ,
186
187
evictorLock : & evictorLock ,
187
- podEvictor : NewRateLimitedTimedQueue ( deletionEvictionLimiter ),
188
- terminationEvictor : NewRateLimitedTimedQueue ( terminationEvictionLimiter ),
188
+ zonePodEvictor : make ( map [ string ] * RateLimitedTimedQueue ),
189
+ zoneTerminationEvictor : make ( map [ string ] * RateLimitedTimedQueue ),
189
190
nodeStatusMap : make (map [string ]nodeStatusData ),
190
191
nodeMonitorGracePeriod : nodeMonitorGracePeriod ,
191
192
nodeMonitorPeriod : nodeMonitorPeriod ,
@@ -198,6 +199,8 @@ func NewNodeController(
198
199
forcefullyDeletePod : func (p * api.Pod ) error { return forcefullyDeletePod (kubeClient , p ) },
199
200
nodeExistsInCloudProvider : func (nodeName string ) (bool , error ) { return nodeExistsInCloudProvider (cloud , nodeName ) },
200
201
computeZoneStateFunc : ComputeZoneState ,
202
+ evictionLimiterQPS : evictionLimiterQPS ,
203
+ evictionLimiterBurst : evictionLimiterBurst ,
201
204
zoneStates : make (map [string ]zoneState ),
202
205
}
203
206
@@ -309,45 +312,49 @@ func (nc *NodeController) Run(period time.Duration) {
309
312
go wait .Until (func () {
310
313
nc .evictorLock .Lock ()
311
314
defer nc .evictorLock .Unlock ()
312
- nc .podEvictor .Try (func (value TimedValue ) (bool , time.Duration ) {
313
- remaining , err := deletePods (nc .kubeClient , nc .recorder , value .Value , nc .daemonSetStore )
314
- if err != nil {
315
- utilruntime .HandleError (fmt .Errorf ("unable to evict node %q: %v" , value .Value , err ))
316
- return false , 0
317
- }
315
+ for k := range nc .zonePodEvictor {
316
+ nc .zonePodEvictor [k ].Try (func (value TimedValue ) (bool , time.Duration ) {
317
+ remaining , err := deletePods (nc .kubeClient , nc .recorder , value .Value , nc .daemonSetStore )
318
+ if err != nil {
319
+ utilruntime .HandleError (fmt .Errorf ("unable to evict node %q: %v" , value .Value , err ))
320
+ return false , 0
321
+ }
318
322
319
- if remaining {
320
- nc .terminationEvictor .Add (value .Value )
321
- }
322
- return true , 0
323
- })
323
+ if remaining {
324
+ nc .zoneTerminationEvictor [k ].Add (value .Value )
325
+ }
326
+ return true , 0
327
+ })
328
+ }
324
329
}, nodeEvictionPeriod , wait .NeverStop )
325
330
326
331
// TODO: replace with a controller that ensures pods that are terminating complete
327
332
// in a particular time period
328
333
go wait .Until (func () {
329
334
nc .evictorLock .Lock ()
330
335
defer nc .evictorLock .Unlock ()
331
- nc .terminationEvictor .Try (func (value TimedValue ) (bool , time.Duration ) {
332
- completed , remaining , err := terminatePods (nc .kubeClient , nc .recorder , value .Value , value .AddedAt , nc .maximumGracePeriod )
333
- if err != nil {
334
- utilruntime .HandleError (fmt .Errorf ("unable to terminate pods on node %q: %v" , value .Value , err ))
335
- return false , 0
336
- }
336
+ for k := range nc .zoneTerminationEvictor {
337
+ nc .zoneTerminationEvictor [k ].Try (func (value TimedValue ) (bool , time.Duration ) {
338
+ completed , remaining , err := terminatePods (nc .kubeClient , nc .recorder , value .Value , value .AddedAt , nc .maximumGracePeriod )
339
+ if err != nil {
340
+ utilruntime .HandleError (fmt .Errorf ("unable to terminate pods on node %q: %v" , value .Value , err ))
341
+ return false , 0
342
+ }
337
343
338
- if completed {
339
- glog .V (2 ).Infof ("All pods terminated on %s" , value .Value )
340
- recordNodeEvent (nc .recorder , value .Value , api .EventTypeNormal , "TerminatedAllPods" , fmt .Sprintf ("Terminated all Pods on Node %s." , value .Value ))
341
- return true , 0
342
- }
344
+ if completed {
345
+ glog .V (2 ).Infof ("All pods terminated on %s" , value .Value )
346
+ recordNodeEvent (nc .recorder , value .Value , api .EventTypeNormal , "TerminatedAllPods" , fmt .Sprintf ("Terminated all Pods on Node %s." , value .Value ))
347
+ return true , 0
348
+ }
343
349
344
- glog .V (2 ).Infof ("Pods terminating since %s on %q, estimated completion %s" , value .AddedAt , value .Value , remaining )
345
- // clamp very short intervals
346
- if remaining < nodeEvictionPeriod {
347
- remaining = nodeEvictionPeriod
348
- }
349
- return false , remaining
350
- })
350
+ glog .V (2 ).Infof ("Pods terminating since %s on %q, estimated completion %s" , value .AddedAt , value .Value , remaining )
351
+ // clamp very short intervals
352
+ if remaining < nodeEvictionPeriod {
353
+ remaining = nodeEvictionPeriod
354
+ }
355
+ return false , remaining
356
+ })
357
+ }
351
358
}, nodeEvictionPeriod , wait .NeverStop )
352
359
353
360
go wait .Until (func () {
@@ -372,8 +379,19 @@ func (nc *NodeController) monitorNodeStatus() error {
372
379
for i := range added {
373
380
glog .V (1 ).Infof ("NodeController observed a new Node: %#v" , added [i ].Name )
374
381
recordNodeEvent (nc .recorder , added [i ].Name , api .EventTypeNormal , "RegisteredNode" , fmt .Sprintf ("Registered Node %v in NodeController" , added [i ].Name ))
375
- nc .cancelPodEviction (added [i ])
376
382
nc .knownNodeSet [added [i ].Name ] = added [i ]
383
+ // When adding new Nodes we need to check if new zone appeared, and if so add new evictor.
384
+ zone := utilnode .GetZoneKey (added [i ])
385
+ if _ , found := nc .zonePodEvictor [zone ]; ! found {
386
+ nc .zonePodEvictor [zone ] =
387
+ NewRateLimitedTimedQueue (
388
+ flowcontrol .NewTokenBucketRateLimiter (nc .evictionLimiterQPS , nc .evictionLimiterBurst ))
389
+ }
390
+ if _ , found := nc .zoneTerminationEvictor [zone ]; ! found {
391
+ nc .zoneTerminationEvictor [zone ] = NewRateLimitedTimedQueue (
392
+ flowcontrol .NewTokenBucketRateLimiter (nc .evictionLimiterQPS , nc .evictionLimiterBurst ))
393
+ }
394
+ nc .cancelPodEviction (added [i ])
377
395
}
378
396
379
397
for i := range deleted {
@@ -689,10 +707,11 @@ func (nc *NodeController) checkForNodeAddedDeleted(nodes *api.NodeList) (added,
689
707
// cancelPodEviction removes any queued evictions, typically because the node is available again. It
690
708
// returns true if an eviction was queued.
691
709
func (nc * NodeController ) cancelPodEviction (node * api.Node ) bool {
710
+ zone := utilnode .GetZoneKey (node )
692
711
nc .evictorLock .Lock ()
693
712
defer nc .evictorLock .Unlock ()
694
- wasDeleting := nc .podEvictor .Remove (node .Name )
695
- wasTerminating := nc .terminationEvictor .Remove (node .Name )
713
+ wasDeleting := nc .zonePodEvictor [ zone ] .Remove (node .Name )
714
+ wasTerminating := nc .zoneTerminationEvictor [ zone ] .Remove (node .Name )
696
715
if wasDeleting || wasTerminating {
697
716
glog .V (2 ).Infof ("Cancelling pod Eviction on Node: %v" , node .Name )
698
717
return true
@@ -703,10 +722,18 @@ func (nc *NodeController) cancelPodEviction(node *api.Node) bool {
703
722
// evictPods queues an eviction for the provided node name, and returns false if the node is already
704
723
// queued for eviction.
705
724
func (nc * NodeController ) evictPods (node * api.Node ) bool {
706
- if nc .zoneStates [utilnode .GetZoneKey (node )] == stateFullSegmentation {
707
- return false
708
- }
709
725
nc .evictorLock .Lock ()
710
726
defer nc .evictorLock .Unlock ()
711
- return nc .podEvictor .Add (node .Name )
727
+ foundHealty := false
728
+ for _ , state := range nc .zoneStates {
729
+ if state != stateFullSegmentation {
730
+ foundHealty = true
731
+ break
732
+ }
733
+ }
734
+ if ! foundHealty {
735
+ return false
736
+ }
737
+ zone := utilnode .GetZoneKey (node )
738
+ return nc .zonePodEvictor [zone ].Add (node .Name )
712
739
}
0 commit comments