Skip to content

Commit 82eff45

Browse files
committed
feat: Flag for enabling recording of failed evictions
Signed-off-by: Grzegorz Głąb <[email protected]>
1 parent 1c1b1a7 commit 82eff45

File tree

21 files changed

+115
-31
lines changed

21 files changed

+115
-31
lines changed

cmd/descheduler/app/options/options.go

+1
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ func (rs *DeschedulerServer) AddFlags(fs *pflag.FlagSet) {
102102
fs.Float64Var(&rs.Tracing.SampleRate, "otel-sample-rate", 1.0, "Sample rate to collect the Traces")
103103
fs.BoolVar(&rs.Tracing.FallbackToNoOpProviderOnError, "otel-fallback-no-op-on-error", false, "Fallback to NoOp Tracer in case of error")
104104
fs.BoolVar(&rs.EnableHTTP2, "enable-http2", false, "If http/2 should be enabled for the metrics and health check")
105+
fs.BoolVar(&rs.RecordEventsForEvictionErrors, "record-events-for-eviction-errors", false, "Set this flag to record events in case of eviction errors")
105106

106107
componentbaseoptions.BindLeaderElectionFlags(&rs.LeaderElection, fs)
107108

docs/cli/descheduler.md

+1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ descheduler [flags]
4747
--permit-address-sharing If true, SO_REUSEADDR will be used when binding the port. This allows binding to wildcard IPs like 0.0.0.0 and specific IPs in parallel, and it avoids waiting for the kernel to release sockets in TIME_WAIT state. [default=false]
4848
--permit-port-sharing If true, SO_REUSEPORT will be used when binding the port, which allows more than one instance to bind on the same address and port. [default=false]
4949
--policy-config-file string File with descheduler policy configuration.
50+
--record-events-for-eviction-errors Set this flag to record events in case of eviction errors
5051
--secure-port int The port on which to serve HTTPS with authentication and authorization. If 0, don't serve HTTPS at all. (default 10258)
5152
--tls-cert-file string File containing the default x509 Certificate for HTTPS. (CA cert, if any, concatenated after server cert). If HTTPS serving is enabled, and --tls-cert-file and --tls-private-key-file are not provided, a self-signed certificate and key are generated for the public address and saved to the directory specified by --cert-dir.
5253
--tls-cipher-suites strings Comma-separated list of cipher suites for the server. If omitted, the default Go cipher suites will be used.

pkg/apis/componentconfig/types.go

+3
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ type DeschedulerConfiguration struct {
6666
// ClientConnection specifies the kubeconfig file and client connection settings to use when communicating with the apiserver.
6767
// Refer to [ClientConnection](https://pkg.go.dev/k8s.io/kubernetes/pkg/apis/componentconfig#ClientConnectionConfiguration) for more information.
6868
ClientConnection componentbaseconfig.ClientConnectionConfiguration
69+
70+
// RecordEventsForEvictionErrors sets event recording in case of eviction error
71+
RecordEventsForEvictionErrors bool
6972
}
7073

7174
type TracingConfiguration struct {

pkg/apis/componentconfig/v1alpha1/types.go

+3
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ type DeschedulerConfiguration struct {
6666
// ClientConnection specifies the kubeconfig file and client connection settings to use when communicating with the apiserver.
6767
// Refer to [ClientConnection](https://pkg.go.dev/k8s.io/kubernetes/pkg/apis/componentconfig#ClientConnectionConfiguration) for more information.
6868
ClientConnection componentbaseconfig.ClientConnectionConfiguration `json:"clientConnection,omitempty"`
69+
70+
// RecordEventsForEvictionErrors sets event recording in case of eviction errorGws
71+
RecordEventsForEvictionErrors bool
6972
}
7073

7174
type TracingConfiguration struct {

pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go

+2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/descheduler/descheduler.go

+1
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ func (d *descheduler) runDeschedulerLoop(ctx context.Context, nodes []*v1.Node)
163163
nodes,
164164
!d.rs.DisableMetrics,
165165
d.eventRecorder,
166+
d.rs.RecordEventsForEvictionErrors,
166167
)
167168

168169
d.runProfiles(ctx, client, nodes, podEvictor)

pkg/descheduler/evictions/evictions.go

+40-28
Original file line numberDiff line numberDiff line change
@@ -42,16 +42,17 @@ type (
4242
)
4343

4444
type PodEvictor struct {
45-
client clientset.Interface
46-
nodes []*v1.Node
47-
policyGroupVersion string
48-
dryRun bool
49-
maxPodsToEvictPerNode *uint
50-
maxPodsToEvictPerNamespace *uint
51-
nodepodCount nodePodEvictedCount
52-
namespacePodCount namespacePodEvictCount
53-
metricsEnabled bool
54-
eventRecorder events.EventRecorder
45+
client clientset.Interface
46+
nodes []*v1.Node
47+
policyGroupVersion string
48+
dryRun bool
49+
maxPodsToEvictPerNode *uint
50+
maxPodsToEvictPerNamespace *uint
51+
nodepodCount nodePodEvictedCount
52+
namespacePodCount namespacePodEvictCount
53+
metricsEnabled bool
54+
eventRecorder events.EventRecorder
55+
recordEventsForEvictionErrors bool
5556
}
5657

5758
func NewPodEvictor(
@@ -63,6 +64,7 @@ func NewPodEvictor(
6364
nodes []*v1.Node,
6465
metricsEnabled bool,
6566
eventRecorder events.EventRecorder,
67+
recordEventsForEvictionErrors bool,
6668
) *PodEvictor {
6769
nodePodCount := make(nodePodEvictedCount)
6870
namespacePodCount := make(namespacePodEvictCount)
@@ -72,16 +74,17 @@ func NewPodEvictor(
7274
}
7375

7476
return &PodEvictor{
75-
client: client,
76-
nodes: nodes,
77-
policyGroupVersion: policyGroupVersion,
78-
dryRun: dryRun,
79-
maxPodsToEvictPerNode: maxPodsToEvictPerNode,
80-
maxPodsToEvictPerNamespace: maxPodsToEvictPerNamespace,
81-
nodepodCount: nodePodCount,
82-
namespacePodCount: namespacePodCount,
83-
metricsEnabled: metricsEnabled,
84-
eventRecorder: eventRecorder,
77+
client: client,
78+
nodes: nodes,
79+
policyGroupVersion: policyGroupVersion,
80+
dryRun: dryRun,
81+
maxPodsToEvictPerNode: maxPodsToEvictPerNode,
82+
maxPodsToEvictPerNamespace: maxPodsToEvictPerNamespace,
83+
nodepodCount: nodePodCount,
84+
namespacePodCount: namespacePodCount,
85+
metricsEnabled: metricsEnabled,
86+
eventRecorder: eventRecorder,
87+
recordEventsForEvictionErrors: recordEventsForEvictionErrors,
8588
}
8689
}
8790

@@ -152,6 +155,10 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio
152155
if pe.metricsEnabled {
153156
metrics.PodsEvicted.With(map[string]string{"result": "error", "strategy": opts.StrategyName, "namespace": pod.Namespace, "node": pod.Spec.NodeName, "profile": opts.ProfileName}).Inc()
154157
}
158+
if pe.recordEventsForEvictionErrors {
159+
reason := extractReason(opts, opts.StrategyName)
160+
pe.eventRecorder.Eventf(pod, nil, v1.EventTypeNormal, reason, "Descheduled", "pod cannot be evicted from %v node by sigs.k8s.io/descheduler: %v", pod.Spec.NodeName, err)
161+
}
155162
return false
156163
}
157164

@@ -167,19 +174,24 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio
167174
if pe.dryRun {
168175
klog.V(1).InfoS("Evicted pod in dry run mode", "pod", klog.KObj(pod), "reason", opts.Reason, "strategy", opts.StrategyName, "node", pod.Spec.NodeName, "profile", opts.ProfileName)
169176
} else {
170-
klog.V(1).InfoS("Evicted pod", "pod", klog.KObj(pod), "reason", opts.Reason, "strategy", opts.StrategyName, "node", pod.Spec.NodeName, "profile", opts.ProfileName)
171-
reason := opts.Reason
172-
if len(reason) == 0 {
173-
reason = opts.StrategyName
174-
if len(reason) == 0 {
175-
reason = "NotSet"
176-
}
177-
}
177+
klog.V(1).InfoS("Evicted pod", "pod", klog.KObj(pod), "reason", opts.Reason, "strategy", opts.StrategyName, "node", pod.Spec.NodeName)
178+
reason := extractReason(opts, opts.StrategyName)
178179
pe.eventRecorder.Eventf(pod, nil, v1.EventTypeNormal, reason, "Descheduled", "pod evicted from %v node by sigs.k8s.io/descheduler", pod.Spec.NodeName)
179180
}
180181
return true
181182
}
182183

184+
func extractReason(opts EvictOptions, strategy string) string {
185+
reason := opts.Reason
186+
if len(reason) == 0 {
187+
reason = strategy
188+
if len(reason) == 0 {
189+
reason = "NotSet"
190+
}
191+
}
192+
return reason
193+
}
194+
183195
func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, policyGroupVersion string) error {
184196
deleteOptions := &metav1.DeleteOptions{}
185197
// GracePeriodSeconds ?

pkg/framework/plugins/nodeutilization/highnodeutilization_test.go

+2
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,7 @@ func TestHighNodeUtilization(t *testing.T) {
493493
testCase.nodes,
494494
false,
495495
eventRecorder,
496+
noRecordEventsForEvictionFailures,
496497
)
497498

498499
defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{
@@ -645,6 +646,7 @@ func TestHighNodeUtilizationWithTaints(t *testing.T) {
645646
item.nodes,
646647
false,
647648
eventRecorder,
649+
noRecordEventsForEvictionFailures,
648650
)
649651

650652
defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{

pkg/framework/plugins/nodeutilization/lownodeutilization_test.go

+6
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ import (
4141
"sigs.k8s.io/descheduler/test"
4242
)
4343

44+
const (
45+
noRecordEventsForEvictionFailures = false
46+
)
47+
4448
func TestLowNodeUtilization(t *testing.T) {
4549
n1NodeName := "n1"
4650
n2NodeName := "n2"
@@ -895,6 +899,7 @@ func TestLowNodeUtilization(t *testing.T) {
895899
test.nodes,
896900
false,
897901
eventRecorder,
902+
noRecordEventsForEvictionFailures,
898903
)
899904

900905
defaultEvictorFilterArgs := &defaultevictor.DefaultEvictorArgs{
@@ -1067,6 +1072,7 @@ func TestLowNodeUtilizationWithTaints(t *testing.T) {
10671072
item.nodes,
10681073
false,
10691074
eventRecorder,
1075+
noRecordEventsForEvictionFailures,
10701076
)
10711077

10721078
defaultEvictorFilterArgs := &defaultevictor.DefaultEvictorArgs{

pkg/framework/plugins/podlifetime/pod_lifetime_test.go

+5
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ import (
3838
"sigs.k8s.io/descheduler/test"
3939
)
4040

41+
const (
42+
noRecordEventsForEvictionFailures = false
43+
)
44+
4145
func TestPodLifeTime(t *testing.T) {
4246
node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
4347
olderPodCreationTime := metav1.NewTime(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC))
@@ -565,6 +569,7 @@ func TestPodLifeTime(t *testing.T) {
565569
tc.nodes,
566570
false,
567571
eventRecorder,
572+
noRecordEventsForEvictionFailures,
568573
)
569574

570575
defaultEvictorFilterArgs := &defaultevictor.DefaultEvictorArgs{

pkg/framework/plugins/removeduplicates/removeduplicates_test.go

+6
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ import (
3939
"sigs.k8s.io/descheduler/test"
4040
)
4141

42+
const (
43+
noRecordEventsForEvictionFailures = false
44+
)
45+
4246
func buildTestPodWithImage(podName, node, image string) *v1.Pod {
4347
pod := test.BuildTestPod(podName, 100, 0, node, test.SetRSOwnerRef)
4448
pod.Spec.Containers = append(pod.Spec.Containers, v1.Container{
@@ -322,6 +326,7 @@ func TestFindDuplicatePods(t *testing.T) {
322326
testCase.nodes,
323327
false,
324328
eventRecorder,
329+
noRecordEventsForEvictionFailures,
325330
)
326331

327332
nodeFit := testCase.nodefit
@@ -771,6 +776,7 @@ func TestRemoveDuplicatesUniformly(t *testing.T) {
771776
testCase.nodes,
772777
false,
773778
eventRecorder,
779+
noRecordEventsForEvictionFailures,
774780
)
775781

776782
defaultEvictorFilterArgs := &defaultevictor.DefaultEvictorArgs{

pkg/framework/plugins/removefailedpods/failedpods_test.go

+5
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ import (
3636
"sigs.k8s.io/descheduler/test"
3737
)
3838

39+
const (
40+
noRecordEventsForEvictionFailures = false
41+
)
42+
3943
var OneHourInSeconds uint = 3600
4044

4145
func TestRemoveFailedPods(t *testing.T) {
@@ -384,6 +388,7 @@ func TestRemoveFailedPods(t *testing.T) {
384388
tc.nodes,
385389
false,
386390
eventRecorder,
391+
noRecordEventsForEvictionFailures,
387392
)
388393

389394
defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{

pkg/framework/plugins/removepodshavingtoomanyrestarts/toomanyrestarts_test.go

+5
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ import (
3737
"sigs.k8s.io/descheduler/test"
3838
)
3939

40+
const (
41+
noRecordEventsForEvictionFailures = false
42+
)
43+
4044
func initPods(node *v1.Node) []*v1.Pod {
4145
pods := make([]*v1.Pod, 0)
4246

@@ -353,6 +357,7 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) {
353357
tc.nodes,
354358
false,
355359
eventRecorder,
360+
noRecordEventsForEvictionFailures,
356361
)
357362

358363
defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{

pkg/framework/plugins/removepodsviolatinginterpodantiaffinity/pod_antiaffinity_test.go

+5
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ import (
3737
"sigs.k8s.io/descheduler/test"
3838
)
3939

40+
const (
41+
noRecordEventsForEvictionFailures = false
42+
)
43+
4044
func TestPodAntiAffinity(t *testing.T) {
4145
node1 := test.BuildTestNode("n1", 2000, 3000, 10, func(node *v1.Node) {
4246
node.ObjectMeta.Labels = map[string]string{
@@ -242,6 +246,7 @@ func TestPodAntiAffinity(t *testing.T) {
242246
test.nodes,
243247
false,
244248
eventRecorder,
249+
noRecordEventsForEvictionFailures,
245250
)
246251

247252
defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{

pkg/framework/plugins/removepodsviolatingnodeaffinity/node_affinity_test.go

+5
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ import (
3636
"sigs.k8s.io/descheduler/test"
3737
)
3838

39+
const (
40+
noRecordEventsForEvictionFailures = false
41+
)
42+
3943
func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
4044
nodeLabelKey := "kubernetes.io/desiredNode"
4145
nodeLabelValue := "yes"
@@ -368,6 +372,7 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
368372
tc.nodes,
369373
false,
370374
eventRecorder,
375+
noRecordEventsForEvictionFailures,
371376
)
372377

373378
defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{

pkg/framework/plugins/removepodsviolatingnodetaints/node_taint_test.go

+5
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ import (
3838
"sigs.k8s.io/descheduler/test"
3939
)
4040

41+
const (
42+
noRecordEventsForEvictionFailures = false
43+
)
44+
4145
func createNoScheduleTaint(key, value string, index int) v1.Taint {
4246
return v1.Taint{
4347
Key: "testTaint" + fmt.Sprintf("%v", index),
@@ -409,6 +413,7 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
409413
tc.nodes,
410414
false,
411415
eventRecorder,
416+
noRecordEventsForEvictionFailures,
412417
)
413418

414419
defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{

pkg/framework/plugins/removepodsviolatingtopologyspreadconstraint/topologyspreadconstraint_test.go

+5
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ import (
2727
"sigs.k8s.io/descheduler/test"
2828
)
2929

30+
const (
31+
noRecordEventsForEvictionFailures = false
32+
)
33+
3034
func TestTopologySpreadConstraint(t *testing.T) {
3135
testCases := []struct {
3236
name string
@@ -1465,6 +1469,7 @@ func TestTopologySpreadConstraint(t *testing.T) {
14651469
tc.nodes,
14661470
false,
14671471
eventRecorder,
1472+
noRecordEventsForEvictionFailures,
14681473
)
14691474

14701475
defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{

0 commit comments

Comments
 (0)