Skip to content

Commit 2e63411

Browse files
committed
[nodeutilization]: prometheus usage client through kubernetes metrics
1 parent 476ace2 commit 2e63411

File tree

6 files changed

+436
-4
lines changed

6 files changed

+436
-4
lines changed

pkg/framework/plugins/nodeutilization/lownodeutilization.go

+38-1
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,25 @@ package nodeutilization
1818

1919
import (
2020
"context"
21+
"crypto/tls"
2122
"fmt"
23+
"net"
24+
"net/http"
25+
"time"
2226

2327
v1 "k8s.io/api/core/v1"
2428
"k8s.io/apimachinery/pkg/api/resource"
2529
"k8s.io/apimachinery/pkg/runtime"
2630
"k8s.io/klog/v2"
31+
2732
"sigs.k8s.io/descheduler/pkg/api"
2833
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
2934
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
3035
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
3136
frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types"
37+
38+
promapi "github.com/prometheus/client_golang/api"
39+
"github.com/prometheus/common/config"
3240
)
3341

3442
const LowNodeUtilizationPluginName = "LowNodeUtilization"
@@ -44,6 +52,8 @@ type LowNodeUtilization struct {
4452
overutilizationCriteria []interface{}
4553
resourceNames []v1.ResourceName
4654
usageSnapshot usageClient
55+
56+
promClient promapi.Client
4757
}
4858

4959
var _ frameworktypes.BalancePlugin = &LowNodeUtilization{}
@@ -89,8 +99,35 @@ func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (f
8999
resourceNames := getResourceNames(lowNodeUtilizationArgsArgs.Thresholds)
90100

91101
var usageSnapshot usageClient
102+
var promClient promapi.Client
92103
if lowNodeUtilizationArgsArgs.MetricsUtilization.MetricsServer {
93-
usageSnapshot = newActualUsageSnapshot(resourceNames, handle.GetPodsAssignedToNodeFunc(), handle.MetricsCollector())
104+
if lowNodeUtilizationArgsArgs.MetricsUtilization.PrometheusURL != "" {
105+
roundTripper := &http.Transport{
106+
Proxy: http.ProxyFromEnvironment,
107+
DialContext: (&net.Dialer{
108+
Timeout: 30 * time.Second,
109+
KeepAlive: 30 * time.Second,
110+
}).DialContext,
111+
TLSHandshakeTimeout: 10 * time.Second,
112+
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
113+
}
114+
115+
pClient, err := promapi.NewClient(promapi.Config{
116+
Address: lowNodeUtilizationArgsArgs.MetricsUtilization.PrometheusURL,
117+
RoundTripper: config.NewAuthorizationCredentialsRoundTripper("Bearer", config.NewInlineSecret(lowNodeUtilizationArgsArgs.MetricsUtilization.PrometheusAuthToken), roundTripper),
118+
})
119+
if err != nil {
120+
return nil, fmt.Errorf("unable to create a new prom client: %v", err)
121+
}
122+
promClient = pClient
123+
124+
usageSnapshot = newPrometheusUsageSnapshot(handle.GetPodsAssignedToNodeFunc(), promClient, lowNodeUtilizationArgsArgs.MetricsUtilization.PromQuery)
125+
// reset all resource names to just ResourceMetrics
126+
// TODO(ingvagabund): validate only ResourceMetrics is set when prometheus metrics are enabled
127+
resourceNames = []v1.ResourceName{ResourceMetrics}
128+
} else {
129+
usageSnapshot = newActualUsageSnapshot(resourceNames, handle.GetPodsAssignedToNodeFunc(), handle.MetricsCollector())
130+
}
94131
} else {
95132
usageSnapshot = newRequestedUsageSnapshot(resourceNames, handle.GetPodsAssignedToNodeFunc())
96133
}

pkg/framework/plugins/nodeutilization/lownodeutilization_test.go

+161
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ import (
3838
frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types"
3939
"sigs.k8s.io/descheduler/pkg/utils"
4040
"sigs.k8s.io/descheduler/test"
41+
42+
"github.com/prometheus/common/model"
4143
)
4244

4345
func TestLowNodeUtilization(t *testing.T) {
@@ -1359,3 +1361,162 @@ func TestLowNodeUtilizationWithTaints(t *testing.T) {
13591361
})
13601362
}
13611363
}
1364+
1365+
func withLocalStorage(pod *v1.Pod) {
1366+
// A pod with local storage.
1367+
test.SetNormalOwnerRef(pod)
1368+
pod.Spec.Volumes = []v1.Volume{
1369+
{
1370+
Name: "sample",
1371+
VolumeSource: v1.VolumeSource{
1372+
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
1373+
EmptyDir: &v1.EmptyDirVolumeSource{
1374+
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI),
1375+
},
1376+
},
1377+
},
1378+
}
1379+
// A Mirror Pod.
1380+
pod.Annotations = test.GetMirrorPodAnnotation()
1381+
}
1382+
1383+
func withCriticalPod(pod *v1.Pod) {
1384+
// A Critical Pod.
1385+
test.SetNormalOwnerRef(pod)
1386+
pod.Namespace = "kube-system"
1387+
priority := utils.SystemCriticalPriority
1388+
pod.Spec.Priority = &priority
1389+
}
1390+
1391+
func TestLowNodeUtilizationWithPrometheusMetrics(t *testing.T) {
1392+
n1NodeName := "n1"
1393+
n2NodeName := "n2"
1394+
n3NodeName := "n3"
1395+
1396+
testCases := []struct {
1397+
name string
1398+
useDeviationThresholds bool
1399+
thresholds, targetThresholds api.ResourceThresholds
1400+
query string
1401+
samples []model.Sample
1402+
nodes []*v1.Node
1403+
pods []*v1.Pod
1404+
expectedPodsEvicted uint
1405+
evictedPods []string
1406+
evictableNamespaces *api.Namespaces
1407+
}{
1408+
{
1409+
name: "with instance:node_cpu:rate:sum query",
1410+
thresholds: api.ResourceThresholds{
1411+
v1.ResourceName("MetricResource"): 30,
1412+
},
1413+
targetThresholds: api.ResourceThresholds{
1414+
v1.ResourceName("MetricResource"): 50,
1415+
},
1416+
query: "instance:node_cpu:rate:sum",
1417+
samples: []model.Sample{
1418+
sample("instance:node_cpu:rate:sum", n1NodeName, 0.5695757575757561),
1419+
sample("instance:node_cpu:rate:sum", n2NodeName, 0.4245454545454522),
1420+
sample("instance:node_cpu:rate:sum", n3NodeName, 0.20381818181818104),
1421+
},
1422+
nodes: []*v1.Node{
1423+
test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
1424+
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
1425+
test.BuildTestNode(n3NodeName, 4000, 3000, 10, nil),
1426+
},
1427+
pods: []*v1.Pod{
1428+
test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
1429+
test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef),
1430+
test.BuildTestPod("p3", 400, 0, n1NodeName, test.SetRSOwnerRef),
1431+
test.BuildTestPod("p4", 400, 0, n1NodeName, test.SetRSOwnerRef),
1432+
test.BuildTestPod("p5", 400, 0, n1NodeName, test.SetRSOwnerRef),
1433+
// These won't be evicted.
1434+
test.BuildTestPod("p6", 400, 0, n1NodeName, test.SetDSOwnerRef),
1435+
test.BuildTestPod("p7", 400, 0, n1NodeName, withLocalStorage),
1436+
test.BuildTestPod("p8", 400, 0, n1NodeName, withCriticalPod),
1437+
test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef),
1438+
},
1439+
expectedPodsEvicted: 1,
1440+
},
1441+
}
1442+
1443+
for _, tc := range testCases {
1444+
testFnc := func(metricsEnabled bool, expectedPodsEvicted uint) func(t *testing.T) {
1445+
return func(t *testing.T) {
1446+
ctx, cancel := context.WithCancel(context.Background())
1447+
defer cancel()
1448+
1449+
var objs []runtime.Object
1450+
for _, node := range tc.nodes {
1451+
objs = append(objs, node)
1452+
}
1453+
for _, pod := range tc.pods {
1454+
objs = append(objs, pod)
1455+
}
1456+
1457+
fakeClient := fake.NewSimpleClientset(objs...)
1458+
1459+
podsForEviction := make(map[string]struct{})
1460+
for _, pod := range tc.evictedPods {
1461+
podsForEviction[pod] = struct{}{}
1462+
}
1463+
1464+
evictionFailed := false
1465+
if len(tc.evictedPods) > 0 {
1466+
fakeClient.Fake.AddReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
1467+
getAction := action.(core.CreateAction)
1468+
obj := getAction.GetObject()
1469+
if eviction, ok := obj.(*policy.Eviction); ok {
1470+
if _, exists := podsForEviction[eviction.Name]; exists {
1471+
return true, obj, nil
1472+
}
1473+
evictionFailed = true
1474+
return true, nil, fmt.Errorf("pod %q was unexpectedly evicted", eviction.Name)
1475+
}
1476+
return true, obj, nil
1477+
})
1478+
}
1479+
1480+
handle, podEvictor, err := frameworktesting.InitFrameworkHandle(ctx, fakeClient, nil, defaultevictor.DefaultEvictorArgs{NodeFit: true}, nil)
1481+
if err != nil {
1482+
t.Fatalf("Unable to initialize a framework handle: %v", err)
1483+
}
1484+
1485+
plugin, err := NewLowNodeUtilization(&LowNodeUtilizationArgs{
1486+
Thresholds: tc.thresholds,
1487+
TargetThresholds: tc.targetThresholds,
1488+
UseDeviationThresholds: tc.useDeviationThresholds,
1489+
EvictableNamespaces: tc.evictableNamespaces,
1490+
MetricsUtilization: MetricsUtilization{
1491+
MetricsServer: true,
1492+
PrometheusURL: "http://prometheus.example.orgname",
1493+
PrometheusAuthToken: "XXXXX",
1494+
},
1495+
},
1496+
handle)
1497+
if err != nil {
1498+
t.Fatalf("Unable to initialize the plugin: %v", err)
1499+
}
1500+
1501+
pClient := &fakePromClient{
1502+
result: tc.samples,
1503+
}
1504+
1505+
plugin.(*LowNodeUtilization).usageSnapshot = newPrometheusUsageSnapshot(handle.GetPodsAssignedToNodeFunc(), pClient, tc.query)
1506+
status := plugin.(frameworktypes.BalancePlugin).Balance(ctx, tc.nodes)
1507+
if status != nil {
1508+
t.Fatalf("Balance.err: %v", status.Err)
1509+
}
1510+
1511+
podsEvicted := podEvictor.TotalEvicted()
1512+
if expectedPodsEvicted != podsEvicted {
1513+
t.Errorf("Expected %v pods to be evicted but %v got evicted", expectedPodsEvicted, podsEvicted)
1514+
}
1515+
if evictionFailed {
1516+
t.Errorf("Pod evictions failed unexpectedly")
1517+
}
1518+
}
1519+
}
1520+
t.Run(tc.name, testFnc(false, tc.expectedPodsEvicted))
1521+
}
1522+
}

pkg/framework/plugins/nodeutilization/nodeutilization.go

+20-3
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ import (
3535
"sigs.k8s.io/descheduler/pkg/utils"
3636
)
3737

38+
const ResourceMetrics = v1.ResourceName("MetricResource")
39+
3840
// NodeUsage stores a node's info, pods on it, thresholds and its resource usage
3941
type NodeUsage struct {
4042
node *v1.Node
@@ -93,6 +95,8 @@ func getNodeThresholds(
9395
if len(node.Status.Allocatable) > 0 {
9496
nodeCapacity = node.Status.Allocatable
9597
}
98+
// Make ResourceMetrics 100% => 1000 points
99+
nodeCapacity[ResourceMetrics] = *resource.NewQuantity(int64(1000), resource.DecimalSI)
96100

97101
nodeThresholdsMap[node.Name] = NodeThresholds{
98102
lowResourceThreshold: map[v1.ResourceName]*resource.Quantity{},
@@ -323,15 +327,27 @@ func evictPods(
323327
if !preEvictionFilterWithOptions(pod) {
324328
continue
325329
}
330+
331+
// In case podUsage does not support resource counting (e.g. provided metric
332+
// does not quantify pod resource utilization) allow to evict only a single
333+
// pod. It is recommended to run the descheduling cycle more often
334+
// so the plugin can perform more evictions towards the re-distribution.
335+
singleEviction := false
326336
podUsage, err := usageSnapshot.podUsage(pod)
327337
if err != nil {
328-
klog.Errorf("unable to get pod usage for %v/%v: %v", pod.Namespace, pod.Name, err)
329-
continue
338+
if _, ok := err.(*notSupportedError); !ok {
339+
klog.Errorf("unable to get pod usage for %v/%v: %v", pod.Namespace, pod.Name, err)
340+
continue
341+
}
342+
singleEviction = true
330343
}
331344
err = podEvictor.Evict(ctx, pod, evictOptions)
332345
if err == nil {
333346
klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod))
334-
347+
if singleEviction {
348+
klog.V(3).InfoS("Currently, only a single pod eviction is allowed")
349+
break
350+
}
335351
for name := range totalAvailableUsage {
336352
if name == v1.ResourcePods {
337353
nodeInfo.usage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI))
@@ -354,6 +370,7 @@ func evictPods(
354370
if quantity, exists := nodeInfo.usage[v1.ResourcePods]; exists {
355371
keysAndValues = append(keysAndValues, "Pods", quantity.Value())
356372
}
373+
357374
for name := range totalAvailableUsage {
358375
if !nodeutil.IsBasicResource(name) {
359376
keysAndValues = append(keysAndValues, string(name), totalAvailableUsage[name].Value())

pkg/framework/plugins/nodeutilization/types.go

+5
Original file line numberDiff line numberDiff line change
@@ -57,4 +57,9 @@ type MetricsUtilization struct {
5757
// metricsServer enables metrics from a kubernetes metrics server.
5858
// Please see https://kubernetes-sigs.github.io/metrics-server/ for more.
5959
MetricsServer bool `json:"metricsServer,omitempty"`
60+
61+
PrometheusURL string `json:"prometheusURL,omitempty"`
62+
// TODO(ingvagabund): Get the token from a secret
63+
PrometheusAuthToken string `json:"prometheusAuthToken,omitempty"`
64+
PromQuery string `json:"promQuery,omitempty"`
6065
}

0 commit comments

Comments
 (0)