Skip to content

Commit 92e7b83

Browse files
committed
[Prometheus] Add ray_clusters_created_total metric
Signed-off-by: win5923 <[email protected]>
1 parent 621e9c7 commit 92e7b83

File tree

4 files changed

+47
-47
lines changed

4 files changed

+47
-47
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,29 @@
11
package common
22

33
import (
4+
"strconv"
5+
46
"github.com/prometheus/client_golang/prometheus"
57
"github.com/prometheus/client_golang/prometheus/promauto"
68
"sigs.k8s.io/controller-runtime/pkg/metrics"
79
)
810

911
// Define all the prometheus counters for all clusters
1012
var (
11-
clustersCreatedCount = promauto.NewCounterVec(
12-
prometheus.CounterOpts{
13-
Name: "ray_operator_clusters_created_total",
14-
Help: "Counts number of clusters created",
15-
},
16-
[]string{"namespace"},
17-
)
18-
clustersDeletedCount = promauto.NewCounterVec(
19-
prometheus.CounterOpts{
20-
Name: "ray_operator_clusters_deleted_total",
21-
Help: "Counts number of clusters deleted",
22-
},
23-
[]string{"namespace"},
24-
)
25-
clustersSuccessfulCount = promauto.NewCounterVec(
26-
prometheus.CounterOpts{
27-
Name: "ray_operator_clusters_successful_total",
28-
Help: "Counts number of clusters successful",
29-
},
30-
[]string{"namespace"},
31-
)
32-
clustersFailedCount = promauto.NewCounterVec(
13+
rayClustersCreatedCounter = promauto.NewCounterVec(
3314
prometheus.CounterOpts{
34-
Name: "ray_operator_clusters_failed_total",
35-
Help: "Counts number of clusters failed",
15+
Name: "ray_clusters_created_total",
16+
Help: "The total number of RayClusters created",
3617
},
37-
[]string{"namespace"},
18+
[]string{"namespace", "created_by_ray_job", "created_by_ray_service"},
3819
)
3920
)
4021

4122
func init() {
4223
// Register custom metrics with the global prometheus registry
43-
metrics.Registry.MustRegister(clustersCreatedCount,
44-
clustersDeletedCount,
45-
clustersSuccessfulCount,
46-
clustersFailedCount)
47-
}
48-
49-
func CreatedClustersCounterInc(namespace string) {
50-
clustersCreatedCount.WithLabelValues(namespace).Inc()
51-
}
52-
53-
// TODO: We don't handle the delete events in new reconciler mode, how to emit deletion metrics?
54-
func DeletedClustersCounterInc(namespace string) {
55-
clustersDeletedCount.WithLabelValues(namespace).Inc()
56-
}
57-
58-
func SuccessfulClustersCounterInc(namespace string) {
59-
clustersSuccessfulCount.WithLabelValues(namespace).Inc()
24+
metrics.Registry.MustRegister(rayClustersCreatedCounter)
6025
}
6126

62-
func FailedClustersCounterInc(namespace string) {
63-
clustersFailedCount.WithLabelValues(namespace).Inc()
27+
func CreatedRayClustersCounterInc(namespace string, createdByRayJob bool, createdByRayService bool) {
28+
rayClustersCreatedCounter.WithLabelValues(namespace, strconv.FormatBool(createdByRayJob), strconv.FormatBool(createdByRayService)).Inc()
6429
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package common
2+
3+
import (
4+
"strings"
5+
"testing"
6+
7+
"github.com/prometheus/client_golang/prometheus/testutil"
8+
)
9+
10+
func TestCreatedRayClustersCounterInc(t *testing.T) {
11+
CreatedRayClustersCounterInc("default", true, false)
12+
CreatedRayClustersCounterInc("default", false, true)
13+
CreatedRayClustersCounterInc("test", false, false)
14+
CreatedRayClustersCounterInc("test", false, false)
15+
16+
expected := `
17+
# HELP ray_clusters_created_total The total number of RayClusters created
18+
# TYPE ray_clusters_created_total counter
19+
ray_clusters_created_total{created_by_ray_job="true",created_by_ray_service="false",namespace="default"} 1
20+
ray_clusters_created_total{created_by_ray_job="false",created_by_ray_service="true",namespace="default"} 1
21+
ray_clusters_created_total{created_by_ray_job="false",created_by_ray_service="false",namespace="test"} 2
22+
`
23+
if err := testutil.CollectAndCompare(rayClustersCreatedCounter, strings.NewReader(expected)); err != nil {
24+
t.Errorf("unexpected collecting result:\n%s", err)
25+
}
26+
}

ray-operator/controllers/ray/raycluster_controller.go

+11-3
Original file line numberDiff line numberDiff line change
@@ -734,12 +734,20 @@ func (r *RayClusterReconciler) reconcilePods(ctx context.Context, instance *rayv
734734
} else if len(headPods.Items) == 0 {
735735
// Create head Pod if it does not exist.
736736
logger.Info("reconcilePods: Found 0 head Pods; creating a head Pod for the RayCluster.")
737-
common.CreatedClustersCounterInc(instance.Namespace)
737+
738+
creatorCRDType := getCreatorCRDType(*instance)
739+
// Increase the counter for ray_clusters_created_total metric.
740+
if creatorCRDType == utils.RayClusterCRD {
741+
common.CreatedRayClustersCounterInc(instance.Namespace, false, false)
742+
} else if creatorCRDType == utils.RayJobCRD {
743+
common.CreatedRayClustersCounterInc(instance.Namespace, true, false)
744+
} else if creatorCRDType == utils.RayServiceCRD {
745+
common.CreatedRayClustersCounterInc(instance.Namespace, false, true)
746+
}
747+
738748
if err := r.createHeadPod(ctx, *instance); err != nil {
739-
common.FailedClustersCounterInc(instance.Namespace)
740749
return errstd.Join(utils.ErrFailedCreateHeadPod, err)
741750
}
742-
common.SuccessfulClustersCounterInc(instance.Namespace)
743751
} else if len(headPods.Items) > 1 { // This should never happen. This protects against the case that users manually create headpod.
744752
correctHeadPodName := instance.Name + "-head"
745753
headPodNames := make([]string, len(headPods.Items))

ray-operator/go.mod

+1
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ require (
6363
github.com/josharian/intern v1.0.0 // indirect
6464
github.com/json-iterator/go v1.1.12 // indirect
6565
github.com/klauspost/compress v1.17.11 // indirect
66+
github.com/kylelemons/godebug v1.1.0 // indirect
6667
github.com/mailru/easyjson v0.9.0 // indirect
6768
github.com/moby/spdystream v0.5.0 // indirect
6869
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect

0 commit comments

Comments
 (0)