Skip to content

Commit 2805e24

Browse files
committed
Add serviceMonitor MetricRelabelings
Signed-off-by: Pouya Dolatabadi <[email protected]>
1 parent 6171a52 commit 2805e24

File tree

4 files changed

+107
-0
lines changed

4 files changed

+107
-0
lines changed

api/nvidia/v1/clusterpolicy_types.go

+5
Original file line numberDiff line numberDiff line change
@@ -954,6 +954,11 @@ type DCGMExporterServiceMonitorConfig struct {
954954
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
955955
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Relabelings allows to rewrite labels on metric sets for NVIDIA DCGM Exporter"
956956
Relabelings []*promv1.RelabelConfig `json:"relabelings,omitempty"`
957+
958+
// MetricRelabelings configures the relabeling rules to apply to the samples before ingestion for NVIDIA DCGM Exporter.
959+
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
960+
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="MetricRelabelings configures the relabeling rules to apply to the samples before ingestion for NVIDIA DCGM Exporter."
961+
MetricRelabelings []*promv1.RelabelConfig `json:"metricRelabelings,omitempty"`
957962
}
958963

959964
// DCGMSpec defines the properties for NVIDIA DCGM deployment

controllers/object_controls.go

+9
Original file line numberDiff line numberDiff line change
@@ -4581,6 +4581,15 @@ func ServiceMonitor(n ClusterPolicyController) (gpuv1.State, error) {
45814581
}
45824582
obj.Spec.Endpoints[0].RelabelConfigs = relabelConfigs
45834583
}
4584+
if serviceMonitor.MetricRelabelings != nil {
4585+
metricRelabelConfigs := make([]promv1.RelabelConfig, len(serviceMonitor.MetricRelabelings))
4586+
for i, relabel := range serviceMonitor.MetricRelabelings {
4587+
if relabel != nil {
4588+
metricRelabelConfigs[i] = *relabel
4589+
}
4590+
}
4591+
obj.Spec.Endpoints[0].MetricRelabelConfigs = metricRelabelConfigs
4592+
}
45844593
}
45854594
if n.stateNames[state] == "state-operator-metrics" || n.stateNames[state] == "state-node-status-exporter" {
45864595
// if ServiceMonitor CRD is missing, assume prometheus is not setup and ignore CR creation

deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml

+89
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,95 @@ spec:
493493
type: string
494494
type: object
495495
type: array
496+
metricRelabelings:
497+
description: |-
498+
`metricRelabelings` configures the relabeling rules to apply to the
499+
samples before ingestion for NVIDIA DCGM Exporter.
500+
items:
501+
description: |-
502+
RelabelConfig allows dynamic rewriting of the label set for targets, alerts,
503+
scraped samples and remote write samples.
504+
505+
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
506+
properties:
507+
action:
508+
default: replace
509+
description: |-
510+
Action to perform based on the regex matching.
511+
512+
`Uppercase` and `Lowercase` actions require Prometheus >= v2.36.0.
513+
`DropEqual` and `KeepEqual` actions require Prometheus >= v2.41.0.
514+
515+
Default: "Replace"
516+
enum:
517+
- replace
518+
- Replace
519+
- keep
520+
- Keep
521+
- drop
522+
- Drop
523+
- hashmod
524+
- HashMod
525+
- labelmap
526+
- LabelMap
527+
- labeldrop
528+
- LabelDrop
529+
- labelkeep
530+
- LabelKeep
531+
- lowercase
532+
- Lowercase
533+
- uppercase
534+
- Uppercase
535+
- keepequal
536+
- KeepEqual
537+
- dropequal
538+
- DropEqual
539+
type: string
540+
modulus:
541+
description: |-
542+
Modulus to take of the hash of the source label values.
543+
544+
Only applicable when the action is `HashMod`.
545+
format: int64
546+
type: integer
547+
regex:
548+
description: Regular expression against which the extracted
549+
value is matched.
550+
type: string
551+
replacement:
552+
description: |-
553+
Replacement value against which a Replace action is performed if the
554+
regular expression matches.
555+
556+
Regex capture groups are available.
557+
type: string
558+
separator:
559+
description: Separator is the string between concatenated
560+
SourceLabels.
561+
type: string
562+
sourceLabels:
563+
description: |-
564+
The source labels select values from existing labels. Their content is
565+
concatenated using the configured Separator and matched against the
566+
configured regular expression.
567+
items:
568+
description: |-
569+
LabelName is a valid Prometheus label name which may only contain ASCII
570+
letters, numbers, as well as underscores.
571+
pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
572+
type: string
573+
type: array
574+
targetLabel:
575+
description: |-
576+
Label to which the resulting string is written in a replacement.
577+
578+
It is mandatory for `Replace`, `HashMod`, `Lowercase`, `Uppercase`,
579+
`KeepEqual` and `DropEqual` actions.
580+
581+
Regex capture groups are available.
582+
type: string
583+
type: object
584+
type: array
496585
type: object
497586
version:
498587
description: NVIDIA DCGM Exporter image tag

deployments/gpu-operator/values.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -328,13 +328,17 @@ dcgmExporter:
328328
interval: 15s
329329
honorLabels: false
330330
additionalLabels: {}
331+
# ServiceMonitor relabel configs to apply to samples before scraping
331332
relabelings: []
332333
# - source_labels:
333334
# - __meta_kubernetes_pod_node_name
334335
# regex: (.*)
335336
# target_label: instance
336337
# replacement: $1
337338
# action: replace
339+
# ServiceMonitor metric relabel configs to apply to samples before ingestion
340+
metricRelabelings: []
341+
338342
# DCGM Exporter configuration
339343
# This block is used to configure DCGM Exporter to emit a customized list of metrics.
340344
# Use "name" to either point to an existing ConfigMap or to create a new one with a

0 commit comments

Comments
 (0)