Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: Add serviceMonitor MetricRelabelings #1291

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions api/nvidia/v1/clusterpolicy_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -954,6 +954,11 @@ type DCGMExporterServiceMonitorConfig struct {
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Relabelings allows to rewrite labels on metric sets for NVIDIA DCGM Exporter"
Relabelings []*promv1.RelabelConfig `json:"relabelings,omitempty"`

// MetricRelabelings configures the relabeling rules to apply to the samples before ingestion for NVIDIA DCGM Exporter.
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="MetricRelabelings configures the relabeling rules to apply to the samples before ingestion for NVIDIA DCGM Exporter."
MetricRelabelings []*promv1.RelabelConfig `json:"metricRelabelings,omitempty"`
}

// DCGMSpec defines the properties for NVIDIA DCGM deployment
Expand Down
9 changes: 9 additions & 0 deletions controllers/object_controls.go
Original file line number Diff line number Diff line change
Expand Up @@ -4581,6 +4581,15 @@ func ServiceMonitor(n ClusterPolicyController) (gpuv1.State, error) {
}
obj.Spec.Endpoints[0].RelabelConfigs = relabelConfigs
}
if serviceMonitor.MetricRelabelings != nil {
metricRelabelConfigs := make([]promv1.RelabelConfig, len(serviceMonitor.MetricRelabelings))
for i, relabel := range serviceMonitor.MetricRelabelings {
if relabel != nil {
metricRelabelConfigs[i] = *relabel
}
}
obj.Spec.Endpoints[0].MetricRelabelConfigs = metricRelabelConfigs
}
}
if n.stateNames[state] == "state-operator-metrics" || n.stateNames[state] == "state-node-status-exporter" {
// if ServiceMonitor CRD is missing, assume prometheus is not setup and ignore CR creation
Expand Down
89 changes: 89 additions & 0 deletions deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,95 @@ spec:
type: string
type: object
type: array
metricRelabelings:
description: |-
`metricRelabelings` configures the relabeling rules to apply to the
samples before ingestion for NVIDIA DCGM Exporter.
items:
description: |-
RelabelConfig allows dynamic rewriting of the label set for targets, alerts,
scraped samples and remote write samples.

More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
properties:
action:
default: replace
description: |-
Action to perform based on the regex matching.

`Uppercase` and `Lowercase` actions require Prometheus >= v2.36.0.
`DropEqual` and `KeepEqual` actions require Prometheus >= v2.41.0.

Default: "Replace"
enum:
- replace
- Replace
- keep
- Keep
- drop
- Drop
- hashmod
- HashMod
- labelmap
- LabelMap
- labeldrop
- LabelDrop
- labelkeep
- LabelKeep
- lowercase
- Lowercase
- uppercase
- Uppercase
- keepequal
- KeepEqual
- dropequal
- DropEqual
type: string
modulus:
description: |-
Modulus to take of the hash of the source label values.

Only applicable when the action is `HashMod`.
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched.
type: string
replacement:
description: |-
Replacement value against which a Replace action is performed if the
regular expression matches.

Regex capture groups are available.
type: string
separator:
description: Separator is the string between concatenated
SourceLabels.
type: string
sourceLabels:
description: |-
The source labels select values from existing labels. Their content is
concatenated using the configured Separator and matched against the
configured regular expression.
items:
description: |-
LabelName is a valid Prometheus label name which may only contain ASCII
letters, numbers, as well as underscores.
pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
type: string
type: array
targetLabel:
description: |-
Label to which the resulting string is written in a replacement.

It is mandatory for `Replace`, `HashMod`, `Lowercase`, `Uppercase`,
`KeepEqual` and `DropEqual` actions.

Regex capture groups are available.
type: string
type: object
type: array
type: object
version:
description: NVIDIA DCGM Exporter image tag
Expand Down
4 changes: 4 additions & 0 deletions deployments/gpu-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -328,13 +328,17 @@ dcgmExporter:
interval: 15s
honorLabels: false
additionalLabels: {}
# ServiceMonitor relabel configs to apply to samples before scraping
relabelings: []
# - source_labels:
# - __meta_kubernetes_pod_node_name
# regex: (.*)
# target_label: instance
# replacement: $1
# action: replace
# ServiceMonitor metric relabel configs to apply to samples before ingestion
metricRelabelings: []

# DCGM Exporter configuration
# This block is used to configure DCGM Exporter to emit a customized list of metrics.
# Use "name" to either point to an existing ConfigMap or to create a new one with a
Expand Down