Skip to content

Commit 7c2af10

Browse files
mplrtribotte
andauthored
Fix open connections metric
Co-authored-by: Romain <[email protected]>
1 parent 598a257 commit 7c2af10

17 files changed

+126
-236
lines changed

cmd/traefik/traefik.go

+6-6
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,14 @@ func setupServer(staticConfiguration *static.Configuration) (*server.Server, err
193193

194194
tsProviders := initTailscaleProviders(staticConfiguration, &providerAggregator)
195195

196+
// Metrics
197+
198+
metricRegistries := registerMetricClients(staticConfiguration.Metrics)
199+
metricsRegistry := metrics.NewMultiRegistry(metricRegistries)
200+
196201
// Entrypoints
197202

198-
serverEntryPointsTCP, err := server.NewTCPEntryPoints(staticConfiguration.EntryPoints, staticConfiguration.HostResolver)
203+
serverEntryPointsTCP, err := server.NewTCPEntryPoints(staticConfiguration.EntryPoints, staticConfiguration.HostResolver, metricsRegistry)
199204
if err != nil {
200205
return nil, err
201206
}
@@ -243,11 +248,6 @@ func setupServer(staticConfiguration *static.Configuration) (*server.Server, err
243248
}
244249
}
245250

246-
// Metrics
247-
248-
metricRegistries := registerMetricClients(staticConfiguration.Metrics)
249-
metricsRegistry := metrics.NewMultiRegistry(metricRegistries)
250-
251251
// Service manager factory
252252

253253
var spiffeX509Source *workloadapi.X509Source

docs/content/observability/metrics/overview.md

+26-24
Original file line numberDiff line numberDiff line change
@@ -16,59 +16,74 @@ Traefik Proxy hosts an official Grafana dashboard for both [on-premises](https:/
1616

1717
## Global Metrics
1818

19-
| Metric | Type | Description |
20-
|---------------------------------------------|---------|---------------------------------------------------------|
21-
| Config reload total | Count | The total count of configuration reloads. |
22-
| Config reload last success | Gauge | The timestamp of the last configuration reload success. |
23-
| TLS certificates not after | Gauge | The expiration date of certificates. |
19+
| Metric | Type | [Labels](#labels) | Description |
20+
|----------------------------|-------|--------------------------|--------------------------------------------------------------------|
21+
| Config reload total | Count | | The total count of configuration reloads. |
22+
| Config reload last success | Gauge | | The timestamp of the last configuration reload success. |
23+
| Open connections | Gauge | `entrypoint`, `protocol` | The current count of open connections, by entrypoint and protocol. |
24+
| TLS certificates not after | Gauge | | The expiration date of certificates. |
2425

2526
```prom tab="Prometheus"
2627
traefik_config_reloads_total
2728
traefik_config_last_reload_success
29+
traefik_open_connections
2830
traefik_tls_certs_not_after
2931
```
3032

3133
```dd tab="Datadog"
3234
config.reload.total
3335
config.reload.lastSuccessTimestamp
36+
open.connections
3437
tls.certs.notAfterTimestamp
3538
```
3639

3740
```influxdb tab="InfluxDB2"
3841
traefik.config.reload.total
3942
traefik.config.reload.lastSuccessTimestamp
43+
traefik.open.connections
4044
traefik.tls.certs.notAfterTimestamp
4145
```
4246

4347
```statsd tab="StatsD"
4448
# Default prefix: "traefik"
4549
{prefix}.config.reload.total
4650
{prefix}.config.reload.lastSuccessTimestamp
51+
{prefix}.open.connections
4752
{prefix}.tls.certs.notAfterTimestamp
4853
```
4954

5055
```opentelemetry tab="OpenTelemetry"
5156
traefik_config_reloads_total
5257
traefik_config_last_reload_success
58+
traefik_open_connections
5359
traefik_tls_certs_not_after
5460
```
5561

56-
## EntryPoint Metrics
62+
### Labels
63+
64+
Here is a comprehensive list of labels that are provided by the global metrics:
65+
66+
| Label | Description | example |
67+
|---------------|----------------------------------------|----------------------|
68+
| `entrypoint` | Entrypoint that handled the connection | "example_entrypoint" |
69+
| `protocol` | Connection protocol | "TCP" |
70+
71+
## HTTP Metrics
72+
73+
### EntryPoint Metrics
5774

5875
| Metric | Type | [Labels](#labels) | Description |
5976
|-----------------------|-----------|--------------------------------------------|---------------------------------------------------------------------|
6077
| Requests total | Count | `code`, `method`, `protocol`, `entrypoint` | The total count of HTTP requests received by an entrypoint. |
6178
| Requests TLS total | Count | `tls_version`, `tls_cipher`, `entrypoint` | The total count of HTTPS requests received by an entrypoint. |
6279
| Request duration | Histogram | `code`, `method`, `protocol`, `entrypoint` | Request processing duration histogram on an entrypoint. |
63-
| Open connections | Count | `method`, `protocol`, `entrypoint` | The current count of open connections on an entrypoint. |
6480
| Requests bytes total | Count | `code`, `method`, `protocol`, `entrypoint` | The total size of HTTP requests in bytes handled by an entrypoint. |
6581
| Responses bytes total | Count | `code`, `method`, `protocol`, `entrypoint` | The total size of HTTP responses in bytes handled by an entrypoint. |
6682

6783
```prom tab="Prometheus"
6884
traefik_entrypoint_requests_total
6985
traefik_entrypoint_requests_tls_total
7086
traefik_entrypoint_request_duration_seconds
71-
traefik_entrypoint_open_connections
7287
traefik_entrypoint_requests_bytes_total
7388
traefik_entrypoint_responses_bytes_total
7489
```
@@ -77,7 +92,6 @@ traefik_entrypoint_responses_bytes_total
7792
entrypoint.request.total
7893
entrypoint.request.tls.total
7994
entrypoint.request.duration
80-
entrypoint.connections.open
8195
entrypoint.requests.bytes.total
8296
entrypoint.responses.bytes.total
8397
```
@@ -86,7 +100,6 @@ entrypoint.responses.bytes.total
86100
traefik.entrypoint.requests.total
87101
traefik.entrypoint.requests.tls.total
88102
traefik.entrypoint.request.duration
89-
traefik.entrypoint.connections.open
90103
traefik.entrypoint.requests.bytes.total
91104
traefik.entrypoint.responses.bytes.total
92105
```
@@ -96,7 +109,6 @@ traefik.entrypoint.responses.bytes.total
96109
{prefix}.entrypoint.request.total
97110
{prefix}.entrypoint.request.tls.total
98111
{prefix}.entrypoint.request.duration
99-
{prefix}.entrypoint.connections.open
100112
{prefix}.entrypoint.requests.bytes.total
101113
{prefix}.entrypoint.responses.bytes.total
102114
```
@@ -110,22 +122,20 @@ traefik_entrypoint_requests_bytes_total
110122
traefik_entrypoint_responses_bytes_total
111123
```
112124

113-
## Router Metrics
125+
### Router Metrics
114126

115127
| Metric | Type | [Labels](#labels) | Description |
116128
|-----------------------|-----------|---------------------------------------------------|----------------------------------------------------------------|
117129
| Requests total | Count | `code`, `method`, `protocol`, `router`, `service` | The total count of HTTP requests handled by a router. |
118130
| Requests TLS total | Count | `tls_version`, `tls_cipher`, `router`, `service` | The total count of HTTPS requests handled by a router. |
119131
| Request duration | Histogram | `code`, `method`, `protocol`, `router`, `service` | Request processing duration histogram on a router. |
120-
| Open connections | Count | `method`, `protocol`, `router`, `service` | The current count of open connections on a router. |
121132
| Requests bytes total | Count | `code`, `method`, `protocol`, `router`, `service` | The total size of HTTP requests in bytes handled by a router. |
122133
| Responses bytes total | Count | `code`, `method`, `protocol`, `router`, `service` | The total size of HTTP responses in bytes handled by a router. |
123134

124135
```prom tab="Prometheus"
125136
traefik_router_requests_total
126137
traefik_router_requests_tls_total
127138
traefik_router_request_duration_seconds
128-
traefik_router_open_connections
129139
traefik_router_requests_bytes_total
130140
traefik_router_responses_bytes_total
131141
```
@@ -134,7 +144,6 @@ traefik_router_responses_bytes_total
134144
router.request.total
135145
router.request.tls.total
136146
router.request.duration
137-
router.connections.open
138147
router.requests.bytes.total
139148
router.responses.bytes.total
140149
```
@@ -143,7 +152,6 @@ router.responses.bytes.total
143152
traefik.router.requests.total
144153
traefik.router.requests.tls.total
145154
traefik.router.request.duration
146-
traefik.router.connections.open
147155
traefik.router.requests.bytes.total
148156
traefik.router.responses.bytes.total
149157
```
@@ -153,7 +161,6 @@ traefik.router.responses.bytes.total
153161
{prefix}.router.request.total
154162
{prefix}.router.request.tls.total
155163
{prefix}.router.request.duration
156-
{prefix}.router.connections.open
157164
{prefix}.router.requests.bytes.total
158165
{prefix}.router.responses.bytes.total
159166
```
@@ -167,14 +174,13 @@ traefik_router_requests_bytes_total
167174
traefik_router_responses_bytes_total
168175
```
169176

170-
## Service Metrics
177+
### Service Metrics
171178

172179
| Metric | Type | Labels | Description |
173180
|-----------------------|-----------|-----------------------------------------|-------------------------------------------------------------|
174181
| Requests total | Count | `code`, `method`, `protocol`, `service` | The total count of HTTP requests processed on a service. |
175182
| Requests TLS total | Count | `tls_version`, `tls_cipher`, `service` | The total count of HTTPS requests processed on a service. |
176183
| Request duration | Histogram | `code`, `method`, `protocol`, `service` | Request processing duration histogram on a service. |
177-
| Open connections | Count | `method`, `protocol`, `service` | The current count of open connections on a service. |
178184
| Retries total | Count | `service` | The count of requests retries on a service. |
179185
| Server UP | Gauge | `service`, `url` | Current service's server status, 0 for a down or 1 for up. |
180186
| Requests bytes total | Count | `code`, `method`, `protocol`, `service` | The total size of requests in bytes received by a service. |
@@ -184,7 +190,6 @@ traefik_router_responses_bytes_total
184190
traefik_service_requests_total
185191
traefik_service_requests_tls_total
186192
traefik_service_request_duration_seconds
187-
traefik_service_open_connections
188193
traefik_service_retries_total
189194
traefik_service_server_up
190195
traefik_service_requests_bytes_total
@@ -195,7 +200,6 @@ traefik_service_responses_bytes_total
195200
service.request.total
196201
router.service.tls.total
197202
service.request.duration
198-
service.connections.open
199203
service.retries.total
200204
service.server.up
201205
service.requests.bytes.total
@@ -206,7 +210,6 @@ service.responses.bytes.total
206210
traefik.service.requests.total
207211
traefik.service.requests.tls.total
208212
traefik.service.request.duration
209-
traefik.service.connections.open
210213
traefik.service.retries.total
211214
traefik.service.server.up
212215
traefik.service.requests.bytes.total
@@ -218,7 +221,6 @@ traefik.service.responses.bytes.total
218221
{prefix}.service.request.total
219222
{prefix}.service.request.tls.total
220223
{prefix}.service.request.duration
221-
{prefix}.service.connections.open
222224
{prefix}.service.retries.total
223225
{prefix}.service.server.up
224226
{prefix}.service.requests.bytes.total
@@ -236,7 +238,7 @@ traefik_service_requests_bytes_total
236238
traefik_service_responses_bytes_total
237239
```
238240

239-
## Labels
241+
### Labels
240242

241243
Here is a comprehensive list of labels that are provided by the metrics:
242244

pkg/metrics/datadog.go

+5-8
Original file line numberDiff line numberDiff line change
@@ -18,29 +18,28 @@ var (
1818

1919
// Metric names consistent with https://github.com/DataDog/integrations-extras/pull/64
2020
const (
21-
ddConfigReloadsName = "config.reload.total"
22-
ddLastConfigReloadSuccessName = "config.reload.lastSuccessTimestamp"
21+
ddConfigReloadsName = "config.reload.total"
22+
ddLastConfigReloadSuccessName = "config.reload.lastSuccessTimestamp"
23+
ddOpenConnsName = "open.connections"
24+
2325
ddTLSCertsNotAfterTimestampName = "tls.certs.notAfterTimestamp"
2426

2527
ddEntryPointReqsName = "entrypoint.request.total"
2628
ddEntryPointReqsTLSName = "entrypoint.request.tls.total"
2729
ddEntryPointReqDurationName = "entrypoint.request.duration"
28-
ddEntryPointOpenConnsName = "entrypoint.connections.open"
2930
ddEntryPointReqsBytesName = "entrypoint.requests.bytes.total"
3031
ddEntryPointRespsBytesName = "entrypoint.responses.bytes.total"
3132

3233
ddRouterReqsName = "router.request.total"
3334
ddRouterReqsTLSName = "router.request.tls.total"
3435
ddRouterReqsDurationName = "router.request.duration"
35-
ddRouterOpenConnsName = "router.connections.open"
3636
ddRouterReqsBytesName = "router.requests.bytes.total"
3737
ddRouterRespsBytesName = "router.responses.bytes.total"
3838

3939
ddServiceReqsName = "service.request.total"
4040
ddServiceReqsTLSName = "service.request.tls.total"
4141
ddServiceReqsDurationName = "service.request.duration"
4242
ddServiceRetriesName = "service.retries.total"
43-
ddServiceOpenConnsName = "service.connections.open"
4443
ddServiceServerUpName = "service.server.up"
4544
ddServiceReqsBytesName = "service.requests.bytes.total"
4645
ddServiceRespsBytesName = "service.responses.bytes.total"
@@ -63,6 +62,7 @@ func RegisterDatadog(ctx context.Context, config *types.Datadog) Registry {
6362
registry := &standardRegistry{
6463
configReloadsCounter: datadogClient.NewCounter(ddConfigReloadsName, 1.0),
6564
lastConfigReloadSuccessGauge: datadogClient.NewGauge(ddLastConfigReloadSuccessName),
65+
openConnectionsGauge: datadogClient.NewGauge(ddOpenConnsName),
6666
tlsCertsNotAfterTimestampGauge: datadogClient.NewGauge(ddTLSCertsNotAfterTimestampName),
6767
}
6868

@@ -71,7 +71,6 @@ func RegisterDatadog(ctx context.Context, config *types.Datadog) Registry {
7171
registry.entryPointReqsCounter = datadogClient.NewCounter(ddEntryPointReqsName, 1.0)
7272
registry.entryPointReqsTLSCounter = datadogClient.NewCounter(ddEntryPointReqsTLSName, 1.0)
7373
registry.entryPointReqDurationHistogram, _ = NewHistogramWithScale(datadogClient.NewHistogram(ddEntryPointReqDurationName, 1.0), time.Second)
74-
registry.entryPointOpenConnsGauge = datadogClient.NewGauge(ddEntryPointOpenConnsName)
7574
registry.entryPointReqsBytesCounter = datadogClient.NewCounter(ddEntryPointReqsBytesName, 1.0)
7675
registry.entryPointRespsBytesCounter = datadogClient.NewCounter(ddEntryPointRespsBytesName, 1.0)
7776
}
@@ -81,7 +80,6 @@ func RegisterDatadog(ctx context.Context, config *types.Datadog) Registry {
8180
registry.routerReqsCounter = datadogClient.NewCounter(ddRouterReqsName, 1.0)
8281
registry.routerReqsTLSCounter = datadogClient.NewCounter(ddRouterReqsTLSName, 1.0)
8382
registry.routerReqDurationHistogram, _ = NewHistogramWithScale(datadogClient.NewHistogram(ddRouterReqsDurationName, 1.0), time.Second)
84-
registry.routerOpenConnsGauge = datadogClient.NewGauge(ddRouterOpenConnsName)
8583
registry.routerReqsBytesCounter = datadogClient.NewCounter(ddRouterReqsBytesName, 1.0)
8684
registry.routerRespsBytesCounter = datadogClient.NewCounter(ddRouterRespsBytesName, 1.0)
8785
}
@@ -92,7 +90,6 @@ func RegisterDatadog(ctx context.Context, config *types.Datadog) Registry {
9290
registry.serviceReqsTLSCounter = datadogClient.NewCounter(ddServiceReqsTLSName, 1.0)
9391
registry.serviceReqDurationHistogram, _ = NewHistogramWithScale(datadogClient.NewHistogram(ddServiceReqsDurationName, 1.0), time.Second)
9492
registry.serviceRetriesCounter = datadogClient.NewCounter(ddServiceRetriesName, 1.0)
95-
registry.serviceOpenConnsGauge = datadogClient.NewGauge(ddServiceOpenConnsName)
9693
registry.serviceServerUpGauge = datadogClient.NewGauge(ddServiceServerUpName)
9794
registry.serviceReqsBytesCounter = datadogClient.NewCounter(ddServiceReqsBytesName, 1.0)
9895
registry.serviceRespsBytesCounter = datadogClient.NewCounter(ddServiceRespsBytesName, 1.0)

0 commit comments

Comments
 (0)