Skip to content

Commit 0587e3d

Browse files
authored
Merge pull request #2603 from dqminh/prometheus-on-demand
Allow on-demand metrics collection for prometheus
2 parents 9ec2495 + 20e306a commit 0587e3d

File tree

8 files changed

+79
-36
lines changed

8 files changed

+79
-36
lines changed

cmd/internal/api/versions.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,10 @@ import (
1919
"net/http"
2020
"path"
2121
"strconv"
22+
"time"
2223

2324
info "github.com/google/cadvisor/info/v1"
24-
"github.com/google/cadvisor/info/v2"
25+
v2 "github.com/google/cadvisor/info/v2"
2526
"github.com/google/cadvisor/manager"
2627

2728
"k8s.io/klog/v2"
@@ -313,7 +314,7 @@ func (api *version2_0) SupportedRequestTypes() []string {
313314
}
314315

315316
func (api *version2_0) HandleRequest(requestType string, request []string, m manager.Manager, w http.ResponseWriter, r *http.Request) error {
316-
opt, err := getRequestOptions(r)
317+
opt, err := GetRequestOptions(r)
317318
if err != nil {
318319
return err
319320
}
@@ -482,7 +483,7 @@ func (api *version2_1) SupportedRequestTypes() []string {
482483

483484
func (api *version2_1) HandleRequest(requestType string, request []string, m manager.Manager, w http.ResponseWriter, r *http.Request) error {
484485
// Get the query request.
485-
opt, err := getRequestOptions(r)
486+
opt, err := GetRequestOptions(r)
486487
if err != nil {
487488
return err
488489
}
@@ -525,7 +526,8 @@ func (api *version2_1) HandleRequest(requestType string, request []string, m man
525526
}
526527
}
527528

528-
func getRequestOptions(r *http.Request) (v2.RequestOptions, error) {
529+
// GetRequestOptions returns the metrics request options from a HTTP request.
530+
func GetRequestOptions(r *http.Request) (v2.RequestOptions, error) {
529531
supportedTypes := map[string]bool{
530532
v2.TypeName: true,
531533
v2.TypeDocker: true,
@@ -555,5 +557,12 @@ func getRequestOptions(r *http.Request) (v2.RequestOptions, error) {
555557
if recursive == "true" {
556558
opt.Recursive = true
557559
}
560+
if maxAgeString := r.URL.Query().Get("max_age"); len(maxAgeString) > 0 {
561+
maxAge, err := time.ParseDuration(maxAgeString)
562+
if err != nil {
563+
return opt, fmt.Errorf("failed to parse 'max_age' option: %v", err)
564+
}
565+
opt.MaxAge = &maxAge
566+
}
558567
return opt, nil
559568
}

cmd/internal/http/handlers.go

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -96,14 +96,28 @@ func RegisterHandlers(mux httpmux.Mux, containerManager manager.Manager, httpAut
9696
// the provided HTTP mux to handle the given Prometheus endpoint.
9797
func RegisterPrometheusHandler(mux httpmux.Mux, resourceManager manager.Manager, prometheusEndpoint string,
9898
f metrics.ContainerLabelsFunc, includedMetrics container.MetricSet) {
99-
r := prometheus.NewRegistry()
100-
r.MustRegister(
101-
metrics.NewPrometheusCollector(resourceManager, f, includedMetrics, clock.RealClock{}),
102-
metrics.NewPrometheusMachineCollector(resourceManager, includedMetrics),
103-
prometheus.NewGoCollector(),
104-
prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{}),
105-
)
106-
mux.Handle(prometheusEndpoint, promhttp.HandlerFor(r, promhttp.HandlerOpts{ErrorHandling: promhttp.ContinueOnError}))
99+
goCollector := prometheus.NewGoCollector()
100+
processCollector := prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{})
101+
machineCollector := metrics.NewPrometheusMachineCollector(resourceManager, includedMetrics)
102+
103+
mux.Handle(prometheusEndpoint, http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
104+
opts, err := api.GetRequestOptions(req)
105+
if err != nil {
106+
http.Error(w, "No metrics gathered, last error:\n\n"+err.Error(), http.StatusInternalServerError)
107+
return
108+
}
109+
opts.Count = 1 // we only want the latest datapoint
110+
opts.Recursive = true // get all child containers
111+
112+
r := prometheus.NewRegistry()
113+
r.MustRegister(
114+
metrics.NewPrometheusCollector(resourceManager, f, includedMetrics, clock.RealClock{}, opts),
115+
machineCollector,
116+
goCollector,
117+
processCollector,
118+
)
119+
promhttp.HandlerFor(r, promhttp.HandlerOpts{ErrorHandling: promhttp.ContinueOnError}).ServeHTTP(w, req)
120+
}))
107121
}
108122

109123
func staticHandlerNoAuth(w http.ResponseWriter, r *http.Request) {

info/v2/container.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ import (
1919

2020
// TODO(rjnagal): Remove dependency after moving all stats structs from v1.
2121
// using v1 now for easy conversion.
22-
"github.com/google/cadvisor/info/v1"
22+
v1 "github.com/google/cadvisor/info/v1"
2323
)
2424

2525
const (

manager/container.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ import (
3333
"github.com/google/cadvisor/collector"
3434
"github.com/google/cadvisor/container"
3535
info "github.com/google/cadvisor/info/v1"
36-
"github.com/google/cadvisor/info/v2"
36+
v2 "github.com/google/cadvisor/info/v2"
3737
"github.com/google/cadvisor/stats"
3838
"github.com/google/cadvisor/summary"
3939
"github.com/google/cadvisor/utils/cpuload"

metrics/metrics.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"time"
1919

2020
info "github.com/google/cadvisor/info/v1"
21+
v2 "github.com/google/cadvisor/info/v2"
2122
)
2223

2324
// metricValue describes a single metric value for a given set of label values
@@ -32,9 +33,8 @@ type metricValues []metricValue
3233

3334
// infoProvider will usually be manager.Manager, but can be swapped out for testing.
3435
type infoProvider interface {
35-
// SubcontainersInfo provides information about all subcontainers of the
36-
// specified container including itself.
37-
SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error)
36+
// GetRequestedContainersInfo gets info for all requested containers based on the request options.
37+
GetRequestedContainersInfo(containerName string, options v2.RequestOptions) (map[string]*info.ContainerInfo, error)
3838
// GetVersionInfo provides information about the version.
3939
GetVersionInfo() (*info.VersionInfo, error)
4040
// GetMachineInfo provides information about the machine.

metrics/prometheus.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222

2323
"github.com/google/cadvisor/container"
2424
info "github.com/google/cadvisor/info/v1"
25+
v2 "github.com/google/cadvisor/info/v2"
2526

2627
"github.com/prometheus/client_golang/prometheus"
2728
"k8s.io/klog/v2"
@@ -97,13 +98,14 @@ type PrometheusCollector struct {
9798
containerMetrics []containerMetric
9899
containerLabelsFunc ContainerLabelsFunc
99100
includedMetrics container.MetricSet
101+
opts v2.RequestOptions
100102
}
101103

102104
// NewPrometheusCollector returns a new PrometheusCollector. The passed
103105
// ContainerLabelsFunc specifies which base labels will be attached to all
104106
// exported metrics. If left to nil, the DefaultContainerLabels function
105107
// will be used instead.
106-
func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetrics container.MetricSet, now clock.Clock) *PrometheusCollector {
108+
func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetrics container.MetricSet, now clock.Clock, opts v2.RequestOptions) *PrometheusCollector {
107109
if f == nil {
108110
f = DefaultContainerLabels
109111
}
@@ -129,6 +131,7 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
129131
},
130132
},
131133
includedMetrics: includedMetrics,
134+
opts: opts,
132135
}
133136
if includedMetrics.Has(container.CpuUsageMetrics) {
134137
c.containerMetrics = append(c.containerMetrics, []containerMetric{
@@ -1780,7 +1783,7 @@ func BaseContainerLabels(whiteList []string) func(container *info.ContainerInfo)
17801783
}
17811784

17821785
func (c *PrometheusCollector) collectContainersInfo(ch chan<- prometheus.Metric) {
1783-
containers, err := c.infoProvider.SubcontainersInfo("/", &info.ContainerInfoRequest{NumStats: 1})
1786+
containers, err := c.infoProvider.GetRequestedContainersInfo("/", c.opts)
17841787
if err != nil {
17851788
c.errors.Set(1)
17861789
klog.Warningf("Couldn't get containers: %s", err)

metrics/prometheus_fake.go

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"time"
2020

2121
info "github.com/google/cadvisor/info/v1"
22+
v2 "github.com/google/cadvisor/info/v2"
2223
)
2324

2425
type testSubcontainersInfoProvider struct{}
@@ -264,9 +265,9 @@ func (p testSubcontainersInfoProvider) GetMachineInfo() (*info.MachineInfo, erro
264265
}, nil
265266
}
266267

267-
func (p testSubcontainersInfoProvider) SubcontainersInfo(string, *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
268-
return []*info.ContainerInfo{
269-
{
268+
func (p testSubcontainersInfoProvider) GetRequestedContainersInfo(string, v2.RequestOptions) (map[string]*info.ContainerInfo, error) {
269+
return map[string]*info.ContainerInfo{
270+
"testcontainer": {
270271
ContainerReference: info.ContainerReference{
271272
Name: "testcontainer",
272273
Aliases: []string{"testcontaineralias"},
@@ -710,10 +711,10 @@ func (p *erroringSubcontainersInfoProvider) GetMachineInfo() (*info.MachineInfo,
710711
return p.successfulProvider.GetMachineInfo()
711712
}
712713

713-
func (p *erroringSubcontainersInfoProvider) SubcontainersInfo(
714-
a string, r *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
714+
func (p *erroringSubcontainersInfoProvider) GetRequestedContainersInfo(
715+
a string, opt v2.RequestOptions) (map[string]*info.ContainerInfo, error) {
715716
if p.shouldFail {
716-
return []*info.ContainerInfo{}, errors.New("Oops 3")
717+
return map[string]*info.ContainerInfo{}, errors.New("Oops 3")
717718
}
718-
return p.successfulProvider.SubcontainersInfo(a, r)
719+
return p.successfulProvider.GetRequestedContainersInfo(a, opt)
719720
}

metrics/prometheus_test.go

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,14 @@
1515
package metrics
1616

1717
import (
18+
"errors"
1819
"os"
1920
"testing"
2021
"time"
2122

2223
"github.com/google/cadvisor/container"
2324
info "github.com/google/cadvisor/info/v1"
25+
v2 "github.com/google/cadvisor/info/v2"
2426

2527
"github.com/prometheus/client_golang/prometheus"
2628
"github.com/prometheus/client_golang/prometheus/testutil"
@@ -35,7 +37,7 @@ func TestPrometheusCollector(t *testing.T) {
3537
s := DefaultContainerLabels(container)
3638
s["zone.name"] = "hello"
3739
return s
38-
}, container.AllMetrics, now)
40+
}, container.AllMetrics, now, v2.RequestOptions{})
3941
reg := prometheus.NewRegistry()
4042
reg.MustRegister(c)
4143

@@ -64,7 +66,7 @@ func TestPrometheusCollector_scrapeFailure(t *testing.T) {
6466
s := DefaultContainerLabels(container)
6567
s["zone.name"] = "hello"
6668
return s
67-
}, container.AllMetrics, now)
69+
}, container.AllMetrics, now, v2.RequestOptions{})
6870
reg := prometheus.NewRegistry()
6971
reg.MustRegister(c)
7072

@@ -76,7 +78,7 @@ func TestPrometheusCollector_scrapeFailure(t *testing.T) {
7678
}
7779

7880
func TestNewPrometheusCollectorWithPerf(t *testing.T) {
79-
c := NewPrometheusCollector(mockInfoProvider{}, mockLabelFunc, container.MetricSet{container.PerfMetrics: struct{}{}}, now)
81+
c := NewPrometheusCollector(&mockInfoProvider{}, mockLabelFunc, container.MetricSet{container.PerfMetrics: struct{}{}}, now, v2.RequestOptions{})
8082
assert.Len(t, c.containerMetrics, 5)
8183
names := []string{}
8284
for _, m := range c.containerMetrics {
@@ -89,18 +91,32 @@ func TestNewPrometheusCollectorWithPerf(t *testing.T) {
8991
assert.Contains(t, names, "container_perf_uncore_events_scaling_ratio")
9092
}
9193

92-
type mockInfoProvider struct{}
94+
func TestNewPrometheusCollectorWithRequestOptions(t *testing.T) {
95+
p := mockInfoProvider{}
96+
opts := v2.RequestOptions{
97+
IdType: "docker",
98+
}
99+
c := NewPrometheusCollector(&p, mockLabelFunc, container.AllMetrics, now, opts)
100+
ch := make(chan prometheus.Metric, 10)
101+
c.Collect(ch)
102+
assert.Equal(t, p.options, opts)
103+
}
104+
105+
type mockInfoProvider struct {
106+
options v2.RequestOptions
107+
}
93108

94-
func (m mockInfoProvider) SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
95-
return nil, nil
109+
func (m *mockInfoProvider) GetRequestedContainersInfo(containerName string, options v2.RequestOptions) (map[string]*info.ContainerInfo, error) {
110+
m.options = options
111+
return map[string]*info.ContainerInfo{}, nil
96112
}
97113

98-
func (m mockInfoProvider) GetVersionInfo() (*info.VersionInfo, error) {
99-
return nil, nil
114+
func (m *mockInfoProvider) GetVersionInfo() (*info.VersionInfo, error) {
115+
return nil, errors.New("not supported")
100116
}
101117

102-
func (m mockInfoProvider) GetMachineInfo() (*info.MachineInfo, error) {
103-
return nil, nil
118+
func (m *mockInfoProvider) GetMachineInfo() (*info.MachineInfo, error) {
119+
return nil, errors.New("not supported")
104120
}
105121

106122
func mockLabelFunc(*info.ContainerInfo) map[string]string {

0 commit comments

Comments
 (0)