Skip to content
This repository was archived by the owner on Feb 23, 2021. It is now read-only.

Commit 9fdeefe

Browse files
committed
Cadvisor now publishes per-container inode stats using the command 'find . -xdev printf '.'| wc -c' this is published in the v2 api using a new field
1 parent a9b1ab1 commit 9fdeefe

File tree

9 files changed

+195
-70
lines changed

9 files changed

+195
-70
lines changed

container/common/fsHandler.go

+54-39
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
package common
1717

1818
import (
19+
"fmt"
1920
"sync"
2021
"time"
2122

@@ -26,71 +27,85 @@ import (
2627

2728
type FsHandler interface {
2829
Start()
29-
Usage() (baseUsageBytes uint64, totalUsageBytes uint64)
30+
Usage() FsUsage
3031
Stop()
3132
}
3233

34+
type FsUsage struct {
35+
BaseUsageBytes uint64
36+
TotalUsageBytes uint64
37+
InodeUsage uint64
38+
}
39+
3340
type realFsHandler struct {
3441
sync.RWMutex
35-
lastUpdate time.Time
36-
usageBytes uint64
37-
baseUsageBytes uint64
38-
period time.Duration
39-
minPeriod time.Duration
40-
rootfs string
41-
extraDir string
42-
fsInfo fs.FsInfo
42+
lastUpdate time.Time
43+
usage FsUsage
44+
period time.Duration
45+
minPeriod time.Duration
46+
rootfs string
47+
extraDir string
48+
fsInfo fs.FsInfo
4349
// Tells the container to stop.
4450
stopChan chan struct{}
4551
}
4652

4753
const (
48-
longDu = time.Second
49-
duTimeout = time.Minute
50-
maxDuBackoffFactor = 20
54+
longOp = time.Second
55+
timeout = 2 * time.Minute
56+
maxBackoffFactor = 20
5157
)
5258

59+
const DefaultPeriod = time.Minute
60+
5361
var _ FsHandler = &realFsHandler{}
5462

5563
func NewFsHandler(period time.Duration, rootfs, extraDir string, fsInfo fs.FsInfo) FsHandler {
5664
return &realFsHandler{
57-
lastUpdate: time.Time{},
58-
usageBytes: 0,
59-
baseUsageBytes: 0,
60-
period: period,
61-
minPeriod: period,
62-
rootfs: rootfs,
63-
extraDir: extraDir,
64-
fsInfo: fsInfo,
65-
stopChan: make(chan struct{}, 1),
65+
lastUpdate: time.Time{},
66+
usage: FsUsage{},
67+
period: period,
68+
minPeriod: period,
69+
rootfs: rootfs,
70+
extraDir: extraDir,
71+
fsInfo: fsInfo,
72+
stopChan: make(chan struct{}, 1),
6673
}
6774
}
6875

6976
func (fh *realFsHandler) update() error {
7077
var (
71-
baseUsage, extraDirUsage uint64
72-
err error
78+
baseUsage, extraDirUsage, inodeUsage uint64
79+
rootDiskErr, rootInodeErr, extraDiskErr error
7380
)
7481
// TODO(vishh): Add support for external mounts.
7582
if fh.rootfs != "" {
76-
baseUsage, err = fh.fsInfo.GetDirUsage(fh.rootfs, duTimeout)
77-
if err != nil {
78-
return err
79-
}
83+
baseUsage, rootDiskErr = fh.fsInfo.GetDirDiskUsage(fh.rootfs, timeout)
84+
inodeUsage, rootInodeErr = fh.fsInfo.GetDirInodeUsage(fh.rootfs, timeout)
8085
}
8186

8287
if fh.extraDir != "" {
83-
extraDirUsage, err = fh.fsInfo.GetDirUsage(fh.extraDir, duTimeout)
84-
if err != nil {
85-
return err
86-
}
88+
extraDirUsage, extraDiskErr = fh.fsInfo.GetDirDiskUsage(fh.extraDir, timeout)
8789
}
8890

91+
// Wait to handle errors until after all operartions are run.
92+
// An error in one will not cause an early return, skipping others
8993
fh.Lock()
9094
defer fh.Unlock()
9195
fh.lastUpdate = time.Now()
92-
fh.usageBytes = baseUsage + extraDirUsage
93-
fh.baseUsageBytes = baseUsage
96+
if rootDiskErr == nil && fh.rootfs != "" {
97+
fh.usage.InodeUsage = inodeUsage
98+
}
99+
if rootInodeErr == nil && fh.rootfs != "" {
100+
fh.usage.TotalUsageBytes = baseUsage + extraDirUsage
101+
}
102+
if extraDiskErr == nil && fh.extraDir != "" {
103+
fh.usage.BaseUsageBytes = baseUsage
104+
}
105+
// Combine errors into a single error to return
106+
if rootDiskErr != nil || rootInodeErr != nil || extraDiskErr != nil {
107+
return fmt.Errorf("rootDiskErr: %v, rootInodeErr: %v, extraDiskErr: %v", rootDiskErr, rootInodeErr, extraDiskErr)
108+
}
94109
return nil
95110
}
96111

@@ -105,15 +120,15 @@ func (fh *realFsHandler) trackUsage() {
105120
if err := fh.update(); err != nil {
106121
glog.Errorf("failed to collect filesystem stats - %v", err)
107122
fh.period = fh.period * 2
108-
if fh.period > maxDuBackoffFactor*fh.minPeriod {
109-
fh.period = maxDuBackoffFactor * fh.minPeriod
123+
if fh.period > maxBackoffFactor*fh.minPeriod {
124+
fh.period = maxBackoffFactor * fh.minPeriod
110125
}
111126
} else {
112127
fh.period = fh.minPeriod
113128
}
114129
duration := time.Since(start)
115-
if duration > longDu {
116-
glog.V(2).Infof("`du` on following dirs took %v: %v", duration, []string{fh.rootfs, fh.extraDir})
130+
if duration > longOp {
131+
glog.V(2).Infof("du and find on following dirs took %v: %v", duration, []string{fh.rootfs, fh.extraDir})
117132
}
118133
}
119134
}
@@ -127,8 +142,8 @@ func (fh *realFsHandler) Stop() {
127142
close(fh.stopChan)
128143
}
129144

130-
func (fh *realFsHandler) Usage() (baseUsageBytes, totalUsageBytes uint64) {
145+
func (fh *realFsHandler) Usage() FsUsage {
131146
fh.RLock()
132147
defer fh.RUnlock()
133-
return fh.baseUsageBytes, fh.usageBytes
148+
return fh.usage
134149
}

container/docker/handler.go

+10-7
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ func newDockerContainerHandler(
243243

244244
if !ignoreMetrics.Has(container.DiskUsageMetrics) {
245245
handler.fsHandler = &dockerFsHandler{
246-
fsHandler: common.NewFsHandler(time.Minute, rootfsStorageDir, otherStorageDir, fsInfo),
246+
fsHandler: common.NewFsHandler(common.DefaultPeriod, rootfsStorageDir, otherStorageDir, fsInfo),
247247
thinPoolWatcher: thinPoolWatcher,
248248
deviceID: handler.deviceID,
249249
}
@@ -283,8 +283,8 @@ func (h *dockerFsHandler) Stop() {
283283
h.fsHandler.Stop()
284284
}
285285

286-
func (h *dockerFsHandler) Usage() (uint64, uint64) {
287-
baseUsage, usage := h.fsHandler.Usage()
286+
func (h *dockerFsHandler) Usage() common.FsUsage {
287+
usage := h.fsHandler.Usage()
288288

289289
// When devicemapper is the storage driver, the base usage of the container comes from the thin pool.
290290
// We still need the result of the fsHandler for any extra storage associated with the container.
@@ -299,12 +299,12 @@ func (h *dockerFsHandler) Usage() (uint64, uint64) {
299299
// had at least 1 refresh and we still can't find the device.
300300
glog.V(5).Infof("unable to get fs usage from thin pool for device %s: %v", h.deviceID, err)
301301
} else {
302-
baseUsage = thinPoolUsage
303-
usage += thinPoolUsage
302+
usage.BaseUsageBytes = thinPoolUsage
303+
usage.TotalUsageBytes += thinPoolUsage
304304
}
305305
}
306306

307-
return baseUsage, usage
307+
return usage
308308
}
309309

310310
func (self *dockerContainerHandler) Start() {
@@ -387,7 +387,10 @@ func (self *dockerContainerHandler) getFsStats(stats *info.ContainerStats) error
387387
}
388388

389389
fsStat := info.FsStats{Device: device, Type: fsType, Limit: limit}
390-
fsStat.BaseUsage, fsStat.Usage = self.fsHandler.Usage()
390+
usage := self.fsHandler.Usage()
391+
fsStat.BaseUsage = usage.BaseUsageBytes
392+
fsStat.Usage = usage.TotalUsageBytes
393+
fsStat.Inodes = usage.InodeUsage
391394

392395
stats.Filesystem = append(stats.Filesystem, fsStat)
393396

container/rkt/handler.go

+5-3
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ package rkt
1818
import (
1919
"fmt"
2020
"os"
21-
"time"
2221

2322
rktapi "github.com/coreos/rkt/api/v1alpha"
2423
"github.com/google/cadvisor/container"
@@ -150,7 +149,7 @@ func newRktContainerHandler(name string, rktClient rktapi.PublicAPIClient, rktPa
150149
}
151150

152151
if !ignoreMetrics.Has(container.DiskUsageMetrics) {
153-
handler.fsHandler = common.NewFsHandler(time.Minute, rootfsStorageDir, "", fsInfo)
152+
handler.fsHandler = common.NewFsHandler(common.DefaultPeriod, rootfsStorageDir, "", fsInfo)
154153
}
155154

156155
return handler, nil
@@ -228,7 +227,10 @@ func (handler *rktContainerHandler) getFsStats(stats *info.ContainerStats) error
228227

229228
fsStat := info.FsStats{Device: deviceInfo.Device, Limit: limit}
230229

231-
fsStat.BaseUsage, fsStat.Usage = handler.fsHandler.Usage()
230+
usage := handler.fsHandler.Usage()
231+
fsStat.BaseUsage = usage.BaseUsageBytes
232+
fsStat.Usage = usage.TotalUsageBytes
233+
fsStat.Inodes = usage.InodeUsage
232234

233235
stats.Filesystem = append(stats.Filesystem, fsStat)
234236

fs/fs.go

+61-18
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package fs
1919

2020
import (
2121
"bufio"
22+
"bytes"
2223
"fmt"
2324
"io/ioutil"
2425
"os"
@@ -44,24 +45,24 @@ const (
4445
LabelRktImages = "rkt-images"
4546
)
4647

47-
// The maximum number of `du` tasks that can be running at once.
48-
const maxConsecutiveDus = 20
48+
// The maximum number of `du` and `find` tasks that can be running at once.
49+
const maxConcurrentOps = 20
4950

50-
// A pool for restricting the number of consecutive `du` tasks running.
51-
var duPool = make(chan struct{}, maxConsecutiveDus)
51+
// A pool for restricting the number of consecutive `du` and `find` tasks running.
52+
var pool = make(chan struct{}, maxConcurrentOps)
5253

5354
func init() {
54-
for i := 0; i < maxConsecutiveDus; i++ {
55-
releaseDuToken()
55+
for i := 0; i < maxConcurrentOps; i++ {
56+
releaseToken()
5657
}
5758
}
5859

59-
func claimDuToken() {
60-
<-duPool
60+
func claimToken() {
61+
<-pool
6162
}
6263

63-
func releaseDuToken() {
64-
duPool <- struct{}{}
64+
func releaseToken() {
65+
pool <- struct{}{}
6566
}
6667

6768
type partition struct {
@@ -428,12 +429,12 @@ func (self *RealFsInfo) GetDirFsDevice(dir string) (*DeviceInfo, error) {
428429
return nil, fmt.Errorf("could not find device with major: %d, minor: %d in cached partitions map", major, minor)
429430
}
430431

431-
func (self *RealFsInfo) GetDirUsage(dir string, timeout time.Duration) (uint64, error) {
432+
func (self *RealFsInfo) GetDirDiskUsage(dir string, timeout time.Duration) (uint64, error) {
432433
if dir == "" {
433434
return 0, fmt.Errorf("invalid directory")
434435
}
435-
claimDuToken()
436-
defer releaseDuToken()
436+
claimToken()
437+
defer releaseToken()
437438
cmd := exec.Command("nice", "-n", "19", "du", "-s", dir)
438439
stdoutp, err := cmd.StdoutPipe()
439440
if err != nil {
@@ -447,28 +448,70 @@ func (self *RealFsInfo) GetDirUsage(dir string, timeout time.Duration) (uint64,
447448
if err := cmd.Start(); err != nil {
448449
return 0, fmt.Errorf("failed to exec du - %v", err)
449450
}
450-
stdoutb, souterr := ioutil.ReadAll(stdoutp)
451-
stderrb, _ := ioutil.ReadAll(stderrp)
452451
timer := time.AfterFunc(timeout, func() {
453452
glog.Infof("killing cmd %v due to timeout(%s)", cmd.Args, timeout.String())
454453
cmd.Process.Kill()
455454
})
455+
stdoutb, souterr := ioutil.ReadAll(stdoutp)
456+
if souterr != nil {
457+
glog.Errorf("failed to read from stdout for cmd %v - %v", cmd.Args, souterr)
458+
}
459+
stderrb, _ := ioutil.ReadAll(stderrp)
456460
err = cmd.Wait()
457461
timer.Stop()
458462
if err != nil {
459463
return 0, fmt.Errorf("du command failed on %s with output stdout: %s, stderr: %s - %v", dir, string(stdoutb), string(stderrb), err)
460464
}
461465
stdout := string(stdoutb)
462-
if souterr != nil {
463-
glog.Errorf("failed to read from stdout for cmd %v - %v", cmd.Args, souterr)
464-
}
465466
usageInKb, err := strconv.ParseUint(strings.Fields(stdout)[0], 10, 64)
466467
if err != nil {
467468
return 0, fmt.Errorf("cannot parse 'du' output %s - %s", stdout, err)
468469
}
469470
return usageInKb * 1024, nil
470471
}
471472

473+
func (self *RealFsInfo) GetDirInodeUsage(dir string, timeout time.Duration) (uint64, error) {
474+
if dir == "" {
475+
return 0, fmt.Errorf("invalid directory")
476+
}
477+
var stdout, stdwcerr, stdfinderr bytes.Buffer
478+
var err error
479+
claimToken()
480+
defer releaseToken()
481+
findCmd := exec.Command("find", dir, "-xdev", "-printf", ".")
482+
wcCmd := exec.Command("wc", "-c")
483+
if wcCmd.Stdin, err = findCmd.StdoutPipe(); err != nil {
484+
return 0, fmt.Errorf("failed to setup stdout for cmd %v - %v", findCmd.Args, err)
485+
}
486+
wcCmd.Stdout, wcCmd.Stderr, findCmd.Stderr = &stdout, &stdwcerr, &stdfinderr
487+
if err = findCmd.Start(); err != nil {
488+
return 0, fmt.Errorf("failed to exec cmd %v - %v; stderr: %v", findCmd.Args, err, stdfinderr.String())
489+
}
490+
491+
if err = wcCmd.Start(); err != nil {
492+
return 0, fmt.Errorf("failed to exec cmd %v - %v; stderr %v", wcCmd.Args, err, stdwcerr.String())
493+
}
494+
timer := time.AfterFunc(timeout, func() {
495+
glog.Infof("killing cmd %v, and cmd %v due to timeout(%s)", findCmd.Args, wcCmd.Args, timeout.String())
496+
wcCmd.Process.Kill()
497+
findCmd.Process.Kill()
498+
})
499+
err = findCmd.Wait()
500+
if err != nil {
501+
return 0, fmt.Errorf("cmd %v failed. stderr: %s; err: %v", findCmd.Args, stdfinderr.String(), err)
502+
}
503+
err = wcCmd.Wait()
504+
if err != nil {
505+
return 0, fmt.Errorf("cmd %v failed. stderr: %s; err: %v", wcCmd.Args, stdwcerr.String(), err)
506+
}
507+
timer.Stop()
508+
inodeUsage, err := strconv.ParseUint(strings.TrimSpace(stdout.String()), 10, 64)
509+
if err != nil {
510+
return 0, fmt.Errorf("cannot parse cmds: %v, %v output %s - %s", findCmd.Args, wcCmd.Args, stdout.String(), err)
511+
}
512+
return inodeUsage, nil
513+
}
514+
472515
func getVfsStats(path string) (total uint64, free uint64, avail uint64, inodes uint64, inodesFree uint64, err error) {
473516
var s syscall.Statfs_t
474517
if err = syscall.Statfs(path, &s); err != nil {

0 commit comments

Comments
 (0)