Skip to content

Commit

Permalink
Merge pull request #2171 from namreg/replace-du-and-find
Browse files Browse the repository at this point in the history
fs: get inodes and disk usage via pure go
  • Loading branch information
dashpole authored Feb 25, 2019
2 parents 1032888 + 046818d commit 05529e2
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 91 deletions.
29 changes: 13 additions & 16 deletions container/common/fsHandler.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ type realFsHandler struct {
}

const (
timeout = 2 * time.Minute
maxBackoffFactor = 20
)

Expand All @@ -74,36 +73,34 @@ func NewFsHandler(period time.Duration, rootfs, extraDir string, fsInfo fs.FsInf

func (fh *realFsHandler) update() error {
var (
baseUsage, extraDirUsage, inodeUsage uint64
rootDiskErr, rootInodeErr, extraDiskErr error
rootUsage, extraUsage fs.UsageInfo
rootErr, extraErr error
)
// TODO(vishh): Add support for external mounts.
if fh.rootfs != "" {
baseUsage, rootDiskErr = fh.fsInfo.GetDirDiskUsage(fh.rootfs, timeout)
inodeUsage, rootInodeErr = fh.fsInfo.GetDirInodeUsage(fh.rootfs, timeout)
rootUsage, rootErr = fh.fsInfo.GetDirUsage(fh.rootfs)
}

if fh.extraDir != "" {
extraDirUsage, extraDiskErr = fh.fsInfo.GetDirDiskUsage(fh.extraDir, timeout)
extraUsage, extraErr = fh.fsInfo.GetDirUsage(fh.extraDir)
}

// Wait to handle errors until after all operartions are run.
// An error in one will not cause an early return, skipping others
fh.Lock()
defer fh.Unlock()
fh.lastUpdate = time.Now()
if rootInodeErr == nil && fh.rootfs != "" {
fh.usage.InodeUsage = inodeUsage
if fh.rootfs != "" && rootErr == nil {
fh.usage.InodeUsage = rootUsage.Inodes
fh.usage.TotalUsageBytes = rootUsage.Bytes + extraUsage.Bytes
}
if rootDiskErr == nil && fh.rootfs != "" {
fh.usage.TotalUsageBytes = baseUsage + extraDirUsage
}
if extraDiskErr == nil && fh.extraDir != "" {
fh.usage.BaseUsageBytes = baseUsage
if fh.extraDir != "" && extraErr == nil {
fh.usage.BaseUsageBytes = rootUsage.Bytes
}

// Combine errors into a single error to return
if rootDiskErr != nil || rootInodeErr != nil || extraDiskErr != nil {
return fmt.Errorf("rootDiskErr: %v, rootInodeErr: %v, extraDiskErr: %v", rootDiskErr, rootInodeErr, extraDiskErr)
if rootErr != nil || extraErr != nil {
return fmt.Errorf("rootDiskErr: %v, extraDiskErr: %v", rootErr, extraErr)
}
return nil
}
Expand Down Expand Up @@ -132,7 +129,7 @@ func (fh *realFsHandler) trackUsage() {
// if the long duration is persistent either because of slow
// disk or lots of containers.
longOp = longOp + time.Second
klog.V(2).Infof("du and find on following dirs took %v: %v; will not log again for this container unless duration exceeds %v", duration, []string{fh.rootfs, fh.extraDir}, longOp)
klog.V(2).Infof("fs: disk usage and inodes count on following dirs took %v: %v; will not log again for this container unless duration exceeds %v", duration, []string{fh.rootfs, fh.extraDir}, longOp)
}
}
}
Expand Down
125 changes: 61 additions & 64 deletions fs/fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package fs

import (
"bufio"
"bytes"
"fmt"
"io/ioutil"
"os"
Expand All @@ -30,7 +29,6 @@ import (
"strconv"
"strings"
"syscall"
"time"

"github.com/docker/docker/pkg/mount"
"github.com/google/cadvisor/devicemapper"
Expand All @@ -47,8 +45,12 @@ const (
LabelCrioImages = "crio-images"
)

// The maximum number of `du` and `find` tasks that can be running at once.
const maxConcurrentOps = 20
const (
// The block size in bytes.
statBlockSize uint64 = 512
// The maximum number of `disk usage` tasks that can be running at once.
maxConcurrentOps = 20
)

// A pool for restricting the number of consecutive `du` and `find` tasks running.
var pool = make(chan struct{}, maxConcurrentOps)
Expand Down Expand Up @@ -559,78 +561,73 @@ func (self *RealFsInfo) GetDirFsDevice(dir string) (*DeviceInfo, error) {
return nil, fmt.Errorf("could not find device with major: %d, minor: %d in cached partitions map", major, minor)
}

func (self *RealFsInfo) GetDirDiskUsage(dir string, timeout time.Duration) (uint64, error) {
claimToken()
defer releaseToken()
return GetDirDiskUsage(dir, timeout)
}
func GetDirUsage(dir string) (UsageInfo, error) {
var usage UsageInfo

func GetDirDiskUsage(dir string, timeout time.Duration) (uint64, error) {
if dir == "" {
return 0, fmt.Errorf("invalid directory")
return usage, fmt.Errorf("invalid directory")
}
cmd := exec.Command("ionice", "-c3", "nice", "-n", "19", "du", "-s", dir)
stdoutp, err := cmd.StdoutPipe()
if err != nil {
return 0, fmt.Errorf("failed to setup stdout for cmd %v - %v", cmd.Args, err)
}
stderrp, err := cmd.StderrPipe()

rootInfo, err := os.Stat(dir)
if err != nil {
return 0, fmt.Errorf("failed to setup stderr for cmd %v - %v", cmd.Args, err)
return usage, fmt.Errorf("could not stat %q to get inode usage: %v", dir, err)
}

if err := cmd.Start(); err != nil {
return 0, fmt.Errorf("failed to exec du - %v", err)
rootStat, ok := rootInfo.Sys().(*syscall.Stat_t)
if !ok {
return usage, fmt.Errorf("unsuported fileinfo for getting inode usage of %q", dir)
}
timer := time.AfterFunc(timeout, func() {
klog.Warningf("Killing cmd %v due to timeout(%s)", cmd.Args, timeout.String())
cmd.Process.Kill()

rootDevId := rootStat.Dev

// dedupedInode stores inodes that could be duplicates (nlink > 1)
dedupedInodes := make(map[uint64]struct{})

err = filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if os.IsNotExist(err) {
// expected if files appear/vanish
return nil
}
if err != nil {
return fmt.Errorf("unable to count inodes for part of dir %s: %s", dir, err)
}

// according to the docs, Sys can be nil
if info.Sys() == nil {
return fmt.Errorf("fileinfo Sys is nil")
}

s, ok := info.Sys().(*syscall.Stat_t)
if !ok {
return fmt.Errorf("unsupported fileinfo; could not convert to stat_t")
}

if s.Dev != rootDevId {
// don't descend into directories on other devices
return filepath.SkipDir
}
if s.Nlink > 1 {
if _, ok := dedupedInodes[s.Ino]; !ok {
// Dedupe things that could be hardlinks
dedupedInodes[s.Ino] = struct{}{}

usage.Bytes += uint64(s.Blocks) * statBlockSize
usage.Inodes++
}
} else {
usage.Bytes += uint64(s.Blocks) * statBlockSize
usage.Inodes++
}
return nil
})
stdoutb, souterr := ioutil.ReadAll(stdoutp)
if souterr != nil {
klog.Errorf("Failed to read from stdout for cmd %v - %v", cmd.Args, souterr)
}
stderrb, _ := ioutil.ReadAll(stderrp)
err = cmd.Wait()
timer.Stop()
if err != nil {
return 0, fmt.Errorf("du command failed on %s with output stdout: %s, stderr: %s - %v", dir, string(stdoutb), string(stderrb), err)
}
stdout := string(stdoutb)
usageInKb, err := strconv.ParseUint(strings.Fields(stdout)[0], 10, 64)
if err != nil {
return 0, fmt.Errorf("cannot parse 'du' output %s - %s", stdout, err)
}
return usageInKb * 1024, nil

return usage, nil
}

func (self *RealFsInfo) GetDirInodeUsage(dir string, timeout time.Duration) (uint64, error) {
func (self *RealFsInfo) GetDirUsage(dir string) (UsageInfo, error) {
claimToken()
defer releaseToken()
return GetDirInodeUsage(dir, timeout)
}

func GetDirInodeUsage(dir string, timeout time.Duration) (uint64, error) {
if dir == "" {
return 0, fmt.Errorf("invalid directory")
}
var counter byteCounter
var stderr bytes.Buffer
findCmd := exec.Command("ionice", "-c3", "nice", "-n", "19", "find", dir, "-xdev", "-printf", ".")
findCmd.Stdout, findCmd.Stderr = &counter, &stderr
if err := findCmd.Start(); err != nil {
return 0, fmt.Errorf("failed to exec cmd %v - %v; stderr: %v", findCmd.Args, err, stderr.String())
}
timer := time.AfterFunc(timeout, func() {
klog.Warningf("Killing cmd %v due to timeout(%s)", findCmd.Args, timeout.String())
findCmd.Process.Kill()
})
err := findCmd.Wait()
timer.Stop()
if err != nil {
return 0, fmt.Errorf("cmd %v failed. stderr: %s; err: %v", findCmd.Args, stderr.String(), err)
}
return counter.bytesWritten, nil
return GetDirUsage(dir)
}

func getVfsStats(path string) (total uint64, free uint64, avail uint64, inodes uint64, inodesFree uint64, err error) {
Expand Down
9 changes: 4 additions & 5 deletions fs/fs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import (
"os"
"reflect"
"testing"
"time"

"github.com/docker/docker/pkg/mount"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -101,9 +100,9 @@ func TestDirDiskUsage(t *testing.T) {
fi, err := f.Stat()
as.NoError(err)
expectedSize := uint64(fi.Size())
size, err := fsInfo.GetDirDiskUsage(dir, time.Minute)
usage, err := fsInfo.GetDirUsage(dir)
as.NoError(err)
as.True(expectedSize <= size, "expected dir size to be at-least %d; got size: %d", expectedSize, size)
as.True(expectedSize <= usage.Bytes, "expected dir size to be at-least %d; got size: %d", expectedSize, usage.Bytes)
}

func TestDirInodeUsage(t *testing.T) {
Expand All @@ -118,10 +117,10 @@ func TestDirInodeUsage(t *testing.T) {
_, err := ioutil.TempFile(dir, "")
require.NoError(t, err)
}
inodes, err := fsInfo.GetDirInodeUsage(dir, time.Minute)
usage, err := fsInfo.GetDirUsage(dir)
as.NoError(err)
// We sould get numFiles+1 inodes, since we get 1 inode for each file, plus 1 for the directory
as.True(uint64(numFiles+1) == inodes, "expected inodes in dir to be %d; got inodes: %d", numFiles+1, inodes)
as.True(uint64(numFiles+1) == usage.Inodes, "expected inodes in dir to be %d; got inodes: %d", numFiles+1, usage.Inodes)
}

var dmStatusTests = []struct {
Expand Down
13 changes: 7 additions & 6 deletions fs/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ package fs

import (
"errors"
"time"
)

type DeviceInfo struct {
Expand Down Expand Up @@ -62,6 +61,11 @@ type DiskStats struct {
WeightedIoTime uint64
}

type UsageInfo struct {
Bytes uint64
Inodes uint64
}

// ErrNoSuchDevice is the error indicating the requested device does not exist.
var ErrNoSuchDevice = errors.New("cadvisor: no such device")

Expand All @@ -72,11 +76,8 @@ type FsInfo interface {
// Returns capacity and free space, in bytes, of the set of mounts passed.
GetFsInfoForPath(mountSet map[string]struct{}) ([]Fs, error)

// Returns number of bytes occupied by 'dir'.
GetDirDiskUsage(dir string, timeout time.Duration) (uint64, error)

// Returns number of inodes used by 'dir'.
GetDirInodeUsage(dir string, timeout time.Duration) (uint64, error)
// GetDirUsage returns a usage information for 'dir'.
GetDirUsage(dir string) (UsageInfo, error)

// GetDeviceInfoByFsUUID returns the information of the device with the
// specified filesystem uuid. If no such device exists, this function will
Expand Down

0 comments on commit 05529e2

Please sign in to comment.