Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chroot.setupChrootBindMounts: pay more attention to flags #5083

Merged
merged 1 commit into from
Oct 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 97 additions & 56 deletions chroot/run_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,14 +229,39 @@ func createPlatformContainer(options runUsingChrootExecSubprocOptions) error {
return errors.New("unsupported createPlatformContainer")
}

func mountFlagsForFSFlags(fsFlags uintptr) uintptr {
var mountFlags uintptr
for _, mapping := range []struct {
fsFlag uintptr
mountFlag uintptr
}{
{unix.ST_MANDLOCK, unix.MS_MANDLOCK},
{unix.ST_NOATIME, unix.MS_NOATIME},
{unix.ST_NODEV, unix.MS_NODEV},
{unix.ST_NODIRATIME, unix.MS_NODIRATIME},
{unix.ST_NOEXEC, unix.MS_NOEXEC},
{unix.ST_NOSUID, unix.MS_NOSUID},
{unix.ST_RDONLY, unix.MS_RDONLY},
{unix.ST_RELATIME, unix.MS_RELATIME},
{unix.ST_SYNCHRONOUS, unix.MS_SYNCHRONOUS},
} {
if fsFlags&mapping.fsFlag == mapping.fsFlag {
mountFlags |= mapping.mountFlag
}
}
return mountFlags
}

func makeReadOnly(mntpoint string, flags uintptr) error {
var fs unix.Statfs_t
// Make sure it's read-only.
if err := unix.Statfs(mntpoint, &fs); err != nil {
return fmt.Errorf("checking if directory %q was bound read-only: %w", mntpoint, err)
}
if fs.Flags&unix.ST_RDONLY == 0 {
if err := unix.Mount(mntpoint, mntpoint, "bind", flags|unix.MS_REMOUNT, ""); err != nil {
// All callers currently pass MS_RDONLY in "flags", but in case they stop doing
// that at some point in the future...
if err := unix.Mount(mntpoint, mntpoint, "bind", flags|unix.MS_RDONLY|unix.MS_REMOUNT|unix.MS_BIND, ""); err != nil {
return fmt.Errorf("remounting %s in mount namespace read-only: %w", mntpoint, err)
}
}
Expand Down Expand Up @@ -268,7 +293,7 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func(
// Now bind mount all of those things to be under the rootfs's location in this
// mount namespace.
commonFlags := uintptr(unix.MS_BIND | unix.MS_REC | unix.MS_PRIVATE)
bindFlags := commonFlags | unix.MS_NODEV
bindFlags := commonFlags
devFlags := commonFlags | unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_RDONLY
procFlags := devFlags | unix.MS_NODEV
sysFlags := devFlags | unix.MS_NODEV
Expand All @@ -291,7 +316,7 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func(
return undoBinds, fmt.Errorf("checking if directory %q was bound read-only: %w", subDev, err)
}
if fs.Flags&unix.ST_RDONLY == 0 {
if err := unix.Mount(subDev, subDev, "bind", devFlags|unix.MS_REMOUNT, ""); err != nil {
if err := unix.Mount(subDev, subDev, "bind", devFlags|unix.MS_REMOUNT|unix.MS_BIND, ""); err != nil {
return undoBinds, fmt.Errorf("remounting /dev in mount namespace read-only: %w", err)
}
}
Expand Down Expand Up @@ -351,7 +376,7 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func(
}
logrus.Debugf("bind mounted %q to %q", "/sys", filepath.Join(spec.Root.Path, "/sys"))

// Bind mount in everything we've been asked to mount.
// Bind, overlay, or tmpfs mount everything we've been asked to mount.
for _, m := range spec.Mounts {
// Skip anything that we just mounted.
switch m.Destination {
Expand All @@ -369,45 +394,44 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func(
continue
}
}
// Skip anything that isn't a bind or tmpfs mount.
// Skip anything that isn't a bind or overlay or tmpfs mount.
if m.Type != "bind" && m.Type != "tmpfs" && m.Type != "overlay" {
logrus.Debugf("skipping mount of type %q on %q", m.Type, m.Destination)
continue
}
// If the target is there, we can just mount it.
// If the target is already there, we can just mount over it.
var srcinfo os.FileInfo
switch m.Type {
case "bind":
srcinfo, err = os.Stat(m.Source)
if err != nil {
return undoBinds, fmt.Errorf("examining %q for mounting in mount namespace: %w", m.Source, err)
}
case "overlay":
fallthrough
case "tmpfs":
case "overlay", "tmpfs":
srcinfo, err = os.Stat("/")
if err != nil {
return undoBinds, fmt.Errorf("examining / to use as a template for a %s: %w", m.Type, err)
return undoBinds, fmt.Errorf("examining / to use as a template for a %s mount: %w", m.Type, err)
}
}
target := filepath.Join(spec.Root.Path, m.Destination)
// Check if target is a symlink
// Check if target is a symlink.
stat, err := os.Lstat(target)
// If target is a symlink, follow the link and ensure the destination exists
// If target is a symlink, follow the link and ensure the destination exists.
if err == nil && stat != nil && (stat.Mode()&os.ModeSymlink != 0) {
target, err = copier.Eval(spec.Root.Path, m.Destination, copier.EvalOptions{})
if err != nil {
return nil, fmt.Errorf("evaluating symlink %q: %w", target, err)
}
// Stat the destination of the evaluated symlink
// Stat the destination of the evaluated symlink.
_, err = os.Stat(target)
}
if err != nil {
// If the target can't be stat()ted, check the error.
if !errors.Is(err, os.ErrNotExist) {
return undoBinds, fmt.Errorf("examining %q for mounting in mount namespace: %w", target, err)
}
// The target isn't there yet, so create it.
// The target isn't there yet, so create it. If the source is a directory,
// we need a directory, otherwise we need a non-directory (i.e., a file).
if srcinfo.IsDir() {
if err = os.MkdirAll(target, 0755); err != nil {
return undoBinds, fmt.Errorf("creating mountpoint %q in mount namespace: %w", target, err)
Expand All @@ -423,79 +447,94 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func(
file.Close()
}
}
// Sort out which flags we're asking for, and what statfs() should be telling us
// if we successfully mounted with them.
requestFlags := uintptr(0)
expectedFlags := uintptr(0)
expectedImportantFlags := uintptr(0)
importantFlags := uintptr(0)
possibleImportantFlags := uintptr(unix.ST_NODEV | unix.ST_NOEXEC | unix.ST_NOSUID | unix.ST_RDONLY)
for _, option := range m.Options {
switch option {
case "nodev":
requestFlags |= unix.MS_NODEV
expectedFlags |= unix.ST_NODEV
importantFlags |= unix.ST_NODEV
expectedImportantFlags |= unix.ST_NODEV
case "dev":
requestFlags &= ^uintptr(unix.MS_NODEV)
expectedFlags &= ^uintptr(unix.ST_NODEV)
importantFlags |= unix.ST_NODEV
expectedImportantFlags &= ^uintptr(unix.ST_NODEV)
case "noexec":
requestFlags |= unix.MS_NOEXEC
expectedFlags |= unix.ST_NOEXEC
importantFlags |= unix.ST_NOEXEC
expectedImportantFlags |= unix.ST_NOEXEC
case "exec":
requestFlags &= ^uintptr(unix.MS_NOEXEC)
expectedFlags &= ^uintptr(unix.ST_NOEXEC)
importantFlags |= unix.ST_NOEXEC
expectedImportantFlags &= ^uintptr(unix.ST_NOEXEC)
case "nosuid":
requestFlags |= unix.MS_NOSUID
expectedFlags |= unix.ST_NOSUID
importantFlags |= unix.ST_NOSUID
expectedImportantFlags |= unix.ST_NOSUID
case "suid":
requestFlags &= ^uintptr(unix.MS_NOSUID)
expectedFlags &= ^uintptr(unix.ST_NOSUID)
importantFlags |= unix.ST_NOSUID
expectedImportantFlags &= ^uintptr(unix.ST_NOSUID)
case "ro":
requestFlags |= unix.MS_RDONLY
expectedFlags |= unix.ST_RDONLY
importantFlags |= unix.ST_RDONLY
expectedImportantFlags |= unix.ST_RDONLY
case "rw":
requestFlags &= ^uintptr(unix.MS_RDONLY)
expectedFlags &= ^uintptr(unix.ST_RDONLY)
importantFlags |= unix.ST_RDONLY
expectedImportantFlags &= ^uintptr(unix.ST_RDONLY)
}
}
switch m.Type {
case "bind":
// Do the bind mount.
logrus.Debugf("bind mounting %q on %q", m.Destination, filepath.Join(spec.Root.Path, m.Destination))
if err := unix.Mount(m.Source, target, "", bindFlags|requestFlags, ""); err != nil {
// Do the initial bind mount. We'll worry about the flags in a bit.
logrus.Debugf("bind mounting %q on %q %v", m.Destination, filepath.Join(spec.Root.Path, m.Destination), m.Options)
if err = unix.Mount(m.Source, target, "", bindFlags|requestFlags, ""); err != nil {
return undoBinds, fmt.Errorf("bind mounting %q from host to %q in mount namespace (%q): %w", m.Source, m.Destination, target, err)
}
if (requestFlags & unix.MS_RDONLY) != 0 {
if err = unix.Statfs(target, &fs); err != nil {
return undoBinds, fmt.Errorf("checking if directory %q was bound read-only: %w", target, err)
}
// we need to make sure these flags are maintained in the REMOUNT operation
additionalFlags := uintptr(fs.Flags) & (unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV)
if err := unix.Mount("", target, "", unix.MS_REMOUNT|unix.MS_BIND|unix.MS_RDONLY|additionalFlags, ""); err != nil {
return undoBinds, fmt.Errorf("setting flags on the bind mount %q from host to %q in mount namespace (%q): %w", m.Source, m.Destination, target, err)
}
}
logrus.Debugf("bind mounted %q to %q", m.Source, target)
case "tmpfs":
// Mount a tmpfs.
if err := mount.Mount(m.Source, target, m.Type, strings.Join(append(m.Options, "private"), ",")); err != nil {
return undoBinds, fmt.Errorf("mounting tmpfs to %q in mount namespace (%q, %q): %w", m.Destination, target, strings.Join(m.Options, ","), err)
// Mount a tmpfs. We'll worry about the flags in a bit.
if err = mount.Mount(m.Source, target, m.Type, strings.Join(append(m.Options, "private"), ",")); err != nil {
return undoBinds, fmt.Errorf("mounting tmpfs to %q in mount namespace (%q, %q): %w", m.Destination, target, strings.Join(append(m.Options, "private"), ","), err)
}
logrus.Debugf("mounted a tmpfs to %q", target)
case "overlay":
// Mount a overlay.
if err := mount.Mount(m.Source, target, m.Type, strings.Join(append(m.Options, "private"), ",")); err != nil {
return undoBinds, fmt.Errorf("mounting overlay to %q in mount namespace (%q, %q): %w", m.Destination, target, strings.Join(m.Options, ","), err)
// Mount an overlay. We'll worry about the flags in a bit.
if err = mount.Mount(m.Source, target, m.Type, strings.Join(append(m.Options, "private"), ",")); err != nil {
return undoBinds, fmt.Errorf("mounting overlay to %q in mount namespace (%q, %q): %w", m.Destination, target, strings.Join(append(m.Options, "private"), ","), err)
}
logrus.Debugf("mounted a overlay to %q", target)
}
// Time to worry about the flags.
if err = unix.Statfs(target, &fs); err != nil {
return undoBinds, fmt.Errorf("checking if directory %q was bound read-only: %w", target, err)
return undoBinds, fmt.Errorf("checking if volume %q was mounted with requested flags: %w", target, err)
}
if uintptr(fs.Flags)&expectedFlags != expectedFlags {
if err := unix.Mount(target, target, "bind", requestFlags|unix.MS_REMOUNT, ""); err != nil {
return undoBinds, fmt.Errorf("remounting %q in mount namespace with expected flags: %w", target, err)
effectiveImportantFlags := uintptr(fs.Flags) & importantFlags
if effectiveImportantFlags != expectedImportantFlags {
// Do a remount to try to get the desired flags to stick.
effectiveUnimportantFlags := uintptr(fs.Flags) & ^possibleImportantFlags
if err = unix.Mount(target, target, m.Type, unix.MS_REMOUNT|bindFlags|requestFlags|mountFlagsForFSFlags(effectiveUnimportantFlags), ""); err != nil {
return undoBinds, fmt.Errorf("remounting %q in mount namespace with flags %#x instead of %#x: %w", target, requestFlags, effectiveImportantFlags, err)
}
// Check if the desired flags stuck.
if err = unix.Statfs(target, &fs); err != nil {
return undoBinds, fmt.Errorf("checking if directory %q was remounted with requested flags %#x instead of %#x: %w", target, requestFlags, effectiveImportantFlags, err)
}
newEffectiveImportantFlags := uintptr(fs.Flags) & importantFlags
if newEffectiveImportantFlags != expectedImportantFlags {
return undoBinds, fmt.Errorf("unable to remount %q with requested flags %#x instead of %#x, just got %#x back", target, requestFlags, effectiveImportantFlags, newEffectiveImportantFlags)
}
}
}

// Set up any read-only paths that we need to. If we're running inside
// of a container, some of these locations will already be read-only.
// of a container, some of these locations will already be read-only, in
// which case can declare victory and move on.
for _, roPath := range spec.Linux.ReadonlyPaths {
r := filepath.Join(spec.Root.Path, roPath)
target, err := filepath.EvalSymlinks(r)
Expand All @@ -515,12 +554,13 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func(
}
return undoBinds, fmt.Errorf("checking if directory %q is already read-only: %w", target, err)
}
if fs.Flags&unix.ST_RDONLY != 0 {
if fs.Flags&unix.ST_RDONLY == unix.ST_RDONLY {
continue
}
// Mount the location over itself, so that we can remount it as read-only.
roFlags := uintptr(unix.MS_NODEV | unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_RDONLY)
if err := unix.Mount(target, target, "", roFlags|unix.MS_BIND|unix.MS_REC, ""); err != nil {
// Mount the location over itself, so that we can remount it as read-only, making
// sure to preserve any combination of nodev/noexec/nosuid that's already in play.
roFlags := mountFlagsForFSFlags(uintptr(fs.Flags)) | unix.MS_RDONLY
if err := unix.Mount(target, target, "", bindFlags|roFlags, ""); err != nil {
if errors.Is(err, os.ErrNotExist) {
// No target, no problem.
continue
Expand All @@ -532,7 +572,7 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func(
return undoBinds, fmt.Errorf("checking if directory %q was bound read-only: %w", target, err)
}
if fs.Flags&unix.ST_RDONLY == 0 {
if err := unix.Mount(target, target, "", roFlags|unix.MS_BIND|unix.MS_REMOUNT, ""); err != nil {
if err := unix.Mount(target, target, "", unix.MS_REMOUNT|unix.MS_RDONLY|bindFlags|mountFlagsForFSFlags(uintptr(fs.Flags)), ""); err != nil {
return undoBinds, fmt.Errorf("remounting %q in mount namespace read-only: %w", target, err)
}
}
Expand All @@ -541,6 +581,7 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func(
return undoBinds, fmt.Errorf("checking if directory %q was remounted read-only: %w", target, err)
}
if fs.Flags&unix.ST_RDONLY == 0 {
// Still not read only.
return undoBinds, fmt.Errorf("verifying that %q in mount namespace was remounted read-only: %w", target, err)
}
}
Expand Down Expand Up @@ -578,7 +619,7 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func(
if err = unix.Statfs(target, &statfs); err != nil {
return undoBinds, fmt.Errorf("checking if directory %q is a mountpoint: %w", target, err)
}
isReadOnly := statfs.Flags&unix.MS_RDONLY != 0
isReadOnly := statfs.Flags&unix.ST_RDONLY == unix.ST_RDONLY
// Check if any of the IDs we're mapping could read it.
var stat unix.Stat_t
if err = unix.Stat(target, &stat); err != nil {
Expand Down Expand Up @@ -641,11 +682,11 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func(
return undoBinds, fmt.Errorf("masking directory %q in mount namespace: %w", target, err)
}
if err = unix.Statfs(target, &fs); err != nil {
return undoBinds, fmt.Errorf("checking if directory %q was mounted read-only in mount namespace: %w", target, err)
return undoBinds, fmt.Errorf("checking if masked directory %q was mounted read-only in mount namespace: %w", target, err)
}
if fs.Flags&unix.ST_RDONLY == 0 {
if err = unix.Mount(target, target, "", roFlags|syscall.MS_REMOUNT, ""); err != nil {
return undoBinds, fmt.Errorf("making sure directory %q in mount namespace is read only: %w", target, err)
if err = unix.Mount(target, target, "", syscall.MS_REMOUNT|roFlags|mountFlagsForFSFlags(uintptr(fs.Flags)), ""); err != nil {
return undoBinds, fmt.Errorf("making sure masked directory %q in mount namespace is read only: %w", target, err)
}
}
}
Expand Down
Loading
Loading