diff --git a/chroot/run_linux.go b/chroot/run_linux.go index cb116add37b..592bdd4f109 100644 --- a/chroot/run_linux.go +++ b/chroot/run_linux.go @@ -236,7 +236,9 @@ func makeReadOnly(mntpoint string, flags uintptr) error { return fmt.Errorf("checking if directory %q was bound read-only: %w", mntpoint, err) } if fs.Flags&unix.ST_RDONLY == 0 { - if err := unix.Mount(mntpoint, mntpoint, "bind", flags|unix.MS_REMOUNT, ""); err != nil { + // All callers currently pass MS_RDONLY in "flags", but in case they stop doing + // that at some point in the future... + if err := unix.Mount(mntpoint, mntpoint, "bind", flags|unix.MS_RDONLY|unix.MS_REMOUNT, ""); err != nil { return fmt.Errorf("remounting %s in mount namespace read-only: %w", mntpoint, err) } } @@ -268,7 +270,7 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func( // Now bind mount all of those things to be under the rootfs's location in this // mount namespace. commonFlags := uintptr(unix.MS_BIND | unix.MS_REC | unix.MS_PRIVATE) - bindFlags := commonFlags | unix.MS_NODEV + bindFlags := commonFlags devFlags := commonFlags | unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_RDONLY procFlags := devFlags | unix.MS_NODEV sysFlags := devFlags | unix.MS_NODEV @@ -351,7 +353,7 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func( } logrus.Debugf("bind mounted %q to %q", "/sys", filepath.Join(spec.Root.Path, "/sys")) - // Bind mount in everything we've been asked to mount. + // Bind, overlay, or tmpfs mount everything we've been asked to mount. for _, m := range spec.Mounts { // Skip anything that we just mounted. switch m.Destination { @@ -369,12 +371,12 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func( continue } } - // Skip anything that isn't a bind or tmpfs mount. + // Skip anything that isn't a bind or overlay or tmpfs mount. if m.Type != "bind" && m.Type != "tmpfs" && m.Type != "overlay" { logrus.Debugf("skipping mount of type %q on %q", m.Type, m.Destination) continue } - // If the target is there, we can just mount it. + // If the target is already there, we can just mount over it. var srcinfo os.FileInfo switch m.Type { case "bind": @@ -382,24 +384,22 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func( if err != nil { return undoBinds, fmt.Errorf("examining %q for mounting in mount namespace: %w", m.Source, err) } - case "overlay": - fallthrough - case "tmpfs": + case "overlay", "tmpfs": srcinfo, err = os.Stat("/") if err != nil { - return undoBinds, fmt.Errorf("examining / to use as a template for a %s: %w", m.Type, err) + return undoBinds, fmt.Errorf("examining / to use as a template for a %s mount: %w", m.Type, err) } } target := filepath.Join(spec.Root.Path, m.Destination) - // Check if target is a symlink + // Check if target is a symlink. stat, err := os.Lstat(target) - // If target is a symlink, follow the link and ensure the destination exists + // If target is a symlink, follow the link and ensure the destination exists. if err == nil && stat != nil && (stat.Mode()&os.ModeSymlink != 0) { target, err = copier.Eval(spec.Root.Path, m.Destination, copier.EvalOptions{}) if err != nil { return nil, fmt.Errorf("evaluating symlink %q: %w", target, err) } - // Stat the destination of the evaluated symlink + // Stat the destination of the evaluated symlink. _, err = os.Stat(target) } if err != nil { @@ -407,7 +407,8 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func( if !errors.Is(err, os.ErrNotExist) { return undoBinds, fmt.Errorf("examining %q for mounting in mount namespace: %w", target, err) } - // The target isn't there yet, so create it. + // The target isn't there yet, so create it. If the source is a directory, + // we need a directory, otherwise we need a non-directory (i.e., a file). if srcinfo.IsDir() { if err = os.MkdirAll(target, 0755); err != nil { return undoBinds, fmt.Errorf("creating mountpoint %q in mount namespace: %w", target, err) @@ -423,79 +424,94 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func( file.Close() } } + // Sort out which flags we're asking for, and what statfs() should be telling us + // if we successfully mounted with them. requestFlags := uintptr(0) - expectedFlags := uintptr(0) + expectedImportantFlags := uintptr(0) + importantFlags := uintptr(0) + possibleImportantFlags := uintptr(unix.MS_NODEV | unix.ST_NOEXEC | unix.ST_NOSUID | unix.MS_RDONLY) for _, option := range m.Options { switch option { case "nodev": requestFlags |= unix.MS_NODEV - expectedFlags |= unix.ST_NODEV + importantFlags |= unix.MS_NODEV + expectedImportantFlags |= unix.ST_NODEV case "dev": requestFlags &= ^uintptr(unix.MS_NODEV) - expectedFlags &= ^uintptr(unix.ST_NODEV) + importantFlags |= unix.MS_NODEV + expectedImportantFlags &= ^uintptr(unix.ST_NODEV) case "noexec": requestFlags |= unix.MS_NOEXEC - expectedFlags |= unix.ST_NOEXEC + importantFlags |= unix.MS_NOEXEC + expectedImportantFlags |= unix.ST_NOEXEC case "exec": requestFlags &= ^uintptr(unix.MS_NOEXEC) - expectedFlags &= ^uintptr(unix.ST_NOEXEC) + importantFlags |= unix.MS_NOEXEC + expectedImportantFlags &= ^uintptr(unix.ST_NOEXEC) case "nosuid": requestFlags |= unix.MS_NOSUID - expectedFlags |= unix.ST_NOSUID + importantFlags |= unix.MS_NOSUID + expectedImportantFlags |= unix.ST_NOSUID case "suid": requestFlags &= ^uintptr(unix.MS_NOSUID) - expectedFlags &= ^uintptr(unix.ST_NOSUID) + importantFlags |= unix.MS_NOSUID + expectedImportantFlags &= ^uintptr(unix.ST_NOSUID) case "ro": requestFlags |= unix.MS_RDONLY - expectedFlags |= unix.ST_RDONLY + importantFlags |= unix.MS_RDONLY + expectedImportantFlags |= unix.ST_RDONLY case "rw": requestFlags &= ^uintptr(unix.MS_RDONLY) - expectedFlags &= ^uintptr(unix.ST_RDONLY) + importantFlags |= unix.MS_RDONLY + expectedImportantFlags &= ^uintptr(unix.ST_RDONLY) } } switch m.Type { case "bind": - // Do the bind mount. - logrus.Debugf("bind mounting %q on %q", m.Destination, filepath.Join(spec.Root.Path, m.Destination)) - if err := unix.Mount(m.Source, target, "", bindFlags|requestFlags, ""); err != nil { + // Do the initial bind mount. We'll worry about the flags in a bit. + logrus.Debugf("bind mounting %q on %q %v", m.Destination, filepath.Join(spec.Root.Path, m.Destination), m.Options) + if err = unix.Mount(m.Source, target, "", bindFlags|requestFlags, ""); err != nil { return undoBinds, fmt.Errorf("bind mounting %q from host to %q in mount namespace (%q): %w", m.Source, m.Destination, target, err) } - if (requestFlags & unix.MS_RDONLY) != 0 { - if err = unix.Statfs(target, &fs); err != nil { - return undoBinds, fmt.Errorf("checking if directory %q was bound read-only: %w", target, err) - } - // we need to make sure these flags are maintained in the REMOUNT operation - additionalFlags := uintptr(fs.Flags) & (unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV) - if err := unix.Mount("", target, "", unix.MS_REMOUNT|unix.MS_BIND|unix.MS_RDONLY|additionalFlags, ""); err != nil { - return undoBinds, fmt.Errorf("setting flags on the bind mount %q from host to %q in mount namespace (%q): %w", m.Source, m.Destination, target, err) - } - } logrus.Debugf("bind mounted %q to %q", m.Source, target) case "tmpfs": - // Mount a tmpfs. - if err := mount.Mount(m.Source, target, m.Type, strings.Join(append(m.Options, "private"), ",")); err != nil { - return undoBinds, fmt.Errorf("mounting tmpfs to %q in mount namespace (%q, %q): %w", m.Destination, target, strings.Join(m.Options, ","), err) + // Mount a tmpfs. We'll worry about the flags in a bit. + if err = mount.Mount(m.Source, target, m.Type, strings.Join(append(m.Options, "private"), ",")); err != nil { + return undoBinds, fmt.Errorf("mounting tmpfs to %q in mount namespace (%q, %q): %w", m.Destination, target, strings.Join(append(m.Options, "private"), ","), err) } logrus.Debugf("mounted a tmpfs to %q", target) case "overlay": - // Mount a overlay. - if err := mount.Mount(m.Source, target, m.Type, strings.Join(append(m.Options, "private"), ",")); err != nil { - return undoBinds, fmt.Errorf("mounting overlay to %q in mount namespace (%q, %q): %w", m.Destination, target, strings.Join(m.Options, ","), err) + // Mount an overlay. We'll worry about the flags in a bit. + if err = mount.Mount(m.Source, target, m.Type, strings.Join(append(m.Options, "private"), ",")); err != nil { + return undoBinds, fmt.Errorf("mounting overlay to %q in mount namespace (%q, %q): %w", m.Destination, target, strings.Join(append(m.Options, "private"), ","), err) } logrus.Debugf("mounted a overlay to %q", target) } + // Time to worry about the flags. if err = unix.Statfs(target, &fs); err != nil { - return undoBinds, fmt.Errorf("checking if directory %q was bound read-only: %w", target, err) + return undoBinds, fmt.Errorf("checking if volume %q was mounted with requested flags: %w", target, err) } - if uintptr(fs.Flags)&expectedFlags != expectedFlags { - if err := unix.Mount(target, target, "bind", requestFlags|unix.MS_REMOUNT, ""); err != nil { - return undoBinds, fmt.Errorf("remounting %q in mount namespace with expected flags: %w", target, err) + effectiveImportantFlags := uintptr(fs.Flags) & importantFlags + if effectiveImportantFlags != expectedImportantFlags { + // Do a remount to try to get the desired flags to stick. + effectiveUnimportantFlags := uintptr(fs.Flags) & ^possibleImportantFlags + if err = unix.Mount(target, target, m.Type, unix.MS_REMOUNT|requestFlags|effectiveUnimportantFlags, ""); err != nil { + return undoBinds, fmt.Errorf("remounting %q in mount namespace with flags %#x instead of %#x: %w", target, requestFlags, effectiveImportantFlags, err) + } + // Check if the desired flags stuck. + if err = unix.Statfs(target, &fs); err != nil { + return undoBinds, fmt.Errorf("checking if directory %q was remounted with requested flags %#x instead of %#x: %w", target, requestFlags, effectiveImportantFlags, err) + } + effectiveImportantFlags = uintptr(fs.Flags) & importantFlags + if effectiveImportantFlags != expectedImportantFlags { + return undoBinds, fmt.Errorf("unable to remount %q with requested flags %#x instead of %#x: %w", target, requestFlags, effectiveImportantFlags, err) } } } // Set up any read-only paths that we need to. If we're running inside - // of a container, some of these locations will already be read-only. + // of a container, some of these locations will already be read-only, in + // which case can declare victory and move on. for _, roPath := range spec.Linux.ReadonlyPaths { r := filepath.Join(spec.Root.Path, roPath) target, err := filepath.EvalSymlinks(r) @@ -515,12 +531,13 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func( } return undoBinds, fmt.Errorf("checking if directory %q is already read-only: %w", target, err) } - if fs.Flags&unix.ST_RDONLY != 0 { + if fs.Flags&unix.ST_RDONLY == unix.ST_RDONLY { continue } - // Mount the location over itself, so that we can remount it as read-only. - roFlags := uintptr(unix.MS_NODEV | unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_RDONLY) - if err := unix.Mount(target, target, "", roFlags|unix.MS_BIND|unix.MS_REC, ""); err != nil { + // Mount the location over itself, so that we can remount it as read-only, making + // sure to preserve any combination of nodev/noexec/nosuid that's already in play. + roFlags := uintptr(fs.Flags) | unix.MS_RDONLY + if err := unix.Mount(target, target, "", bindFlags|roFlags, ""); err != nil { if errors.Is(err, os.ErrNotExist) { // No target, no problem. continue @@ -532,7 +549,7 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func( return undoBinds, fmt.Errorf("checking if directory %q was bound read-only: %w", target, err) } if fs.Flags&unix.ST_RDONLY == 0 { - if err := unix.Mount(target, target, "", roFlags|unix.MS_BIND|unix.MS_REMOUNT, ""); err != nil { + if err := unix.Mount(target, target, "", unix.MS_REMOUNT|unix.MS_RDONLY|uintptr(fs.Flags), ""); err != nil { return undoBinds, fmt.Errorf("remounting %q in mount namespace read-only: %w", target, err) } } @@ -541,6 +558,7 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func( return undoBinds, fmt.Errorf("checking if directory %q was remounted read-only: %w", target, err) } if fs.Flags&unix.ST_RDONLY == 0 { + // Still not read only. return undoBinds, fmt.Errorf("verifying that %q in mount namespace was remounted read-only: %w", target, err) } } @@ -641,11 +659,11 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func( return undoBinds, fmt.Errorf("masking directory %q in mount namespace: %w", target, err) } if err = unix.Statfs(target, &fs); err != nil { - return undoBinds, fmt.Errorf("checking if directory %q was mounted read-only in mount namespace: %w", target, err) + return undoBinds, fmt.Errorf("checking if masked directory %q was mounted read-only in mount namespace: %w", target, err) } if fs.Flags&unix.ST_RDONLY == 0 { - if err = unix.Mount(target, target, "", roFlags|syscall.MS_REMOUNT, ""); err != nil { - return undoBinds, fmt.Errorf("making sure directory %q in mount namespace is read only: %w", target, err) + if err = unix.Mount(target, target, "", syscall.MS_REMOUNT|roFlags|uintptr(fs.Flags), ""); err != nil { + return undoBinds, fmt.Errorf("making sure masked directory %q in mount namespace is read only: %w", target, err) } } } diff --git a/chroot/run_test.go b/chroot/run_test.go index a6f00805efc..2309def4c54 100644 --- a/chroot/run_test.go +++ b/chroot/run_test.go @@ -20,6 +20,7 @@ import ( "github.com/containers/storage/pkg/reexec" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-tools/generate" + "github.com/stretchr/testify/require" ) const ( @@ -318,6 +319,9 @@ func TestProcessNoNewPrivileges(t *testing.T) { if syscall.Getuid() != 0 { t.Skip("tests need to be run as root") } + if !seccompAvailable { + t.Skip("not built with seccomp support") + } for _, nope := range []bool{false, true} { testMinimal(t, func(g *generate.Generator, rootDir, bundleDir string) { @@ -325,7 +329,7 @@ func TestProcessNoNewPrivileges(t *testing.T) { }, func(t *testing.T, report *types.TestReport) { if report.Spec.Process.NoNewPrivileges != nope { - t.Fatalf("expected no-new-prives to be %v, got %v", nope, report.Spec.Process.NoNewPrivileges) + t.Fatalf("expected no-new-privs to be %v, got %v", nope, report.Spec.Process.NoNewPrivileges) } }, ) @@ -375,27 +379,123 @@ func TestMounts(t *testing.T) { if syscall.Getuid() != 0 { t.Skip("tests need to be run as root") } - testMinimal(t, - func(g *generate.Generator, rootDir, bundleDir string) { - g.AddMount(specs.Mount{ - Source: "tmpfs", - Destination: "/was-not-there-before", - Type: "tmpfs", - Options: []string{"ro,size=0"}, - }) - }, - func(t *testing.T, report *types.TestReport) { - found := false - for _, mount := range report.Spec.Mounts { - if mount.Destination == "/was-not-there-before" && mount.Type == "tmpfs" { - found = true + t.Run("tmpfs", func(t *testing.T) { + testMinimal(t, + func(g *generate.Generator, rootDir, bundleDir string) { + g.AddMount(specs.Mount{ + Source: "tmpfs", + Destination: "/was-not-there-before", + Type: "tmpfs", + Options: []string{"ro,size=0"}, + }) + }, + func(t *testing.T, report *types.TestReport) { + found := false + for _, mount := range report.Spec.Mounts { + if mount.Destination == "/was-not-there-before" && mount.Type == "tmpfs" { + found = true + } } - } - if !found { - t.Fatal("added mount not found") - } + if !found { + t.Errorf("added mount for /was-not-there-before not found in %#v", report.Spec.Mounts) + } + }, + ) + }) + binds := []struct { + name string + destination string + options []string + require []string + reject []string + }{ + { + name: "nodev", + destination: "/nodev", + options: []string{"nodev"}, + reject: []string{"dev"}, }, - ) + { + name: "noexec", + destination: "/noexec", + options: []string{"noexec"}, + reject: []string{"exec"}, + }, + { + name: "nosuid", + destination: "/nosuid", + options: []string{"nosuid"}, + reject: []string{"suid"}}, + { + name: "nodev,noexec", + destination: "/nodev,noexec", + options: []string{"nodev", "noexec"}, + reject: []string{"dev", "exec"}, + }, + { + name: "nodev,noexec,nosuid", + destination: "/nodev,noexec,nosuid", + options: []string{"nodev", "noexec", "nosuid"}, + reject: []string{"dev", "exec", "suid"}, + }, + { + name: "nodev,noexec,nosuid,ro", + destination: "/nodev,noexec,nosuid,ro", + options: []string{"nodev", "noexec", "nosuid", "ro"}, + reject: []string{"dev", "exec", "suid", "rw"}, + }, + { + name: "nodev,noexec,nosuid,rw", + destination: "/nodev,noexec,nosuid,rw", + options: []string{"nodev", "noexec", "nosuid", "rw"}, + reject: []string{"dev", "exec", "suid", "ro"}, + }, + } + for _, bind := range binds { + t.Run(bind.name, func(t *testing.T) { + // mount a tmpfs with default flags over the temp dir, which may be on a nodev/noexec/nosuid filesystem + tmpfsMount := t.TempDir() + t.Cleanup(func() { _ = syscall.Unmount(tmpfsMount, syscall.MNT_FORCE|syscall.MNT_DETACH) }) + require.NoErrorf(t, syscall.Mount("none", tmpfsMount, "tmpfs", 0, "rw,size=1m"), "error mounting a tmpfs with default flags at %s", tmpfsMount) + testMinimal(t, + func(g *generate.Generator, rootDir, bundleDir string) { + g.AddMount(specs.Mount{ + Source: tmpfsMount, + Destination: bind.destination, + Type: "bind", + Options: bind.options, + }) + }, + func(t *testing.T, report *types.TestReport) { + foundMounts := make(map[string]bool) + for _, mount := range report.Spec.Mounts { + if mount.Destination == bind.destination { + allRequired := true + requiredFlags := bind.require + if len(requiredFlags) == 0 { + requiredFlags = bind.options + } + for _, required := range requiredFlags { + if !util.StringInSlice(required, mount.Options) { + allRequired = false + } + } + anyRejected := false + for _, rejected := range bind.reject { + if util.StringInSlice(rejected, mount.Options) { + anyRejected = true + } + } + foundMounts[mount.Destination] = allRequired && !anyRejected + } + } + if !foundMounts[bind.destination] { + t.Errorf("added mount for %s not found with the right flags (%v) in %+v", bind.destination, bind.options, report.Spec.Mounts) + } + }, + ) + }) + } } func TestLinuxIDMapping(t *testing.T) { diff --git a/chroot/seccomp.go b/chroot/seccomp.go index 8c609519b87..e875f330c07 100644 --- a/chroot/seccomp.go +++ b/chroot/seccomp.go @@ -13,6 +13,8 @@ import ( "github.com/sirupsen/logrus" ) +const seccompAvailable = true + // setSeccomp sets the seccomp filter for ourselves and any processes that we'll start. func setSeccomp(spec *specs.Spec) error { logrus.Debugf("setting seccomp configuration") diff --git a/chroot/seccomp_freebsd.go b/chroot/seccomp_freebsd.go index 02897da7471..90e9f14cbc7 100644 --- a/chroot/seccomp_freebsd.go +++ b/chroot/seccomp_freebsd.go @@ -7,6 +7,8 @@ import ( "github.com/opencontainers/runtime-spec/specs-go" ) +const seccompAvailable = false + func setSeccomp(spec *specs.Spec) error { // Ignore this on FreeBSD return nil diff --git a/chroot/seccomp_unsupported.go b/chroot/seccomp_unsupported.go index 5be2e48dc74..dc80dcd1d79 100644 --- a/chroot/seccomp_unsupported.go +++ b/chroot/seccomp_unsupported.go @@ -9,6 +9,8 @@ import ( "github.com/opencontainers/runtime-spec/specs-go" ) +const seccompAvailable = false + func setSeccomp(spec *specs.Spec) error { if spec.Linux.Seccomp != nil { return errors.New("configured a seccomp filter without seccomp support?")