From 2a3a956cbb9b6f1c29082b1033b090ad7cfe9c4f Mon Sep 17 00:00:00 2001 From: Nalin Dahyabhai Date: Wed, 4 Oct 2023 17:07:30 -0400 Subject: [PATCH] chroot.setupChrootBindMounts: pay more attention to flags Pay better attention to dev/nodev/exec/noexec/suid/nosuid/ro/rw flags on bind, overlay, and tmpfs mounts when any of them are specified. Stop quietly adding "nodev" when it isn't asked for. Signed-off-by: Nalin Dahyabhai --- chroot/run_linux.go | 153 ++++++++++++-------- chroot/run_test.go | 253 ++++++++++++++++++++++++++-------- chroot/seccomp.go | 2 + chroot/seccomp_freebsd.go | 2 + chroot/seccomp_unsupported.go | 2 + tests/chroot.bats | 105 ++++++++++++++ tests/helpers.bash | 16 +++ 7 files changed, 419 insertions(+), 114 deletions(-) create mode 100644 tests/chroot.bats diff --git a/chroot/run_linux.go b/chroot/run_linux.go index cb116add37b..dae4b717c39 100644 --- a/chroot/run_linux.go +++ b/chroot/run_linux.go @@ -229,6 +229,29 @@ func createPlatformContainer(options runUsingChrootExecSubprocOptions) error { return errors.New("unsupported createPlatformContainer") } +func mountFlagsForFSFlags(fsFlags uintptr) uintptr { + var mountFlags uintptr + for _, mapping := range []struct { + fsFlag uintptr + mountFlag uintptr + }{ + {unix.ST_MANDLOCK, unix.MS_MANDLOCK}, + {unix.ST_NOATIME, unix.MS_NOATIME}, + {unix.ST_NODEV, unix.MS_NODEV}, + {unix.ST_NODIRATIME, unix.MS_NODIRATIME}, + {unix.ST_NOEXEC, unix.MS_NOEXEC}, + {unix.ST_NOSUID, unix.MS_NOSUID}, + {unix.ST_RDONLY, unix.MS_RDONLY}, + {unix.ST_RELATIME, unix.MS_RELATIME}, + {unix.ST_SYNCHRONOUS, unix.MS_SYNCHRONOUS}, + } { + if fsFlags&mapping.fsFlag == mapping.fsFlag { + mountFlags |= mapping.mountFlag + } + } + return mountFlags +} + func makeReadOnly(mntpoint string, flags uintptr) error { var fs unix.Statfs_t // Make sure it's read-only. @@ -236,7 +259,9 @@ func makeReadOnly(mntpoint string, flags uintptr) error { return fmt.Errorf("checking if directory %q was bound read-only: %w", mntpoint, err) } if fs.Flags&unix.ST_RDONLY == 0 { - if err := unix.Mount(mntpoint, mntpoint, "bind", flags|unix.MS_REMOUNT, ""); err != nil { + // All callers currently pass MS_RDONLY in "flags", but in case they stop doing + // that at some point in the future... + if err := unix.Mount(mntpoint, mntpoint, "bind", flags|unix.MS_RDONLY|unix.MS_REMOUNT|unix.MS_BIND, ""); err != nil { return fmt.Errorf("remounting %s in mount namespace read-only: %w", mntpoint, err) } } @@ -268,7 +293,7 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func( // Now bind mount all of those things to be under the rootfs's location in this // mount namespace. commonFlags := uintptr(unix.MS_BIND | unix.MS_REC | unix.MS_PRIVATE) - bindFlags := commonFlags | unix.MS_NODEV + bindFlags := commonFlags devFlags := commonFlags | unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_RDONLY procFlags := devFlags | unix.MS_NODEV sysFlags := devFlags | unix.MS_NODEV @@ -291,7 +316,7 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func( return undoBinds, fmt.Errorf("checking if directory %q was bound read-only: %w", subDev, err) } if fs.Flags&unix.ST_RDONLY == 0 { - if err := unix.Mount(subDev, subDev, "bind", devFlags|unix.MS_REMOUNT, ""); err != nil { + if err := unix.Mount(subDev, subDev, "bind", devFlags|unix.MS_REMOUNT|unix.MS_BIND, ""); err != nil { return undoBinds, fmt.Errorf("remounting /dev in mount namespace read-only: %w", err) } } @@ -351,7 +376,7 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func( } logrus.Debugf("bind mounted %q to %q", "/sys", filepath.Join(spec.Root.Path, "/sys")) - // Bind mount in everything we've been asked to mount. + // Bind, overlay, or tmpfs mount everything we've been asked to mount. for _, m := range spec.Mounts { // Skip anything that we just mounted. switch m.Destination { @@ -369,12 +394,12 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func( continue } } - // Skip anything that isn't a bind or tmpfs mount. + // Skip anything that isn't a bind or overlay or tmpfs mount. if m.Type != "bind" && m.Type != "tmpfs" && m.Type != "overlay" { logrus.Debugf("skipping mount of type %q on %q", m.Type, m.Destination) continue } - // If the target is there, we can just mount it. + // If the target is already there, we can just mount over it. var srcinfo os.FileInfo switch m.Type { case "bind": @@ -382,24 +407,22 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func( if err != nil { return undoBinds, fmt.Errorf("examining %q for mounting in mount namespace: %w", m.Source, err) } - case "overlay": - fallthrough - case "tmpfs": + case "overlay", "tmpfs": srcinfo, err = os.Stat("/") if err != nil { - return undoBinds, fmt.Errorf("examining / to use as a template for a %s: %w", m.Type, err) + return undoBinds, fmt.Errorf("examining / to use as a template for a %s mount: %w", m.Type, err) } } target := filepath.Join(spec.Root.Path, m.Destination) - // Check if target is a symlink + // Check if target is a symlink. stat, err := os.Lstat(target) - // If target is a symlink, follow the link and ensure the destination exists + // If target is a symlink, follow the link and ensure the destination exists. if err == nil && stat != nil && (stat.Mode()&os.ModeSymlink != 0) { target, err = copier.Eval(spec.Root.Path, m.Destination, copier.EvalOptions{}) if err != nil { return nil, fmt.Errorf("evaluating symlink %q: %w", target, err) } - // Stat the destination of the evaluated symlink + // Stat the destination of the evaluated symlink. _, err = os.Stat(target) } if err != nil { @@ -407,7 +430,8 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func( if !errors.Is(err, os.ErrNotExist) { return undoBinds, fmt.Errorf("examining %q for mounting in mount namespace: %w", target, err) } - // The target isn't there yet, so create it. + // The target isn't there yet, so create it. If the source is a directory, + // we need a directory, otherwise we need a non-directory (i.e., a file). if srcinfo.IsDir() { if err = os.MkdirAll(target, 0755); err != nil { return undoBinds, fmt.Errorf("creating mountpoint %q in mount namespace: %w", target, err) @@ -423,79 +447,94 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func( file.Close() } } + // Sort out which flags we're asking for, and what statfs() should be telling us + // if we successfully mounted with them. requestFlags := uintptr(0) - expectedFlags := uintptr(0) + expectedImportantFlags := uintptr(0) + importantFlags := uintptr(0) + possibleImportantFlags := uintptr(unix.ST_NODEV | unix.ST_NOEXEC | unix.ST_NOSUID | unix.ST_RDONLY) for _, option := range m.Options { switch option { case "nodev": requestFlags |= unix.MS_NODEV - expectedFlags |= unix.ST_NODEV + importantFlags |= unix.ST_NODEV + expectedImportantFlags |= unix.ST_NODEV case "dev": requestFlags &= ^uintptr(unix.MS_NODEV) - expectedFlags &= ^uintptr(unix.ST_NODEV) + importantFlags |= unix.ST_NODEV + expectedImportantFlags &= ^uintptr(unix.ST_NODEV) case "noexec": requestFlags |= unix.MS_NOEXEC - expectedFlags |= unix.ST_NOEXEC + importantFlags |= unix.ST_NOEXEC + expectedImportantFlags |= unix.ST_NOEXEC case "exec": requestFlags &= ^uintptr(unix.MS_NOEXEC) - expectedFlags &= ^uintptr(unix.ST_NOEXEC) + importantFlags |= unix.ST_NOEXEC + expectedImportantFlags &= ^uintptr(unix.ST_NOEXEC) case "nosuid": requestFlags |= unix.MS_NOSUID - expectedFlags |= unix.ST_NOSUID + importantFlags |= unix.ST_NOSUID + expectedImportantFlags |= unix.ST_NOSUID case "suid": requestFlags &= ^uintptr(unix.MS_NOSUID) - expectedFlags &= ^uintptr(unix.ST_NOSUID) + importantFlags |= unix.ST_NOSUID + expectedImportantFlags &= ^uintptr(unix.ST_NOSUID) case "ro": requestFlags |= unix.MS_RDONLY - expectedFlags |= unix.ST_RDONLY + importantFlags |= unix.ST_RDONLY + expectedImportantFlags |= unix.ST_RDONLY case "rw": requestFlags &= ^uintptr(unix.MS_RDONLY) - expectedFlags &= ^uintptr(unix.ST_RDONLY) + importantFlags |= unix.ST_RDONLY + expectedImportantFlags &= ^uintptr(unix.ST_RDONLY) } } switch m.Type { case "bind": - // Do the bind mount. - logrus.Debugf("bind mounting %q on %q", m.Destination, filepath.Join(spec.Root.Path, m.Destination)) - if err := unix.Mount(m.Source, target, "", bindFlags|requestFlags, ""); err != nil { + // Do the initial bind mount. We'll worry about the flags in a bit. + logrus.Debugf("bind mounting %q on %q %v", m.Destination, filepath.Join(spec.Root.Path, m.Destination), m.Options) + if err = unix.Mount(m.Source, target, "", bindFlags|requestFlags, ""); err != nil { return undoBinds, fmt.Errorf("bind mounting %q from host to %q in mount namespace (%q): %w", m.Source, m.Destination, target, err) } - if (requestFlags & unix.MS_RDONLY) != 0 { - if err = unix.Statfs(target, &fs); err != nil { - return undoBinds, fmt.Errorf("checking if directory %q was bound read-only: %w", target, err) - } - // we need to make sure these flags are maintained in the REMOUNT operation - additionalFlags := uintptr(fs.Flags) & (unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV) - if err := unix.Mount("", target, "", unix.MS_REMOUNT|unix.MS_BIND|unix.MS_RDONLY|additionalFlags, ""); err != nil { - return undoBinds, fmt.Errorf("setting flags on the bind mount %q from host to %q in mount namespace (%q): %w", m.Source, m.Destination, target, err) - } - } logrus.Debugf("bind mounted %q to %q", m.Source, target) case "tmpfs": - // Mount a tmpfs. - if err := mount.Mount(m.Source, target, m.Type, strings.Join(append(m.Options, "private"), ",")); err != nil { - return undoBinds, fmt.Errorf("mounting tmpfs to %q in mount namespace (%q, %q): %w", m.Destination, target, strings.Join(m.Options, ","), err) + // Mount a tmpfs. We'll worry about the flags in a bit. + if err = mount.Mount(m.Source, target, m.Type, strings.Join(append(m.Options, "private"), ",")); err != nil { + return undoBinds, fmt.Errorf("mounting tmpfs to %q in mount namespace (%q, %q): %w", m.Destination, target, strings.Join(append(m.Options, "private"), ","), err) } logrus.Debugf("mounted a tmpfs to %q", target) case "overlay": - // Mount a overlay. - if err := mount.Mount(m.Source, target, m.Type, strings.Join(append(m.Options, "private"), ",")); err != nil { - return undoBinds, fmt.Errorf("mounting overlay to %q in mount namespace (%q, %q): %w", m.Destination, target, strings.Join(m.Options, ","), err) + // Mount an overlay. We'll worry about the flags in a bit. + if err = mount.Mount(m.Source, target, m.Type, strings.Join(append(m.Options, "private"), ",")); err != nil { + return undoBinds, fmt.Errorf("mounting overlay to %q in mount namespace (%q, %q): %w", m.Destination, target, strings.Join(append(m.Options, "private"), ","), err) } logrus.Debugf("mounted a overlay to %q", target) } + // Time to worry about the flags. if err = unix.Statfs(target, &fs); err != nil { - return undoBinds, fmt.Errorf("checking if directory %q was bound read-only: %w", target, err) + return undoBinds, fmt.Errorf("checking if volume %q was mounted with requested flags: %w", target, err) } - if uintptr(fs.Flags)&expectedFlags != expectedFlags { - if err := unix.Mount(target, target, "bind", requestFlags|unix.MS_REMOUNT, ""); err != nil { - return undoBinds, fmt.Errorf("remounting %q in mount namespace with expected flags: %w", target, err) + effectiveImportantFlags := uintptr(fs.Flags) & importantFlags + if effectiveImportantFlags != expectedImportantFlags { + // Do a remount to try to get the desired flags to stick. + effectiveUnimportantFlags := uintptr(fs.Flags) & ^possibleImportantFlags + if err = unix.Mount(target, target, m.Type, unix.MS_REMOUNT|bindFlags|requestFlags|mountFlagsForFSFlags(effectiveUnimportantFlags), ""); err != nil { + return undoBinds, fmt.Errorf("remounting %q in mount namespace with flags %#x instead of %#x: %w", target, requestFlags, effectiveImportantFlags, err) + } + // Check if the desired flags stuck. + if err = unix.Statfs(target, &fs); err != nil { + return undoBinds, fmt.Errorf("checking if directory %q was remounted with requested flags %#x instead of %#x: %w", target, requestFlags, effectiveImportantFlags, err) + } + newEffectiveImportantFlags := uintptr(fs.Flags) & importantFlags + if newEffectiveImportantFlags != expectedImportantFlags { + return undoBinds, fmt.Errorf("unable to remount %q with requested flags %#x instead of %#x, just got %#x back", target, requestFlags, effectiveImportantFlags, newEffectiveImportantFlags) } } } // Set up any read-only paths that we need to. If we're running inside - // of a container, some of these locations will already be read-only. + // of a container, some of these locations will already be read-only, in + // which case can declare victory and move on. for _, roPath := range spec.Linux.ReadonlyPaths { r := filepath.Join(spec.Root.Path, roPath) target, err := filepath.EvalSymlinks(r) @@ -515,12 +554,13 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func( } return undoBinds, fmt.Errorf("checking if directory %q is already read-only: %w", target, err) } - if fs.Flags&unix.ST_RDONLY != 0 { + if fs.Flags&unix.ST_RDONLY == unix.ST_RDONLY { continue } - // Mount the location over itself, so that we can remount it as read-only. - roFlags := uintptr(unix.MS_NODEV | unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_RDONLY) - if err := unix.Mount(target, target, "", roFlags|unix.MS_BIND|unix.MS_REC, ""); err != nil { + // Mount the location over itself, so that we can remount it as read-only, making + // sure to preserve any combination of nodev/noexec/nosuid that's already in play. + roFlags := mountFlagsForFSFlags(uintptr(fs.Flags)) | unix.MS_RDONLY + if err := unix.Mount(target, target, "", bindFlags|roFlags, ""); err != nil { if errors.Is(err, os.ErrNotExist) { // No target, no problem. continue @@ -532,7 +572,7 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func( return undoBinds, fmt.Errorf("checking if directory %q was bound read-only: %w", target, err) } if fs.Flags&unix.ST_RDONLY == 0 { - if err := unix.Mount(target, target, "", roFlags|unix.MS_BIND|unix.MS_REMOUNT, ""); err != nil { + if err := unix.Mount(target, target, "", unix.MS_REMOUNT|unix.MS_RDONLY|bindFlags|mountFlagsForFSFlags(uintptr(fs.Flags)), ""); err != nil { return undoBinds, fmt.Errorf("remounting %q in mount namespace read-only: %w", target, err) } } @@ -541,6 +581,7 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func( return undoBinds, fmt.Errorf("checking if directory %q was remounted read-only: %w", target, err) } if fs.Flags&unix.ST_RDONLY == 0 { + // Still not read only. return undoBinds, fmt.Errorf("verifying that %q in mount namespace was remounted read-only: %w", target, err) } } @@ -578,7 +619,7 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func( if err = unix.Statfs(target, &statfs); err != nil { return undoBinds, fmt.Errorf("checking if directory %q is a mountpoint: %w", target, err) } - isReadOnly := statfs.Flags&unix.MS_RDONLY != 0 + isReadOnly := statfs.Flags&unix.ST_RDONLY == unix.ST_RDONLY // Check if any of the IDs we're mapping could read it. var stat unix.Stat_t if err = unix.Stat(target, &stat); err != nil { @@ -641,11 +682,11 @@ func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func( return undoBinds, fmt.Errorf("masking directory %q in mount namespace: %w", target, err) } if err = unix.Statfs(target, &fs); err != nil { - return undoBinds, fmt.Errorf("checking if directory %q was mounted read-only in mount namespace: %w", target, err) + return undoBinds, fmt.Errorf("checking if masked directory %q was mounted read-only in mount namespace: %w", target, err) } if fs.Flags&unix.ST_RDONLY == 0 { - if err = unix.Mount(target, target, "", roFlags|syscall.MS_REMOUNT, ""); err != nil { - return undoBinds, fmt.Errorf("making sure directory %q in mount namespace is read only: %w", target, err) + if err = unix.Mount(target, target, "", syscall.MS_REMOUNT|roFlags|mountFlagsForFSFlags(uintptr(fs.Flags)), ""); err != nil { + return undoBinds, fmt.Errorf("making sure masked directory %q in mount namespace is read only: %w", target, err) } } } diff --git a/chroot/run_test.go b/chroot/run_test.go index a6f00805efc..bb883e53091 100644 --- a/chroot/run_test.go +++ b/chroot/run_test.go @@ -12,14 +12,17 @@ import ( "path/filepath" "strconv" "strings" - "syscall" "testing" "github.com/containers/buildah/tests/testreport/types" "github.com/containers/buildah/util" + "github.com/containers/storage/pkg/mount" "github.com/containers/storage/pkg/reexec" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-tools/generate" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "golang.org/x/sys/unix" ) const ( @@ -34,6 +37,7 @@ func TestMain(m *testing.M) { } func testMinimal(t *testing.T, modify func(g *generate.Generator, rootDir, bundleDir string), verify func(t *testing.T, report *types.TestReport)) { + t.Helper() g, err := generate.New("linux") if err != nil { t.Fatalf("generate.New(%q): %v", "linux", err) @@ -111,14 +115,14 @@ func testMinimal(t *testing.T, modify func(g *generate.Generator, rootDir, bundl } func TestNoop(t *testing.T) { - if syscall.Getuid() != 0 { + if unix.Getuid() != 0 { t.Skip("tests need to be run as root") } testMinimal(t, nil, nil) } func TestMinimalSkeleton(t *testing.T) { - if syscall.Getuid() != 0 { + if unix.Getuid() != 0 { t.Skip("tests need to be run as root") } testMinimal(t, @@ -130,7 +134,7 @@ func TestMinimalSkeleton(t *testing.T) { } func TestProcessTerminal(t *testing.T) { - if syscall.Getuid() != 0 { + if unix.Getuid() != 0 { t.Skip("tests need to be run as root") } for _, terminal := range []bool{false, true} { @@ -148,7 +152,7 @@ func TestProcessTerminal(t *testing.T) { } func TestProcessConsoleSize(t *testing.T) { - if syscall.Getuid() != 0 { + if unix.Getuid() != 0 { t.Skip("tests need to be run as root") } for _, size := range [][2]uint{{80, 25}, {132, 50}} { @@ -170,7 +174,7 @@ func TestProcessConsoleSize(t *testing.T) { } func TestProcessUser(t *testing.T) { - if syscall.Getuid() != 0 { + if unix.Getuid() != 0 { t.Skip("tests need to be run as root") } for _, id := range []uint32{0, 1000} { @@ -193,14 +197,14 @@ func TestProcessUser(t *testing.T) { } func TestProcessEnv(t *testing.T) { - if syscall.Getuid() != 0 { + if unix.Getuid() != 0 { t.Skip("tests need to be run as root") } - e := fmt.Sprintf("PARENT_TEST_PID=%d", syscall.Getpid()) + e := fmt.Sprintf("PARENT_TEST_PID=%d", unix.Getpid()) testMinimal(t, func(g *generate.Generator, rootDir, bundleDir string) { g.ClearProcessEnv() - g.AddProcessEnv("PARENT_TEST_PID", strconv.Itoa(syscall.Getpid())) + g.AddProcessEnv("PARENT_TEST_PID", strconv.Itoa(unix.Getpid())) }, func(t *testing.T, report *types.TestReport) { for _, ev := range report.Spec.Process.Env { @@ -214,7 +218,7 @@ func TestProcessEnv(t *testing.T) { } func TestProcessCwd(t *testing.T) { - if syscall.Getuid() != 0 { + if unix.Getuid() != 0 { t.Skip("tests need to be run as root") } testMinimal(t, @@ -233,7 +237,7 @@ func TestProcessCwd(t *testing.T) { } func TestProcessCapabilities(t *testing.T) { - if syscall.Getuid() != 0 { + if unix.Getuid() != 0 { t.Skip("tests need to be run as root") } testMinimal(t, @@ -277,15 +281,15 @@ func TestProcessCapabilities(t *testing.T) { } func TestProcessRlimits(t *testing.T) { - if syscall.Getuid() != 0 { + if unix.Getuid() != 0 { t.Skip("tests need to be run as root") } - for _, limit := range []int64{100 * 1024 * 1024 * 1024, 200 * 1024 * 1024 * 1024, syscall.RLIM_INFINITY} { + for _, limit := range []uint64{100 * 1024 * 1024 * 1024, 200 * 1024 * 1024 * 1024, unix.RLIM_INFINITY} { testMinimal(t, func(g *generate.Generator, rootDir, bundleDir string) { g.ClearProcessRlimits() - if limit != syscall.RLIM_INFINITY { - g.AddProcessRlimits("rlimit_as", uint64(limit), uint64(limit)) + if limit != unix.RLIM_INFINITY { + g.AddProcessRlimits("rlimit_as", limit, limit) } }, func(t *testing.T, report *types.TestReport) { @@ -295,17 +299,17 @@ func TestProcessRlimits(t *testing.T) { rlim = &report.Spec.Process.Rlimits[i] } } - if limit == syscall.RLIM_INFINITY && !(rlim == nil || (int64(rlim.Soft) == syscall.RLIM_INFINITY && int64(rlim.Hard) == syscall.RLIM_INFINITY)) { + if limit == unix.RLIM_INFINITY && !(rlim == nil || (rlim.Soft == unix.RLIM_INFINITY && rlim.Hard == unix.RLIM_INFINITY)) { t.Fatalf("wasn't supposed to set limit on number of open files: %#v", rlim) } - if limit != syscall.RLIM_INFINITY && rlim == nil { + if limit != unix.RLIM_INFINITY && rlim == nil { t.Fatalf("was supposed to set limit on number of open files") } if rlim != nil { - if int64(rlim.Soft) != limit { + if rlim.Soft != limit { t.Fatalf("soft limit was set to %d, not %d", rlim.Soft, limit) } - if int64(rlim.Hard) != limit { + if rlim.Hard != limit { t.Fatalf("hard limit was set to %d, not %d", rlim.Hard, limit) } } @@ -315,9 +319,12 @@ func TestProcessRlimits(t *testing.T) { } func TestProcessNoNewPrivileges(t *testing.T) { - if syscall.Getuid() != 0 { + if unix.Getuid() != 0 { t.Skip("tests need to be run as root") } + if !seccompAvailable { + t.Skip("not built with seccomp support") + } for _, nope := range []bool{false, true} { testMinimal(t, func(g *generate.Generator, rootDir, bundleDir string) { @@ -325,7 +332,7 @@ func TestProcessNoNewPrivileges(t *testing.T) { }, func(t *testing.T, report *types.TestReport) { if report.Spec.Process.NoNewPrivileges != nope { - t.Fatalf("expected no-new-prives to be %v, got %v", nope, report.Spec.Process.NoNewPrivileges) + t.Fatalf("expected no-new-privs to be %v, got %v", nope, report.Spec.Process.NoNewPrivileges) } }, ) @@ -333,7 +340,7 @@ func TestProcessNoNewPrivileges(t *testing.T) { } func TestProcessOOMScoreAdj(t *testing.T) { - if syscall.Getuid() != 0 { + if unix.Getuid() != 0 { t.Skip("tests need to be run as root") } for _, adj := range []int{0, 1, 2, 3} { @@ -355,10 +362,10 @@ func TestProcessOOMScoreAdj(t *testing.T) { } func TestHostname(t *testing.T) { - if syscall.Getuid() != 0 { + if unix.Getuid() != 0 { t.Skip("tests need to be run as root") } - hostname := fmt.Sprintf("host%d", syscall.Getpid()) + hostname := fmt.Sprintf("host%d", unix.Getpid()) testMinimal(t, func(g *generate.Generator, rootDir, bundleDir string) { g.SetHostname(hostname) @@ -372,49 +379,179 @@ func TestHostname(t *testing.T) { } func TestMounts(t *testing.T) { - if syscall.Getuid() != 0 { + if unix.Getuid() != 0 { t.Skip("tests need to be run as root") } - testMinimal(t, - func(g *generate.Generator, rootDir, bundleDir string) { - g.AddMount(specs.Mount{ - Source: "tmpfs", - Destination: "/was-not-there-before", - Type: "tmpfs", - Options: []string{"ro,size=0"}, - }) - }, - func(t *testing.T, report *types.TestReport) { - found := false - for _, mount := range report.Spec.Mounts { - if mount.Destination == "/was-not-there-before" && mount.Type == "tmpfs" { - found = true + t.Run("tmpfs", func(t *testing.T) { + testMinimal(t, + func(g *generate.Generator, rootDir, bundleDir string) { + g.AddMount(specs.Mount{ + Source: "tmpfs", + Destination: "/was-not-there-before", + Type: "tmpfs", + Options: []string{"ro", "size=0"}, + }) + }, + func(t *testing.T, report *types.TestReport) { + found := false + for _, mount := range report.Spec.Mounts { + if mount.Destination == "/was-not-there-before" && mount.Type == "tmpfs" { + found = true + } } - } - if !found { - t.Fatal("added mount not found") - } + if !found { + t.Errorf("added mount for /was-not-there-before not found in %#v", report.Spec.Mounts) + } + }, + ) + }) + // apparently we can do anything except turn read-only into read-write + binds := []struct { + name string + tmpfsOptions string + destination string + fsType string + options []string + require []string + reject []string + }{ + { + name: "nodev", + destination: "/nodev", + options: []string{"nodev"}, + reject: []string{"dev"}, }, - ) + { + name: "noexec", + destination: "/noexec", + options: []string{"noexec"}, + reject: []string{"exec"}, + }, + { + name: "nosuid", + destination: "/nosuid", + options: []string{"nosuid"}, + reject: []string{"suid"}}, + { + name: "nodev,noexec", + destination: "/nodev,noexec", + options: []string{"nodev", "noexec"}, + reject: []string{"dev", "exec"}, + }, + { + name: "nodev,noexec,nosuid", + destination: "/nodev,noexec,nosuid", + options: []string{"nodev", "noexec", "nosuid"}, + reject: []string{"dev", "exec", "suid"}, + }, + { + name: "nodev,noexec,nosuid,ro", + destination: "/nodev,noexec,nosuid,ro", + options: []string{"nodev", "noexec", "nosuid", "ro"}, + reject: []string{"dev", "exec", "suid", "rw"}, + }, + { + name: "nodev,noexec,nosuid,rw", + destination: "/nodev,noexec,nosuid,rw", + options: []string{"nodev", "noexec", "nosuid", "rw"}, + reject: []string{"dev", "exec", "suid", "ro"}, + }, + { + name: "dev,exec,suid,rw", + tmpfsOptions: "nodev,noexec,nosuid", + destination: "/dev,exec,suid,rw", + options: []string{"dev", "exec", "suid", "rw"}, + require: []string{"rw"}, + reject: []string{"nodev", "noexec", "nosuid", "ro"}, + }, + { + name: "nodev,noexec,nosuid,ro,flip", + tmpfsOptions: "dev,exec,suid,rw", + destination: "/nodev,noexec,nosuid,ro", + options: []string{"nodev", "noexec", "nosuid", "ro"}, + reject: []string{"dev", "exec", "suid", "rw"}, + }, + } + for _, bind := range binds { + t.Run(bind.name, func(t *testing.T) { + // mount a tmpfs over the temp dir, which may be on a nodev/noexec/nosuid filesystem + tmpfsMount := t.TempDir() + t.Cleanup(func() { _ = unix.Unmount(tmpfsMount, unix.MNT_FORCE|unix.MNT_DETACH) }) + tmpfsOptions := "rw,size=1m" + if bind.tmpfsOptions != "" { + tmpfsOptions += ("," + bind.tmpfsOptions) + } + tmpfsFlags, tmpfsOptions := mount.ParseOptions(tmpfsOptions) + require.NoErrorf(t, unix.Mount("none", tmpfsMount, "tmpfs", uintptr(tmpfsFlags), tmpfsOptions), "error mounting a tmpfs with flags=%#x,options=%q at %s", tmpfsFlags, tmpfsOptions, tmpfsMount) + testMinimal(t, + func(g *generate.Generator, rootDir, bundleDir string) { + fsType := bind.fsType + if fsType == "" { + fsType = "bind" + } + g.AddMount(specs.Mount{ + Source: tmpfsMount, + Destination: bind.destination, + Type: fsType, + Options: bind.options, + }) + }, + func(t *testing.T, report *types.TestReport) { + foundMounts := make(map[string]bool) + for _, mount := range report.Spec.Mounts { + if mount.Destination == bind.destination { + allRequired := true + requiredFlags := bind.require + if len(requiredFlags) == 0 { + requiredFlags = bind.options + } + for _, required := range requiredFlags { + if !util.StringInSlice(required, mount.Options) { + allRequired = false + } + } + anyRejected := false + for _, rejected := range bind.reject { + if util.StringInSlice(rejected, mount.Options) { + anyRejected = true + } + } + foundMounts[mount.Destination] = allRequired && !anyRejected + } + } + if !foundMounts[bind.destination] { + t.Errorf("added mount for %s not found with the right flags (%v) in %+v", bind.destination, bind.options, report.Spec.Mounts) + } + }, + ) + // okay, just make sure we didn't change anything about the tmpfs mount point outside of the chroot + var fs unix.Statfs_t + require.NoErrorf(t, unix.Statfs(tmpfsMount, &fs), "fstat") + assert.Equalf(t, tmpfsFlags&unix.MS_NODEV == unix.MS_NODEV, fs.Flags&unix.ST_NODEV == unix.ST_NODEV, "nodev flag") + assert.Equalf(t, tmpfsFlags&unix.MS_NOEXEC == unix.MS_NOEXEC, fs.Flags&unix.ST_NOEXEC == unix.ST_NOEXEC, "noexec flag") + assert.Equalf(t, tmpfsFlags&unix.MS_NOSUID == unix.MS_NOSUID, fs.Flags&unix.ST_NOSUID == unix.ST_NOSUID, "nosuid flag") + assert.Equalf(t, tmpfsFlags&unix.MS_RDONLY == unix.MS_RDONLY, fs.Flags&unix.ST_RDONLY == unix.ST_RDONLY, "readonly flag") + }) + } } func TestLinuxIDMapping(t *testing.T) { - if syscall.Getuid() != 0 { + if unix.Getuid() != 0 { t.Skip("tests need to be run as root") } testMinimal(t, func(g *generate.Generator, rootDir, bundleDir string) { g.ClearLinuxUIDMappings() g.ClearLinuxGIDMappings() - g.AddLinuxUIDMapping(uint32(syscall.Getuid()), 0, 1) - g.AddLinuxGIDMapping(uint32(syscall.Getgid()), 0, 1) + g.AddLinuxUIDMapping(uint32(unix.Getuid()), 0, 1) + g.AddLinuxGIDMapping(uint32(unix.Getgid()), 0, 1) }, func(t *testing.T, report *types.TestReport) { if len(report.Spec.Linux.UIDMappings) != 1 { t.Fatalf("expected 1 uid mapping, got %q", len(report.Spec.Linux.UIDMappings)) } - if report.Spec.Linux.UIDMappings[0].HostID != uint32(syscall.Getuid()) { - t.Fatalf("expected host uid mapping to be %d, got %d", syscall.Getuid(), report.Spec.Linux.UIDMappings[0].HostID) + if report.Spec.Linux.UIDMappings[0].HostID != uint32(unix.Getuid()) { + t.Fatalf("expected host uid mapping to be %d, got %d", unix.Getuid(), report.Spec.Linux.UIDMappings[0].HostID) } if report.Spec.Linux.UIDMappings[0].ContainerID != 0 { t.Fatalf("expected container uid mapping to be 0, got %d", report.Spec.Linux.UIDMappings[0].ContainerID) @@ -422,8 +559,8 @@ func TestLinuxIDMapping(t *testing.T) { if report.Spec.Linux.UIDMappings[0].Size != 1 { t.Fatalf("expected container uid map size to be 1, got %d", report.Spec.Linux.UIDMappings[0].Size) } - if report.Spec.Linux.GIDMappings[0].HostID != uint32(syscall.Getgid()) { - t.Fatalf("expected host uid mapping to be %d, got %d", syscall.Getgid(), report.Spec.Linux.GIDMappings[0].HostID) + if report.Spec.Linux.GIDMappings[0].HostID != uint32(unix.Getgid()) { + t.Fatalf("expected host uid mapping to be %d, got %d", unix.Getgid(), report.Spec.Linux.GIDMappings[0].HostID) } if report.Spec.Linux.GIDMappings[0].ContainerID != 0 { t.Fatalf("expected container gid mapping to be 0, got %d", report.Spec.Linux.GIDMappings[0].ContainerID) @@ -436,22 +573,22 @@ func TestLinuxIDMapping(t *testing.T) { } func TestLinuxIDMappingShift(t *testing.T) { - if syscall.Getuid() != 0 { + if unix.Getuid() != 0 { t.Skip("tests need to be run as root") } testMinimal(t, func(g *generate.Generator, rootDir, bundleDir string) { g.ClearLinuxUIDMappings() g.ClearLinuxGIDMappings() - g.AddLinuxUIDMapping(uint32(syscall.Getuid())+1, 0, 1) - g.AddLinuxGIDMapping(uint32(syscall.Getgid())+1, 0, 1) + g.AddLinuxUIDMapping(uint32(unix.Getuid())+1, 0, 1) + g.AddLinuxGIDMapping(uint32(unix.Getgid())+1, 0, 1) }, func(t *testing.T, report *types.TestReport) { if len(report.Spec.Linux.UIDMappings) != 1 { t.Fatalf("expected 1 uid mapping, got %q", len(report.Spec.Linux.UIDMappings)) } - if report.Spec.Linux.UIDMappings[0].HostID != uint32(syscall.Getuid()+1) { - t.Fatalf("expected host uid mapping to be %d, got %d", syscall.Getuid()+1, report.Spec.Linux.UIDMappings[0].HostID) + if report.Spec.Linux.UIDMappings[0].HostID != uint32(unix.Getuid()+1) { + t.Fatalf("expected host uid mapping to be %d, got %d", unix.Getuid()+1, report.Spec.Linux.UIDMappings[0].HostID) } if report.Spec.Linux.UIDMappings[0].ContainerID != 0 { t.Fatalf("expected container uid mapping to be 0, got %d", report.Spec.Linux.UIDMappings[0].ContainerID) @@ -459,8 +596,8 @@ func TestLinuxIDMappingShift(t *testing.T) { if report.Spec.Linux.UIDMappings[0].Size != 1 { t.Fatalf("expected container uid map size to be 1, got %d", report.Spec.Linux.UIDMappings[0].Size) } - if report.Spec.Linux.GIDMappings[0].HostID != uint32(syscall.Getgid()+1) { - t.Fatalf("expected host uid mapping to be %d, got %d", syscall.Getgid()+1, report.Spec.Linux.GIDMappings[0].HostID) + if report.Spec.Linux.GIDMappings[0].HostID != uint32(unix.Getgid()+1) { + t.Fatalf("expected host uid mapping to be %d, got %d", unix.Getgid()+1, report.Spec.Linux.GIDMappings[0].HostID) } if report.Spec.Linux.GIDMappings[0].ContainerID != 0 { t.Fatalf("expected container gid mapping to be 0, got %d", report.Spec.Linux.GIDMappings[0].ContainerID) diff --git a/chroot/seccomp.go b/chroot/seccomp.go index 8c609519b87..e875f330c07 100644 --- a/chroot/seccomp.go +++ b/chroot/seccomp.go @@ -13,6 +13,8 @@ import ( "github.com/sirupsen/logrus" ) +const seccompAvailable = true + // setSeccomp sets the seccomp filter for ourselves and any processes that we'll start. func setSeccomp(spec *specs.Spec) error { logrus.Debugf("setting seccomp configuration") diff --git a/chroot/seccomp_freebsd.go b/chroot/seccomp_freebsd.go index 02897da7471..90e9f14cbc7 100644 --- a/chroot/seccomp_freebsd.go +++ b/chroot/seccomp_freebsd.go @@ -7,6 +7,8 @@ import ( "github.com/opencontainers/runtime-spec/specs-go" ) +const seccompAvailable = false + func setSeccomp(spec *specs.Spec) error { // Ignore this on FreeBSD return nil diff --git a/chroot/seccomp_unsupported.go b/chroot/seccomp_unsupported.go index 5be2e48dc74..dc80dcd1d79 100644 --- a/chroot/seccomp_unsupported.go +++ b/chroot/seccomp_unsupported.go @@ -9,6 +9,8 @@ import ( "github.com/opencontainers/runtime-spec/specs-go" ) +const seccompAvailable = false + func setSeccomp(spec *specs.Spec) error { if spec.Linux.Seccomp != nil { return errors.New("configured a seccomp filter without seccomp support?") diff --git a/tests/chroot.bats b/tests/chroot.bats new file mode 100644 index 00000000000..110fc952c8b --- /dev/null +++ b/tests/chroot.bats @@ -0,0 +1,105 @@ +#!/usr/bin/env bats + +load helpers + +@test chroot-mount-flags { + skip_if_no_unshare + if ! test -e /etc/subuid ; then + skip "we can't bind mount over /etc/subuid during the test if there is no /etc/subuid file" + fi + if ! test -e /etc/subgid ; then + skip "we can't bind mount over /etc/subgid during the test if there is no /etc/subgid file" + fi + # whom should we map to root in a nested namespace? + if is_rootless ; then + subid=128 + rangesize=1024 + else + subid=1048576 + rangesize=16384 + fi + # we're going to have to prefetch into storage used by someone else image + # chosen because its rootfs doesn't have any uid/gid ownership above + # $rangesize, because the nested namespace needs to be able to represent all + # of them + baseimage=registry.access.redhat.com/ubi9-micro:latest + _prefetch $baseimage + baseimagef=$(tr -c a-zA-Z0-9.- - <<< "$baseimage") + # create the directories that we need + tmpfs=${TEST_SCRATCH_DIR}/tmpfs + mkdir $tmpfs + context=${TEST_SCRATCH_DIR}/context + mkdir $context + storagedir=${TEST_SCRATCH_DIR}/storage + mkdir $storagedir + rootdir=${storagedir}/rootdir + mkdir $rootdir + runrootdir=${storagedir}/runrootdir + mkdir $runrootdir + xdgruntimedir=${storagedir}/xdgruntime + mkdir $xdgruntimedir + xdgconfighome=${storagedir}/xdgconfighome + mkdir $xdgconfighome + xdgdatahome=${storagedir}/xdgdatahome + mkdir $xdgdatahome + storageopts="--storage-driver vfs --root $rootdir --runroot $runrootdir" + # our temporary parent directory might not be world-searchable, which will + # cause someone in the nested user namespace to hit permissions issues even + # looking for $storagedir, so tweak perms to let them do at least that much + fixupdir=$storagedir + while test $(stat -c %d:%i $fixupdir) != $(stat -c %d:%i /) ; do + # walk up to root, or the first parent that we don't own + if test $(stat -c %u $fixupdir) -ne $(id -u) ; then + break + fi + chmod +x $fixupdir + fixupdir=$fixupdir/.. + done + # start writing the script to run in the nested user namespace + echo set -e > ${TEST_SCRATCH_DIR}/script.sh + echo export XDG_RUNTIME_DIR=$xdgruntimedir >> ${TEST_SCRATCH_DIR}/script.sh + echo export XDG_CONFIG_HOME=$xdgconfighome >> ${TEST_SCRATCH_DIR}/script.sh + echo export XDG_DATA_HOME=$xdgdatahome >> ${TEST_SCRATCH_DIR}/script.sh + # give our would-be user ownership of that directory + echo chown --recursive ${subid}:${subid} ${storagedir} >> ${TEST_SCRATCH_DIR}/script.sh + # make newuidmap/newgidmap, invoked by unshare even for uid=0, happy + echo root:0:4294967295 > ${TEST_SCRATCH_DIR}/subid + echo mount --bind -r ${TEST_SCRATCH_DIR}/subid /etc/subuid >> ${TEST_SCRATCH_DIR}/script.sh + echo mount --bind -r ${TEST_SCRATCH_DIR}/subid /etc/subgid >> ${TEST_SCRATCH_DIR}/script.sh + # don't get tripped up by ${TEST_SCRATCH_DIR} potentially being on a filesystem with non-default mount flags + echo mount -t tmpfs -o size=256K tmpfs $tmpfs >> ${TEST_SCRATCH_DIR}/script.sh + # mount a small tmpfs with every mount flag combination that concerns us, and + # be ready to tell buildah to mount everything conservatively, to mirror the + # TransientMounts API being used to nodev/noexec/nosuid/ro bind in a source + # that doesn't necessarily have those flags already set on it + for d in dev nodev ; do + for e in exec noexec ; do + for s in suid nosuid ; do + for r in ro rw ; do + subdir=$tmpfs/d-$d-$e-$s-$r + echo mkdir ${subdir} >> ${TEST_SCRATCH_DIR}/script.sh + echo mount -t tmpfs -o size=256K,$d,$e,$s,$r tmpfs ${subdir} >> ${TEST_SCRATCH_DIR}/script.sh + mounts="${mounts:+${mounts} }--volume ${subdir}:/mounts/d-$d-$e-$s-$r:nodev,noexec,nosuid,ro" + done + done + done + done + # make sure that RUN doesn't just break when we try to use volume mounts with + # flags set that we're not allowed to modify + echo FROM $baseimage > $context/Dockerfile + echo RUN cat /proc/mounts >> $context/Dockerfile + # copy in the prefetched image + # unshare from util-linux 2.39 also accepts INNER:OUTER:SIZE for --map-users + # and --map-groups, but fedora 37's is too old, so the older OUTER,INNER,SIZE + # (using commas instead of colons as field separators) will have to do + echo "unshare -Umpf --mount-proc --setuid 0 --setgid 0 --map-users=${subid},0,${rangesize} --map-groups=${subid},0,${rangesize} ${COPY_BINARY} ${storageopts} dir:$_BUILDAH_IMAGE_CACHEDIR/$baseimagef containers-storage:$baseimage" >> ${TEST_SCRATCH_DIR}/script.sh + # try to do a build with all of the volume mounts + echo "unshare -Umpf --mount-proc --setuid 0 --setgid 0 --map-users=${subid},0,${rangesize} --map-groups=${subid},0,${rangesize} ${BUILDAH_BINARY} ${BUILDAH_REGISTRY_OPTS} ${storageopts} build --isolation chroot --pull=never $mounts $context" >> ${TEST_SCRATCH_DIR}/script.sh + # run that whole script in a nested mount namespace with no $XDG_... + # variables leaked into it + if is_rootless ; then + run_buildah unshare env -i bash -x ${TEST_SCRATCH_DIR}/script.sh + else + unshare -mpf --mount-proc env -i bash -x ${TEST_SCRATCH_DIR}/script.sh + fi +} diff --git a/tests/helpers.bash b/tests/helpers.bash index 349145f298d..fa52d56d07a 100644 --- a/tests/helpers.bash +++ b/tests/helpers.bash @@ -621,6 +621,22 @@ function skip_if_no_docker() { fi } +function skip_if_no_unshare() { + run which ${UNSHARE_BINARY:-unshare} + if [[ $status -ne 0 ]]; then + skip "unshare is not installed" + fi + if ! unshare -Ur true ; then + skip "unshare was not able to create a user namespace" + fi + if ! unshare -Urm true ; then + skip "unshare was not able to create a mount namespace" + fi + if ! unshare -Urmpf true ; then + skip "unshare was not able to create a pid namespace" + fi +} + function start_git_daemon() { daemondir=${TEST_SCRATCH_DIR}/git-daemon mkdir -p ${daemondir}/repo