From 871ca8c7fba2fcd76c35e4da30db41cddfb3c766 Mon Sep 17 00:00:00 2001 From: Edgar Lee <122112154+elpdt852@users.noreply.github.com> Date: Mon, 19 Feb 2024 22:08:49 +0800 Subject: [PATCH] Add support for gVisor --- .github/workflows/ci.yml | 1 + modules/common/containerd-rootless.nix | 49 ++++++++-- modules/common/containerd.nix | 50 ++++++++++ modules/nixos/containerd-rootless.nix | 2 +- modules/nixos/containerd.nix | 45 +++++---- modules/nixos/default.nix | 1 + modules/nixos/tests/gvisor.nix | 124 +++++++++++++++++++++++++ 7 files changed, 246 insertions(+), 26 deletions(-) create mode 100644 modules/nixos/tests/gvisor.nix diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fe743da..aed5380 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -46,6 +46,7 @@ jobs: - k3s - k3s-external - k3s-rootless + - gvisor needs: [lint, build] if: contains(github.event.pull_request.labels.*.name, 'ok-to-test') steps: diff --git a/modules/common/containerd-rootless.nix b/modules/common/containerd-rootless.nix index 3400120..4af6cee 100644 --- a/modules/common/containerd-rootless.nix +++ b/modules/common/containerd-rootless.nix @@ -50,6 +50,29 @@ let ''; }; + runsc-rootless = pkgs.writeShellApplication { + name = "runsc"; + runtimeInputs = [ + pkgs.gvisor + ]; + text = '' + exec runsc \ + --ignore-cgroups \ + "$@" + ''; + }; + + gvisor-rootless = pkgs.buildEnv { + name = "gvisor-rootless"; + paths = [ + # Specific order matters here, since we want runsc-rootless to win over + # runsc. + runsc-rootless + pkgs.gvisor + ]; + ignoreCollisions = true; + }; + makeProg = args: pkgs.substituteAll (args // { inherit (pkgs) runtimeShell; dir = "bin"; @@ -94,7 +117,7 @@ let name = "containerd-rootless-child"; src = ./containerd-rootless-child.sh; inherit mountSources mountPoints; - path = lib.makeBinPath [ + path = lib.makeBinPath ([ cfg.package pkgs.coreutils pkgs.iptables @@ -103,7 +126,7 @@ let # Mount only works inside user namespaces from "/run/current-system/sw" # See: https://github.com/NixOS/nixpkgs/issues/42117#issuecomment-872029461 "/run/current-system/sw" - ]; + ] ++ cfg.path); }; in { @@ -160,6 +183,9 @@ in { options.virtualisation.containerd.rootless = { inherit (ctrd-lib.options) nixSnapshotterIntegration + gVisorIntegration + path + defaultRuntime setAddress setNamespace setSnapshotter @@ -235,12 +261,15 @@ in { setAddress = lib.mkDefault "$XDG_RUNTIME_DIR/containerd/containerd.sock"; - settings = { - version = 2; - plugins."io.containerd.grpc.v1.cri" = { - cni.bin_dir = lib.mkOptionDefault "${pkgs.cni-plugins}/bin"; + settings = lib.recursiveUpdate + (ctrd-lib.mkSettings cfg) + { + plugins."io.containerd.grpc.v1.cri" = { + disable_apparmor = true; + disable_cgroup = true; + restrict_oom_score_adj = true; + }; }; - }; bindMounts = { "$XDG_RUNTIME_DIR/containerd".mountPoint = "/run/containerd"; @@ -262,5 +291,11 @@ in { }; }; }) + (lib.mkIf cfg.gVisorIntegration { + virtualisation.containerd.rootless = { + path = [ gvisor-rootless ]; + settings = ctrd-lib.mkGVisorSettings; + }; + }) ]); } diff --git a/modules/common/containerd.nix b/modules/common/containerd.nix index 503d3b6..161b639 100644 --- a/modules/common/containerd.nix +++ b/modules/common/containerd.nix @@ -16,6 +16,24 @@ let nixSnapshotterIntegration = mkEnableOption "Nix snapshotter integration"; + gVisorIntegration = mkEnableOption "gVisor integration"; + + defaultRuntime = mkOption { + type = types.str; + description = lib.mdDoc '' + Configures the default CRI runtime for containerd. + ''; + default = "runc"; + }; + + path = mkOption { + type = types.listOf types.path; + description = lib.mdDoc '' + Packages to be included in the PATH for containerd. + ''; + default = []; + }; + setAddress = mkOption { type = types.str; default = "/run/containerd/containerd.sock"; @@ -60,11 +78,41 @@ let }; }; + mkSettings = cfg: { + version = 2; + plugins."io.containerd.grpc.v1.cri" = { + cni = { + conf_dir = lib.mkOptionDefault "/etc/cni/net.d"; + bin_dir = lib.mkOptionDefault "${pkgs.cni-plugins}/bin"; + }; + + containerd = { + default_runtime_name = cfg.defaultRuntime; + + runtimes.runc = { + runtime_type = "io.containerd.runc.v2"; + options.SystemdCgroup = false; + }; + }; + }; + }; + + mkGVisorSettings = { + plugins."io.containerd.grpc.v1.cri".containerd = { + runtimes.runsc = { + runtime_type = "io.containerd.runsc.v1"; + }; + }; + }; + in { options.virtualisation.containerd = { inherit (options) k3sIntegration nixSnapshotterIntegration + gVisorIntegration + defaultRuntime + path setAddress setNamespace setSnapshotter @@ -76,7 +124,9 @@ in { default = { inherit options + mkGVisorSettings mkNixSnapshotterSettings + mkSettings ; }; internal = true; diff --git a/modules/nixos/containerd-rootless.nix b/modules/nixos/containerd-rootless.nix index 4c4c946..071c8b6 100644 --- a/modules/nixos/containerd-rootless.nix +++ b/modules/nixos/containerd-rootless.nix @@ -11,7 +11,7 @@ in { ../common/containerd-rootless.nix ]; - config = lib.mkIf cfg.enable { + config = lib.mkIf cfg.enable { environment.extraInit = '' if [ -z "$CONTAINERD_ADDRESS" ]; then export CONTAINERD_ADDRESS="${cfg.setAddress}" diff --git a/modules/nixos/containerd.nix b/modules/nixos/containerd.nix index 0054e5d..7c5c46c 100644 --- a/modules/nixos/containerd.nix +++ b/modules/nixos/containerd.nix @@ -18,6 +18,10 @@ in { config = lib.mkIf cfg.enable (lib.mkMerge [ { + virtualisation.containerd = { + settings = cfg.lib.mkSettings cfg; + }; + environment.extraInit = '' if [ -z "$CONTAINERD_ADDRESS" ]; then export CONTAINERD_ADDRESS="${cfg.setAddress}" @@ -33,6 +37,8 @@ in { export CONTAINERD_SNAPSHOTTER="${cfg.setSnapshotter}" fi ''); + + systemd.services.containerd.path = cfg.path; } (lib.mkIf cfg.k3sIntegration { services.k3s.moreFlags = [ @@ -42,25 +48,22 @@ in { virtualisation.containerd = { setNamespace = lib.mkDefault "k8s.io"; - settings.plugins."io.containerd.grpc.v1.cri" = { - stream_server_address = "127.0.0.1"; - stream_server_port = "10010"; - enable_selinux = false; - enable_unprivileged_ports = true; - enable_unprivileged_icmp = true; - disable_apparmor = true; - disable_cgroup = true; - restrict_oom_score_adj = true; - sandbox_image = "rancher/mirrored-pause:3.6"; + settings = { + plugins."io.containerd.grpc.v1.cri" = { + stream_server_address = "127.0.0.1"; + stream_server_port = "10010"; + enable_selinux = false; + enable_unprivileged_ports = true; + enable_unprivileged_icmp = true; + disable_apparmor = true; + disable_cgroup = true; + restrict_oom_score_adj = true; + sandbox_image = "rancher/mirrored-pause:3.6"; - cni = { - conf_dir = "/var/lib/rancher/k3s/agent/etc/cni/net.d/"; - bin_dir = "${k3s-cni-plugins}/bin"; - }; - - containerd.runtimes.runc = { - runtime_type = "io.containerd.runc.v2"; - options.SystemdCgroup = false; + cni = { + conf_dir = "/var/lib/rancher/k3s/agent/etc/cni/net.d/"; + bin_dir = "${k3s-cni-plugins}/bin"; + }; }; }; }; @@ -76,5 +79,11 @@ in { "--image-service-endpoint unix:///run/nix-snapshotter/nix-snapshotter.sock" ]; }) + (lib.mkIf cfg.gVisorIntegration { + virtualisation.containerd = { + path = [ pkgs.gvisor ]; + settings = cfg.lib.mkGVisorSettings; + }; + }) ]); } diff --git a/modules/nixos/default.nix b/modules/nixos/default.nix index 0c7f3a1..7a15dec 100644 --- a/modules/nixos/default.nix +++ b/modules/nixos/default.nix @@ -87,5 +87,6 @@ in { nixosTests.k3s = import ./tests/k3s.nix; nixosTests.k3s-external = import ./tests/k3s-external.nix; nixosTests.k3s-rootless = import ./tests/k3s-rootless.nix; + nixosTests.gvisor = import ./tests/gvisor.nix; }; } diff --git a/modules/nixos/tests/gvisor.nix b/modules/nixos/tests/gvisor.nix new file mode 100644 index 0000000..c50feb4 --- /dev/null +++ b/modules/nixos/tests/gvisor.nix @@ -0,0 +1,124 @@ +{ lib, pkgs, ... }: +let + redis = pkgs.nix-snapshotter.buildImage { + name = "ghcr.io/pdtpartners/redis"; + tag = "latest"; + copyToRoot = [ + pkgs.util-linux + ]; + config = { + Entrypoint = [ "${pkgs.redis}/bin/redis-server" ]; + Cmd = [ "--protected-mode" "no" ]; + }; + }; + + common = { + environment.systemPackages = [ + pkgs.nerdctl + pkgs.redis + ]; + + nix.settings.experimental-features = [ "nix-command" ]; + }; + +in { + nodes = { + rootful = { + imports = [ + common + ]; + + virtualisation.containerd = { + enable = true; + nixSnapshotterIntegration = true; + gVisorIntegration = true; + defaultRuntime = "runsc"; + }; + + services.nix-snapshotter = { + enable = true; + }; + + services.preload-containerd = { + enable = true; + targets = [{ + archives = [ redis ]; + }]; + }; + }; + + rootless = { + imports = [ + common + ]; + + virtualisation.containerd.rootless = { + enable = true; + nixSnapshotterIntegration = true; + gVisorIntegration = true; + defaultRuntime = "runsc"; + }; + + services.nix-snapshotter.rootless = { + enable = true; + }; + + services.preload-containerd.rootless = { + enable = true; + targets = [{ + archives = [ redis ]; + address = "$XDG_RUNTIME_DIR/containerd/containerd.sock"; + }]; + }; + + users.users.alice = { + uid = 1000; + isNormalUser = true; + }; + + environment.variables = { + XDG_RUNTIME_DIR = "/run/user/1000"; + }; + }; + }; + + testScript = { nodes, ... }: + let + sudo_su = lib.concatStringsSep " " [ + "sudo" + "--preserve-env=XDG_RUNTIME_DIR,CONTAINERD_ADDRESS,CONTAINERD_SNAPSHOTTER" + "-u" + "alice" + ]; + + in '' + def test(machine, sudo_su = ""): + if sudo_su == "": + machine.wait_for_unit("nix-snapshotter.service") + machine.wait_for_unit("containerd.service") + machine.wait_for_unit("preload-containerd.service") + else: + machine.succeed("loginctl enable-linger alice") + wait_for_user_unit(machine, "nix-snapshotter.service") + wait_for_user_unit(machine, "containerd.service") + wait_for_user_unit(machine, "preload-containerd.service") + + with subtest(f"{machine.name}: Run redis using runtime runsc"): + machine.succeed(f"{sudo_su} nerdctl run -d --name redis -p 30000:6379 --cap-add syslog ghcr.io/pdtpartners/redis") + + with subtest(f"{machine.name}: Ensure that gVisor is active"): + out = machine.succeed(f"{sudo_su} nerdctl exec redis dmesg | grep -i gvisor") + assert "Starting gVisor" in out + + with subtest(f"{machine.name}: Ensure that redis is healthy"): + out = machine.wait_until_succeeds(f"{sudo_su} redis-cli -p 30000 ping") + assert "PONG" in out + + def wait_for_user_unit(machine, service, user = "alice"): + machine.wait_until_succeeds(f"systemctl --user --machine={user}@ is-active {service}") + + start_all() + test(rootful) + test(rootless, "${sudo_su}") + ''; +}