about summary refs log tree commit diff
diff options
context:
space:
mode:
authoraszlig <aszlig@redmoonstudios.org>2016-05-06 22:00:01 +0200
committeraszlig <aszlig@redmoonstudios.org>2016-05-06 22:00:01 +0200
commite936f7dff6a42080e0af0687e6858160af16dd8c (patch)
tree325a511cb4348c81b1ddd5bbc2483f818d65806c
parentf53850bf21bb32717819647df8363977d3f4395e (diff)
parent64ca91cac9b5dd520a736528a3f0a29ba1480593 (diff)
downloadnixlib-e936f7dff6a42080e0af0687e6858160af16dd8c.tar
nixlib-e936f7dff6a42080e0af0687e6858160af16dd8c.tar.gz
nixlib-e936f7dff6a42080e0af0687e6858160af16dd8c.tar.bz2
nixlib-e936f7dff6a42080e0af0687e6858160af16dd8c.tar.lz
nixlib-e936f7dff6a42080e0af0687e6858160af16dd8c.tar.xz
nixlib-e936f7dff6a42080e0af0687e6858160af16dd8c.tar.zst
nixlib-e936f7dff6a42080e0af0687e6858160af16dd8c.zip
Merge branch 'stage1-dont-kill-kthreads'
Merges pull request #15275:

    This addresses #15226 and fixes killing of processes before
    switching from the initrd to the real root.

    Right now, the pkill that is issued not only kills user space
    processes but also sends a SIGKILL to kernel threads as well.
    Usually these threads ignore signals, but some of these processes do
    handle signals, like for example the md module, which happened in
    #15226.

    It also adds a small check for the swraid installer test and a
    standalone test which checks on just that problem, so in the future
    this shouldn't happen again.

This has been acked by @edolstra on IRC.
-rw-r--r--nixos/modules/system/boot/stage-1-init.sh14
-rw-r--r--nixos/release-combined.nix1
-rw-r--r--nixos/release.nix1
-rw-r--r--nixos/tests/boot-stage1.nix155
-rw-r--r--nixos/tests/installer.nix4
5 files changed, 173 insertions, 2 deletions
diff --git a/nixos/modules/system/boot/stage-1-init.sh b/nixos/modules/system/boot/stage-1-init.sh
index 1f8779abf0c3..9bffcd31b9b4 100644
--- a/nixos/modules/system/boot/stage-1-init.sh
+++ b/nixos/modules/system/boot/stage-1-init.sh
@@ -439,8 +439,18 @@ eval "exec $logOutFd>&- $logErrFd>&-"
 
 # Kill any remaining processes, just to be sure we're not taking any
 # with us into stage 2. But keep storage daemons like unionfs-fuse.
-pkill -9 -v -f '@'
-
+#
+# Storage daemons are distinguished by an @ in front of their command line:
+# https://www.freedesktop.org/wiki/Software/systemd/RootStorageDaemons/
+local pidsToKill="$(pgrep -v -f '^@')"
+for pid in $pidsToKill; do
+    # Make sure we don't kill kernel processes, see #15226 and:
+    # http://stackoverflow.com/questions/12213445/identifying-kernel-threads
+    readlink "/proc/$pid/exe" &> /dev/null || continue
+    # Try to avoid killing ourselves.
+    [ $pid -eq $$ ] && continue
+    kill -9 "$pid"
+done
 
 if test -n "$debug1mounts"; then fail; fi
 
diff --git a/nixos/release-combined.nix b/nixos/release-combined.nix
index acf9ab143dab..c8c4df5c9138 100644
--- a/nixos/release-combined.nix
+++ b/nixos/release-combined.nix
@@ -67,6 +67,7 @@ in rec {
         (all nixos.tests.boot.biosUsb)
         (all nixos.tests.boot.uefiCdrom)
         (all nixos.tests.boot.uefiUsb)
+        (all nixos.tests.boot-stage1)
         (all nixos.tests.ipv6)
         (all nixos.tests.kde4)
         #(all nixos.tests.lightdm)
diff --git a/nixos/release.nix b/nixos/release.nix
index 97f6df16dc99..c8547784bbcf 100644
--- a/nixos/release.nix
+++ b/nixos/release.nix
@@ -209,6 +209,7 @@ in rec {
   tests.bittorrent = callTest tests/bittorrent.nix {};
   tests.blivet = callTest tests/blivet.nix {};
   tests.boot = callSubTests tests/boot.nix {};
+  tests.boot-stage1 = callTest tests/boot-stage1.nix {};
   tests.cadvisor = hydraJob (import tests/cadvisor.nix { system = "x86_64-linux"; });
   tests.chromium = (callSubTests tests/chromium.nix { system = "x86_64-linux"; }).stable;
   tests.cjdns = callTest tests/cjdns.nix {};
diff --git a/nixos/tests/boot-stage1.nix b/nixos/tests/boot-stage1.nix
new file mode 100644
index 000000000000..ad253d23c543
--- /dev/null
+++ b/nixos/tests/boot-stage1.nix
@@ -0,0 +1,155 @@
+import ./make-test.nix ({ pkgs, ... }: {
+  name = "boot-stage1";
+
+  machine = { config, pkgs, lib, ... }: {
+    boot.extraModulePackages = let
+      compileKernelModule = name: source: pkgs.runCommand name rec {
+        inherit source;
+        kdev = config.boot.kernelPackages.kernel.dev;
+        kver = config.boot.kernelPackages.kernel.modDirVersion;
+        ksrc = "${kdev}/lib/modules/${kver}/build";
+      } ''
+        echo "obj-m += $name.o" > Makefile
+        echo "$source" > "$name.c"
+        make -C "$ksrc" M=$(pwd) modules
+        install -vD "$name.ko" "$out/lib/modules/$kver/$name.ko"
+      '';
+
+      # This spawns a kthread which just waits until it gets a signal and
+      # terminates if that is the case. We want to make sure that nothing during
+      # the boot process kills any kthread by accident, like what happened in
+      # issue #15226.
+      kcanary = compileKernelModule "kcanary" ''
+        #include <linux/init.h>
+        #include <linux/module.h>
+        #include <linux/kernel.h>
+        #include <linux/kthread.h>
+        #include <linux/sched.h>
+
+        struct task_struct *canaryTask;
+
+        static int kcanary(void *nothing)
+        {
+          allow_signal(SIGINT);
+          allow_signal(SIGTERM);
+          allow_signal(SIGKILL);
+          while (!kthread_should_stop()) {
+            set_current_state(TASK_INTERRUPTIBLE);
+            schedule_timeout_interruptible(msecs_to_jiffies(100));
+            if (signal_pending(current)) break;
+          }
+          return 0;
+        }
+
+        static int kcanaryInit(void)
+        {
+          kthread_run(&kcanary, NULL, "kcanary");
+          return 0;
+        }
+
+        static void kcanaryExit(void)
+        {
+          kthread_stop(canaryTask);
+        }
+
+        module_init(kcanaryInit);
+        module_exit(kcanaryExit);
+      '';
+
+    in lib.singleton kcanary;
+
+    boot.initrd.kernelModules = [ "kcanary" ];
+
+    boot.initrd.extraUtilsCommands = let
+      compile = name: source: pkgs.runCommand name { inherit source; } ''
+        mkdir -p "$out/bin"
+        echo "$source" | gcc -Wall -o "$out/bin/$name" -xc -
+      '';
+
+      daemonize = name: source: compile name ''
+        #include <stdio.h>
+        #include <unistd.h>
+
+        void runSource(void) {
+        ${source}
+        }
+
+        int main(void) {
+          if (fork() > 0) return 0;
+          setsid();
+          runSource();
+          return 1;
+        }
+      '';
+
+      mkCmdlineCanary = { name, cmdline ? "", source ? "" }: (daemonize name ''
+        char *argv[] = {"${cmdline}", NULL};
+        execvp("${name}-child", argv);
+      '') // {
+        child = compile "${name}-child" ''
+          #include <stdio.h>
+          #include <unistd.h>
+
+          int main(void) {
+            ${source}
+            while (1) sleep(1);
+            return 1;
+          }
+        '';
+      };
+
+      copyCanaries = with lib; concatMapStrings (canary: ''
+        ${optionalString (canary ? child) ''
+          copy_bin_and_libs "${canary.child}/bin/${canary.child.name}"
+        ''}
+        copy_bin_and_libs "${canary}/bin/${canary.name}"
+      '');
+
+    in copyCanaries [
+      # Simple canary process which just sleeps forever and should be killed by
+      # stage 2.
+      (daemonize "canary1" "while (1) sleep(1);")
+
+      # We want this canary process to try mimicking a kthread using a cmdline
+      # with a zero length so we can make sure that the process is properly
+      # killed in stage 1.
+      (mkCmdlineCanary {
+        name = "canary2";
+        source = ''
+          FILE *f;
+          f = fopen("/run/canary2.pid", "w");
+          fprintf(f, "%d\n", getpid());
+          fclose(f);
+        '';
+      })
+
+      # This canary process mimicks a storage daemon, which we do NOT want to be
+      # killed before going into stage 2. For more on root storage daemons, see:
+      # https://www.freedesktop.org/wiki/Software/systemd/RootStorageDaemons/
+      (mkCmdlineCanary {
+        name = "canary3";
+        cmdline = "@canary3";
+      })
+    ];
+
+    boot.initrd.postMountCommands = ''
+      canary1
+      canary2
+      canary3
+      # Make sure the pidfile of canary 2 is created so that we still can get
+      # its former pid after the killing spree starts next within stage 1.
+      while [ ! -s /run/canary2.pid ]; do sleep 0.1; done
+    '';
+  };
+
+  testScript = ''
+    $machine->waitForUnit("multi-user.target");
+    $machine->succeed('test -s /run/canary2.pid');
+    $machine->fail('pgrep -a canary1');
+    $machine->fail('kill -0 $(< /run/canary2.pid)');
+    $machine->succeed('pgrep -a -f \'^@canary3$\''');
+    $machine->succeed('pgrep -a -f \'^kcanary$\''');
+  '';
+
+  meta.maintainers = with pkgs.stdenv.lib.maintainers; [ aszlig ];
+})
diff --git a/nixos/tests/installer.nix b/nixos/tests/installer.nix
index c9aa9f9c85df..0b0e53ee7324 100644
--- a/nixos/tests/installer.nix
+++ b/nixos/tests/installer.nix
@@ -407,6 +407,10 @@ in {
               "mdadm --verbose -W /dev/md1",
           );
         '';
+      preBootCommands = ''
+        $machine->start;
+        $machine->fail("dmesg | grep 'immediate safe mode'");
+      '';
     };
 
   # Test a basic install using GRUB 1.