diff options
author | Eelco Dolstra <eelco.dolstra@logicblox.com> | 2014-04-10 15:55:51 +0200 |
---|---|---|
committer | Eelco Dolstra <eelco.dolstra@logicblox.com> | 2014-04-10 15:55:51 +0200 |
commit | d2155649af372b87f6373762356303bb450c6a07 (patch) | |
tree | 0f9503545b88d99d6bc2aeb78ebaa8ed8418785a /nixos/modules/virtualisation | |
parent | fe7599b2b2c0f327d2a7672d081f64bbdf6ec2f3 (diff) | |
parent | 6a7a8a144fae43fae51232703bf742c8bcee8d67 (diff) | |
download | nixlib-d2155649af372b87f6373762356303bb450c6a07.tar nixlib-d2155649af372b87f6373762356303bb450c6a07.tar.gz nixlib-d2155649af372b87f6373762356303bb450c6a07.tar.bz2 nixlib-d2155649af372b87f6373762356303bb450c6a07.tar.lz nixlib-d2155649af372b87f6373762356303bb450c6a07.tar.xz nixlib-d2155649af372b87f6373762356303bb450c6a07.tar.zst nixlib-d2155649af372b87f6373762356303bb450c6a07.zip |
Merge branch 'containers'
Fixes #2105.
Diffstat (limited to 'nixos/modules/virtualisation')
-rw-r--r-- | nixos/modules/virtualisation/container-config.nix | 103 | ||||
-rw-r--r-- | nixos/modules/virtualisation/containers.nix | 214 | ||||
-rw-r--r-- | nixos/modules/virtualisation/nixos-container.pl | 238 | ||||
-rw-r--r-- | nixos/modules/virtualisation/run-in-netns.c | 50 |
4 files changed, 577 insertions, 28 deletions
diff --git a/nixos/modules/virtualisation/container-config.nix b/nixos/modules/virtualisation/container-config.nix new file mode 100644 index 000000000000..21e64c8c0957 --- /dev/null +++ b/nixos/modules/virtualisation/container-config.nix @@ -0,0 +1,103 @@ +{ config, pkgs, lib, ... }: + +with lib; + +{ + + config = mkIf config.boot.isContainer { + + # Provide a login prompt on /var/lib/login.socket. On the host, + # you can connect to it by running ‘socat + # unix:<path-to-container>/var/lib/login.socket -,echo=0,raw’. + systemd.sockets.login = + { description = "Login Socket"; + wantedBy = [ "sockets.target" ]; + socketConfig = + { ListenStream = "/var/lib/login.socket"; + SocketMode = "0666"; + Accept = true; + }; + }; + + systemd.services."login@" = + { description = "Login %i"; + environment.TERM = "linux"; + serviceConfig = + { Type = "simple"; + StandardInput = "socket"; + ExecStart = "${pkgs.socat}/bin/socat -t0 - exec:${pkgs.shadow}/bin/login,pty,setsid,setpgid,stderr,ctty"; + TimeoutStopSec = 1; # FIXME + }; + }; + + # Also provide a root login prompt on /var/lib/root-login.socket + # that doesn't ask for a password. This socket can only be used by + # root on the host. + systemd.sockets.root-login = + { description = "Root Login Socket"; + wantedBy = [ "sockets.target" ]; + socketConfig = + { ListenStream = "/var/lib/root-login.socket"; + SocketMode = "0600"; + Accept = true; + }; + }; + + systemd.services."root-login@" = + { description = "Root Login %i"; + environment.TERM = "linux"; + serviceConfig = + { Type = "simple"; + StandardInput = "socket"; + ExecStart = "${pkgs.socat}/bin/socat -t0 - \"exec:${pkgs.shadow}/bin/login -f root,pty,setsid,setpgid,stderr,ctty\""; + TimeoutStopSec = 1; # FIXME + }; + }; + + # Provide a daemon on /var/lib/run-command.socket that reads a + # command from stdin and executes it. + systemd.sockets.run-command = + { description = "Run Command Socket"; + wantedBy = [ "sockets.target" ]; + socketConfig = + { ListenStream = "/var/lib/run-command.socket"; + SocketMode = "0600"; # only root can connect + Accept = true; + }; + }; + + systemd.services."run-command@" = + { description = "Run Command %i"; + environment.TERM = "linux"; + serviceConfig = + { Type = "simple"; + StandardInput = "socket"; + TimeoutStopSec = 1; # FIXME + }; + script = + '' + #! ${pkgs.stdenv.shell} -e + source /etc/bashrc + read c + eval "command=($c)" + exec "''${command[@]}" + ''; + }; + + systemd.services.container-startup-done = + { description = "Container Startup Notification"; + wantedBy = [ "multi-user.target" ]; + after = [ "multi-user.target" ]; + script = + '' + if [ -p /var/lib/startup-done ]; then + echo done > /var/lib/startup-done + fi + ''; + serviceConfig.Type = "oneshot"; + serviceConfig.RemainAfterExit = true; + }; + + }; + +} diff --git a/nixos/modules/virtualisation/containers.nix b/nixos/modules/virtualisation/containers.nix index d87284de4fc1..c53bd7d3509d 100644 --- a/nixos/modules/virtualisation/containers.nix +++ b/nixos/modules/virtualisation/containers.nix @@ -2,6 +2,29 @@ with pkgs.lib; +let + + runInNetns = pkgs.stdenv.mkDerivation { + name = "run-in-netns"; + unpackPhase = "true"; + buildPhase = '' + mkdir -p $out/bin + gcc ${./run-in-netns.c} -o $out/bin/run-in-netns + ''; + installPhase = "true"; + }; + + nixos-container = pkgs.substituteAll { + name = "nixos-container"; + dir = "bin"; + isExecutable = true; + src = ./nixos-container.pl; + perl = "${pkgs.perl}/bin/perl -I${pkgs.perlPackages.FileSlurp}/lib/perl5/site_perl"; + inherit (pkgs) socat; + }; + +in + { options = { @@ -14,19 +37,12 @@ with pkgs.lib; ''; }; - systemd.containers = mkOption { + containers = mkOption { type = types.attrsOf (types.submodule ( { config, options, name, ... }: { options = { - root = mkOption { - type = types.path; - description = '' - The root directory of the container. - ''; - }; - config = mkOption { description = '' A specification of the desired configuration of this @@ -45,21 +61,53 @@ with pkgs.lib; ''; }; + privateNetwork = mkOption { + type = types.bool; + default = false; + description = '' + Whether to give the container its own private virtual + Ethernet interface. The interface is called + <literal>eth0</literal>, and is hooked up to the interface + <literal>c-<replaceable>container-name</replaceable></literal> + on the host. If this option is not set, then the + container shares the network interfaces of the host, + and can bind to any port on any interface. + ''; + }; + + hostAddress = mkOption { + type = types.nullOr types.string; + default = null; + example = "10.231.136.1"; + description = '' + The IPv4 address assigned to the host interface. + ''; + }; + + localAddress = mkOption { + type = types.nullOr types.string; + default = null; + example = "10.231.136.2"; + description = '' + The IPv4 address assigned to <literal>eth0</literal> + in the container. + ''; + }; + }; config = mkMerge - [ { root = mkDefault "/var/lib/containers/${name}"; - } - (mkIf options.config.isDefined { + [ (mkIf options.config.isDefined { path = (import ../../lib/eval-config.nix { modules = let extraConfig = { boot.isContainer = true; security.initialRootPassword = mkDefault "!"; networking.hostName = mkDefault name; + networking.useDHCP = false; }; in [ extraConfig config.config ]; - prefix = [ "systemd" "containers" name ]; + prefix = [ "containers" name ]; }).config.system.build.toplevel; }) ]; @@ -69,12 +117,10 @@ with pkgs.lib; example = literalExample '' { webserver = - { root = "/containers/webserver"; - path = "/nix/var/nix/profiles/webserver"; + { path = "/nix/var/nix/profiles/webserver"; }; database = - { root = "/containers/database"; - config = + { config = { config, pkgs, ... }: { services.postgresql.enable = true; services.postgresql.package = pkgs.postgresql92; @@ -94,29 +140,96 @@ with pkgs.lib; }; - config = { + config = mkIf (!config.boot.isContainer) { + + systemd.services."container@" = + { description = "Container '%i'"; - systemd.services = mapAttrs' (name: container: nameValuePair "container-${name}" - { description = "Container '${name}'"; + unitConfig.RequiresMountsFor = [ "/var/lib/containers/%i" ]; - wantedBy = [ "multi-user.target" ]; + path = [ pkgs.iproute ]; - unitConfig.RequiresMountsFor = [ container.root ]; + environment.INSTANCE = "%i"; + environment.root = "/var/lib/containers/%i"; preStart = '' - mkdir -p -m 0755 ${container.root}/etc - if ! [ -e ${container.root}/etc/os-release ]; then - touch ${container.root}/etc/os-release + mkdir -p -m 0755 $root/var/lib + + # Create a named pipe to get a signal when the container + # has finished booting. + rm -f $root/var/lib/startup-done + mkfifo -m 0600 $root/var/lib/startup-done + ''; + + script = + '' + mkdir -p -m 0755 "$root/etc" "$root/var/lib" + if ! [ -e "$root/etc/os-release" ]; then + touch "$root/etc/os-release" + fi + + mkdir -p -m 0755 \ + "/nix/var/nix/profiles/per-container/$INSTANCE" \ + "/nix/var/nix/gcroots/per-container/$INSTANCE" + + SYSTEM_PATH=/nix/var/nix/profiles/system + if [ -f "/etc/containers/$INSTANCE.conf" ]; then + . "/etc/containers/$INSTANCE.conf" fi + + # Cleanup from last time. + ifaceHost=c-$INSTANCE + ifaceCont=ctmp-$INSTANCE + ns=net-$INSTANCE + ip netns del $ns 2> /dev/null || true + ip link del $ifaceHost 2> /dev/null || true + ip link del $ifaceCont 2> /dev/null || true + + if [ "$PRIVATE_NETWORK" = 1 ]; then + # Create a pair of virtual ethernet devices. On the host, + # we get ‘c-<container-name’, and on the guest, we get + # ‘eth0’. + ip link add $ifaceHost type veth peer name $ifaceCont + ip netns add $ns + ip link set $ifaceCont netns $ns + ip netns exec $ns ip link set $ifaceCont name eth0 + ip netns exec $ns ip link set dev eth0 up + ip link set dev $ifaceHost up + if [ -n "$HOST_ADDRESS" ]; then + ip addr add $HOST_ADDRESS dev $ifaceHost + ip netns exec $ns ip route add $HOST_ADDRESS dev eth0 + ip netns exec $ns ip route add default via $HOST_ADDRESS + fi + if [ -n "$LOCAL_ADDRESS" ]; then + ip netns exec $ns ip addr add $LOCAL_ADDRESS dev eth0 + ip route add $LOCAL_ADDRESS dev $ifaceHost + fi + runInNetNs="${runInNetns}/bin/run-in-netns $ns" + extraFlags="--capability=CAP_NET_ADMIN" + fi + + exec $runInNetNs ${config.systemd.package}/bin/systemd-nspawn \ + -M "$INSTANCE" -D "/var/lib/containers/$INSTANCE" $extraFlags \ + --bind-ro=/nix/store \ + --bind-ro=/nix/var/nix/db \ + --bind-ro=/nix/var/nix/daemon-socket \ + --bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles" \ + --bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots" \ + "$SYSTEM_PATH/init" ''; - serviceConfig.ExecStart = - "${config.systemd.package}/bin/systemd-nspawn -M ${name} -D ${container.root} --bind-ro=/nix ${container.path}/init"; + postStart = + '' + # This blocks until the container-startup-done service + # writes something to this pipe. FIXME: it also hangs + # until the start timeout expires if systemd-nspawn exits. + read x < $root/var/lib/startup-done + ''; preStop = '' - pid="$(cat /sys/fs/cgroup/systemd/machine/${name}.nspawn/system/tasks 2> /dev/null)" + pid="$(cat /sys/fs/cgroup/systemd/machine/$INSTANCE.nspawn/system/tasks 2> /dev/null)" if [ -n "$pid" ]; then # Send the RTMIN+3 signal, which causes the container # systemd to start halt.target. @@ -131,7 +244,52 @@ with pkgs.lib; done fi ''; - }) config.systemd.containers; + + restartIfChanged = false; + #reloadIfChanged = true; # FIXME + + serviceConfig.ExecReload = pkgs.writeScript "reload-container" + '' + #! ${pkgs.stdenv.shell} -e + SYSTEM_PATH=/nix/var/nix/profiles/system + if [ -f "/etc/containers/$INSTANCE.conf" ]; then + . "/etc/containers/$INSTANCE.conf" + fi + echo $SYSTEM_PATH/bin/switch-to-configuration test | \ + ${pkgs.socat}/bin/socat unix:$root/var/lib/run-command.socket - + ''; + + serviceConfig.SyslogIdentifier = "container %i"; + }; + + # Generate a configuration file in /etc/containers for each + # container so that container@.target can get the container + # configuration. + environment.etc = mapAttrs' (name: cfg: nameValuePair "containers/${name}.conf" + { text = + '' + SYSTEM_PATH=${cfg.path} + ${optionalString cfg.privateNetwork '' + PRIVATE_NETWORK=1 + ${optionalString (cfg.hostAddress != null) '' + HOST_ADDRESS=${cfg.hostAddress} + ''} + ${optionalString (cfg.localAddress != null) '' + LOCAL_ADDRESS=${cfg.localAddress} + ''} + ''} + ''; + }) config.containers; + + # FIXME: auto-start containers. + + # Generate /etc/hosts entries for the containers. + networking.extraHosts = concatStrings (mapAttrsToList (name: cfg: optionalString (cfg.localAddress != null) + '' + ${cfg.localAddress} ${name}.containers + '') config.containers); + + environment.systemPackages = [ nixos-container ]; }; } diff --git a/nixos/modules/virtualisation/nixos-container.pl b/nixos/modules/virtualisation/nixos-container.pl new file mode 100644 index 000000000000..d7e8c7339b6d --- /dev/null +++ b/nixos/modules/virtualisation/nixos-container.pl @@ -0,0 +1,238 @@ +#! @perl@ + +use strict; +use POSIX; +use File::Path; +use File::Slurp; +use Fcntl ':flock'; +use Getopt::Long qw(:config gnu_getopt); + +my $socat = '@socat@/bin/socat'; + +# Parse the command line. + +sub showHelp { + print <<EOF; +Usage: nixos-container list + nixos-container create <container-name> [--config <string>] [--ensure-unique-name] + nixos-container destroy <container-name> + nixos-container start <container-name> + nixos-container stop <container-name> + nixos-container login <container-name> + nixos-container root-login <container-name> + nixos-container run <container-name> -- args... + nixos-container set-root-password <container-name> <password> + nixos-container show-ip <container-name> +EOF + exit 0; +} + +my $ensureUniqueName = 0; +my $extraConfig = ""; + +GetOptions( + "help" => sub { showHelp() }, + "ensure-unique-name" => \$ensureUniqueName, + "config=s" => \$extraConfig + ) or exit 1; + +my $action = $ARGV[0] or die "$0: no action specified\n"; + + +# Execute the selected action. + +mkpath("/etc/containers", 0, 0755); +mkpath("/var/lib/containers", 0, 0700); + +if ($action eq "list") { + foreach my $confFile (glob "/etc/containers/*.conf") { + $confFile =~ /\/([^\/]+).conf$/ or next; + print "$1\n"; + } + exit 0; +} + +my $containerName = $ARGV[1] or die "$0: no container name specified\n"; +$containerName =~ /^[a-zA-Z0-9\-]+$/ or die "$0: invalid container name\n"; + +sub writeNixOSConfig { + my ($nixosConfigFile) = @_; + + my $nixosConfig = <<EOF; +{ config, pkgs, ... }: + +with pkgs.lib; + +{ boot.isContainer = true; + security.initialRootPassword = mkDefault "!"; + networking.hostName = mkDefault "$containerName"; + networking.useDHCP = false; + $extraConfig +} +EOF + + write_file($nixosConfigFile, $nixosConfig); +} + +if ($action eq "create") { + # Acquire an exclusive lock to prevent races with other + # invocations of ‘nixos-container create’. + my $lockFN = "/run/lock/nixos-container"; + open(my $lock, '>>', $lockFN) or die "$0: opening $lockFN: $!"; + flock($lock, LOCK_EX) or die "$0: could not lock $lockFN: $!"; + + my $confFile = "/etc/containers/$containerName.conf"; + my $root = "/var/lib/containers/$containerName"; + + # Maybe generate a unique name. + if ($ensureUniqueName) { + my $base = $containerName; + for (my $nr = 0; ; $nr++) { + $containerName = "$base-$nr"; + $confFile = "/etc/containers/$containerName.conf"; + $root = "/var/lib/containers/$containerName"; + last unless -e $confFile || -e $root; + } + } + + die "$0: container ‘$containerName’ already exists\n" if -e $confFile; + + # Get an unused IP address. + my %usedIPs; + foreach my $confFile2 (glob "/etc/containers/*.conf") { + my $s = read_file($confFile2) or die; + $usedIPs{$1} = 1 if $s =~ /^HOST_ADDRESS=([0-9\.]+)$/m; + $usedIPs{$1} = 1 if $s =~ /^LOCAL_ADDRESS=([0-9\.]+)$/m; + } + + my ($ipPrefix, $hostAddress, $localAddress); + for (my $nr = 1; $nr < 255; $nr++) { + $ipPrefix = "10.233.$nr"; + $hostAddress = "$ipPrefix.1"; + $localAddress = "$ipPrefix.2"; + last unless $usedIPs{$hostAddress} || $usedIPs{$localAddress}; + $ipPrefix = undef; + } + + die "$0: out of IP addresses\n" unless defined $ipPrefix; + + my @conf; + push @conf, "PRIVATE_NETWORK=1\n"; + push @conf, "HOST_ADDRESS=$hostAddress\n"; + push @conf, "LOCAL_ADDRESS=$localAddress\n"; + write_file($confFile, \@conf); + + close($lock); + + print STDERR "host IP is $hostAddress, container IP is $localAddress\n"; + + mkpath("$root/etc/nixos", 0, 0755); + + my $nixosConfigFile = "$root/etc/nixos/configuration.nix"; + writeNixOSConfig $nixosConfigFile; + + # The per-container directory is restricted to prevent users on + # the host from messing with guest users who happen to have the + # same uid. + my $profileDir = "/nix/var/nix/profiles/per-container"; + mkpath($profileDir, 0, 0700); + $profileDir = "$profileDir/$containerName"; + mkpath($profileDir, 0, 0755); + + system("nix-env", "-p", "$profileDir/system", + "-I", "nixos-config=$nixosConfigFile", "-f", "<nixpkgs/nixos>", + "--set", "-A", "system") == 0 + or die "$0: failed to build initial container configuration\n"; + + print "$containerName\n" if $ensureUniqueName; + exit 0; +} + +my $root = "/var/lib/containers/$containerName"; +my $profileDir = "/nix/var/nix/profiles/per-container/$containerName"; +my $confFile = "/etc/containers/$containerName.conf"; +die "$0: container ‘$containerName’ does not exist\n" if !-e $confFile; + +sub isContainerRunning { + my $status = `systemctl show 'container\@$containerName'`; + return $status =~ /ActiveState=active/; +} + +sub stopContainer { + system("systemctl", "stop", "container\@$containerName") == 0 + or die "$0: failed to stop container\n"; +} + +if ($action eq "destroy") { + die "$0: cannot destroy declarative container (remove it from your configuration.nix instead)\n" + unless POSIX::access($confFile, &POSIX::W_OK); + + stopContainer if isContainerRunning; + + rmtree($profileDir) if -e $profileDir; + rmtree($root) if -e $root; + unlink($confFile) or die; +} + +elsif ($action eq "start") { + system("systemctl", "start", "container\@$containerName") == 0 + or die "$0: failed to start container\n"; +} + +elsif ($action eq "stop") { + stopContainer; +} + +elsif ($action eq "update") { + my $nixosConfigFile = "$root/etc/nixos/configuration.nix"; + + # FIXME: may want to be more careful about clobbering the existing + # configuration.nix. + writeNixOSConfig $nixosConfigFile if defined $extraConfig; + + system("nix-env", "-p", "$profileDir/system", + "-I", "nixos-config=$nixosConfigFile", "-f", "<nixpkgs/nixos>", + "--set", "-A", "system") == 0 + or die "$0: failed to build container configuration\n"; + + if (isContainerRunning) { + print STDERR "reloading container...\n"; + system("systemctl", "reload", "container\@$containerName") == 0 + or die "$0: failed to reload container\n"; + } +} + +elsif ($action eq "login") { + exec($socat, "unix:$root/var/lib/login.socket", "-,echo=0,raw"); +} + +elsif ($action eq "root-login") { + exec($socat, "unix:$root/var/lib/root-login.socket", "-,echo=0,raw"); +} + +elsif ($action eq "run") { + shift @ARGV; shift @ARGV; + open(SOCAT, "|-", $socat, "unix:$root/var/lib/run-command.socket", "-"); + print SOCAT join(' ', map { "'$_'" } @ARGV), "\n"; + close(SOCAT); +} + +elsif ($action eq "set-root-password") { + # FIXME: don't get password from the command line. + my $password = $ARGV[2] or die "$0: no password given\n"; + open(SOCAT, "|-", $socat, "unix:$root/var/lib/run-command.socket", "-"); + print SOCAT "passwd\n"; + print SOCAT "$password\n"; + print SOCAT "$password\n"; + close(SOCAT); +} + +elsif ($action eq "show-ip") { + my $s = read_file($confFile) or die; + $s =~ /^LOCAL_ADDRESS=([0-9\.]+)$/m or die "$0: cannot get IP address\n"; + print "$1\n"; +} + +else { + die "$0: unknown action ‘$action’\n"; +} diff --git a/nixos/modules/virtualisation/run-in-netns.c b/nixos/modules/virtualisation/run-in-netns.c new file mode 100644 index 000000000000..d375bddf2e6b --- /dev/null +++ b/nixos/modules/virtualisation/run-in-netns.c @@ -0,0 +1,50 @@ +#define _GNU_SOURCE + +#include <stdio.h> +#include <string.h> +#include <errno.h> + +#include <unistd.h> +#include <sched.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <fcntl.h> +#include <linux/limits.h> + +int main(int argc, char * * argv) +{ + if (argc < 3) { + fprintf(stderr, "%s: missing arguments\n", argv[0]); + return 1; + } + + char nsPath[PATH_MAX]; + + sprintf(nsPath, "/run/netns/%s", argv[1]); + + int fd = open(nsPath, O_RDONLY); + if (fd == -1) { + fprintf(stderr, "%s: opening network namespace: %s\n", argv[0], strerror(errno)); + return 1; + } + + if (setns(fd, CLONE_NEWNET) == -1) { + fprintf(stderr, "%s: setting network namespace: %s\n", argv[0], strerror(errno)); + return 1; + } + + umount2(nsPath, MNT_DETACH); + if (unlink(nsPath) == -1) { + fprintf(stderr, "%s: unlinking network namespace: %s\n", argv[0], strerror(errno)); + return 1; + } + + /* FIXME: Remount /sys so that /sys/class/net reflects the + interfaces visible in the network namespace. This requires + bind-mounting /sys/fs/cgroups etc. */ + + execv(argv[2], argv + 2); + fprintf(stderr, "%s: running command: %s\n", argv[0], strerror(errno)); + return 1; +} |