summary refs log tree commit diff
diff options
context:
space:
mode:
authorEelco Dolstra <eelco.dolstra@logicblox.com>2014-05-07 17:00:46 +0200
committerEelco Dolstra <eelco.dolstra@logicblox.com>2014-05-07 17:53:57 +0200
commit6f7aaf10a5beb6cc10ebe75674d9c8664bd187fa (patch)
treec4ced63774765a3b7476d958e1d52c59b7be7cd1
parent810680bcae1f8ca23744544e87fbf03b705e9493 (diff)
downloadnixlib-6f7aaf10a5beb6cc10ebe75674d9c8664bd187fa.tar
nixlib-6f7aaf10a5beb6cc10ebe75674d9c8664bd187fa.tar.gz
nixlib-6f7aaf10a5beb6cc10ebe75674d9c8664bd187fa.tar.bz2
nixlib-6f7aaf10a5beb6cc10ebe75674d9c8664bd187fa.tar.lz
nixlib-6f7aaf10a5beb6cc10ebe75674d9c8664bd187fa.tar.xz
nixlib-6f7aaf10a5beb6cc10ebe75674d9c8664bd187fa.tar.zst
nixlib-6f7aaf10a5beb6cc10ebe75674d9c8664bd187fa.zip
Containers: Use systemd-nspawn's --network-veth flag
Note that this causes the name of the host-side interface to change
from c-<name> to ve-<name>.
-rw-r--r--nixos/doc/manual/containers.xml8
-rw-r--r--nixos/doc/manual/release-notes.xml22
-rw-r--r--nixos/modules/services/networking/dhcpcd.nix5
-rw-r--r--nixos/modules/virtualisation/containers.nix93
-rw-r--r--nixos/modules/virtualisation/run-in-netns.c50
5 files changed, 73 insertions, 105 deletions
diff --git a/nixos/doc/manual/containers.xml b/nixos/doc/manual/containers.xml
index b8f170fc614f..2530d5195212 100644
--- a/nixos/doc/manual/containers.xml
+++ b/nixos/doc/manual/containers.xml
@@ -213,8 +213,8 @@ $ ping -c1 10.233.4.2
 <para>Networking is implemented using a pair of virtual Ethernet
 devices. The network interface in the container is called
 <literal>eth0</literal>, while the matching interface in the host is
-called <literal>c-<replaceable>container-name</replaceable></literal>
-(e.g., <literal>c-foo</literal>).  The container has its own network
+called <literal>ve-<replaceable>container-name</replaceable></literal>
+(e.g., <literal>ve-foo</literal>).  The container has its own network
 namespace and the <literal>CAP_NET_ADMIN</literal> capability, so it
 can perform arbitrary network configuration such as setting up
 firewall rules, without affecting or having access to the host’s
@@ -228,11 +228,11 @@ on the host:
 
 <programlisting>
 networking.nat.enable = true;
-networking.nat.internalInterfaces = ["c-+"];
+networking.nat.internalInterfaces = ["ve-+"];
 networking.nat.externalInterface = "eth0";
 </programlisting>
 where <literal>eth0</literal> should be replaced with the desired
-external interface. Note that <literal>c-+</literal> is a wildcard
+external interface. Note that <literal>ve-+</literal> is a wildcard
 that matches all container interfaces.</para>
 
 </section>
diff --git a/nixos/doc/manual/release-notes.xml b/nixos/doc/manual/release-notes.xml
index 836cb5c19f4c..52e88bb4c861 100644
--- a/nixos/doc/manual/release-notes.xml
+++ b/nixos/doc/manual/release-notes.xml
@@ -6,6 +6,28 @@
 
 <!--==================================================================-->
 
+<section xml:id="sec-release-14.10">
+
+<title>Release 14.10 (“Caterpillar”, 2014/10/??)</title>
+
+<para>When upgrading from a previous release, please be aware of the
+following incompatible changes:
+
+<itemizedlist>
+
+  <listitem><para>The host side of a container virtual Ethernet pair
+  is now called <literal>ve-<replaceable>container-name</replaceable></literal>
+  rather than <literal>c-<replaceable>container-name</replaceable></literal>.</para></listitem>
+
+</itemizedlist>
+
+</para>
+
+</section>
+
+
+<!--==================================================================-->
+
 <section xml:id="sec-release-14.04">
 
 <title>Release 14.04 (“Baboon”, 2014/04/30)</title>
diff --git a/nixos/modules/services/networking/dhcpcd.nix b/nixos/modules/services/networking/dhcpcd.nix
index 757340b3c2cd..8799d7d8a201 100644
--- a/nixos/modules/services/networking/dhcpcd.nix
+++ b/nixos/modules/services/networking/dhcpcd.nix
@@ -34,9 +34,8 @@ let
 
       # Ignore peth* devices; on Xen, they're renamed physical
       # Ethernet cards used for bridging.  Likewise for vif* and tap*
-      # (Xen) and virbr* and vnet* (libvirt) and c-* and ctmp-* (NixOS
-      # containers).
-      denyinterfaces ${toString ignoredInterfaces} lo peth* vif* tap* tun* virbr* vnet* vboxnet* c-* ctmp-*
+      # (Xen) and virbr* and vnet* (libvirt).
+      denyinterfaces ${toString ignoredInterfaces} lo peth* vif* tap* tun* virbr* vnet* vboxnet*
 
       ${config.networking.dhcpcd.extraConfig}
     '';
diff --git a/nixos/modules/virtualisation/containers.nix b/nixos/modules/virtualisation/containers.nix
index 7a8c8a4594b5..08c234b94133 100644
--- a/nixos/modules/virtualisation/containers.nix
+++ b/nixos/modules/virtualisation/containers.nix
@@ -4,16 +4,6 @@ with lib;
 
 let
 
-  runInNetns = pkgs.stdenv.mkDerivation {
-    name = "run-in-netns";
-    unpackPhase = "true";
-    buildPhase = ''
-      mkdir -p $out/bin
-      gcc ${./run-in-netns.c} -o $out/bin/run-in-netns
-    '';
-    installPhase = "true";
-  };
-
   nixos-container = pkgs.substituteAll {
     name = "nixos-container";
     dir = "bin";
@@ -23,6 +13,28 @@ let
     inherit (pkgs) socat;
   };
 
+  # The container's init script, a small wrapper around the regular
+  # NixOS stage-2 init script.
+  containerInit = pkgs.writeScript "container-init"
+    ''
+      #! ${pkgs.stdenv.shell} -e
+
+      # Initialise the container side of the veth pair.
+      if [ "$PRIVATE_NETWORK" = 1 ]; then
+        ip link set host0 name eth0
+        ip link set dev eth0 up
+        if [ -n "$HOST_ADDRESS" ]; then
+          ip route add $HOST_ADDRESS dev eth0
+          ip route add default via $HOST_ADDRESS
+        fi
+        if [ -n "$LOCAL_ADDRESS" ]; then
+          ip addr add $LOCAL_ADDRESS dev eth0
+        fi
+      fi
+
+      exec "$1"
+    '';
+
   system = config.nixpkgs.system;
 
 in
@@ -70,7 +82,7 @@ in
                 Whether to give the container its own private virtual
                 Ethernet interface.  The interface is called
                 <literal>eth0</literal>, and is hooked up to the interface
-                <literal>c-<replaceable>container-name</replaceable></literal>
+                <literal>ve-<replaceable>container-name</replaceable></literal>
                 on the host.  If this option is not set, then the
                 container shares the network interfaces of the host,
                 and can bind to any port on any interface.
@@ -176,39 +188,8 @@ in
               "/nix/var/nix/profiles/per-container/$INSTANCE" \
               "/nix/var/nix/gcroots/per-container/$INSTANCE"
 
-            if [ -f "/etc/containers/$INSTANCE.conf" ]; then
-              . "/etc/containers/$INSTANCE.conf"
-            fi
-
-            # Cleanup from last time.
-            ifaceHost=c-$INSTANCE
-            ifaceCont=ctmp-$INSTANCE
-            ns=net-$INSTANCE
-            ip netns del $ns 2> /dev/null || true
-            ip link del $ifaceHost 2> /dev/null || true
-            ip link del $ifaceCont 2> /dev/null || true
-
             if [ "$PRIVATE_NETWORK" = 1 ]; then
-              # Create a pair of virtual ethernet devices.  On the host,
-              # we get ‘c-<container-name’, and on the guest, we get
-              # ‘eth0’.
-              ip link add $ifaceHost type veth peer name $ifaceCont
-              ip netns add $ns
-              ip link set $ifaceCont netns $ns
-              ip netns exec $ns ip link set $ifaceCont name eth0
-              ip netns exec $ns ip link set dev eth0 up
-              ip link set dev $ifaceHost up
-              if [ -n "$HOST_ADDRESS" ]; then
-                ip addr add $HOST_ADDRESS dev $ifaceHost
-                ip netns exec $ns ip route add $HOST_ADDRESS dev eth0
-                ip netns exec $ns ip route add default via $HOST_ADDRESS
-              fi
-              if [ -n "$LOCAL_ADDRESS" ]; then
-                ip netns exec $ns ip addr add $LOCAL_ADDRESS dev eth0
-                ip route add $LOCAL_ADDRESS dev $ifaceHost
-              fi
-              runInNetNs="${runInNetns}/bin/run-in-netns $ns"
-              extraFlags="--capability=CAP_NET_ADMIN"
+              extraFlags="--network-veth"
             fi
 
             # If the host is 64-bit and the container is 32-bit, add a
@@ -219,7 +200,7 @@ in
               fi
             ''}
 
-            exec $runInNetNs ${config.systemd.package}/bin/systemd-nspawn \
+            exec ${config.systemd.package}/bin/systemd-nspawn \
               --keep-unit \
               -M "$INSTANCE" -D "$root" $extraFlags \
               --bind-ro=/nix/store \
@@ -227,7 +208,11 @@ in
               --bind-ro=/nix/var/nix/daemon-socket \
               --bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles" \
               --bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots" \
-              "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init"
+              --setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \
+              --setenv HOST_ADDRESS="$HOST_ADDRESS" \
+              --setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \
+              --setenv PATH="$PATH" \
+              ${containerInit} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init"
           '';
 
         postStart =
@@ -237,6 +222,17 @@ in
             # until the start timeout expires if systemd-nspawn exits.
             read x < $root/var/lib/startup-done
             rm -f $root/var/lib/startup-done
+
+            if [ "$PRIVATE_NETWORK" = 1 ]; then
+              ifaceHost=ve-$INSTANCE
+              ip link set dev $ifaceHost up
+              if [ -n "$HOST_ADDRESS" ]; then
+                ip addr add $HOST_ADDRESS dev $ifaceHost
+              fi
+              if [ -n "$LOCAL_ADDRESS" ]; then
+                ip route add $LOCAL_ADDRESS dev $ifaceHost
+              fi
+            fi
           '';
 
         preStop =
@@ -251,14 +247,13 @@ in
           ''
             #! ${pkgs.stdenv.shell} -e
             SYSTEM_PATH=/nix/var/nix/profiles/system
-            if [ -f "/etc/containers/$INSTANCE.conf" ]; then
-              . "/etc/containers/$INSTANCE.conf"
-            fi
             echo $SYSTEM_PATH/bin/switch-to-configuration test | \
               ${pkgs.socat}/bin/socat unix:$root/var/lib/run-command.socket -
           '';
 
         serviceConfig.SyslogIdentifier = "container %i";
+
+        serviceConfig.EnvironmentFile = "-/etc/containers/%i.conf";
       };
 
     # Generate a configuration file in /etc/containers for each
@@ -288,6 +283,8 @@ in
         ${cfg.localAddress} ${name}.containers
       '') config.containers);
 
+    networking.dhcpcd.denyInterfaces = [ "ve-*" ];
+
     environment.systemPackages = [ nixos-container ];
 
   };
diff --git a/nixos/modules/virtualisation/run-in-netns.c b/nixos/modules/virtualisation/run-in-netns.c
deleted file mode 100644
index d375bddf2e6b..000000000000
--- a/nixos/modules/virtualisation/run-in-netns.c
+++ /dev/null
@@ -1,50 +0,0 @@
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <string.h>
-#include <errno.h>
-
-#include <unistd.h>
-#include <sched.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mount.h>
-#include <fcntl.h>
-#include <linux/limits.h>
-
-int main(int argc, char * * argv)
-{
-    if (argc < 3) {
-        fprintf(stderr, "%s: missing arguments\n", argv[0]);
-        return 1;
-    }
-
-    char nsPath[PATH_MAX];
-
-    sprintf(nsPath, "/run/netns/%s", argv[1]);
-
-    int fd = open(nsPath, O_RDONLY);
-    if (fd == -1) {
-        fprintf(stderr, "%s: opening network namespace: %s\n", argv[0], strerror(errno));
-        return 1;
-    }
-
-    if (setns(fd, CLONE_NEWNET) == -1) {
-        fprintf(stderr, "%s: setting network namespace: %s\n", argv[0], strerror(errno));
-        return 1;
-    }
-
-    umount2(nsPath, MNT_DETACH);
-    if (unlink(nsPath) == -1) {
-        fprintf(stderr, "%s: unlinking network namespace: %s\n", argv[0], strerror(errno));
-        return 1;
-    }
-
-    /* FIXME: Remount /sys so that /sys/class/net reflects the
-       interfaces visible in the network namespace. This requires
-       bind-mounting /sys/fs/cgroups etc. */
-
-    execv(argv[2], argv + 2);
-    fprintf(stderr, "%s: running command: %s\n", argv[0], strerror(errno));
-    return 1;
-}