diff options
author | Eelco Dolstra <eelco.dolstra@logicblox.com> | 2014-05-07 17:00:46 +0200 |
---|---|---|
committer | Eelco Dolstra <eelco.dolstra@logicblox.com> | 2014-05-07 17:53:57 +0200 |
commit | 6f7aaf10a5beb6cc10ebe75674d9c8664bd187fa (patch) | |
tree | c4ced63774765a3b7476d958e1d52c59b7be7cd1 | |
parent | 810680bcae1f8ca23744544e87fbf03b705e9493 (diff) | |
download | nixlib-6f7aaf10a5beb6cc10ebe75674d9c8664bd187fa.tar nixlib-6f7aaf10a5beb6cc10ebe75674d9c8664bd187fa.tar.gz nixlib-6f7aaf10a5beb6cc10ebe75674d9c8664bd187fa.tar.bz2 nixlib-6f7aaf10a5beb6cc10ebe75674d9c8664bd187fa.tar.lz nixlib-6f7aaf10a5beb6cc10ebe75674d9c8664bd187fa.tar.xz nixlib-6f7aaf10a5beb6cc10ebe75674d9c8664bd187fa.tar.zst nixlib-6f7aaf10a5beb6cc10ebe75674d9c8664bd187fa.zip |
Containers: Use systemd-nspawn's --network-veth flag
Note that this causes the name of the host-side interface to change from c-<name> to ve-<name>.
-rw-r--r-- | nixos/doc/manual/containers.xml | 8 | ||||
-rw-r--r-- | nixos/doc/manual/release-notes.xml | 22 | ||||
-rw-r--r-- | nixos/modules/services/networking/dhcpcd.nix | 5 | ||||
-rw-r--r-- | nixos/modules/virtualisation/containers.nix | 93 | ||||
-rw-r--r-- | nixos/modules/virtualisation/run-in-netns.c | 50 |
5 files changed, 73 insertions, 105 deletions
diff --git a/nixos/doc/manual/containers.xml b/nixos/doc/manual/containers.xml index b8f170fc614f..2530d5195212 100644 --- a/nixos/doc/manual/containers.xml +++ b/nixos/doc/manual/containers.xml @@ -213,8 +213,8 @@ $ ping -c1 10.233.4.2 <para>Networking is implemented using a pair of virtual Ethernet devices. The network interface in the container is called <literal>eth0</literal>, while the matching interface in the host is -called <literal>c-<replaceable>container-name</replaceable></literal> -(e.g., <literal>c-foo</literal>). The container has its own network +called <literal>ve-<replaceable>container-name</replaceable></literal> +(e.g., <literal>ve-foo</literal>). The container has its own network namespace and the <literal>CAP_NET_ADMIN</literal> capability, so it can perform arbitrary network configuration such as setting up firewall rules, without affecting or having access to the host’s @@ -228,11 +228,11 @@ on the host: <programlisting> networking.nat.enable = true; -networking.nat.internalInterfaces = ["c-+"]; +networking.nat.internalInterfaces = ["ve-+"]; networking.nat.externalInterface = "eth0"; </programlisting> where <literal>eth0</literal> should be replaced with the desired -external interface. Note that <literal>c-+</literal> is a wildcard +external interface. Note that <literal>ve-+</literal> is a wildcard that matches all container interfaces.</para> </section> diff --git a/nixos/doc/manual/release-notes.xml b/nixos/doc/manual/release-notes.xml index 836cb5c19f4c..52e88bb4c861 100644 --- a/nixos/doc/manual/release-notes.xml +++ b/nixos/doc/manual/release-notes.xml @@ -6,6 +6,28 @@ <!--==================================================================--> +<section xml:id="sec-release-14.10"> + +<title>Release 14.10 (“Caterpillar”, 2014/10/??)</title> + +<para>When upgrading from a previous release, please be aware of the +following incompatible changes: + +<itemizedlist> + + <listitem><para>The host side of a container virtual Ethernet pair + is now called <literal>ve-<replaceable>container-name</replaceable></literal> + rather than <literal>c-<replaceable>container-name</replaceable></literal>.</para></listitem> + +</itemizedlist> + +</para> + +</section> + + +<!--==================================================================--> + <section xml:id="sec-release-14.04"> <title>Release 14.04 (“Baboon”, 2014/04/30)</title> diff --git a/nixos/modules/services/networking/dhcpcd.nix b/nixos/modules/services/networking/dhcpcd.nix index 757340b3c2cd..8799d7d8a201 100644 --- a/nixos/modules/services/networking/dhcpcd.nix +++ b/nixos/modules/services/networking/dhcpcd.nix @@ -34,9 +34,8 @@ let # Ignore peth* devices; on Xen, they're renamed physical # Ethernet cards used for bridging. Likewise for vif* and tap* - # (Xen) and virbr* and vnet* (libvirt) and c-* and ctmp-* (NixOS - # containers). - denyinterfaces ${toString ignoredInterfaces} lo peth* vif* tap* tun* virbr* vnet* vboxnet* c-* ctmp-* + # (Xen) and virbr* and vnet* (libvirt). + denyinterfaces ${toString ignoredInterfaces} lo peth* vif* tap* tun* virbr* vnet* vboxnet* ${config.networking.dhcpcd.extraConfig} ''; diff --git a/nixos/modules/virtualisation/containers.nix b/nixos/modules/virtualisation/containers.nix index 7a8c8a4594b5..08c234b94133 100644 --- a/nixos/modules/virtualisation/containers.nix +++ b/nixos/modules/virtualisation/containers.nix @@ -4,16 +4,6 @@ with lib; let - runInNetns = pkgs.stdenv.mkDerivation { - name = "run-in-netns"; - unpackPhase = "true"; - buildPhase = '' - mkdir -p $out/bin - gcc ${./run-in-netns.c} -o $out/bin/run-in-netns - ''; - installPhase = "true"; - }; - nixos-container = pkgs.substituteAll { name = "nixos-container"; dir = "bin"; @@ -23,6 +13,28 @@ let inherit (pkgs) socat; }; + # The container's init script, a small wrapper around the regular + # NixOS stage-2 init script. + containerInit = pkgs.writeScript "container-init" + '' + #! ${pkgs.stdenv.shell} -e + + # Initialise the container side of the veth pair. + if [ "$PRIVATE_NETWORK" = 1 ]; then + ip link set host0 name eth0 + ip link set dev eth0 up + if [ -n "$HOST_ADDRESS" ]; then + ip route add $HOST_ADDRESS dev eth0 + ip route add default via $HOST_ADDRESS + fi + if [ -n "$LOCAL_ADDRESS" ]; then + ip addr add $LOCAL_ADDRESS dev eth0 + fi + fi + + exec "$1" + ''; + system = config.nixpkgs.system; in @@ -70,7 +82,7 @@ in Whether to give the container its own private virtual Ethernet interface. The interface is called <literal>eth0</literal>, and is hooked up to the interface - <literal>c-<replaceable>container-name</replaceable></literal> + <literal>ve-<replaceable>container-name</replaceable></literal> on the host. If this option is not set, then the container shares the network interfaces of the host, and can bind to any port on any interface. @@ -176,39 +188,8 @@ in "/nix/var/nix/profiles/per-container/$INSTANCE" \ "/nix/var/nix/gcroots/per-container/$INSTANCE" - if [ -f "/etc/containers/$INSTANCE.conf" ]; then - . "/etc/containers/$INSTANCE.conf" - fi - - # Cleanup from last time. - ifaceHost=c-$INSTANCE - ifaceCont=ctmp-$INSTANCE - ns=net-$INSTANCE - ip netns del $ns 2> /dev/null || true - ip link del $ifaceHost 2> /dev/null || true - ip link del $ifaceCont 2> /dev/null || true - if [ "$PRIVATE_NETWORK" = 1 ]; then - # Create a pair of virtual ethernet devices. On the host, - # we get ‘c-<container-name’, and on the guest, we get - # ‘eth0’. - ip link add $ifaceHost type veth peer name $ifaceCont - ip netns add $ns - ip link set $ifaceCont netns $ns - ip netns exec $ns ip link set $ifaceCont name eth0 - ip netns exec $ns ip link set dev eth0 up - ip link set dev $ifaceHost up - if [ -n "$HOST_ADDRESS" ]; then - ip addr add $HOST_ADDRESS dev $ifaceHost - ip netns exec $ns ip route add $HOST_ADDRESS dev eth0 - ip netns exec $ns ip route add default via $HOST_ADDRESS - fi - if [ -n "$LOCAL_ADDRESS" ]; then - ip netns exec $ns ip addr add $LOCAL_ADDRESS dev eth0 - ip route add $LOCAL_ADDRESS dev $ifaceHost - fi - runInNetNs="${runInNetns}/bin/run-in-netns $ns" - extraFlags="--capability=CAP_NET_ADMIN" + extraFlags="--network-veth" fi # If the host is 64-bit and the container is 32-bit, add a @@ -219,7 +200,7 @@ in fi ''} - exec $runInNetNs ${config.systemd.package}/bin/systemd-nspawn \ + exec ${config.systemd.package}/bin/systemd-nspawn \ --keep-unit \ -M "$INSTANCE" -D "$root" $extraFlags \ --bind-ro=/nix/store \ @@ -227,7 +208,11 @@ in --bind-ro=/nix/var/nix/daemon-socket \ --bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles" \ --bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots" \ - "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init" + --setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \ + --setenv HOST_ADDRESS="$HOST_ADDRESS" \ + --setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \ + --setenv PATH="$PATH" \ + ${containerInit} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init" ''; postStart = @@ -237,6 +222,17 @@ in # until the start timeout expires if systemd-nspawn exits. read x < $root/var/lib/startup-done rm -f $root/var/lib/startup-done + + if [ "$PRIVATE_NETWORK" = 1 ]; then + ifaceHost=ve-$INSTANCE + ip link set dev $ifaceHost up + if [ -n "$HOST_ADDRESS" ]; then + ip addr add $HOST_ADDRESS dev $ifaceHost + fi + if [ -n "$LOCAL_ADDRESS" ]; then + ip route add $LOCAL_ADDRESS dev $ifaceHost + fi + fi ''; preStop = @@ -251,14 +247,13 @@ in '' #! ${pkgs.stdenv.shell} -e SYSTEM_PATH=/nix/var/nix/profiles/system - if [ -f "/etc/containers/$INSTANCE.conf" ]; then - . "/etc/containers/$INSTANCE.conf" - fi echo $SYSTEM_PATH/bin/switch-to-configuration test | \ ${pkgs.socat}/bin/socat unix:$root/var/lib/run-command.socket - ''; serviceConfig.SyslogIdentifier = "container %i"; + + serviceConfig.EnvironmentFile = "-/etc/containers/%i.conf"; }; # Generate a configuration file in /etc/containers for each @@ -288,6 +283,8 @@ in ${cfg.localAddress} ${name}.containers '') config.containers); + networking.dhcpcd.denyInterfaces = [ "ve-*" ]; + environment.systemPackages = [ nixos-container ]; }; diff --git a/nixos/modules/virtualisation/run-in-netns.c b/nixos/modules/virtualisation/run-in-netns.c deleted file mode 100644 index d375bddf2e6b..000000000000 --- a/nixos/modules/virtualisation/run-in-netns.c +++ /dev/null @@ -1,50 +0,0 @@ -#define _GNU_SOURCE - -#include <stdio.h> -#include <string.h> -#include <errno.h> - -#include <unistd.h> -#include <sched.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/mount.h> -#include <fcntl.h> -#include <linux/limits.h> - -int main(int argc, char * * argv) -{ - if (argc < 3) { - fprintf(stderr, "%s: missing arguments\n", argv[0]); - return 1; - } - - char nsPath[PATH_MAX]; - - sprintf(nsPath, "/run/netns/%s", argv[1]); - - int fd = open(nsPath, O_RDONLY); - if (fd == -1) { - fprintf(stderr, "%s: opening network namespace: %s\n", argv[0], strerror(errno)); - return 1; - } - - if (setns(fd, CLONE_NEWNET) == -1) { - fprintf(stderr, "%s: setting network namespace: %s\n", argv[0], strerror(errno)); - return 1; - } - - umount2(nsPath, MNT_DETACH); - if (unlink(nsPath) == -1) { - fprintf(stderr, "%s: unlinking network namespace: %s\n", argv[0], strerror(errno)); - return 1; - } - - /* FIXME: Remount /sys so that /sys/class/net reflects the - interfaces visible in the network namespace. This requires - bind-mounting /sys/fs/cgroups etc. */ - - execv(argv[2], argv + 2); - fprintf(stderr, "%s: running command: %s\n", argv[0], strerror(errno)); - return 1; -} |