summary refs log tree commit diff
diff options
context:
space:
mode:
authorAlyssa Ross <hi@alyssa.is>2023-06-26 18:29:37 +0000
committerAlyssa Ross <hi@alyssa.is>2024-01-19 12:02:09 +0100
commit324d50e8af0862ddb0772d7b04d6725a66ebdeb4 (patch)
tree4707008e2e61095c741fd0cd223c79fa18719448
parent89250cde5e4886bf04f9dfed1634b5238fead3ef (diff)
downloadspectrum-324d50e8af0862ddb0772d7b04d6725a66ebdeb4.tar
spectrum-324d50e8af0862ddb0772d7b04d6725a66ebdeb4.tar.gz
spectrum-324d50e8af0862ddb0772d7b04d6725a66ebdeb4.tar.bz2
spectrum-324d50e8af0862ddb0772d7b04d6725a66ebdeb4.tar.lz
spectrum-324d50e8af0862ddb0772d7b04d6725a66ebdeb4.tar.xz
spectrum-324d50e8af0862ddb0772d7b04d6725a66ebdeb4.tar.zst
spectrum-324d50e8af0862ddb0772d7b04d6725a66ebdeb4.zip
img/app: shut down when the application exits
As we move towards graphical applications, the intuitive way for
things to work is to have application VMs terminate when the
application exits.  Once the terminal-based applications are gone (or
at least not the main focus), we can repurpose the console device to
be an interactive shell, so debugging is still possible.

This requires actually implementing an orderly shutdown in the VM.
Prior to now, there's just been poweroff -f.

Signed-off-by: Alyssa Ross <hi@alyssa.is>
-rw-r--r--host/start-vmm/ch.h12
-rw-r--r--host/start-vmm/ch.rs213
-rw-r--r--host/start-vmm/kill.c9
-rw-r--r--host/start-vmm/kill.rs17
-rw-r--r--host/start-vmm/lib.rs102
-rw-r--r--host/start-vmm/meson.build3
-rw-r--r--host/start-vmm/net.c112
-rw-r--r--host/start-vmm/net.rs22
-rw-r--r--host/start-vmm/pipe.c10
-rw-r--r--host/start-vmm/pipe.rs21
-rw-r--r--host/start-vmm/tests/vm_command-basic.rs2
-rw-r--r--host/start-vmm/tests/vm_command-multiple-disks.rs2
-rw-r--r--host/start-vmm/tests/vm_command-shared-dir.rs2
-rw-r--r--host/start-vmm/unix.rs17
-rw-r--r--img/app/Makefile16
-rw-r--r--img/app/etc/s6-linux-init/run-image/service/s6-linux-init-shutdownd/notification-fd1
-rw-r--r--img/app/etc/s6-linux-init/run-image/service/s6-linux-init-shutdownd/notification-fd.license2
-rwxr-xr-ximg/app/etc/s6-linux-init/run-image/service/s6-linux-init-shutdownd/run5
-rwxr-xr-ximg/app/etc/s6-linux-init/scripts/rc.shutdown5
-rwxr-xr-ximg/app/etc/s6-linux-init/scripts/rc.shutdown.final3
-rwxr-xr-ximg/app/etc/s6-rc/app/run2
-rwxr-xr-xscripts/make-erofs.sh2
22 files changed, 332 insertions, 248 deletions
diff --git a/host/start-vmm/ch.h b/host/start-vmm/ch.h
index 5143723..8c4d597 100644
--- a/host/start-vmm/ch.h
+++ b/host/start-vmm/ch.h
@@ -3,15 +3,15 @@
 
 #include <stdint.h>
 
-struct ch_device;
+#include <net/if.h>
+
+struct vm_name;
 
 struct net_config {
 	int fd;
+	char *id;
 	uint8_t mac[6];
 };
 
-int ch_add_net(const char *vm_name, const struct net_config *,
-               struct ch_device **out);
-int ch_remove_device(const char *vm_name, struct ch_device *);
-
-void ch_device_free(struct ch_device *);
+int ch_add_net(struct vm_name *vm_name, const struct net_config *);
+int ch_remove_device(struct vm_name *vm_name, const char[static IFNAMSIZ]);
diff --git a/host/start-vmm/ch.rs b/host/start-vmm/ch.rs
index cc05d84..493e043 100644
--- a/host/start-vmm/ch.rs
+++ b/host/start-vmm/ch.rs
@@ -1,22 +1,21 @@
 // SPDX-License-Identifier: EUPL-1.2+
 // SPDX-FileCopyrightText: 2022-2023 Alyssa Ross <hi@alyssa.is>
 
-use std::ffi::{CStr, OsStr, OsString};
-use std::io::Write;
-use std::mem::take;
-use std::num::NonZeroI32;
+use std::ffi::{CStr, OsStr};
+use std::io::{self, Write};
 use std::os::raw::{c_char, c_int};
 use std::os::unix::prelude::*;
+use std::path::PathBuf;
 use std::process::{Command, Stdio};
+use std::str::{self, Utf8Error};
 
 use miniserde::{json, Serialize};
 
-use crate::net::MacAddress;
+use crate::net::{connect_net, MacAddress};
 
-// Trivially safe.
-const EINVAL: NonZeroI32 = unsafe { NonZeroI32::new_unchecked(22) };
-const EPERM: NonZeroI32 = unsafe { NonZeroI32::new_unchecked(1) };
-const EPROTO: NonZeroI32 = unsafe { NonZeroI32::new_unchecked(71) };
+const EINVAL: i32 = 22;
+const EPERM: i32 = 1;
+const EPROTO: i32 = 71;
 
 #[derive(Serialize)]
 pub struct ConsoleConfig {
@@ -42,13 +41,6 @@ pub struct GpuConfig {
 }
 
 #[derive(Serialize)]
-#[repr(C)]
-pub struct NetConfig {
-    pub fd: RawFd,
-    pub mac: MacAddress,
-}
-
-#[derive(Serialize)]
 pub struct MemoryConfig {
     pub size: i64,
     pub shared: bool,
@@ -67,15 +59,17 @@ pub struct VmConfig {
     pub fs: Vec<FsConfig>,
     pub gpu: Vec<GpuConfig>,
     pub memory: MemoryConfig,
-    pub net: Vec<NetConfig>,
     pub payload: PayloadConfig,
     pub serial: ConsoleConfig,
 }
 
-fn command(vm_name: &str, s: impl AsRef<OsStr>) -> Command {
-    let mut api_socket_path = OsString::from("/run/service/vmm/instance/");
-    api_socket_path.push(vm_name);
-    api_socket_path.push("/env/cloud-hypervisor.sock");
+fn command(vm_name: Option<&str>, s: impl AsRef<OsStr>) -> Command {
+    let mut api_socket_path = PathBuf::new();
+    if let Some(vm_name) = vm_name {
+        api_socket_path.push("/run/service/vmm/instance");
+        api_socket_path.push(vm_name);
+    }
+    api_socket_path.push("env/cloud-hypervisor.sock");
 
     let mut command = Command::new("ch-remote");
     command.stdin(Stdio::null());
@@ -85,12 +79,15 @@ fn command(vm_name: &str, s: impl AsRef<OsStr>) -> Command {
     command
 }
 
-pub fn create_vm(vm_name: &str, mut config: VmConfig) -> Result<(), String> {
-    // Net devices can't be created from file descriptors in vm.create.
-    // https://github.com/cloud-hypervisor/cloud-hypervisor/issues/5523
-    let nets = take(&mut config.net);
-
-    let mut ch_remote = command(vm_name, "create")
+// Net devices can't be created from file descriptors in vm.create.
+// https://github.com/cloud-hypervisor/cloud-hypervisor/issues/5523
+pub fn create_vm(
+    vmm_pipe_writer: BorrowedFd,
+    vmm_pid: u32,
+    config: VmConfig,
+    net_providers: &[impl AsRef<str>],
+) -> Result<(), String> {
+    let mut ch_remote = command(None, "create")
         .args(["--", "-"])
         .stdin(Stdio::piped())
         .spawn()
@@ -103,89 +100,99 @@ pub fn create_vm(vm_name: &str, mut config: VmConfig) -> Result<(), String> {
     let status = ch_remote
         .wait()
         .map_err(|e| format!("waiting for ch-remote: {e}"))?;
+
     if status.success() {
+        for provider in net_providers {
+            let provider = provider.as_ref();
+            if unsafe { connect_net(vmm_pipe_writer, vmm_pid, provider) } == -1 {
+                let e = io::Error::last_os_error();
+                return Err(format!("connecting to {:?}: {}", provider, e));
+            }
+        }
+
+        Ok(())
     } else if let Some(code) = status.code() {
-        return Err(format!("ch-remote exited {code}"));
+        Err(format!("ch-remote exited {code}"))
     } else {
         let signal = status.signal().unwrap();
-        return Err(format!("ch-remote killed by signal {signal}"));
+        Err(format!("ch-remote killed by signal {signal}"))
     }
+}
 
-    for net in nets {
-        add_net(vm_name, &net).map_err(|e| format!("failed to add net: {e}"))?;
-    }
+#[repr(C)]
+pub struct NetConfig {
+    pub fd: RawFd,
+    pub id: *mut [u8; 16],
+    pub mac: MacAddress,
+}
 
-    Ok(())
+/// # Safety
+///
+/// The returned reference is only valid as long as the pointer is.
+unsafe fn vm_name_str<'a>(vm_name: *const c_char) -> Result<Option<&'a str>, Utf8Error> {
+    if vm_name.is_null() {
+        Ok(None)
+    } else {
+        CStr::from_ptr(vm_name).to_str().map(Some)
+    }
 }
 
-pub fn add_net(vm_name: &str, net: &NetConfig) -> Result<OsString, NonZeroI32> {
-    let mut ch_remote = command(vm_name, "add-net")
-        .arg(format!("fd={},mac={}", net.fd, net.mac))
-        .stdout(Stdio::piped())
-        .spawn()
-        .or(Err(EPERM))?;
-
-    let jq_out = match Command::new("jq")
-        .args(["-j", ".id"])
-        .stdin(ch_remote.stdout.take().unwrap())
-        .stderr(Stdio::inherit())
-        .output()
-    {
-        Ok(o) => o,
-        Err(_) => {
-            // Try not to leave a zombie.
-            let _ = ch_remote.kill();
-            let _ = ch_remote.wait();
-            return Err(EPERM);
-        }
-    };
+/// # Safety
+///
+/// The returned reference is only valid as long as the pointer is.
+unsafe fn device_id_str<'a>(device_id: *const [u8; 16]) -> Option<&'a str> {
+    if device_id.is_null() {
+        return None;
+    }
 
-    if let Ok(ch_remote_status) = ch_remote.wait() {
-        if ch_remote_status.success() && jq_out.status.success() {
-            return Ok(OsString::from_vec(jq_out.stdout));
-        }
+    let id = &*device_id;
+
+    if id.contains(&b',') {
+        return None;
     }
 
-    Err(EPROTO)
+    let nul_index = id.iter().position(|&c| c == 0).unwrap_or(id.len());
+    str::from_utf8(id[..nul_index].into()).ok()
 }
 
-pub fn remove_device(vm_name: &str, device_id: &OsStr) -> Result<(), NonZeroI32> {
-    let ch_remote = command(vm_name, "remove-device")
-        .arg(device_id)
-        .status()
-        .or(Err(EPERM))?;
+impl NetConfig {
+    /// # Safety
+    ///
+    /// [`id`] must be valid.
+    unsafe fn cloud_hypervisor_cli_string(&self) -> Option<String> {
+        let id = device_id_str(self.id)?;
 
-    if ch_remote.success() {
-        Ok(())
-    } else {
-        Err(EPROTO)
+        Some(format!("fd={},id={id},mac={}", self.fd, self.mac))
     }
 }
 
 /// # Safety
 ///
-/// - `vm_name` must point to a valid C string.
-/// - `tap` must be a file descriptor describing an tap device.
-/// - `mac` must be a valid pointer.
+/// - `vm_name` must be null, or point to a valid C string.
+/// - `net.fd` must be a file descriptor describing an tap device.
+/// - `net.id` must be null, or point to valid data.
 #[export_name = "ch_add_net"]
-unsafe extern "C" fn add_net_c(
-    vm_name: *const c_char,
-    net: &NetConfig,
-    id: *mut *mut OsString,
-) -> c_int {
-    let Ok(vm_name) = CStr::from_ptr(vm_name).to_str() else {
-        return EINVAL.into();
+unsafe extern "C" fn add_net(vm_name: *const c_char, net: &NetConfig) -> c_int {
+    let Ok(vm_name) = vm_name_str(vm_name) else {
+        return EINVAL;
     };
 
-    match add_net(vm_name, net) {
-        Err(e) => e.get(),
-        Ok(id_str) => {
-            if !id.is_null() {
-                let token = Box::into_raw(Box::new(id_str));
-                *id = token;
-            }
-            0
-        }
+    let Some(arg) = net.cloud_hypervisor_cli_string() else {
+        return EINVAL;
+    };
+
+    let Ok(ch_remote) = command(vm_name, "add-net")
+        .arg(arg)
+        .stdout(Stdio::null())
+        .status()
+    else {
+        return EPERM;
+    };
+
+    if ch_remote.success() {
+        0
+    } else {
+        EPROTO
     }
 }
 
@@ -195,26 +202,22 @@ unsafe extern "C" fn add_net_c(
 /// - `id` must be a device ID obtained by calling `add_net_c`.  After
 ///   calling `remove_device_c`, the pointer is no longer valid.
 #[export_name = "ch_remove_device"]
-unsafe extern "C" fn remove_device_c(vm_name: *const c_char, device_id: *mut OsString) -> c_int {
-    let Ok(vm_name) = CStr::from_ptr(vm_name).to_str() else {
-        return EINVAL.into();
+unsafe extern "C" fn remove_device(vm_name: *const c_char, device_id: *const [u8; 16]) -> c_int {
+    let Ok(vm_name) = vm_name_str(vm_name) else {
+        return EINVAL;
     };
-    let device_id = Box::from_raw(device_id);
 
-    if let Err(e) = remove_device(vm_name, device_id.as_ref()) {
-        e.get()
-    } else {
-        0
-    }
-}
+    let Some(device_id) = device_id_str(device_id) else {
+        return EINVAL;
+    };
 
-/// # Safety
-///
-/// `id` must be a device ID obtained by calling `add_net_c`.  After
-/// calling `device_free`, the pointer is no longer valid.
-#[export_name = "ch_device_free"]
-unsafe extern "C" fn device_free(id: *mut OsString) {
-    if !id.is_null() {
-        drop(Box::from_raw(id))
+    let Ok(ch_remote) = command(vm_name, "remove-device").arg(device_id).status() else {
+        return EPERM;
+    };
+
+    if ch_remote.success() {
+        0
+    } else {
+        EPROTO
     }
 }
diff --git a/host/start-vmm/kill.c b/host/start-vmm/kill.c
new file mode 100644
index 0000000..d2618fd
--- /dev/null
+++ b/host/start-vmm/kill.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: EUPL-1.2+
+// SPDX-FileCopyrightText: 2023 Alyssa Ross <hi@alyssa.is>
+
+#include <signal.h>
+
+int kill_term(pid_t pid)
+{
+	return kill(pid, SIGTERM);
+}
diff --git a/host/start-vmm/kill.rs b/host/start-vmm/kill.rs
new file mode 100644
index 0000000..68ab18f
--- /dev/null
+++ b/host/start-vmm/kill.rs
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: EUPL-1.2+
+// SPDX-FileCopyrightText: 2023 Alyssa Ross <hi@alyssa.is>
+
+use std::ffi::c_int;
+use std::io;
+
+pub fn kill_term(pid: u32) -> io::Result<()> {
+    extern "C" {
+        fn kill_term(pid: u32) -> c_int;
+    }
+
+    // SAFETY: trivial.
+    match unsafe { kill_term(pid) } {
+        -1 => Err(io::Error::last_os_error()),
+        _ => Ok(()),
+    }
+}
diff --git a/host/start-vmm/lib.rs b/host/start-vmm/lib.rs
index f9b7566..c54fc29 100644
--- a/host/start-vmm/lib.rs
+++ b/host/start-vmm/lib.rs
@@ -3,32 +3,29 @@
 
 mod ch;
 mod fork;
+mod kill;
 mod net;
+mod pipe;
 mod s6;
 mod unix;
 
 use std::borrow::Cow;
 use std::env::args_os;
-use std::ffi::{CString, OsStr};
-use std::fs::remove_file;
+use std::ffi::OsStr;
+use std::fs::{remove_file, File};
 use std::io::{self, ErrorKind};
+use std::mem::forget;
 use std::os::unix::net::UnixListener;
 use std::os::unix::prelude::*;
-use std::os::unix::process::parent_id;
 use std::path::Path;
-use std::process::{exit, Command};
+use std::process::{self, exit, Command};
 
 use ch::{ConsoleConfig, DiskConfig, FsConfig, GpuConfig, MemoryConfig, PayloadConfig, VmConfig};
 use fork::double_fork;
-use net::net_setup;
+use kill::kill_term;
+use pipe::pipe;
 use s6::notify_readiness;
-use unix::clear_cloexec;
-
-const SIGTERM: i32 = 15;
-
-extern "C" {
-    fn kill(pid: i32, sig: i32) -> i32;
-}
+use unix::AsFdExt;
 
 pub fn prog_name() -> String {
     args_os()
@@ -47,15 +44,14 @@ pub fn create_api_socket() -> Result<UnixListener, String> {
         .map_err(|e| format!("creating API socket: {e}"))?;
 
     // Safe because we own api_socket.
-    if unsafe { clear_cloexec(api_socket.as_fd()) } == -1 {
-        let errno = io::Error::last_os_error();
-        return Err(format!("clearing CLOEXEC on API socket fd: {}", errno));
+    if let Err(e) = api_socket.clear_cloexec() {
+        return Err(format!("clearing CLOEXEC on API socket fd: {}", e));
     }
 
     Ok(api_socket)
 }
 
-pub fn vm_config(vm_name: &str, config_root: &Path) -> Result<VmConfig, String> {
+pub fn vm_config(vm_name: &str, config_root: &Path) -> Result<(VmConfig, Vec<String>), String> {
     if config_root.to_str().is_none() {
         return Err(format!("config root {:?} is not valid UTF-8", config_root));
     }
@@ -68,7 +64,7 @@ pub fn vm_config(vm_name: &str, config_root: &Path) -> Result<VmConfig, String>
     let shared_dirs_dir = config_dir.join("shared-dirs");
     let wayland_path = config_dir.join("wayland");
 
-    Ok(VmConfig {
+    Ok((VmConfig {
         console: ConsoleConfig {
             mode: "Pty",
             file: None,
@@ -134,27 +130,25 @@ pub fn vm_config(vm_name: &str, config_root: &Path) -> Result<VmConfig, String>
             size: 256 << 20,
             shared: true,
         },
-        net: match net_providers_dir.read_dir() {
+        payload: PayloadConfig {
+            kernel: kernel_path.to_str().unwrap().to_string(),
+            cmdline: "console=ttyS0 root=PARTLABEL=root",
+        },
+        serial: ConsoleConfig {
+            mode: "File",
+            file: Some(format!("/run/{vm_name}.log")),
+        },
+    },
+        match net_providers_dir.read_dir() {
             Ok(entries) => entries
                 .into_iter()
-                .map(|result| {
-                    let entry = result
+                .map(|result|
+                    result
                         .map_err(|e| format!("examining directory entry: {}", e))?
-                        .file_name();
-
-                    // Safe because provider_name is the name of a directory entry, so
-                    // can't contain a null byte.
-                    let provider_name = unsafe { CString::from_vec_unchecked(entry.into_vec()) };
-
-                    // Safe because we pass a valid pointer and check the result.
-                    let net = unsafe { net_setup(provider_name.as_ptr()) };
-                    if net.fd == -1 {
-                        let e = io::Error::last_os_error();
-                        return Err(format!("setting up networking failed: {e}"));
-                    }
-
-                    Ok(net)
-                })
+                        .file_name()
+                        .into_string()
+                        .map_err(|name| format!("provider name {:?} is not UTF-8", name))
+                )
                 // TODO: to support multiple net providers, we'll need
                 // a better naming scheme for tap and bridge devices.
                 .take(1)
@@ -162,25 +156,25 @@ pub fn vm_config(vm_name: &str, config_root: &Path) -> Result<VmConfig, String>
             Err(e) if e.kind() == ErrorKind::NotFound => Default::default(),
             Err(e) => return Err(format!("reading directory {:?}: {e}", net_providers_dir)),
         },
-        payload: PayloadConfig {
-            kernel: kernel_path.to_str().unwrap().to_string(),
-            cmdline: "console=ttyS0 root=PARTLABEL=root",
-        },
-        serial: ConsoleConfig {
-            mode: "File",
-            file: Some(format!("/run/{vm_name}.log")),
-        },
-    })
+    ))
 }
 
 /// # Safety
 ///
 /// Calls [notify_readiness], so can only be called once per process.
-unsafe fn create_vm_child_main(vm_name: &str, config: VmConfig) -> ! {
-    if let Err(e) = ch::create_vm(vm_name, config) {
+unsafe fn create_vm_child_main(
+    reader: File,
+    writer: File,
+    vmm_pid: u32,
+    config: (VmConfig, Vec<String>),
+) -> ! {
+    drop(reader);
+
+    let (vm_config, nets) = config;
+
+    if let Err(e) = ch::create_vm(writer.as_fd(), vmm_pid, vm_config, nets.as_ref()) {
         eprintln!("{}: creating VM: {e}", prog_name());
-        if kill(parent_id() as _, SIGTERM) == -1 {
-            let e = io::Error::last_os_error();
+        if let Err(e) = kill_term(vmm_pid) {
             eprintln!("{}: killing cloud-hypervisor: {e}", prog_name());
         };
         exit(1);
@@ -207,6 +201,11 @@ pub fn create_vm(dir: &Path, config_root: &Path) -> Result<(), String> {
         return Err(format!("VM name may not contain a colon: {:?}", vm_name));
     }
 
+    let (reader, writer) = pipe().map_err(|e| format!("creating pipe: {e}"))?;
+    reader
+        .clear_cloexec()
+        .map_err(|e| format!("clearing O_CLOEXEC: {e}"))?;
+    let pid = process::id();
     let config = vm_config(vm_name, config_root)?;
 
     // SAFETY: safe because we ensure we don't violate any invariants
@@ -216,8 +215,11 @@ pub fn create_vm(dir: &Path, config_root: &Path) -> Result<(), String> {
         e if e < 0 => Err(format!("double fork: {}", io::Error::from_raw_os_error(-e))),
         // SAFETY: create_vm_child_main can only be called once per process,
         // but this is a new process, so we know it hasn't been called before.
-        0 => unsafe { create_vm_child_main(vm_name, config) },
-        _ => Ok(()),
+        0 => unsafe { create_vm_child_main(reader, writer, pid, config) },
+        _ => {
+            forget(reader);
+            Ok(())
+        }
     }
 }
 
diff --git a/host/start-vmm/meson.build b/host/start-vmm/meson.build
index 564be1b..78bfb83 100644
--- a/host/start-vmm/meson.build
+++ b/host/start-vmm/meson.build
@@ -8,7 +8,8 @@ add_project_arguments('-D_GNU_SOURCE', '-Wno-error=attributes', language : 'c')
 
 miniserde_dep = dependency('miniserde')
 
-c_lib = static_library('start-vmm', 'fork.c', 'net.c', 'net-util.c', 'unix.c')
+c_lib = static_library('start-vmm',
+  'fork.c', 'kill.c', 'net.c', 'net-util.c', 'pipe.c', 'unix.c')
 rust_lib = static_library('start_vmm', 'lib.rs',
   dependencies : miniserde_dep,
   link_with : c_lib)
diff --git a/host/start-vmm/net.c b/host/start-vmm/net.c
index 4fc5486..a586486 100644
--- a/host/start-vmm/net.c
+++ b/host/start-vmm/net.c
@@ -19,14 +19,16 @@
 
 #include <linux/if_tun.h>
 
-static int setup_tap(const char *bridge_name, const char *tap_prefix)
+struct vm_name;
+
+static int setup_tap(const char *bridge_name, const char *tap_prefix,
+		     pid_t vmm_pid, char tap_name[static IFNAMSIZ])
 {
 	int fd;
-	char tap_name[IFNAMSIZ];
 
 	// We assume ≤16-bit pids.
-	if (snprintf(tap_name, sizeof tap_name, "%s%d",
-	             tap_prefix, getpid()) == -1)
+	if (snprintf(tap_name, IFNAMSIZ, "%s%d",
+	             tap_prefix, vmm_pid) == -1)
 		return -1;
 	if ((fd = tap_open(tap_name, IFF_NO_PI|IFF_VNET_HDR|IFF_TUN_EXCL)) == -1)
 		goto out;
@@ -43,22 +45,19 @@ out:
 	return fd;
 }
 
-static int client_net_setup(const char *bridge_name)
-{
-	return setup_tap(bridge_name, "client");
-}
-
-static int router_net_setup(const char *bridge_name, const char *router_vm_name,
-                            const uint8_t mac[6], struct ch_device **out)
+static int net_setup(const char *bridge_name, const char *tap_prefix,
+		     pid_t vmm_pid, const char mac[static 6],
+		     struct vm_name *vm_name, char *device_id_out)
 {
-	struct net_config net;
 	int e;
+	struct net_config net = { .id = device_id_out };
 
-	memcpy(&net.mac, mac, sizeof net.mac);
-	if ((net.fd = setup_tap(bridge_name, "router")) == -1)
+	memcpy(net.mac, mac, sizeof net.mac);
+
+	if ((net.fd = setup_tap(bridge_name, tap_prefix, vmm_pid, net.id)) == -1)
 		return -1;
 
-	e = ch_add_net(router_vm_name, &net, out);
+	e = ch_add_net(vm_name, &net);
 	close(net.fd);
 	if (!e)
 		return 0;
@@ -66,13 +65,13 @@ static int router_net_setup(const char *bridge_name, const char *router_vm_name,
 	return -1;
 }
 
-static int router_net_cleanup(pid_t pid, const char *vm_name,
-                              struct ch_device *vm_net_device)
+static int router_net_cleanup(pid_t vmm_pid, struct vm_name *vm_name,
+			      char tap_name[static IFNAMSIZ])
 {
 	int e;
 	char name[IFNAMSIZ], newname[IFNAMSIZ], brname[IFNAMSIZ];
 
-	if ((e = ch_remove_device(vm_name, vm_net_device))) {
+	if ((e = ch_remove_device(vm_name, tap_name))) {
 		errno = e;
 		return -1;
 	}
@@ -81,9 +80,9 @@ static int router_net_cleanup(pid_t pid, const char *vm_name,
 	// longer using by freeing up the name.
 	//
 	// We assume ≤16-bit pids.
-	snprintf(name, sizeof name, "router%d", pid);
-	snprintf(newname, sizeof newname, "_dead%d", pid);
-	snprintf(brname, sizeof brname, "br%d", pid);
+	snprintf(name, sizeof name, "router%d", vmm_pid);
+	snprintf(newname, sizeof newname, "_dead%d", vmm_pid);
+	snprintf(brname, sizeof brname, "br%d", vmm_pid);
 
 	if (bridge_remove_if(brname, name) == -1)
 		warn("removing %s from %s", name, brname);
@@ -93,16 +92,16 @@ static int router_net_cleanup(pid_t pid, const char *vm_name,
 	return if_rename(name, newname);
 }
 
-static int bridge_cleanup(pid_t pid)
+static int bridge_cleanup(pid_t vmm_pid)
 {
 	char name[IFNAMSIZ];
-	snprintf(name, sizeof name, "br%d", pid);
+	snprintf(name, sizeof name, "br%d", vmm_pid);
 	return bridge_delete(name);
 }
 
-static noreturn void exit_listener_main(int fd, pid_t pid,
-                                        const char *router_vm_name,
-                                        struct ch_device *router_vm_net_device)
+static noreturn void exit_listener_main(int fd, pid_t vmm_pid,
+                                        struct vm_name *router_vm_name,
+                                        char router_tap_name[static IFNAMSIZ])
 {
 	// Wait for the other end of the pipe to be closed.
 	int status = EXIT_SUCCESS;
@@ -115,12 +114,12 @@ static noreturn void exit_listener_main(int fd, pid_t pid,
 	}
 	assert(pollfd.revents == POLLERR);
 
-	if (router_net_cleanup(pid, router_vm_name,
-	                       router_vm_net_device) == -1) {
+	if (router_net_cleanup(vmm_pid, router_vm_name,
+	                       router_tap_name) == -1) {
 		warn("cleaning up router tap");
 		status = EXIT_FAILURE;
 	}
-	if (bridge_cleanup(pid) == -1) {
+	if (bridge_cleanup(vmm_pid) == -1) {
 		warn("cleaning up bridge");
 		status = EXIT_FAILURE;
 	}
@@ -128,70 +127,53 @@ static noreturn void exit_listener_main(int fd, pid_t pid,
 	exit(status);
 }
 
-static int exit_listener_setup(const char *router_vm_name,
-                               struct ch_device *router_vm_net_device)
+static int exit_listener_setup(int vmm_pipe_writer, pid_t vmm_pid,
+			       struct vm_name *router_vm_name,
+                               char router_tap_name[static IFNAMSIZ])
 {
-	pid_t pid = getpid();
-	int fd[2];
-
-	if (pipe(fd) == -1)
-		return -1;
-
 	switch (fork()) {
 	case -1:
-		close(fd[0]);
-		close(fd[1]);
 		return -1;
 	case 0:
-		close(fd[0]);
-		exit_listener_main(fd[1], pid, router_vm_name,
-		                   router_vm_net_device);
+		exit_listener_main(vmm_pipe_writer, vmm_pid, router_vm_name,
+		                   router_tap_name);
 	default:
-		close(fd[1]);
 		return 0;
 	}
 }
 
-struct net_config net_setup(const char *router_vm_name)
+int connect_net(int vmm_pipe_writer, pid_t vmm_pid,
+		struct vm_name *provider_name)
 {
-	struct ch_device *router_vm_net_device = NULL;
-	struct net_config r = { .fd = -1, .mac = { 0 } };
+	char id[IFNAMSIZ];
 	char bridge_name[IFNAMSIZ];
-	pid_t pid = getpid();
 	// We assume ≤16-bit pids.
-	uint8_t router_mac[6] = { 0x0A, 0xB3, 0xEC, 0x80, pid >> 8, pid };
+	char mac[] = { 0x0A, 0xB3, 0xEC, 0x00, vmm_pid >> 8, vmm_pid };
 
-	memcpy(r.mac, router_mac, 6);
-	r.mac[3] = 0x00;
-
-	if (snprintf(bridge_name, sizeof bridge_name, "br%d", pid) == -1)
-		return r;
+	if (snprintf(bridge_name, sizeof bridge_name, "br%d", vmm_pid) == -1)
+		return -1;
 
 	if (bridge_add(bridge_name) == -1)
-		goto out;
+		return -1;
 	if (if_up(bridge_name) == -1)
 		goto fail_bridge;
 
-	if ((r.fd = client_net_setup(bridge_name)) == -1)
+	if (net_setup(bridge_name, "client", vmm_pid, mac, NULL, id) == -1)
 		goto fail_bridge;
 
-	if (router_net_setup(bridge_name, router_vm_name, router_mac,
-	                     &router_vm_net_device) == -1)
+	mac[3] = 0x80;
+	if (net_setup(bridge_name, "router", vmm_pid, mac, provider_name, id) == -1)
 		goto fail_bridge;
 
-	// Set up a process that will listen for this process dying,
+	// Set up a process that will listen for the main process dying,
 	// and remove the interface from the netvm, and delete the
 	// bridge.
-	if (exit_listener_setup(router_vm_name, router_vm_net_device) == -1)
+	if (exit_listener_setup(vmm_pipe_writer, vmm_pid, provider_name, id) == -1)
 		goto fail_bridge;
 
-	goto out;
+	return 0;
 
 fail_bridge:
 	bridge_delete(bridge_name);
-	close(r.fd);
-	r.fd = -1;
-out:
-	ch_device_free(router_vm_net_device);
-	return r;
+	return -1;
 }
diff --git a/host/start-vmm/net.rs b/host/start-vmm/net.rs
index 8ca19c5..8c1c942 100644
--- a/host/start-vmm/net.rs
+++ b/host/start-vmm/net.rs
@@ -3,21 +3,14 @@
 
 use std::borrow::Cow;
 use std::fmt::{self, Display, Formatter};
-use std::os::raw::c_char;
+use std::os::fd::BorrowedFd;
+use std::os::raw::c_int;
 
 use miniserde::ser::Fragment;
 use miniserde::Serialize;
 
-use crate::ch::NetConfig;
-
 #[repr(transparent)]
-pub struct MacAddress([u8; 6]);
-
-impl MacAddress {
-    pub fn new(octets: [u8; 6]) -> Self {
-        Self(octets)
-    }
-}
+pub struct MacAddress(pub [u8; 6]);
 
 impl Display for MacAddress {
     fn fmt(&self, f: &mut Formatter) -> fmt::Result {
@@ -36,7 +29,14 @@ impl Serialize for MacAddress {
 }
 
 extern "C" {
-    pub fn net_setup(provider_vm_name: *const c_char) -> NetConfig;
+    // It's safe to pass a reference, because even though it creates
+    // an exit listener that continues running after connect_vms()
+    // returns in the parent, it still hasn't returned in the child,
+    // which has its own copy of everything.
+    //
+    // It's safe for it to be a str, because the value is opaque to C.
+    #[allow(improper_ctypes)]
+    pub fn connect_net(vmm_pipe_writer: BorrowedFd, vmm_pid: u32, provider_name: &str) -> c_int;
 }
 
 #[cfg(test)]
diff --git a/host/start-vmm/pipe.c b/host/start-vmm/pipe.c
new file mode 100644
index 0000000..0024824
--- /dev/null
+++ b/host/start-vmm/pipe.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: EUPL-1.2+
+// SPDX-FileCopyrightText: 2023 Alyssa Ross <hi@alyssa.is>
+
+#include <fcntl.h>
+#include <unistd.h>
+
+int pipe_cloexec(int fd[2])
+{
+	return pipe2(fd, O_CLOEXEC);
+}
diff --git a/host/start-vmm/pipe.rs b/host/start-vmm/pipe.rs
new file mode 100644
index 0000000..0c85836
--- /dev/null
+++ b/host/start-vmm/pipe.rs
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: EUPL-1.2+
+// SPDX-FileCopyrightText: 2023 Alyssa Ross <hi@alyssa.is>
+
+use std::ffi::c_int;
+use std::fs::File;
+use std::io;
+use std::os::fd::OwnedFd;
+
+extern "C" {
+    fn pipe_cloexec(fds: &mut [Option<OwnedFd>; 2]) -> c_int;
+}
+
+pub fn pipe() -> io::Result<(File, File)> {
+    let mut fds = [None, None];
+    // SAFETY: trivially safe.
+    if unsafe { pipe_cloexec(&mut fds) } == -1 {
+        return Err(io::Error::last_os_error());
+    }
+    let [r, w] = fds;
+    Ok((r.unwrap().into(), w.unwrap().into()))
+}
diff --git a/host/start-vmm/tests/vm_command-basic.rs b/host/start-vmm/tests/vm_command-basic.rs
index bc911ae..29aca58 100644
--- a/host/start-vmm/tests/vm_command-basic.rs
+++ b/host/start-vmm/tests/vm_command-basic.rs
@@ -18,7 +18,7 @@ fn main() -> std::io::Result<()> {
     File::create(&kernel_path)?;
     File::create(&image_path)?;
 
-    let mut config = vm_config("testvm", tmp_dir.path()).unwrap();
+    let (mut config, _) = vm_config("testvm", tmp_dir.path()).unwrap();
 
     assert_eq!(config.console.mode, "Pty");
     assert_eq!(config.disks.len(), 1);
diff --git a/host/start-vmm/tests/vm_command-multiple-disks.rs b/host/start-vmm/tests/vm_command-multiple-disks.rs
index f7f7a74..2fd0f03 100644
--- a/host/start-vmm/tests/vm_command-multiple-disks.rs
+++ b/host/start-vmm/tests/vm_command-multiple-disks.rs
@@ -26,7 +26,7 @@ fn main() -> std::io::Result<()> {
         symlink("/dev/null", image_path)?;
     }
 
-    let config = vm_config("testvm", tmp_dir.path()).unwrap();
+    let (config, _) = vm_config("testvm", tmp_dir.path()).unwrap();
     assert_eq!(config.disks.len(), 2);
     assert!(config.disks.iter().all(|disk| disk.readonly));
 
diff --git a/host/start-vmm/tests/vm_command-shared-dir.rs b/host/start-vmm/tests/vm_command-shared-dir.rs
index 6a4a1fd..464a6eb 100644
--- a/host/start-vmm/tests/vm_command-shared-dir.rs
+++ b/host/start-vmm/tests/vm_command-shared-dir.rs
@@ -26,7 +26,7 @@ fn main() -> std::io::Result<()> {
     create_dir(vm_config_dir.join("shared-dirs/dir2"))?;
     symlink("/", vm_config_dir.join("shared-dirs/dir2/dir"))?;
 
-    let config = vm_config("testvm", tmp_dir.path()).unwrap();
+    let (config, _) = vm_config("testvm", tmp_dir.path()).unwrap();
     assert_eq!(config.fs.len(), 2);
 
     let mut actual_tags = BTreeSet::new();
diff --git a/host/start-vmm/unix.rs b/host/start-vmm/unix.rs
index 8213497..8475eb4 100644
--- a/host/start-vmm/unix.rs
+++ b/host/start-vmm/unix.rs
@@ -2,8 +2,21 @@
 // SPDX-FileCopyrightText: 2023 Alyssa Ross <hi@alyssa.is>
 
 use std::ffi::c_int;
-use std::os::fd::BorrowedFd;
+use std::io;
+use std::os::fd::{AsFd, BorrowedFd};
 
 extern "C" {
-    pub fn clear_cloexec(fd: BorrowedFd) -> c_int;
+    fn clear_cloexec(fd: BorrowedFd) -> c_int;
 }
+
+pub trait AsFdExt: AsFd {
+    fn clear_cloexec(&self) -> io::Result<()> {
+        // SAFETY: trivial.
+        match unsafe { clear_cloexec(self.as_fd()) } {
+            -1 => Err(io::Error::last_os_error()),
+            _ => Ok(()),
+        }
+    }
+}
+
+impl<T: AsFd> AsFdExt for T {}
diff --git a/img/app/Makefile b/img/app/Makefile
index 95212b8..79d3cdf 100644
--- a/img/app/Makefile
+++ b/img/app/Makefile
@@ -36,11 +36,16 @@ VM_FILES = \
 	etc/mdev/wait \
 	etc/passwd \
 	etc/resolv.conf \
-	etc/s6-linux-init/scripts/rc.init
+	etc/s6-linux-init/run-image/service/s6-linux-init-shutdownd/notification-fd \
+	etc/s6-linux-init/run-image/service/s6-linux-init-shutdownd/run \
+	etc/s6-linux-init/scripts/rc.init \
+	etc/s6-linux-init/scripts/rc.shutdown \
+	etc/s6-linux-init/scripts/rc.shutdown.final
 VM_DIRS = dev run proc sys \
 	etc/s6-linux-init/env \
 	etc/s6-linux-init/run-image/ext \
 	etc/s6-linux-init/run-image/service
+VM_FIFOS = etc/s6-linux-init/run-image/service/s6-linux-init-shutdownd/fifo
 
 # These are separate because they need to be included, but putting
 # them as make dependencies would confuse make.
@@ -48,14 +53,19 @@ VM_LINKS = bin etc/ssl/certs/ca-certificates.crt
 
 VM_BUILD_FILES = build/etc/s6-rc
 
+build/fifo:
+	mkdir -p build
+	mkfifo -m 0600 $@
+
 build/empty:
 	mkdir -p $@
 
-build/rootfs.erofs: ../../scripts/make-erofs.sh $(VM_FILES) $(VM_BUILD_FILES) build/empty
+build/rootfs.erofs: ../../scripts/make-erofs.sh $(VM_FILES) $(VM_BUILD_FILES) build/empty build/fifo
 	../../scripts/make-erofs.sh -- $@ $(PACKAGES) \
 	    $$(for file in $(VM_FILES) $(VM_LINKS); do printf '%s %s ' $$file $$file; done) \
 	    $$(for file in $(VM_BUILD_FILES); do printf '%s %s ' $$file $${file#build/}; done) \
-	    $$(printf 'build/empty %s ' $(VM_DIRS))
+	    $$(printf 'build/empty %s ' $(VM_DIRS)) \
+	    $$(printf 'build/fifo %s ' $(VM_FIFOS))
 
 VM_S6_RC_FILES = \
 	etc/s6-rc/app/run \
diff --git a/img/app/etc/s6-linux-init/run-image/service/s6-linux-init-shutdownd/notification-fd b/img/app/etc/s6-linux-init/run-image/service/s6-linux-init-shutdownd/notification-fd
new file mode 100644
index 0000000..00750ed
--- /dev/null
+++ b/img/app/etc/s6-linux-init/run-image/service/s6-linux-init-shutdownd/notification-fd
@@ -0,0 +1 @@
+3
diff --git a/img/app/etc/s6-linux-init/run-image/service/s6-linux-init-shutdownd/notification-fd.license b/img/app/etc/s6-linux-init/run-image/service/s6-linux-init-shutdownd/notification-fd.license
new file mode 100644
index 0000000..a941ca4
--- /dev/null
+++ b/img/app/etc/s6-linux-init/run-image/service/s6-linux-init-shutdownd/notification-fd.license
@@ -0,0 +1,2 @@
+SPDX-License-Identifier: CC0-1.0
+SPDX-FileCopyrightText: 2023 Alyssa Ross <hi@alyssa.is>
diff --git a/img/app/etc/s6-linux-init/run-image/service/s6-linux-init-shutdownd/run b/img/app/etc/s6-linux-init/run-image/service/s6-linux-init-shutdownd/run
new file mode 100755
index 0000000..228fc6a
--- /dev/null
+++ b/img/app/etc/s6-linux-init/run-image/service/s6-linux-init-shutdownd/run
@@ -0,0 +1,5 @@
+#!/bin/execlineb -P
+# SPDX-License-Identifier: EUPL-1.2+
+# SPDX-FileCopyrightText: 2023 Alyssa Ross <hi@alyssa.is>
+
+s6-linux-init-shutdownd -Bc /etc/s6-linux-init
diff --git a/img/app/etc/s6-linux-init/scripts/rc.shutdown b/img/app/etc/s6-linux-init/scripts/rc.shutdown
new file mode 100755
index 0000000..3b09c02
--- /dev/null
+++ b/img/app/etc/s6-linux-init/scripts/rc.shutdown
@@ -0,0 +1,5 @@
+#!/bin/execlineb -P
+# SPDX-License-Identifier: EUPL-1.2+
+# SPDX-FileCopyrightText: 2023 Alyssa Ross <hi@alyssa.is>
+
+s6-rc -Dbav2 change
diff --git a/img/app/etc/s6-linux-init/scripts/rc.shutdown.final b/img/app/etc/s6-linux-init/scripts/rc.shutdown.final
new file mode 100755
index 0000000..7f48a65
--- /dev/null
+++ b/img/app/etc/s6-linux-init/scripts/rc.shutdown.final
@@ -0,0 +1,3 @@
+#!/bin/execlineb -P
+# SPDX-License-Identifier: EUPL-1.2+
+# SPDX-FileCopyrightText: 2023 Alyssa Ross <hi@alyssa.is>
diff --git a/img/app/etc/s6-rc/app/run b/img/app/etc/s6-rc/app/run
index 2a628b7..72215de 100755
--- a/img/app/etc/s6-rc/app/run
+++ b/img/app/etc/s6-rc/app/run
@@ -23,4 +23,4 @@ foreground { clear }
 unexport ?
 
 foreground { /run/ext/run }
-exec -l sh
+s6-linux-init-shutdown -p now
diff --git a/scripts/make-erofs.sh b/scripts/make-erofs.sh
index a289df7..055cb6f 100755
--- a/scripts/make-erofs.sh
+++ b/scripts/make-erofs.sh
@@ -56,4 +56,4 @@ if [ -z "${img-}" ]; then
 	ex_usage
 fi
 
-mkfs.erofs -b4096 "$@" "$img" "$root"
+mkfs.erofs -b4096 --all-root "$@" "$img" "$root"