From 64c9c083028671f6c0f3a67446932e45984e2d87 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Sat, 2 Nov 2019 15:15:33 +0100 Subject: nixos/ceph: create /etc/ceph and /var/lib/ceph via tmpfiles We seem to be relying on those being present during runtime anyways. --- nixos/modules/services/network-filesystems/ceph.nix | 2 ++ 1 file changed, 2 insertions(+) (limited to 'nixos/modules') diff --git a/nixos/modules/services/network-filesystems/ceph.nix b/nixos/modules/services/network-filesystems/ceph.nix index 656a2d21b868..887a6ff925d2 100644 --- a/nixos/modules/services/network-filesystems/ceph.nix +++ b/nixos/modules/services/network-filesystems/ceph.nix @@ -401,7 +401,9 @@ in mkMerge targets; systemd.tmpfiles.rules = [ + "d /etc/ceph - ceph ceph - -" "d /run/ceph 0770 ceph ceph -" + "d /var/lib/ceph - ceph ceph - -" ]; }; } -- cgit 1.4.1 From 67e0777f6258ded0029bbfeec65fbedf36d6d4c8 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Sat, 2 Nov 2019 15:01:39 +0100 Subject: nixos/ceph: run unprivileged, use StateDirectory and tmpfiles, don't pass extraServiceConfig Don't pass user and group to ceph, and rely on it to drop ceps, but let systemd handle running it as the appropriate user. This also inlines the extraServiceConfig into the makeService function, as we have conditionals depending on daemonType there anyways. Use StateDirectory to create directories in /var/lib/ceph/${daemonType}/${clusterName}-${daemonId}. There previously was a condition on daemonType being one of mds,mon,rgw or mgr. We only instantiate makeServices with these types, and "osd" was special. In the osd case, test examples suggest it'd be in something like /var/lib/ceph/osd/ceph-${cfg.osd0.name} - so it's not special at all, but exactly like the pattern for the others. During initialization, we also need these folders, before the unit is started up. Move the mkdir -p commands in the vm tests to the line immediately before they're required. --- .../modules/services/network-filesystems/ceph.nix | 57 +++++++++++----------- nixos/tests/ceph-multi-node.nix | 15 ++---- nixos/tests/ceph-single-node.nix | 13 ++--- 3 files changed, 36 insertions(+), 49 deletions(-) (limited to 'nixos/modules') diff --git a/nixos/modules/services/network-filesystems/ceph.nix b/nixos/modules/services/network-filesystems/ceph.nix index 887a6ff925d2..ef0d2d90a1a1 100644 --- a/nixos/modules/services/network-filesystems/ceph.nix +++ b/nixos/modules/services/network-filesystems/ceph.nix @@ -9,12 +9,12 @@ let expandCamelCase = replaceStrings upperChars (map (s: " ${s}") lowerChars); expandCamelCaseAttrs = mapAttrs' (name: value: nameValuePair (expandCamelCase name) value); - makeServices = (daemonType: daemonIds: extraServiceConfig: + makeServices = (daemonType: daemonIds: mkMerge (map (daemonId: - { "ceph-${daemonType}-${daemonId}" = makeService daemonType daemonId cfg.global.clusterName pkgs.ceph extraServiceConfig; }) + { "ceph-${daemonType}-${daemonId}" = makeService daemonType daemonId cfg.global.clusterName pkgs.ceph; }) daemonIds)); - makeService = (daemonType: daemonId: clusterName: ceph: extraServiceConfig: { + makeService = (daemonType: daemonId: clusterName: ceph: { enable = true; description = "Ceph ${builtins.replaceStrings lowerChars upperChars daemonType} daemon ${daemonId}"; after = [ "network-online.target" "time-sync.target" ] ++ optional (daemonType == "osd") "ceph-mon.target"; @@ -22,6 +22,8 @@ let partOf = [ "ceph-${daemonType}.target" ]; wantedBy = [ "ceph-${daemonType}.target" ]; + path = [ pkgs.getopt ]; + serviceConfig = { LimitNOFILE = 1048576; LimitNPROC = 1048576; @@ -34,22 +36,22 @@ let Restart = "on-failure"; StartLimitBurst = "5"; StartLimitInterval = "30min"; + StateDirectory = "ceph/${if daemonType == "rgw" then "radosgw" else daemonType}/${clusterName}-${daemonId}"; + User = "ceph"; + Group = if daemonType == "osd" then "disk" else "ceph"; ExecStart = ''${ceph.out}/bin/${if daemonType == "rgw" then "radosgw" else "ceph-${daemonType}"} \ - -f --cluster ${clusterName} --id ${daemonId} --setuser ceph \ - --setgroup ${if daemonType == "osd" then "disk" else "ceph"}''; - } // extraServiceConfig - // optionalAttrs (daemonType == "osd") { ExecStartPre = ''${ceph.lib}/libexec/ceph/ceph-osd-prestart.sh \ - --id ${daemonId} --cluster ${clusterName}''; }; - } // optionalAttrs (builtins.elem daemonType [ "mds" "mon" "rgw" "mgr" ]) { - preStart = '' - daemonPath="/var/lib/ceph/${if daemonType == "rgw" then "radosgw" else daemonType}/${clusterName}-${daemonId}" - if [ ! -d $daemonPath ]; then - mkdir -m 755 -p $daemonPath - chown -R ceph:ceph $daemonPath - fi - ''; - } // optionalAttrs (daemonType == "osd") { path = [ pkgs.getopt ]; } - ); + -f --cluster ${clusterName} --id ${daemonId}''; + } // optionalAttrs (daemonType == "osd") { + ExecStartPre = ''${ceph.lib}/libexec/ceph/ceph-osd-prestart.sh --id ${daemonId} --cluster ${clusterName}''; + StartLimitBurst = "30"; + RestartSec = "20s"; + PrivateDevices = "no"; # osd needs disk access + } // optionalAttrs ( daemonType == "mon") { + RestartSec = "10"; + } // optionalAttrs (lib.elem daemonType ["mgr" "mds"]) { + StartLimitBurst = "3"; + }; + }); makeTarget = (daemonType: { @@ -377,14 +379,11 @@ in systemd.services = let services = [] - ++ optional cfg.mon.enable (makeServices "mon" cfg.mon.daemons { RestartSec = "10"; }) - ++ optional cfg.mds.enable (makeServices "mds" cfg.mds.daemons { StartLimitBurst = "3"; }) - ++ optional cfg.osd.enable (makeServices "osd" cfg.osd.daemons { StartLimitBurst = "30"; - RestartSec = "20s"; - PrivateDevices = "no"; # osd needs disk access - }) - ++ optional cfg.rgw.enable (makeServices "rgw" cfg.rgw.daemons { }) - ++ optional cfg.mgr.enable (makeServices "mgr" cfg.mgr.daemons { StartLimitBurst = "3"; }); + ++ optional cfg.mon.enable (makeServices "mon" cfg.mon.daemons) + ++ optional cfg.mds.enable (makeServices "mds" cfg.mds.daemons) + ++ optional cfg.osd.enable (makeServices "osd" cfg.osd.daemons) + ++ optional cfg.rgw.enable (makeServices "rgw" cfg.rgw.daemons) + ++ optional cfg.mgr.enable (makeServices "mgr" cfg.mgr.daemons); in mkMerge services; @@ -403,7 +402,9 @@ in systemd.tmpfiles.rules = [ "d /etc/ceph - ceph ceph - -" "d /run/ceph 0770 ceph ceph -" - "d /var/lib/ceph - ceph ceph - -" - ]; + "d /var/lib/ceph - ceph ceph - -"] + ++ optionals cfg.mgr.enable [ "d /var/lib/ceph/mgr - ceph ceph - -"] + ++ optionals cfg.mon.enable [ "d /var/lib/ceph/mon - ceph ceph - -"] + ++ optionals cfg.osd.enable [ "d /var/lib/ceph/osd - ceph ceph - -"]; }; } diff --git a/nixos/tests/ceph-multi-node.nix b/nixos/tests/ceph-multi-node.nix index 13d0851366eb..c34b134de531 100644 --- a/nixos/tests/ceph-multi-node.nix +++ b/nixos/tests/ceph-multi-node.nix @@ -114,18 +114,6 @@ let $osd0->waitForUnit("network.target"); $osd1->waitForUnit("network.target"); - # Create the ceph-related directories - $monA->mustSucceed( - "mkdir -p /var/lib/ceph/mgr/ceph-${cfg.monA.name}", - "mkdir -p /var/lib/ceph/mon/ceph-${cfg.monA.name}", - ); - $osd0->mustSucceed( - "mkdir -p /var/lib/ceph/osd/ceph-${cfg.osd0.name}", - ); - $osd1->mustSucceed( - "mkdir -p /var/lib/ceph/osd/ceph-${cfg.osd1.name}", - ); - # Bootstrap ceph-mon daemon $monA->mustSucceed( "sudo -u ceph ceph-authtool --create-keyring /tmp/ceph.mon.keyring --gen-key -n mon. --cap mon 'allow *'", @@ -133,6 +121,7 @@ let "sudo -u ceph ceph-authtool /tmp/ceph.mon.keyring --import-keyring /etc/ceph/ceph.client.admin.keyring", "monmaptool --create --add ${cfg.monA.name} ${cfg.monA.ip} --fsid ${cfg.clusterId} /tmp/monmap", "sudo -u ceph ceph-mon --mkfs -i ${cfg.monA.name} --monmap /tmp/monmap --keyring /tmp/ceph.mon.keyring", + "sudo -u ceph mkdir -p /var/lib/ceph/mgr/ceph-${cfg.monA.name}/", "sudo -u ceph touch /var/lib/ceph/mon/ceph-${cfg.monA.name}/done", "systemctl start ceph-mon-${cfg.monA.name}" ); @@ -159,12 +148,14 @@ let # Bootstrap both OSDs $osd0->mustSucceed( "mkfs.xfs /dev/vdb", + "mkdir -p /var/lib/ceph/osd/ceph-${cfg.osd0.name}", "mount /dev/vdb /var/lib/ceph/osd/ceph-${cfg.osd0.name}", "ceph-authtool --create-keyring /var/lib/ceph/osd/ceph-${cfg.osd0.name}/keyring --name osd.${cfg.osd0.name} --add-key ${cfg.osd0.key}", "echo '{\"cephx_secret\": \"${cfg.osd0.key}\"}' | ceph osd new ${cfg.osd0.uuid} -i -", ); $osd1->mustSucceed( "mkfs.xfs /dev/vdb", + "mkdir -p /var/lib/ceph/osd/ceph-${cfg.osd1.name}", "mount /dev/vdb /var/lib/ceph/osd/ceph-${cfg.osd1.name}", "ceph-authtool --create-keyring /var/lib/ceph/osd/ceph-${cfg.osd1.name}/keyring --name osd.${cfg.osd1.name} --add-key ${cfg.osd1.key}", "echo '{\"cephx_secret\": \"${cfg.osd1.key}\"}' | ceph osd new ${cfg.osd1.uuid} -i -" diff --git a/nixos/tests/ceph-single-node.nix b/nixos/tests/ceph-single-node.nix index cb37b4b43bcd..5938300ebb4a 100644 --- a/nixos/tests/ceph-single-node.nix +++ b/nixos/tests/ceph-single-node.nix @@ -77,14 +77,6 @@ let $monA->waitForUnit("network.target"); - # Create the ceph-related directories - $monA->mustSucceed( - "mkdir -p /var/lib/ceph/mgr/ceph-${cfg.monA.name}", - "mkdir -p /var/lib/ceph/mon/ceph-${cfg.monA.name}", - "mkdir -p /var/lib/ceph/osd/ceph-${cfg.osd0.name}", - "mkdir -p /var/lib/ceph/osd/ceph-${cfg.osd1.name}", - ); - # Bootstrap ceph-mon daemon $monA->mustSucceed( "sudo -u ceph ceph-authtool --create-keyring /tmp/ceph.mon.keyring --gen-key -n mon. --cap mon 'allow *'", @@ -101,8 +93,9 @@ let # Can't check ceph status until a mon is up $monA->succeed("ceph -s | grep 'mon: 1 daemons'"); - # Start the ceph-mgr daemon, it has no deps and hardly any setup + # Start the ceph-mgr daemon, after copying in the keyring $monA->mustSucceed( + "sudo -u ceph mkdir -p /var/lib/ceph/mgr/ceph-${cfg.monA.name}/", "ceph auth get-or-create mgr.${cfg.monA.name} mon 'allow profile mgr' osd 'allow *' mds 'allow *' > /var/lib/ceph/mgr/ceph-${cfg.monA.name}/keyring", "systemctl start ceph-mgr-${cfg.monA.name}" ); @@ -114,7 +107,9 @@ let $monA->mustSucceed( "mkfs.xfs /dev/vdb", "mkfs.xfs /dev/vdc", + "mkdir -p /var/lib/ceph/osd/ceph-${cfg.osd0.name}", "mount /dev/vdb /var/lib/ceph/osd/ceph-${cfg.osd0.name}", + "mkdir -p /var/lib/ceph/osd/ceph-${cfg.osd1.name}", "mount /dev/vdc /var/lib/ceph/osd/ceph-${cfg.osd1.name}", "ceph-authtool --create-keyring /var/lib/ceph/osd/ceph-${cfg.osd0.name}/keyring --name osd.${cfg.osd0.name} --add-key ${cfg.osd0.key}", "ceph-authtool --create-keyring /var/lib/ceph/osd/ceph-${cfg.osd1.name}/keyring --name osd.${cfg.osd1.name} --add-key ${cfg.osd1.key}", -- cgit 1.4.1 From ffd006086945924511114fa1f5478665870da3af Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Sat, 9 Nov 2019 16:05:58 +0100 Subject: nixos/ceph: use ConditionPathExists to delay ceph daemon start This prevents services to be started before they're initialized, and renders the `systemd.targets.ceph.wantedBy = lib.mkForce [];` hack in the vm tests obsolete - The config now starts up ceph after a reboot, too. Let's take advantage of that, crash all VMs, and boot them up again. --- .../modules/services/network-filesystems/ceph.nix | 19 +++++++++++----- nixos/tests/ceph-multi-node.nix | 26 ++++++++-------------- nixos/tests/ceph-single-node.nix | 14 ++++-------- 3 files changed, 27 insertions(+), 32 deletions(-) (limited to 'nixos/modules') diff --git a/nixos/modules/services/network-filesystems/ceph.nix b/nixos/modules/services/network-filesystems/ceph.nix index ef0d2d90a1a1..543a7b25d5d6 100644 --- a/nixos/modules/services/network-filesystems/ceph.nix +++ b/nixos/modules/services/network-filesystems/ceph.nix @@ -14,7 +14,9 @@ let { "ceph-${daemonType}-${daemonId}" = makeService daemonType daemonId cfg.global.clusterName pkgs.ceph; }) daemonIds)); - makeService = (daemonType: daemonId: clusterName: ceph: { + makeService = (daemonType: daemonId: clusterName: ceph: + let + stateDirectory = "ceph/${if daemonType == "rgw" then "radosgw" else daemonType}/${clusterName}-${daemonId}"; in { enable = true; description = "Ceph ${builtins.replaceStrings lowerChars upperChars daemonType} daemon ${daemonId}"; after = [ "network-online.target" "time-sync.target" ] ++ optional (daemonType == "osd") "ceph-mon.target"; @@ -24,6 +26,9 @@ let path = [ pkgs.getopt ]; + # Don't start services that are not yet initialized + unitConfig.ConditionPathExists = "/var/lib/${stateDirectory}/keyring"; + serviceConfig = { LimitNOFILE = 1048576; LimitNPROC = 1048576; @@ -36,7 +41,7 @@ let Restart = "on-failure"; StartLimitBurst = "5"; StartLimitInterval = "30min"; - StateDirectory = "ceph/${if daemonType == "rgw" then "radosgw" else daemonType}/${clusterName}-${daemonId}"; + StateDirectory = stateDirectory; User = "ceph"; Group = if daemonType == "osd" then "disk" else "ceph"; ExecStart = ''${ceph.out}/bin/${if daemonType == "rgw" then "radosgw" else "ceph-${daemonType}"} \ @@ -60,6 +65,7 @@ let partOf = [ "ceph.target" ]; wantedBy = [ "ceph.target" ]; before = [ "ceph.target" ]; + unitConfig.StopWhenUnneeded = true; }; } ); @@ -389,9 +395,12 @@ in systemd.targets = let targets = [ - { ceph = { description = "Ceph target allowing to start/stop all ceph service instances at once"; - wantedBy = [ "multi-user.target" ]; }; } - ] ++ optional cfg.mon.enable (makeTarget "mon") + { ceph = { + description = "Ceph target allowing to start/stop all ceph service instances at once"; + wantedBy = [ "multi-user.target" ]; + unitConfig.StopWhenUnneeded = true; + }; } ] + ++ optional cfg.mon.enable (makeTarget "mon") ++ optional cfg.mds.enable (makeTarget "mds") ++ optional cfg.osd.enable (makeTarget "osd") ++ optional cfg.rgw.enable (makeTarget "rgw") diff --git a/nixos/tests/ceph-multi-node.nix b/nixos/tests/ceph-multi-node.nix index c34b134de531..09a81ae25e3d 100644 --- a/nixos/tests/ceph-multi-node.nix +++ b/nixos/tests/ceph-multi-node.nix @@ -49,9 +49,6 @@ let boot.kernelModules = [ "xfs" ]; services.ceph = cephConfig; - - # So that we don't have to battle systemd when bootstraping - systemd.targets.ceph.wantedBy = lib.mkForce []; }; networkMonA = { @@ -191,22 +188,17 @@ let "ceph osd pool delete multi-node-other-test multi-node-other-test --yes-i-really-really-mean-it" ); - # As we disable the target in the config, we still want to test that it works as intended - $osd0->mustSucceed("systemctl stop ceph-osd-${cfg.osd0.name}"); - $osd1->mustSucceed("systemctl stop ceph-osd-${cfg.osd1.name}"); - $monA->mustSucceed( - "systemctl stop ceph-mgr-${cfg.monA.name}", - "systemctl stop ceph-mon-${cfg.monA.name}" - ); + # Shut down ceph on all machines in a very unpolite way + $monA->crash; + $osd0->crash; + $osd1->crash; - $monA->succeed("systemctl start ceph.target"); - $monA->waitForUnit("ceph-mon-${cfg.monA.name}"); - $monA->waitForUnit("ceph-mgr-${cfg.monA.name}"); - $osd0->succeed("systemctl start ceph.target"); - $osd0->waitForUnit("ceph-osd-${cfg.osd0.name}"); - $osd1->succeed("systemctl start ceph.target"); - $osd1->waitForUnit("ceph-osd-${cfg.osd1.name}"); + # Start it up + $osd0->start; + $osd1->start; + $monA->start; + # Ensure the cluster comes back up again $monA->succeed("ceph -s | grep 'mon: 1 daemons'"); $monA->waitUntilSucceeds("ceph -s | grep 'quorum ${cfg.monA.name}'"); $monA->waitUntilSucceeds("ceph osd stat | grep -e '2 osds: 2 up[^,]*, 2 in'"); diff --git a/nixos/tests/ceph-single-node.nix b/nixos/tests/ceph-single-node.nix index 5938300ebb4a..3278d76769a4 100644 --- a/nixos/tests/ceph-single-node.nix +++ b/nixos/tests/ceph-single-node.nix @@ -46,9 +46,6 @@ let boot.kernelModules = [ "xfs" ]; services.ceph = cephConfig; - - # So that we don't have to battle systemd when bootstraping - systemd.targets.ceph.wantedBy = lib.mkForce []; }; networkMonA = { @@ -151,20 +148,17 @@ let "ceph osd pool delete single-node-other-test single-node-other-test --yes-i-really-really-mean-it" ); - # As we disable the target in the config, we still want to test that it works as intended - $monA->mustSucceed( - "systemctl stop ceph-osd-${cfg.osd0.name}", - "systemctl stop ceph-osd-${cfg.osd1.name}", - "systemctl stop ceph-mgr-${cfg.monA.name}", - "systemctl stop ceph-mon-${cfg.monA.name}" - ); + # Shut down ceph by stopping ceph.target. + $monA->mustSucceed("systemctl stop ceph.target"); + # Start it up $monA->succeed("systemctl start ceph.target"); $monA->waitForUnit("ceph-mon-${cfg.monA.name}"); $monA->waitForUnit("ceph-mgr-${cfg.monA.name}"); $monA->waitForUnit("ceph-osd-${cfg.osd0.name}"); $monA->waitForUnit("ceph-osd-${cfg.osd1.name}"); + # Ensure the cluster comes back up again $monA->succeed("ceph -s | grep 'mon: 1 daemons'"); $monA->waitUntilSucceeds("ceph -s | grep 'quorum ${cfg.monA.name}'"); $monA->waitUntilSucceeds("ceph osd stat | grep -e '2 osds: 2 up[^,]*, 2 in'"); -- cgit 1.4.1