summary refs log tree commit diff
path: root/nixos/modules/tasks
diff options
context:
space:
mode:
authorSvein Ove Aas <sveina@gmail.com>2018-06-16 02:39:07 +0100
committerSvein Ove Aas <sveina@gmail.com>2018-07-02 21:14:45 +0100
commitcfd8c4ee88fec3a7f989663e09d8e39513b8488e (patch)
treef55535a5f229e35f01a1ab8dcc417289fea6e9b5 /nixos/modules/tasks
parent1fa2503f9dba814eb23726a25642d2180ce791c3 (diff)
downloadnixlib-cfd8c4ee88fec3a7f989663e09d8e39513b8488e.tar
nixlib-cfd8c4ee88fec3a7f989663e09d8e39513b8488e.tar.gz
nixlib-cfd8c4ee88fec3a7f989663e09d8e39513b8488e.tar.bz2
nixlib-cfd8c4ee88fec3a7f989663e09d8e39513b8488e.tar.lz
nixlib-cfd8c4ee88fec3a7f989663e09d8e39513b8488e.tar.xz
nixlib-cfd8c4ee88fec3a7f989663e09d8e39513b8488e.tar.zst
nixlib-cfd8c4ee88fec3a7f989663e09d8e39513b8488e.zip
zfs: Improve import handling
Diffstat (limited to 'nixos/modules/tasks')
-rw-r--r--nixos/modules/tasks/filesystems/zfs.nix92
1 files changed, 77 insertions, 15 deletions
diff --git a/nixos/modules/tasks/filesystems/zfs.nix b/nixos/modules/tasks/filesystems/zfs.nix
index bfcd81d62159..39f51c363673 100644
--- a/nixos/modules/tasks/filesystems/zfs.nix
+++ b/nixos/modules/tasks/filesystems/zfs.nix
@@ -58,6 +58,45 @@ let
 
   snapshotNames = [ "frequent" "hourly" "daily" "weekly" "monthly" ];
 
+  # When importing ZFS pools, there's one difficulty: These scripts may run
+  # before the backing devices (physical HDDs, etc.) of the pool have been
+  # scanned and initialized.
+  #
+  # An attempted import with all devices missing will just fail, and can be
+  # retried, but an import where e.g. two out of three disks in a three-way
+  # mirror are missing, will succeed. This is a problem: When the missing disks
+  # are later discovered, they won't be automatically set online, rendering the
+  # pool redundancy-less (and far slower) until such time as the system reboots.
+  #
+  # The solution is the below. poolReady checks the status of an un-imported
+  # pool, to see if *every* device is available -- in which case the pool will be
+  # in state ONLINE, as opposed to DEGRADED, FAULTED or MISSING.
+  #
+  # The import scripts then loop over this, waiting until the pool is ready or a
+  # sufficient amount of time has passed that we can assume it won't be. In the
+  # latter case it makes one last attempt at importing, allowing the system to
+  # (eventually) boot even with a degraded pool.
+  importLib = {zpoolCmd, awkCmd, cfgZfs}: ''
+    poolReady() {
+      pool="$1"
+      state="$("${zpoolCmd}" import | "${awkCmd}" "/pool: $pool/ { found = 1 }; /state:/ { if (found == 1) { print \$2; exit } }; END { if (found == 0) { print \"MISSING\" } }")"
+      if [[ "$state" = "ONLINE" ]]; then
+        return 0
+      else
+        echo "Pool $pool in state $state, waiting"
+        return 1
+      fi
+    }
+    poolImported() {
+      pool="$1"
+      "${zpoolCmd}" list "$pool" >/dev/null 2>/dev/null
+    }
+    poolImport() {
+      pool="$1"
+      "${zpoolCmd}" import -d "${cfgZfs.devNodes}" -N $ZFS_FORCE "$pool"
+    }
+  '';
+
 in
 
 {
@@ -339,19 +378,26 @@ in
                   ;;
               esac
             done
-            ''] ++ (map (pool: ''
+          ''] ++ [(importLib {
+            # See comments at importLib definition.
+            zpoolCmd = "zpool";
+            awkCmd = "awk";
+            inherit cfgZfs;
+          })] ++ (map (pool: ''
             echo -n "importing root ZFS pool \"${pool}\"..."
-            trial=0
-            until msg="$(zpool import -d ${cfgZfs.devNodes} -N $ZFS_FORCE '${pool}' 2>&1)"; do
-              sleep 0.25
-              echo -n .
-              trial=$(($trial + 1))
-              if [[ $trial -eq 60 ]]; then
-                break
+            # Loop across the import until it succeeds, because the devices needed may not be discovered yet.
+            if ! poolImported "${pool}"; then
+              for trial in `seq 1 60`; do
+                poolReady "${pool}" > /dev/null && msg="$(poolImport "${pool}" 2>&1)" && break
+                sleep 1
+                echo -n .
+              done
+              echo
+              if [[ -n "$msg" ]]; then
+                echo "$msg";
               fi
-            done
-            echo
-            if [[ -n "$msg" ]]; then echo "$msg"; fi
+              poolImported "${pool}" || poolImport "${pool}"  # Try one last time, e.g. to import a degraded pool.
+            fi
             ${lib.optionalString cfgZfs.requestEncryptionCredentials ''
               zfs load-key -a
             ''}
@@ -395,10 +441,26 @@ in
               Type = "oneshot";
               RemainAfterExit = true;
             };
-            script = ''
-              zpool_cmd="${packages.zfsUser}/sbin/zpool"
-              ("$zpool_cmd" list "${pool}" >/dev/null) || "$zpool_cmd" import -d ${cfgZfs.devNodes} -N ${optionalString cfgZfs.forceImportAll "-f"} "${pool}"
-              ${optionalString cfgZfs.requestEncryptionCredentials "\"${packages.zfsUser}/sbin/zfs\" load-key -r \"${pool}\""}
+            script = (importLib {
+              # See comments at importLib definition.
+              zpoolCmd="${packages.zfsUser}/sbin/zpool";
+              awkCmd="${pkgs.gawk}/bin/awk";
+              inherit cfgZfs;
+            }) + ''
+              poolImported "${pool}" && exit
+              echo -n "importing ZFS pool \"${pool}\"..."
+              # Loop across the import until it succeeds, because the devices needed may not be discovered yet.
+              for trial in `seq 1 60`; do
+                poolReady "${pool}" && poolImport "${pool}" && break
+                sleep 1
+              done
+              poolImported "${pool}" || poolImport "${pool}"  # Try one last time, e.g. to import a degraded pool.
+              if poolImported "${pool}"; then
+                ${optionalString cfgZfs.requestEncryptionCredentials "\"${packages.zfsUser}/sbin/zfs\" load-key -r \"${pool}\""}
+                echo "Successfully imported ${pool}"
+              else
+                exit 1
+              fi
             '';
           };