about summary refs log tree commit diff
path: root/nixpkgs/nixos/modules/services/misc/paperless.nix
diff options
context:
space:
mode:
Diffstat (limited to 'nixpkgs/nixos/modules/services/misc/paperless.nix')
-rw-r--r--nixpkgs/nixos/modules/services/misc/paperless.nix155
1 files changed, 109 insertions, 46 deletions
diff --git a/nixpkgs/nixos/modules/services/misc/paperless.nix b/nixpkgs/nixos/modules/services/misc/paperless.nix
index fbf1338a0dff..4199e7713304 100644
--- a/nixpkgs/nixos/modules/services/misc/paperless.nix
+++ b/nixpkgs/nixos/modules/services/misc/paperless.nix
@@ -3,8 +3,10 @@
 with lib;
 let
   cfg = config.services.paperless;
+  pkg = cfg.package;
 
   defaultUser = "paperless";
+  nltkDir = "/var/cache/paperless/nltk";
 
   # Don't start a redis instance if the user sets a custom redis connection
   enableRedis = !hasAttr "PAPERLESS_REDIS" cfg.extraConfig;
@@ -14,19 +16,24 @@ let
     PAPERLESS_DATA_DIR = cfg.dataDir;
     PAPERLESS_MEDIA_ROOT = cfg.mediaDir;
     PAPERLESS_CONSUMPTION_DIR = cfg.consumptionDir;
+    PAPERLESS_NLTK_DIR = nltkDir;
     GUNICORN_CMD_ARGS = "--bind=${cfg.address}:${toString cfg.port}";
+  } // optionalAttrs (config.time.timeZone != null) {
+    PAPERLESS_TIME_ZONE = config.time.timeZone;
+  } // optionalAttrs enableRedis {
+    PAPERLESS_REDIS = "unix://${redisServer.unixSocket}";
   } // (
     lib.mapAttrs (_: toString) cfg.extraConfig
-  ) // (optionalAttrs enableRedis {
-    PAPERLESS_REDIS = "unix://${redisServer.unixSocket}";
-  });
+  );
 
-  manage = let
-    setupEnv = lib.concatStringsSep "\n" (mapAttrsToList (name: val: "export ${name}=\"${val}\"") env);
-  in pkgs.writeShellScript "manage" ''
-    ${setupEnv}
-    exec ${cfg.package}/bin/paperless-ngx "$@"
-  '';
+  manage =
+    let
+      setupEnv = lib.concatStringsSep "\n" (mapAttrsToList (name: val: "export ${name}=\"${val}\"") env);
+    in
+    pkgs.writeShellScript "manage" ''
+      ${setupEnv}
+      exec ${pkg}/bin/paperless-ngx "$@"
+    '';
 
   # Secure the services
   defaultServiceConfig = {
@@ -44,6 +51,7 @@ let
       cfg.dataDir
       cfg.mediaDir
     ];
+    CacheDirectory = "paperless";
     CapabilityBoundingSet = "";
     # ProtectClock adds DeviceAllow=char-rtc r
     DeviceAllow = "";
@@ -77,7 +85,7 @@ let
     RestrictSUIDSGID = true;
     SupplementaryGroups = optional enableRedis redisServer.user;
     SystemCallArchitectures = "native";
-    SystemCallFilter = [ "@system-service" "~@privileged @resources @setuid @keyring" ];
+    SystemCallFilter = [ "@system-service" "~@privileged @setuid @keyring" ];
     # Does not work well with the temporary root
     #UMask = "0066";
   };
@@ -167,18 +175,17 @@ in
 
     extraConfig = mkOption {
       type = types.attrs;
-      default = {};
+      default = { };
       description = lib.mdDoc ''
         Extra paperless config options.
 
         See [the documentation](https://paperless-ngx.readthedocs.io/en/latest/configuration.html)
         for available options.
       '';
-      example = literalExpression ''
-        {
-          PAPERLESS_OCR_LANGUAGE = "deu+eng";
-        }
-      '';
+      example = {
+        PAPERLESS_OCR_LANGUAGE = "deu+eng";
+        PAPERLESS_DBHOST = "/run/postgresql";
+      };
     };
 
     user = mkOption {
@@ -209,28 +216,41 @@ in
     ];
 
     systemd.services.paperless-scheduler = {
-      description = "Paperless scheduler";
+      description = "Paperless Celery Beat";
+      wantedBy = [ "multi-user.target" ];
+      wants = [ "paperless-consumer.service" "paperless-web.service" "paperless-task-queue.service" ];
       serviceConfig = defaultServiceConfig // {
         User = cfg.user;
-        ExecStart = "${cfg.package}/bin/paperless-ngx qcluster";
+        ExecStart = "${pkg}/bin/celery --app paperless beat --loglevel INFO";
         Restart = "on-failure";
-        # The `mbind` syscall is needed for running the classifier.
-        SystemCallFilter = defaultServiceConfig.SystemCallFilter ++ [ "mbind" ];
-        # Needs to talk to mail server for automated import rules
-        PrivateNetwork = false;
       };
       environment = env;
-      wantedBy = [ "multi-user.target" ];
-      wants = [ "paperless-consumer.service" "paperless-web.service" ];
 
       preStart = ''
         ln -sf ${manage} ${cfg.dataDir}/paperless-manage
 
         # Auto-migrate on first run or if the package has changed
         versionFile="${cfg.dataDir}/src-version"
-        if [[ $(cat "$versionFile" 2>/dev/null) != ${cfg.package} ]]; then
-          ${cfg.package}/bin/paperless-ngx migrate
-          echo ${cfg.package} > "$versionFile"
+        version=$(cat "$versionFile" 2>/dev/null || echo 0)
+
+        if [[ $version != ${pkg.version} ]]; then
+          ${pkg}/bin/paperless-ngx migrate
+
+          # Parse old version string format for backwards compatibility
+          version=$(echo "$version" | grep -ohP '[^-]+$')
+
+          versionLessThan() {
+            target=$1
+            [[ $({ echo "$version"; echo "$target"; } | sort -V | head -1) != "$target" ]]
+          }
+
+          if versionLessThan 1.12.0; then
+            # Reindex documents as mentioned in https://github.com/paperless-ngx/paperless-ngx/releases/tag/v1.12.1
+            echo "Reindexing documents, to allow searching old comments. Required after the 1.12.x upgrade."
+            ${pkg}/bin/paperless-ngx document_index reindex
+          fi
+
+          echo ${pkg.version} > "$versionFile"
         fi
       ''
       + optionalString (cfg.passwordFile != null) ''
@@ -240,7 +260,7 @@ in
         superuserStateFile="${cfg.dataDir}/superuser-state"
 
         if [[ $(cat "$superuserStateFile" 2>/dev/null) != $superuserState ]]; then
-          ${cfg.package}/bin/paperless-ngx manage_superuser
+          ${pkg}/bin/paperless-ngx manage_superuser
           echo "$superuserState" > "$superuserStateFile"
         fi
       '';
@@ -248,6 +268,21 @@ in
       after = [ "redis-paperless.service" ];
     };
 
+    systemd.services.paperless-task-queue = {
+      description = "Paperless Celery Workers";
+      after = [ "paperless-scheduler.service" ];
+      serviceConfig = defaultServiceConfig // {
+        User = cfg.user;
+        ExecStart = "${pkg}/bin/celery --app paperless worker --loglevel INFO";
+        Restart = "on-failure";
+        # The `mbind` syscall is needed for running the classifier.
+        SystemCallFilter = defaultServiceConfig.SystemCallFilter ++ [ "mbind" ];
+        # Needs to talk to mail server for automated import rules
+        PrivateNetwork = false;
+      };
+      environment = env;
+    };
+
     # Reading the user-provided password file requires root access
     systemd.services.paperless-copy-password = mkIf (cfg.passwordFile != null) {
       requiredBy = [ "paperless-scheduler.service" ];
@@ -261,48 +296,76 @@ in
       };
     };
 
-    systemd.services.paperless-consumer = {
-      description = "Paperless document consumer";
+    # Download NLTK corpus data
+    systemd.services.paperless-download-nltk-data = {
+      wantedBy = [ "paperless-scheduler.service" ];
+      before = [ "paperless-scheduler.service" ];
+      after = [ "network-online.target" ];
       serviceConfig = defaultServiceConfig // {
         User = cfg.user;
-        ExecStart = "${cfg.package}/bin/paperless-ngx document_consumer";
-        Restart = "on-failure";
+        Type = "oneshot";
+        # Enable internet access
+        PrivateNetwork = false;
+        # Restrict write access
+        BindPaths = [];
+        BindReadOnlyPaths = [
+          "/nix/store"
+          "-/etc/resolv.conf"
+          "-/etc/nsswitch.conf"
+          "-/etc/ssl/certs"
+          "-/etc/static/ssl/certs"
+          "-/etc/hosts"
+          "-/etc/localtime"
+        ];
+        ExecStart = let pythonWithNltk = pkg.python.withPackages (ps: [ ps.nltk ]); in ''
+          ${pythonWithNltk}/bin/python -m nltk.downloader -d '${nltkDir}' punkt snowball_data stopwords
+        '';
       };
-      environment = env;
+    };
+
+    systemd.services.paperless-consumer = {
+      description = "Paperless document consumer";
       # Bind to `paperless-scheduler` so that the consumer never runs
       # during migrations
       bindsTo = [ "paperless-scheduler.service" ];
       after = [ "paperless-scheduler.service" ];
+      serviceConfig = defaultServiceConfig // {
+        User = cfg.user;
+        ExecStart = "${pkg}/bin/paperless-ngx document_consumer";
+        Restart = "on-failure";
+      };
+      environment = env;
     };
 
     systemd.services.paperless-web = {
       description = "Paperless web server";
+      # Bind to `paperless-scheduler` so that the web server never runs
+      # during migrations
+      bindsTo = [ "paperless-scheduler.service" ];
+      after = [ "paperless-scheduler.service" ];
       serviceConfig = defaultServiceConfig // {
         User = cfg.user;
         ExecStart = ''
-          ${pkgs.python3Packages.gunicorn}/bin/gunicorn \
-            -c ${cfg.package}/lib/paperless-ngx/gunicorn.conf.py paperless.asgi:application
+          ${pkg.python.pkgs.gunicorn}/bin/gunicorn \
+            -c ${pkg}/lib/paperless-ngx/gunicorn.conf.py paperless.asgi:application
         '';
         Restart = "on-failure";
 
-        AmbientCapabilities = "CAP_NET_BIND_SERVICE";
-        CapabilityBoundingSet = "CAP_NET_BIND_SERVICE";
-        # gunicorn needs setuid
-        SystemCallFilter = defaultServiceConfig.SystemCallFilter ++ [ "@setuid" ];
+        # gunicorn needs setuid, liblapack needs mbind
+        SystemCallFilter = defaultServiceConfig.SystemCallFilter ++ [ "@setuid mbind" ];
         # Needs to serve web page
         PrivateNetwork = false;
+      } // lib.optionalAttrs (cfg.port < 1024) {
+        AmbientCapabilities = [ "CAP_NET_BIND_SERVICE" ];
+        CapabilityBoundingSet = [ "CAP_NET_BIND_SERVICE" ];
       };
       environment = env // {
-        PATH = mkForce cfg.package.path;
-        PYTHONPATH = "${cfg.package.pythonPath}:${cfg.package}/lib/paperless-ngx/src";
+        PATH = mkForce pkg.path;
+        PYTHONPATH = "${pkg.python.pkgs.makePythonPath pkg.propagatedBuildInputs}:${pkg}/lib/paperless-ngx/src";
       };
       # Allow the web interface to access the private /tmp directory of the server.
       # This is required to support uploading files via the web interface.
-      unitConfig.JoinsNamespaceOf = "paperless-scheduler.service";
-      # Bind to `paperless-scheduler` so that the web server never runs
-      # during migrations
-      bindsTo = [ "paperless-scheduler.service" ];
-      after = [ "paperless-scheduler.service" ];
+      unitConfig.JoinsNamespaceOf = "paperless-task-queue.service";
     };
 
     users = optionalAttrs (cfg.user == defaultUser) {