about summary refs log tree commit diff
path: root/nixos/modules/services/misc/paperless.nix
diff options
context:
space:
mode:
Diffstat (limited to 'nixos/modules/services/misc/paperless.nix')
-rw-r--r--nixos/modules/services/misc/paperless.nix46
1 files changed, 39 insertions, 7 deletions
diff --git a/nixos/modules/services/misc/paperless.nix b/nixos/modules/services/misc/paperless.nix
index 667f16d98f82..4199e7713304 100644
--- a/nixos/modules/services/misc/paperless.nix
+++ b/nixos/modules/services/misc/paperless.nix
@@ -6,6 +6,7 @@ let
   pkg = cfg.package;
 
   defaultUser = "paperless";
+  nltkDir = "/var/cache/paperless/nltk";
 
   # Don't start a redis instance if the user sets a custom redis connection
   enableRedis = !hasAttr "PAPERLESS_REDIS" cfg.extraConfig;
@@ -15,6 +16,7 @@ let
     PAPERLESS_DATA_DIR = cfg.dataDir;
     PAPERLESS_MEDIA_ROOT = cfg.mediaDir;
     PAPERLESS_CONSUMPTION_DIR = cfg.consumptionDir;
+    PAPERLESS_NLTK_DIR = nltkDir;
     GUNICORN_CMD_ARGS = "--bind=${cfg.address}:${toString cfg.port}";
   } // optionalAttrs (config.time.timeZone != null) {
     PAPERLESS_TIME_ZONE = config.time.timeZone;
@@ -24,12 +26,14 @@ let
     lib.mapAttrs (_: toString) cfg.extraConfig
   );
 
-  manage = let
-    setupEnv = lib.concatStringsSep "\n" (mapAttrsToList (name: val: "export ${name}=\"${val}\"") env);
-  in pkgs.writeShellScript "manage" ''
-    ${setupEnv}
-    exec ${pkg}/bin/paperless-ngx "$@"
-  '';
+  manage =
+    let
+      setupEnv = lib.concatStringsSep "\n" (mapAttrsToList (name: val: "export ${name}=\"${val}\"") env);
+    in
+    pkgs.writeShellScript "manage" ''
+      ${setupEnv}
+      exec ${pkg}/bin/paperless-ngx "$@"
+    '';
 
   # Secure the services
   defaultServiceConfig = {
@@ -47,6 +51,7 @@ let
       cfg.dataDir
       cfg.mediaDir
     ];
+    CacheDirectory = "paperless";
     CapabilityBoundingSet = "";
     # ProtectClock adds DeviceAllow=char-rtc r
     DeviceAllow = "";
@@ -170,7 +175,7 @@ in
 
     extraConfig = mkOption {
       type = types.attrs;
-      default = {};
+      default = { };
       description = lib.mdDoc ''
         Extra paperless config options.
 
@@ -291,6 +296,33 @@ in
       };
     };
 
+    # Download NLTK corpus data
+    systemd.services.paperless-download-nltk-data = {
+      wantedBy = [ "paperless-scheduler.service" ];
+      before = [ "paperless-scheduler.service" ];
+      after = [ "network-online.target" ];
+      serviceConfig = defaultServiceConfig // {
+        User = cfg.user;
+        Type = "oneshot";
+        # Enable internet access
+        PrivateNetwork = false;
+        # Restrict write access
+        BindPaths = [];
+        BindReadOnlyPaths = [
+          "/nix/store"
+          "-/etc/resolv.conf"
+          "-/etc/nsswitch.conf"
+          "-/etc/ssl/certs"
+          "-/etc/static/ssl/certs"
+          "-/etc/hosts"
+          "-/etc/localtime"
+        ];
+        ExecStart = let pythonWithNltk = pkg.python.withPackages (ps: [ ps.nltk ]); in ''
+          ${pythonWithNltk}/bin/python -m nltk.downloader -d '${nltkDir}' punkt snowball_data stopwords
+        '';
+      };
+    };
+
     systemd.services.paperless-consumer = {
       description = "Paperless document consumer";
       # Bind to `paperless-scheduler` so that the consumer never runs