diff options
author | Atemu <atemu.main@gmail.com> | 2024-02-01 16:59:28 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-01 16:59:28 +0100 |
commit | f38bca1ca559974b36863d0ff4bb710b7bc12890 (patch) | |
tree | 8950996221b92d6b810467d23b8d44432cb21210 | |
parent | 2f61aff9ae76490380452d78a722ace1e71c5976 (diff) | |
parent | 6300f478e9d9a9a0f0d66004e50e67a107f9cea3 (diff) | |
download | nixlib-f38bca1ca559974b36863d0ff4bb710b7bc12890.tar nixlib-f38bca1ca559974b36863d0ff4bb710b7bc12890.tar.gz nixlib-f38bca1ca559974b36863d0ff4bb710b7bc12890.tar.bz2 nixlib-f38bca1ca559974b36863d0ff4bb710b7bc12890.tar.lz nixlib-f38bca1ca559974b36863d0ff4bb710b7bc12890.tar.xz nixlib-f38bca1ca559974b36863d0ff4bb710b7bc12890.tar.zst nixlib-f38bca1ca559974b36863d0ff4bb710b7bc12890.zip |
Merge pull request #279677 from leona-ya/paperless-nltk
nixos/paperless: use nltk_data package as NLTK data source
-rw-r--r-- | nixos/doc/manual/release-notes/rl-2405.section.md | 2 | ||||
-rw-r--r-- | nixos/modules/services/misc/paperless.nix | 24 | ||||
-rw-r--r-- | pkgs/applications/office/paperless-ngx/default.nix | 2 | ||||
-rw-r--r-- | pkgs/tools/text/nltk_data/default.nix | 5 |
4 files changed, 14 insertions, 19 deletions
diff --git a/nixos/doc/manual/release-notes/rl-2405.section.md b/nixos/doc/manual/release-notes/rl-2405.section.md index 1566417bd41c..e26702f7a61d 100644 --- a/nixos/doc/manual/release-notes/rl-2405.section.md +++ b/nixos/doc/manual/release-notes/rl-2405.section.md @@ -298,6 +298,8 @@ The pre-existing [services.ankisyncd](#opt-services.ankisyncd.enable) has been m - Custom themes and other assets that were previously stored in `custom/public/*` now belong in `custom/public/assets/*` - New instances of Gitea using MySQL now ignore the `[database].CHARSET` config option and always use the `utf8mb4` charset, existing instances should migrate via the `gitea doctor convert` CLI command. +- The `services.paperless` module no longer uses the previously downloaded NLTK data stored in `/var/cache/paperless/nltk`. This directory can be removed. + - The `hardware.pulseaudio` module now sets permission of pulse user home directory to 755 when running in "systemWide" mode. It fixes [issue 114399](https://github.com/NixOS/nixpkgs/issues/114399). - The `btrbk` module now automatically selects and provides required compression diff --git a/nixos/modules/services/misc/paperless.nix b/nixos/modules/services/misc/paperless.nix index 9780a4d72257..1256d8315c8b 100644 --- a/nixos/modules/services/misc/paperless.nix +++ b/nixos/modules/services/misc/paperless.nix @@ -6,7 +6,6 @@ let pkg = cfg.package; defaultUser = "paperless"; - nltkDir = "/var/cache/paperless/nltk"; defaultFont = "${pkgs.liberation_ttf}/share/fonts/truetype/LiberationSerif-Regular.ttf"; # Don't start a redis instance if the user sets a custom redis connection @@ -17,13 +16,17 @@ let PAPERLESS_DATA_DIR = cfg.dataDir; PAPERLESS_MEDIA_ROOT = cfg.mediaDir; PAPERLESS_CONSUMPTION_DIR = cfg.consumptionDir; - PAPERLESS_NLTK_DIR = nltkDir; PAPERLESS_THUMBNAIL_FONT_NAME = defaultFont; GUNICORN_CMD_ARGS = "--bind=${cfg.address}:${toString cfg.port}"; } // optionalAttrs (config.time.timeZone != null) { PAPERLESS_TIME_ZONE = config.time.timeZone; } // optionalAttrs enableRedis { PAPERLESS_REDIS = "unix://${redisServer.unixSocket}"; + } // optionalAttrs (cfg.settings.PAPERLESS_ENABLE_NLTK or true) { + PAPERLESS_NLTK_DIR = pkgs.symlinkJoin { + name = "paperless_ngx_nltk_data"; + paths = pkg.nltkData; + }; } // (lib.mapAttrs (_: s: if (lib.isAttrs s || lib.isList s) then builtins.toJSON s else if lib.isBool s then lib.boolToString s @@ -292,23 +295,6 @@ in }; }; - # Download NLTK corpus data - systemd.services.paperless-download-nltk-data = { - wantedBy = [ "paperless-scheduler.service" ]; - before = [ "paperless-scheduler.service" ]; - after = [ "network-online.target" ]; - wants = [ "network-online.target" ]; - serviceConfig = defaultServiceConfig // { - User = cfg.user; - Type = "oneshot"; - # Enable internet access - PrivateNetwork = false; - ExecStart = let pythonWithNltk = pkg.python.withPackages (ps: [ ps.nltk ]); in '' - ${pythonWithNltk}/bin/python -m nltk.downloader -d '${nltkDir}' punkt snowball_data stopwords - ''; - }; - }; - systemd.services.paperless-consumer = { description = "Paperless document consumer"; # Bind to `paperless-scheduler` so that the consumer never runs diff --git a/pkgs/applications/office/paperless-ngx/default.nix b/pkgs/applications/office/paperless-ngx/default.nix index afe241222068..94b4ac77b68a 100644 --- a/pkgs/applications/office/paperless-ngx/default.nix +++ b/pkgs/applications/office/paperless-ngx/default.nix @@ -18,6 +18,7 @@ , xcbuild , pango , pkg-config +, nltk-data }: let @@ -293,6 +294,7 @@ python.pkgs.buildPythonApplication rec { passthru = { inherit python path frontend; + nltkData = with nltk-data; [ punkt snowball_data stopwords ]; tests = { inherit (nixosTests) paperless; }; }; diff --git a/pkgs/tools/text/nltk_data/default.nix b/pkgs/tools/text/nltk_data/default.nix index 118598083d5d..6445dd8c06e2 100644 --- a/pkgs/tools/text/nltk_data/default.nix +++ b/pkgs/tools/text/nltk_data/default.nix @@ -48,6 +48,11 @@ lib.makeScope newScope (self: { location = "taggers"; hash = "sha256-ilTs4HWPUoHxQb4kWEy3wJ6QsE/98+EQya44gtV2inw="; }); + snowball_data = makeNltkDataPackage ({ + pname = "snowball_data"; + location = "stemmers"; + hash = "sha256-Y6LERPtaRbCtWmJCvMAd2xH02xdrevZBFNYvP9N4+3s="; + }); stopwords = makeNltkDataPackage ({ pname = "stopwords"; location = "corpora"; |