about summary refs log tree commit diff
path: root/pkgs/by-name/op/open-english-wordnet/package.nix
blob: ccd661753c05996d4881005e72afdcc3eeaf1ddf (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
{ lib
, fetchFromGitHub
, fetchpatch
, gzip
, python3
, stdenvNoCC
}:

stdenvNoCC.mkDerivation (self: {
  pname = "open-english-wordnet";
  version = "2022";

  src = fetchFromGitHub {
    owner = "globalwordnet";
    repo = "english-wordnet";
    rev = "${self.version}-edition";
    hash = "sha256-a1fWIp39uuJZL1aFX/r+ttLB1+kwh/XPHwphgENTQ5M=";
  };

  patches = lib.mapAttrsToList (rev: hash: fetchpatch {
    url = "https://github.com/globalwordnet/english-wordnet/commit/${rev}.patch";
    inherit hash;
  }) {
    # Upstream commit bumping the version number, accidentally ommited from the tagged release
    "bc07902f8995b62c70f01a282b23f40f30630540" = "sha256-1e4MG/k86g3OFUhiShCCbNXnvDKrYFr1KlGVsGl++KI=";
    # PR #982, “merge.py: Make result independent of filesystem order”
    "6da46a48dd76a48ad9ff563e6c807b8271fc83cd" = "sha256-QkkJH7NVGy/IbeSWkotU80IGF4esz0b8mIL9soHdQtQ=";
  };

  # TODO(nicoo): make compression optional?
  nativeBuildInputs = [
    gzip
    (python3.withPackages (p: with p; [ pyyaml ]))
  ];

  # TODO(nicoo): generate LMF and WNDB versions with separate outputs
  buildPhase = ''
    runHook preBuild

    echo Generating wn.xml
    python scripts/from-yaml.py
    python scripts/merge.py

    echo Compressing
    gzip --best --no-name --stdout ./wn.xml > 'oewn:${self.version}.xml.gz'

    runHook postBuild
  '';

  installPhase = ''
    runHook preInstall
    install -Dt $out/share/wordnet 'oewn:${self.version}.xml.gz'
    runHook postInstall
  '';

  meta = with lib; {
    description = "Lexical network of the English language";
    longDescription = ''
      Open English WordNet is a lexical network of the English language grouping
      words into synsets and linking them according to relationships such as
      hypernymy, antonymy and meronymy. It is intended to be used in natural
      language processing applications and provides deep lexical information
      about the English language as a graph.

      Open English WordNet is a fork of the Princeton Wordnet developed under an
      open source methodology.
    '';
    homepage = "https://en-word.net/";
    license = licenses.cc-by-40;
    maintainers = with maintainers; [ nicoo ];
    platforms = platforms.all;
  };
})