about summary refs log tree commit diff
path: root/nixpkgs/pkgs/development/python-modules/spacy
diff options
context:
space:
mode:
authorAlyssa Ross <hi@alyssa.is>2021-01-10 07:13:44 +0000
committerAlyssa Ross <hi@alyssa.is>2021-01-12 14:07:16 +0000
commite2698550456abba83c6dcd5d5e5a9990a0b96f8a (patch)
tree79a56f0df3fa55e470d84b4dff6059fbf487ec18 /nixpkgs/pkgs/development/python-modules/spacy
parent1cdc42df888dc98c347e03bd942ed9825a55bcb3 (diff)
parent84d74ae9c9cbed73274b8e4e00be14688ffc93fe (diff)
downloadnixlib-e2698550456abba83c6dcd5d5e5a9990a0b96f8a.tar
nixlib-e2698550456abba83c6dcd5d5e5a9990a0b96f8a.tar.gz
nixlib-e2698550456abba83c6dcd5d5e5a9990a0b96f8a.tar.bz2
nixlib-e2698550456abba83c6dcd5d5e5a9990a0b96f8a.tar.lz
nixlib-e2698550456abba83c6dcd5d5e5a9990a0b96f8a.tar.xz
nixlib-e2698550456abba83c6dcd5d5e5a9990a0b96f8a.tar.zst
nixlib-e2698550456abba83c6dcd5d5e5a9990a0b96f8a.zip
Merge commit '84d74ae9c9cbed73274b8e4e00be14688ffc93fe'
Diffstat (limited to 'nixpkgs/pkgs/development/python-modules/spacy')
-rw-r--r--nixpkgs/pkgs/development/python-modules/spacy/annotation-test/annotate.py69
-rw-r--r--nixpkgs/pkgs/development/python-modules/spacy/annotation-test/default.nix23
-rw-r--r--nixpkgs/pkgs/development/python-modules/spacy/default.nix16
-rw-r--r--nixpkgs/pkgs/development/python-modules/spacy/models.json182
-rw-r--r--nixpkgs/pkgs/development/python-modules/spacy/models.nix12
5 files changed, 283 insertions, 19 deletions
diff --git a/nixpkgs/pkgs/development/python-modules/spacy/annotation-test/annotate.py b/nixpkgs/pkgs/development/python-modules/spacy/annotation-test/annotate.py
new file mode 100644
index 000000000000..822eb8ac0743
--- /dev/null
+++ b/nixpkgs/pkgs/development/python-modules/spacy/annotation-test/annotate.py
@@ -0,0 +1,69 @@
+import pytest
+import spacy
+
+en_text = (
+    "When Sebastian Thrun started working on self-driving cars at "
+    "Google in 2007, few people outside of the company took him "
+    "seriously. “I can tell you very senior CEOs of major American "
+    "car companies would shake my hand and turn away because I wasn’t "
+    "worth talking to,” said Thrun, in an interview with Recode earlier "
+    "this week.")
+
+
+@pytest.fixture
+def en_core_web_sm():
+    return spacy.load("en_core_web_sm")
+
+
+@pytest.fixture
+def doc_en_core_web_sm(en_core_web_sm):
+    return en_core_web_sm(en_text)
+
+
+def test_entities(doc_en_core_web_sm):
+    entities = list(map(lambda e: (e.text, e.label_),
+                        doc_en_core_web_sm.ents))
+
+    assert entities == [
+        ('Sebastian Thrun', 'PERSON'),
+        ('Google', 'ORG'), ('2007', 'DATE'),
+        ('American', 'NORP'),
+        ('Thrun', 'ORG'),
+        ('earlier this week', 'DATE')
+    ]
+
+
+def test_nouns(doc_en_core_web_sm):
+    assert [
+        chunk.text for chunk in doc_en_core_web_sm.noun_chunks] == [
+        'Sebastian Thrun',
+        'self-driving cars',
+        'Google',
+        'few people',
+        'the company',
+        'him',
+        'I',
+        'you',
+        'very senior CEOs',
+        'major American car companies',
+        'my hand',
+        'I',
+        'Thrun',
+        'an interview',
+        'Recode']
+
+
+def test_verbs(doc_en_core_web_sm):
+    assert [
+        token.lemma_ for token in doc_en_core_web_sm if token.pos_ == "VERB"] == [
+        'start',
+        'work',
+        'drive',
+        'take',
+        'can',
+        'tell',
+        'would',
+        'shake',
+        'turn',
+        'talk',
+        'say']
diff --git a/nixpkgs/pkgs/development/python-modules/spacy/annotation-test/default.nix b/nixpkgs/pkgs/development/python-modules/spacy/annotation-test/default.nix
new file mode 100644
index 000000000000..1b066ff3c39d
--- /dev/null
+++ b/nixpkgs/pkgs/development/python-modules/spacy/annotation-test/default.nix
@@ -0,0 +1,23 @@
+{ stdenv, pytest, spacy_models }:
+
+stdenv.mkDerivation {
+  name = "spacy-annotation-test";
+
+  src = ./.;
+
+  dontConfigure = true;
+  dontBuild = true;
+  doCheck = true;
+
+  checkInputs = [ pytest spacy_models.en_core_web_sm ];
+
+  checkPhase = ''
+    pytest annotate.py
+  '';
+
+  installPhase = ''
+    touch $out
+  '';
+
+  meta.timeout = 60;
+}
diff --git a/nixpkgs/pkgs/development/python-modules/spacy/default.nix b/nixpkgs/pkgs/development/python-modules/spacy/default.nix
index f0a143ec30c7..077aea4dc9a3 100644
--- a/nixpkgs/pkgs/development/python-modules/spacy/default.nix
+++ b/nixpkgs/pkgs/development/python-modules/spacy/default.nix
@@ -1,5 +1,6 @@
 { lib
 , buildPythonPackage
+, callPackage
 , fetchPypi
 , pythonOlder
 , pytest
@@ -21,11 +22,11 @@
 
 buildPythonPackage rec {
   pname = "spacy";
-  version = "2.3.0";
+  version = "2.3.2";
 
   src = fetchPypi {
     inherit pname version;
-    sha256 = "0nri437dyapiq5gx8lbmjdfvqw2cnw3di13kp44rzr17bm5yh2jv";
+    sha256 = "07zf7kivj4r1n6xwisld7n90bpi095bqbc9xpv668grq1rpf53c1";
   };
 
   propagatedBuildInputs = [
@@ -54,9 +55,18 @@ buildPythonPackage rec {
   # '';
 
   postPatch = ''
-    substituteInPlace setup.cfg --replace "thinc==7.4.1" "thinc>=7.4.1,<8"
+    substituteInPlace setup.cfg \
+      --replace "blis>=0.4.0,<0.5.0" "blis>=0.4.0,<1.0" \
+      --replace "catalogue>=0.0.7,<1.1.0" "catalogue>=0.0.7,<3.0" \
+      --replace "plac>=0.9.6,<1.2.0" "plac>=0.9.6,<2.0" \
+      --replace "srsly>=1.0.2,<1.1.0" "srsly>=1.0.2,<3.0" \
+      --replace "thinc==7.4.1" "thinc>=7.4.1,<8"
   '';
 
+  pythonImportsCheck = [ "spacy" ];
+
+  passthru.tests = callPackage ./annotation-test {};
+
   meta = with lib; {
     description = "Industrial-strength Natural Language Processing (NLP) with Python and Cython";
     homepage = "https://github.com/explosion/spaCy";
diff --git a/nixpkgs/pkgs/development/python-modules/spacy/models.json b/nixpkgs/pkgs/development/python-modules/spacy/models.json
index 1c02b05d303e..3a427f8932a5 100644
--- a/nixpkgs/pkgs/development/python-modules/spacy/models.json
+++ b/nixpkgs/pkgs/development/python-modules/spacy/models.json
@@ -1,79 +1,145 @@
 [{
+  "pname": "da_core_news_lg",
+  "version": "2.3.0",
+  "sha256": "18y1jany1ha27jkwb9563haxsbylm0axkh5c8009lsfxc8y2w9hr",
+  "license": "cc-by-sa-40"
+},
+{
+  "pname": "da_core_news_md",
+  "version": "2.3.0",
+  "sha256": "06nm5grj5jdx0rja7vw1f91vvd69p6vhafrwpfr1npqk24j6cacb",
+  "license": "cc-by-sa-40"
+},
+{
+  "pname": "da_core_news_sm",
+  "version": "2.3.0",
+  "sha256": "00byhlrcbg4wxplr473g9b3126pvk4vwy0q34xg0zx4994qb6rgn",
+  "license": "cc-by-sa-40"
+},
+{
+  "pname": "de_core_news_lg",
+  "version": "2.3.0",
+  "sha256": "0rixhgdz4z7sq6f2b1w3n5cn1645cr37g40hbd9xzlvdzdf5cg6d",
+  "license": "mit"
+},
+{
   "pname": "de_core_news_md",
   "version": "2.3.0",
   "sha256": "0kxir1w000r5fn1kpa38m7688xinkn2mk1m82aiwqlck3r72jdi6",
-  "license": "cc-by-sa-40"
+  "license": "mit"
 },
 {
   "pname": "de_core_news_sm",
   "version": "2.3.0",
   "sha256": "00cbmrf4njg28laysapdnp4rv4lw4yw03rxkynw1ain5fwb0izl7",
-  "license": "cc-by-sa-40"
+  "license": "mit"
+},
+{
+  "pname": "el_core_news_lg",
+  "version": "2.3.0",
+  "sha256": "001c19dd1kirlvzbdv2i89zw8nf6c4icv2w0chm7rd6x9m3i13gd",
+  "license": "cc-by-nc-sa-30"
 },
 {
   "pname": "el_core_news_md",
   "version": "2.3.0",
   "sha256": "170x8bzm5nf02mhkxyxjk58yk2639hsjb5b9prcc69500c0vmnp0",
-  "license": "cc-by-sa-40"
+  "license": "cc-by-nc-sa-30"
 },
 {
   "pname": "el_core_news_sm",
   "version": "2.3.0",
   "sha256": "10mh3za4jvr07rawzk8ps642rp11s3smraj9xvrxflik4fqkz18b",
-  "license": "cc-by-sa-40"
+  "license": "cc-by-nc-sa-30"
 },
 {
   "pname": "en_core_web_lg",
   "version": "2.3.0",
   "sha256": "0mfa5wz31ya295jhyj489gb4qy806zmpq1zc11bvv5alv2m35if2",
-  "license": "cc-by-sa-40"
+  "license": "mit"
 },
 {
   "pname": "en_core_web_md",
   "version": "2.3.0",
   "sha256": "1ys8sqkhiap1mq6mhbkbq8bc07lvl68xngbx725xkwvirzl5gabh",
-  "license": "cc-by-sa-40"
+  "license": "mit"
 },
 {
   "pname": "en_core_web_sm",
   "version": "2.3.0",
   "sha256": "04icv9qf4pj53ll8vqxcjl2a723q1k00i7lifk8wx5saif28g37a",
-  "license": "cc-by-sa-40"
+  "license": "mit"
 },
 {
   "pname": "en_vectors_web_lg",
   "version": "2.3.0",
   "sha256": "13g012rwh0bcxx3ii5mmygqzyryah1y3zd000zhidnacc1x1g743",
-  "license": "cc-by-sa-40"
+  "license": "cc-by-sa-30"
+},
+{
+  "pname": "es_core_news_lg",
+  "version": "2.3.0",
+  "sha256": "1r0pr0lzs3j9w7rd5z9nw87iayjm36v7f7gamvaiiphs6bc5p7ls",
+  "license": "gpl3"
 },
 {
   "pname": "es_core_news_md",
   "version": "2.3.0",
   "sha256": "0nz33bmpr3rxqbnv6vb1id8pkfsvh8ii8vqplwgb3b8772kmpzy2",
-  "license": "cc-by-sa-40"
+  "license": "gpl3"
 },
 {
   "pname": "es_core_news_sm",
   "version": "2.3.0",
   "sha256": "02xqhg4m0gg5r9yibvl02zixkll6w0nsmbdhp07y5yyaqjarc90d",
-  "license": "cc-by-sa-40"
+  "license": "gpl3"
+},
+{
+  "pname": "fr_core_news_lg",
+  "version": "2.3.0",
+  "sha256": "1yliamws8nqqjhpk9gr2dzlk0dms2mr958zbj21biv8fimbq60ik",
+  "license": "lgpllr"
 },
 {
   "pname": "fr_core_news_md",
   "version": "2.3.0",
   "sha256": "04fk212ksac3bp9dj7dmzsdcnbqmbsgymsic6ddcv9zbfdv5d0db",
-  "license": "cc-by-sa-40"
+  "license": "lgpllr"
 },
 {
   "pname": "fr_core_news_sm",
   "version": "2.3.0",
   "sha256": "0kldww855z67qfc9maa9z1lsvdf5vj5vc8gj0x3h68kv5n1xr4h0",
-  "license": "cc-by-sa-40"
+  "license": "lgpllr"
+},
+{
+  "pname": "it_core_news_lg",
+  "version": "2.3.0",
+  "sha256": "192rdmqnwl3ajxzhnw4r1cqv5bkziv0yc2bbzckmzqss64wk7k70",
+  "license": "cc-by-nc-sa-30"
+},
+{
+  "pname": "it_core_news_md",
+  "version": "2.3.0",
+  "sha256": "019ih4vwq1w6j38j0wc8pyyg1an6yy37wxq2w4amwppynmmcnd5w",
+  "license": "cc-by-nc-sa-30"
 },
 {
   "pname": "it_core_news_sm",
   "version": "2.3.0",
   "sha256": "1c3ywqa8li0j7cyvd1xqbb096y61978hd6qv7rc6cxxjdhmkrrds",
+  "license": "cc-by-nc-sa-40"
+},
+{
+  "pname": "lt_core_news_lg",
+  "version": "2.3.0",
+  "sha256": "0hn5w8n7mgv33i6gvnaxl1j44n9gz4j86gg1a9jjlgdw5z98n0p2",
+  "license": "cc-by-sa-40"
+},
+{
+  "pname": "lt_core_news_md",
+  "version": "2.3.0",
+  "sha256": "1xya79cz2xd5vgzg6qg0ww5j2bmv7kppdk3mdjf6zpwrlzwdbk5d",
   "license": "cc-by-sa-40"
 },
 {
@@ -83,26 +149,116 @@
   "license": "cc-by-sa-40"
 },
 {
+  "pname": "nb_core_news_lg",
+  "version": "2.3.0",
+  "sha256": "18mblypw3c82004qz5w1p3262iqwn99wl9b781dq7aqwxskr02d6",
+  "license": "mit"
+},
+{
+  "pname": "nb_core_news_md",
+  "version": "2.3.0",
+  "sha256": "0iw97k9glxbar8mrpvnmmcb1nffgdhb83akn99p53pwmqbzxy9p3",
+  "license": "mit"
+},
+{
   "pname": "nb_core_news_sm",
   "version": "2.3.0",
   "sha256": "07b7xri2q3m7fvn9a2gjc1044a3f14231vr32hrw96h7k6vg95h7",
   "license": "mit"
 },
 {
+  "pname": "nl_core_news_lg",
+  "version": "2.3.0",
+  "sha256": "016166kzpgi0p3m0x3k308a0r60a28yz7npagjvmpl1dfm9lzhnv",
+  "license": "cc-by-sa-40"
+},
+{
+  "pname": "nl_core_news_md",
+  "version": "2.3.0",
+  "sha256": "1anfhig531k9k14s0cbgsvmvifp3h50qi1h8dhx894kjmq10k2lg",
+  "license": "cc-by-sa-40"
+},
+{
   "pname": "nl_core_news_sm",
   "version": "2.3.0",
   "sha256": "0alvz7pn7cj0yax8h5gp71vrdblh3mcsmyhzgiddsd44ry35nxnj",
   "license": "cc-by-sa-40"
 },
 {
+  "pname": "pl_core_news_lg",
+  "version": "2.3.0",
+  "sha256": "1acchp8pv1h4c6cwvxz07lh4ychn6aw809zfg3mbbsxgsgd2ahjr",
+  "license": "gpl3"
+},
+{
+  "pname": "pl_core_news_md",
+  "version": "2.3.0",
+  "sha256": "19jjjjvbys3ayibkm3cx497b4bh63ll39hfq04wx116rj4ajpwwg",
+  "license": "gpl3"
+},
+{
+  "pname": "pl_core_news_sm",
+  "version": "2.3.0",
+  "sha256": "166mqlfkgiszcc6hwg2mr6sir9y88y22nd81a1nidq0fiif5lfji",
+  "license": "gpl3"
+},
+{
+  "pname": "pt_core_news_lg",
+  "version": "2.3.0",
+  "sha256": "18gvdmfwyy2sbq6206imglhghyagd6a4gb6wcfkwhm7lzbnq714d",
+  "license": "cc-by-sa-40"
+},
+{
+  "pname": "pt_core_news_md",
+  "version": "2.3.0",
+  "sha256": "1yxnpwby2aq6ydvd35lylc4fs141fisfnzlx8pl88pp2b2gxijvl",
+  "license": "cc-by-sa-40"
+},
+{
   "pname": "pt_core_news_sm",
   "version": "2.3.0",
   "sha256": "1vcvzdg9f93x0vaafkk9l9xhpmaavfj0cf0l3p06c5kx2d76f9ph",
   "license": "cc-by-sa-40"
 },
 {
+  "pname": "ro_core_news_sm",
+  "version": "2.3.0",
+  "sha256": "0lsmbdwsaczv37y5sa1vvgwszy2hs8jp24a0nvc5qm8vb71rxj8w",
+  "license": "cc-by-sa-40"
+},
+{
+  "pname": "ro_core_news_md",
+  "version": "2.3.0",
+  "sha256": "1igwkz3yd1117gi2g78yilh9ln8n5yrdimas4prfxjgzwid3q8bc",
+  "license": "cc-by-sa-40"
+},
+{
+  "pname": "ro_core_news_lg",
+  "version": "2.3.0",
+  "sha256": "0id1y32kjfans7llh1i55rgr4n2x6xn208y4qf6yl3pbc17i0n9z",
+  "license": "cc-by-sa-40"
+},
+{
   "pname": "xx_ent_wiki_sm",
   "version": "2.3.0",
   "sha256": "0x3zmmybl5kh4dn5prkfmr4q5j9bh13p40qc3rhdfi0i3jxc11pn",
-  "license": "cc-by-sa-40"
+  "license": "mit"
+},
+{
+  "pname": "zh_core_web_lg",
+  "version": "2.3.1",
+  "sha256": "17zxk7cz47k07yb3qaigc3sx4dj4zwilr1lsn2jq6w7jc8k2h1ll",
+  "license": "mit"
+},
+{
+  "pname": "zh_core_web_md",
+  "version": "2.3.1",
+  "sha256": "1n4iwxyam4ykn0f9jdzwkhczack8r9c3kkbyga3c4h6iwqsflzcj",
+  "license": "mit"
+},
+{
+  "pname": "zh_core_web_sm",
+  "version": "2.3.1",
+  "sha256": "1lj5nwhx38cpwnvajwxlfkf84dr1xx2h6wwbg3scycsh459i9fpc",
+  "license": "mit"
 }]
diff --git a/nixpkgs/pkgs/development/python-modules/spacy/models.nix b/nixpkgs/pkgs/development/python-modules/spacy/models.nix
index b091c3b7d56c..c4a314ea4742 100644
--- a/nixpkgs/pkgs/development/python-modules/spacy/models.nix
+++ b/nixpkgs/pkgs/development/python-modules/spacy/models.nix
@@ -1,6 +1,9 @@
-{ stdenv, buildPythonPackage, fetchurl, spacy }:
+{ stdenv, lib, buildPythonPackage, fetchurl, jieba, pkuseg, spacy }:
 let
-  buildModelPackage = { pname, version, sha256, license }: buildPythonPackage {
+  buildModelPackage = { pname, version, sha256, license }:
+  let
+    lang = builtins.substring 0 2 pname;
+  in buildPythonPackage {
     inherit pname version;
 
     src = fetchurl {
@@ -8,7 +11,10 @@ let
       inherit sha256;
     };
 
-    propagatedBuildInputs = [ spacy ];
+    propagatedBuildInputs = [ spacy ]
+      ++ lib.optionals (lang == "zh") [ jieba pkuseg ];
+
+    pythonImportsCheck = [ pname ];
 
     meta = with stdenv.lib; {
       description = "Models for the spaCy NLP library";