about summary refs log tree commit diff
path: root/nixpkgs/pkgs/development/python-modules/spacy/annotation-test/annotate.py
diff options
context:
space:
mode:
Diffstat (limited to 'nixpkgs/pkgs/development/python-modules/spacy/annotation-test/annotate.py')
-rw-r--r--nixpkgs/pkgs/development/python-modules/spacy/annotation-test/annotate.py69
1 files changed, 69 insertions, 0 deletions
diff --git a/nixpkgs/pkgs/development/python-modules/spacy/annotation-test/annotate.py b/nixpkgs/pkgs/development/python-modules/spacy/annotation-test/annotate.py
new file mode 100644
index 000000000000..822eb8ac0743
--- /dev/null
+++ b/nixpkgs/pkgs/development/python-modules/spacy/annotation-test/annotate.py
@@ -0,0 +1,69 @@
+import pytest
+import spacy
+
+en_text = (
+    "When Sebastian Thrun started working on self-driving cars at "
+    "Google in 2007, few people outside of the company took him "
+    "seriously. “I can tell you very senior CEOs of major American "
+    "car companies would shake my hand and turn away because I wasn’t "
+    "worth talking to,” said Thrun, in an interview with Recode earlier "
+    "this week.")
+
+
+@pytest.fixture
+def en_core_web_sm():
+    return spacy.load("en_core_web_sm")
+
+
+@pytest.fixture
+def doc_en_core_web_sm(en_core_web_sm):
+    return en_core_web_sm(en_text)
+
+
+def test_entities(doc_en_core_web_sm):
+    entities = list(map(lambda e: (e.text, e.label_),
+                        doc_en_core_web_sm.ents))
+
+    assert entities == [
+        ('Sebastian Thrun', 'PERSON'),
+        ('Google', 'ORG'), ('2007', 'DATE'),
+        ('American', 'NORP'),
+        ('Thrun', 'ORG'),
+        ('earlier this week', 'DATE')
+    ]
+
+
+def test_nouns(doc_en_core_web_sm):
+    assert [
+        chunk.text for chunk in doc_en_core_web_sm.noun_chunks] == [
+        'Sebastian Thrun',
+        'self-driving cars',
+        'Google',
+        'few people',
+        'the company',
+        'him',
+        'I',
+        'you',
+        'very senior CEOs',
+        'major American car companies',
+        'my hand',
+        'I',
+        'Thrun',
+        'an interview',
+        'Recode']
+
+
+def test_verbs(doc_en_core_web_sm):
+    assert [
+        token.lemma_ for token in doc_en_core_web_sm if token.pos_ == "VERB"] == [
+        'start',
+        'work',
+        'drive',
+        'take',
+        'can',
+        'tell',
+        'would',
+        'shake',
+        'turn',
+        'talk',
+        'say']