summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--pkgs/development/python-modules/jieba/default.nix33
-rw-r--r--pkgs/development/python-modules/langcodes/default.nix34
-rw-r--r--pkgs/development/python-modules/marisa-trie/default.nix34
-rw-r--r--pkgs/development/python-modules/mecab-python3/default.nix24
-rw-r--r--pkgs/development/python-modules/wordfreq/default.nix48
-rw-r--r--pkgs/top-level/python-packages.nix10
6 files changed, 183 insertions, 0 deletions
diff --git a/pkgs/development/python-modules/jieba/default.nix b/pkgs/development/python-modules/jieba/default.nix
new file mode 100644
index 000000000000..df21c0f41861
--- /dev/null
+++ b/pkgs/development/python-modules/jieba/default.nix
@@ -0,0 +1,33 @@
+{ lib, buildPythonPackage, fetchFromGitHub, glibcLocales, python, isPy3k }:
+
+buildPythonPackage rec {
+  pname = "jieba";
+  version = "0.39";
+
+  # no tests in PyPI tarball
+  src = fetchFromGitHub {
+    owner = "fxsjy";
+    repo = pname;
+    rev = "v${version}";
+    sha256 = "0hbq0l1jbgcvm58qg4p37im4jl5a9igvq3wlhlk22pmbkbvqqgzs";
+  };
+
+  checkInputs = [ glibcLocales ];
+
+  # UnicodeEncodeError
+  doCheck = isPy3k;
+
+  # Citing https://github.com/fxsjy/jieba/issues/384: "testcases is in a mess"
+  # So just picking random ones that currently work
+  checkPhase = ''
+    export LC_ALL=en_US.UTF-8
+    ${python.interpreter} test/test.py
+    ${python.interpreter} test/test_tokenize.py
+  '';
+
+  meta = with lib; {
+    description = "Chinese Words Segementation Utilities";
+    homepage = https://github.com/fxsjy/jieba;
+    license = licenses.mit;
+  };
+}
diff --git a/pkgs/development/python-modules/langcodes/default.nix b/pkgs/development/python-modules/langcodes/default.nix
new file mode 100644
index 000000000000..d1d25f5407c2
--- /dev/null
+++ b/pkgs/development/python-modules/langcodes/default.nix
@@ -0,0 +1,34 @@
+{ lib
+, buildPythonPackage
+, marisa-trie
+, pythonOlder
+, fetchPypi
+, nose
+}:
+
+buildPythonPackage rec {
+  pname = "langcodes";
+  version = "1.4.1";
+
+  src = fetchPypi {
+    inherit pname version;
+    sha256 = "1axdiva2qglsjmnx2ak7i6hm0yhp6kbc4lcsgn8ckwy0nq1z3kr2";
+  };
+
+  propagatedBuildInputs = [ marisa-trie ];
+
+  disabled = pythonOlder "3.3";
+
+  checkInputs = [ nose ];
+
+  checkPhase = ''
+    nosetests
+  '';
+
+  meta = with lib; {
+    description = "A toolkit for working with and comparing the standardized codes for languages, such as ‘en’ for English or ‘es’ for Spanish";
+    homepage =  http://github.com/LuminosoInsight/langcodes;
+    license = licenses.mit;
+    maintainers = with maintainers; [ ixxie ];
+  };
+}
diff --git a/pkgs/development/python-modules/marisa-trie/default.nix b/pkgs/development/python-modules/marisa-trie/default.nix
new file mode 100644
index 000000000000..ad9f6d6f2457
--- /dev/null
+++ b/pkgs/development/python-modules/marisa-trie/default.nix
@@ -0,0 +1,34 @@
+{ lib
+, buildPythonPackage
+, fetchPypi
+, pytestrunner
+, pytest
+, hypothesis
+}:
+
+buildPythonPackage rec {
+  pname = "marisa-trie";
+  version = "0.7.4";
+
+  src = fetchPypi {
+    inherit pname version;
+    sha256 = "1n4pxnaranbh3x2fcqxwh8j1z2918vy7i4q1z4jn75m9rkm5h8ia";
+  };
+
+  postPatch = ''
+    substituteInPlace setup.py \
+      --replace "hypothesis==" "hypothesis>="
+  '';
+
+  nativeBuildInputs = [ pytestrunner ];
+
+  checkInputs = [ pytest hypothesis ];
+
+  meta = with lib; {
+    description = "Static memory-efficient Trie-like structures for Python (2.x and 3.x) based on marisa-trie C++ library";
+    longDescription = "There are official SWIG-based Python bindings included in C++ library distribution; this package provides alternative Cython-based pip-installable Python bindings.";
+    homepage =  https://github.com/kmike/marisa-trie;
+    license = licenses.mit;
+    maintainers = with maintainers; [ ixxie ];
+  };
+}
diff --git a/pkgs/development/python-modules/mecab-python3/default.nix b/pkgs/development/python-modules/mecab-python3/default.nix
new file mode 100644
index 000000000000..110d1a57c826
--- /dev/null
+++ b/pkgs/development/python-modules/mecab-python3/default.nix
@@ -0,0 +1,24 @@
+{ lib
+, buildPythonPackage
+, mecab
+, fetchPypi
+}:
+
+buildPythonPackage rec {
+  pname = "mecab-python3";
+  version = "0.7";
+
+  src = fetchPypi {
+    inherit pname version;
+    sha256 = "007dg4f5fby2yl7cc44x6xwvcrf2w2ifmn0rmk56ss33mhs8l6qy";
+  };
+
+  propagatedBuildInputs = [ mecab ];
+
+  meta = with lib; {
+    description = "A python wrapper for mecab: Morphological Analysis engine";
+    homepage =  https://github.com/LuminosoInsight/wordfreq/;
+    license = licenses.bsd0;
+    maintainers = with maintainers; [ ixxie ];
+  };
+}
diff --git a/pkgs/development/python-modules/wordfreq/default.nix b/pkgs/development/python-modules/wordfreq/default.nix
new file mode 100644
index 000000000000..9de1fd5b3922
--- /dev/null
+++ b/pkgs/development/python-modules/wordfreq/default.nix
@@ -0,0 +1,48 @@
+{ lib
+, buildPythonPackage
+, regex
+, langcodes
+, ftfy
+, msgpack
+, mecab-python3
+, jieba
+, nose
+, pythonOlder
+, fetchFromGitHub
+}:
+
+buildPythonPackage rec {
+  pname = "wordfreq";
+  version = "2.0";
+
+   src = fetchFromGitHub {
+    owner = "LuminosoInsight";
+    repo = "wordfreq";
+    rev = "e3a1b470d9f8e0d82e9f179ffc41abba434b823b";
+    sha256 = "1wjkhhj7nxfnrghwvmvwc672s30lp4b7yr98gxdxgqcq6wdshxwv";
+   };
+
+  checkInputs = [ nose ];
+
+  checkPhase = ''
+    # These languages require additional dictionaries
+    nosetests -e test_japanese -e test_korean -e test_languages
+  '';
+   
+  propagatedBuildInputs = [ regex langcodes ftfy msgpack mecab-python3 jieba ];
+  
+  # patch to relax version requirements for regex
+  # dependency to prevent break in upgrade
+  postPatch = ''
+    substituteInPlace setup.py --replace "regex ==" "regex >="
+  '';
+    
+  disabled = pythonOlder "3";
+
+  meta = with lib; {
+    description = "A library for looking up the frequencies of words in many languages, based on many sources of data";
+    homepage =  https://github.com/LuminosoInsight/wordfreq/;
+    license = licenses.mit;
+    maintainers = with maintainers; [ ixxie ];
+  };
+}
diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix
index 0753599255d9..7d45b392792e 100644
--- a/pkgs/top-level/python-packages.nix
+++ b/pkgs/top-level/python-packages.nix
@@ -4988,6 +4988,8 @@ in {
 
   jdcal = callPackage ../development/python-modules/jdcal { };
 
+  jieba = callPackage ../development/python-modules/jieba { };
+
   internetarchive = callPackage ../development/python-modules/internetarchive {};
 
   JPype1 = callPackage ../development/python-modules/JPype1 {};
@@ -9177,6 +9179,8 @@ in {
     marisa = pkgs.marisa;
   };
 
+  marisa-trie = callPackage ../development/python-modules/marisa-trie { };
+
   markupsafe = buildPythonPackage rec {
     name = "markupsafe-${version}";
     version = "1.0";
@@ -10733,6 +10737,8 @@ in {
     };
   };
 
+  langcodes = callPackage ../development/python-modules/langcodes { };
+
   livestreamer = buildPythonPackage rec {
     version = "1.12.2";
     name = "livestreamer-${version}";
@@ -11320,6 +11326,8 @@ in {
     propagatedBuildInputs = with self; [ six requests ];
   };
 
+  mecab-python3 = callPackage ../development/python-modules/mecab-python3 { };
+
   mox3 = buildPythonPackage rec {
     name = "mox3-${version}";
     version = "0.23.0";
@@ -17098,6 +17106,8 @@ EOF
 
   widgetsnbextension = callPackage ../development/python-modules/widgetsnbextension { };
 
+  wordfreq = callPackage ../development/python-modules/wordfreq { };
+
   magic-wormhole = callPackage ../development/python-modules/magic-wormhole { };
 
   magic-wormhole-transit-relay = callPackage ../development/python-modules/magic-wormhole-transit-relay { };