summary refs log tree commit diff
path: root/pkgs/development/python-modules
diff options
context:
space:
mode:
authoraszlig <aszlig@redmoonstudios.org>2017-09-02 03:43:21 +0200
committeraszlig <aszlig@redmoonstudios.org>2017-09-02 03:45:13 +0200
commit3086fc7f832eec68db3e540202481c114f23133f (patch)
treed29b0813903ee6a0f02ec20f4c5f0851cc914c71 /pkgs/development/python-modules
parent40b76c880943af92f38df406c666e4eea8f758b2 (diff)
downloadnixlib-3086fc7f832eec68db3e540202481c114f23133f.tar
nixlib-3086fc7f832eec68db3e540202481c114f23133f.tar.gz
nixlib-3086fc7f832eec68db3e540202481c114f23133f.tar.bz2
nixlib-3086fc7f832eec68db3e540202481c114f23133f.tar.lz
nixlib-3086fc7f832eec68db3e540202481c114f23133f.tar.xz
nixlib-3086fc7f832eec68db3e540202481c114f23133f.tar.zst
nixlib-3086fc7f832eec68db3e540202481c114f23133f.zip
python/pyocr: Move package into python-modules
We already have a patch feeling lonely inside the python-modules
directory and to have everything at one place let's actually move pyocr
into its own dedicated directory so it's easier to patch it up (which
we're going to).

Right now, the package fails to build because of a few test failures, so
I haven't tested this apart from evaluating.

Signed-off-by: aszlig <aszlig@redmoonstudios.org>
Diffstat (limited to 'pkgs/development/python-modules')
-rw-r--r--pkgs/development/python-modules/pyocr/default.nix64
-rw-r--r--pkgs/development/python-modules/pyocr/tesseract.patch (renamed from pkgs/development/python-modules/pyocr-tesseract.patch)0
2 files changed, 64 insertions, 0 deletions
diff --git a/pkgs/development/python-modules/pyocr/default.nix b/pkgs/development/python-modules/pyocr/default.nix
new file mode 100644
index 000000000000..65a8c741d6ef
--- /dev/null
+++ b/pkgs/development/python-modules/pyocr/default.nix
@@ -0,0 +1,64 @@
+{ lib, fetchFromGitHub, buildPythonPackage, pillow, six
+, tesseract, cuneiform
+}:
+
+buildPythonPackage rec {
+  name = "pyocr-${version}";
+  version = "0.4.6";
+
+  # Don't fetch from PYPI because it doesn't contain tests.
+  src = fetchFromGitHub {
+    owner = "jflesch";
+    repo = "pyocr";
+    rev = version;
+    sha256 = "0amyhkkm400qzbw65ivyzrzxl2r7vxqgsgqm7ml95m7gwkwhnzz0";
+  };
+
+  patches = [ ./tesseract.patch ];
+
+  postPatch = ''
+    sed -i \
+      -e 's,^\(TESSERACT_CMD *= *\).*,\1"${tesseract}/bin/tesseract",' \
+      -e 's,^\(CUNEIFORM_CMD *= *\).*,\1"${cuneiform}/bin/cuneiform",' \
+      -e '/^CUNIFORM_POSSIBLE_PATHS *= *\[/,/^\]$/ {
+        c CUNIFORM_POSSIBLE_PATHS = ["${cuneiform}/share/cuneiform"]
+      }' src/pyocr/{tesseract,cuneiform}.py
+
+    sed -i -r \
+      -e 's,"libtesseract\.so\.3","${tesseract}/lib/libtesseract.so",' \
+      -e 's,^(TESSDATA_PREFIX *=).*,\1 "${tesseract}/share/tessdata",' \
+      src/pyocr/libtesseract/tesseract_raw.py
+
+    # Disable specific tests that are probably failing because of this issue:
+    # https://github.com/jflesch/pyocr/issues/52
+    for test in $disabledTests; do
+      file="''${test%%:*}"
+      fun="''${test#*:}"
+      echo "$fun = unittest.skip($fun)" >> "tests/tests_$file.py"
+    done
+  '';
+
+  disabledTests = [
+    "cuneiform:TestTxt.test_basic"
+    "cuneiform:TestTxt.test_european"
+    "cuneiform:TestTxt.test_french"
+    "cuneiform:TestWordBox.test_basic"
+    "cuneiform:TestWordBox.test_european"
+    "cuneiform:TestWordBox.test_french"
+    "libtesseract:TestBasicDoc.test_basic"
+    "libtesseract:TestDigitLineBox.test_digits"
+    "libtesseract:TestLineBox.test_japanese"
+    "libtesseract:TestTxt.test_japanese"
+    "libtesseract:TestWordBox.test_japanese"
+    "tesseract:TestDigitLineBox.test_digits"
+    "tesseract:TestTxt.test_japanese"
+  ];
+
+  propagatedBuildInputs = [ pillow six ];
+
+  meta = {
+    homepage = "https://github.com/jflesch/pyocr";
+    description = "A Python wrapper for Tesseract and Cuneiform";
+    license = lib.licenses.gpl3Plus;
+  };
+}
diff --git a/pkgs/development/python-modules/pyocr-tesseract.patch b/pkgs/development/python-modules/pyocr/tesseract.patch
index d09a7c57352e..d09a7c57352e 100644
--- a/pkgs/development/python-modules/pyocr-tesseract.patch
+++ b/pkgs/development/python-modules/pyocr/tesseract.patch