about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--nixos/lib/testing.nix2
-rw-r--r--pkgs/applications/graphics/tesseract/default.nix58
2 files changed, 19 insertions, 41 deletions
diff --git a/nixos/lib/testing.nix b/nixos/lib/testing.nix
index 7fad5cbc3cd9..2efe7a5b879c 100644
--- a/nixos/lib/testing.nix
+++ b/nixos/lib/testing.nix
@@ -93,7 +93,7 @@ rec {
 
       vms = map (m: m.config.system.build.vm) (lib.attrValues nodes);
 
-      ocrProg = tesseract.override { enableLanguages = [ "eng" ]; };
+      ocrProg = tesseract;
 
       # Generate onvenience wrappers for running the test driver
       # interactively with the specified network, and for starting the
diff --git a/pkgs/applications/graphics/tesseract/default.nix b/pkgs/applications/graphics/tesseract/default.nix
index 375b09995488..1f1da9a389f2 100644
--- a/pkgs/applications/graphics/tesseract/default.nix
+++ b/pkgs/applications/graphics/tesseract/default.nix
@@ -1,53 +1,31 @@
-{ stdenv, fetchurl, autoconf, automake, libtool, leptonica, libpng, libtiff
-, enableLanguages ? null
+{ stdenv, fetchFromGitHub, pkgconfig, leptonica, libpng, libtiff
+, icu, pango, opencl-headers
 }:
 
-with stdenv.lib;
-
-let
-  majVersion = "3.02";
-  version = "${majVersion}.02";
-
-  mkLang = lang: sha256: let
-    src = fetchurl {
-      url = "http://tesseract-ocr.googlecode.com/files/tesseract-ocr-${majVersion}.${lang}.tar.gz";
-      inherit sha256;
-    };
-  in "tar xfvz ${src} -C $out/share/ --strip=1";
-
-  wantLang = name: const (enableLanguages == null || elem name enableLanguages);
-
-  extraLanguages = mapAttrsToList mkLang (filterAttrs wantLang {
-    cat = "0d1smiv1b3k9ay2s05sl7q08mb3ln4w5iiiymv2cs8g8333z8jl9";
-    rus = "059336mkhsj9m3hwfb818xjlxkcdpy7wfgr62qwz65cx914xl709";
-    spa = "1c9iza5mbahd9pa7znnq8yv09v5kz3gbd2sarcgcgc1ps1jc437l";
-    nld = "162acxp1yb6gyki2is3ay2msalmfcsnrlsd9wml2ja05k94m6bjy";
-    eng = "1y5xf794n832s3lymzlsdm2s9nlrd2v27jjjp0fd9xp7c2ah4461";
-    slv = "0rqng43435cly32idxm1lvxkcippvc3xpxbfizwq5j0155ym00dr";
-    jpn = "07v8pymd0iwyzh946lxylybda20gsw7p4fsb09jw147955x49gq9";
-  });
-in
-
 stdenv.mkDerivation rec {
   name = "tesseract-${version}";
+  version = "3.04.01";
 
-  src = fetchurl {
-    url = "http://tesseract-ocr.googlecode.com/files/tesseract-ocr-${version}.tar.gz";
-    sha256 = "0g81m9y4iydp7kgr56mlkvjdwpp3mb01q385yhdnyvra7z5kkk96";
+  src = fetchFromGitHub {
+    owner = "tesseract-ocr";
+    repo = "tesseract";
+    rev = version;
+    sha256 = "0h1x4z1h86n2gwknd0wck6gykkp99bmm02lg4a47a698g4az6ybv";
   };
 
-  buildInputs = [ autoconf automake libtool leptonica libpng libtiff ];
+  tessdata = fetchFromGitHub {
+    owner = "tesseract-ocr";
+    repo = "tessdata";
+    rev = "3cf1e2df1fe1d1da29295c9ef0983796c7958b7d";
+    sha256 = "1v4b63v5nzcxr2y3635r19l7lj5smjmc9vfk0wmxlryxncb4vpg7";
+  };
 
-  hardeningDisable = [ "format" ];
+  nativeBuildInputs = [ pkgconfig ];
+  buildInputs = [ leptonica libpng libtiff icu pango opencl-headers ];
 
-  preConfigure = ''
-      ./autogen.sh
-      substituteInPlace "configure" \
-        --replace 'LIBLEPT_HEADERSDIR="/usr/local/include /usr/include"' \
-                  'LIBLEPT_HEADERSDIR=${leptonica}/include'
-  '';
+  LIBLEPT_HEADERSDIR = "${leptonica}/include";
 
-  postInstall = concatStringsSep "; " extraLanguages;
+  postInstall = "cp -Rt \"$out/share/tessdata\" \"$tessdata/\"*";
 
   meta = {
     description = "OCR engine";