about summary refs log tree commit diff
path: root/pkgs/applications/graphics/tesseract/default.nix
blob: 7940079d099480b48f7c147b8c578a9aae146398 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
{ stdenv, fetchFromGitHub, autoreconfHook, pkgconfig
, leptonica, libpng, libtiff, icu, pango, opencl-headers
# Supported list of languages or `null' for all available languages
, enableLanguages ? null
# if you want just a specific list of languages, optionally specify a hash
# to make tessdata a fixed output derivation.
, enableLanguagesHash ? (if enableLanguages == null # all languages
                         then "1h48xfzabhn0ldbx5ib67cp9607pr0zpblsy8z6fs4knn0zznfnw"
                         else null)
}:

let tessdata = stdenv.mkDerivation ({
  name = "tessdata";
  src = fetchFromGitHub {
    owner = "tesseract-ocr";
    repo = "tessdata";
    rev = "3cf1e2df1fe1d1da29295c9ef0983796c7958b7d";
    # when updating don't forget to update the default value fo enableLanguagesHash
    sha256 = "1v4b63v5nzcxr2y3635r19l7lj5smjmc9vfk0wmxlryxncb4vpg7";
  };
  buildCommand = ''
    cd $src;
    for lang in ${if enableLanguages==null then "*.traineddata" else stdenv.lib.concatMapStringsSep " " (x: x+".traineddata") enableLanguages} ; do
      install -Dt $out/share/tessdata $src/$lang ;
    done;
  '';
  preferLocalBuild = true;
  } // (stdenv.lib.optionalAttrs (enableLanguagesHash != null) {
  # when a hash is given, we make this a fixed output derivation.
  outputHashMode = "recursive";
  outputHashAlgo = "sha256";
  outputHash = enableLanguagesHash;
  }));
in

stdenv.mkDerivation rec {
  name = "tesseract-${version}";
  version = "3.05.00";

  src = fetchFromGitHub {
    owner = "tesseract-ocr";
    repo = "tesseract";
    rev = version;
    sha256 = "11wrpcfl118wxsv2c3w2scznwb48c4547qml42s2bpdz079g8y30";
  };

  enableParallelBuilding = true;

  nativeBuildInputs = [ pkgconfig autoreconfHook ];
  buildInputs = [ leptonica libpng libtiff icu pango opencl-headers ];

  LIBLEPT_HEADERSDIR = "${leptonica}/include";

  postInstall = ''
    for i in ${tessdata}/share/tessdata/*; do
      ln -s $i $out/share/tessdata;
    done
  '';

  meta = {
    description = "OCR engine";
    homepage = https://github.com/tesseract-ocr/tesseract;
    license = stdenv.lib.licenses.asl20;
    maintainers = with stdenv.lib.maintainers; [viric];
    platforms = with stdenv.lib.platforms; linux ++ darwin;
  };
}