about summary refs log tree commit diff
path: root/nixpkgs/pkgs/development/libraries/libextractor
diff options
context:
space:
mode:
Diffstat (limited to 'nixpkgs/pkgs/development/libraries/libextractor')
-rw-r--r--nixpkgs/pkgs/development/libraries/libextractor/default.nix113
-rw-r--r--nixpkgs/pkgs/development/libraries/libextractor/fix-gcc8-build.patch16
-rw-r--r--nixpkgs/pkgs/development/libraries/libextractor/gst-hardcode-plugins.patch11
3 files changed, 140 insertions, 0 deletions
diff --git a/nixpkgs/pkgs/development/libraries/libextractor/default.nix b/nixpkgs/pkgs/development/libraries/libextractor/default.nix
new file mode 100644
index 000000000000..f7e9606bbe77
--- /dev/null
+++ b/nixpkgs/pkgs/development/libraries/libextractor/default.nix
@@ -0,0 +1,113 @@
+{ fetchurl, stdenv, substituteAll
+, libtool, gettext, zlib, bzip2, flac, libvorbis
+, exiv2, libgsf, rpm, pkgconfig, fetchpatch
+, gstreamerSupport ? true, gst_all_1 ? null
+# ^ Needed e.g. for proper id3 and FLAC support.
+#   Set to `false` to decrease package closure size by about 87 MB (53%).
+, gstPlugins ? (gst: [ gst.gst-plugins-base gst.gst-plugins-good ])
+# If an application needs additional gstreamer plugins it can also make them
+# available by adding them to the environment variable
+# GST_PLUGIN_SYSTEM_PATH_1_0, e.g. like this:
+# postInstall = ''
+#   wrapProgram $out/bin/extract --prefix GST_PLUGIN_SYSTEM_PATH_1_0 : "$GST_PLUGIN_SYSTEM_PATH_1_0"
+# '';
+# See also <https://nixos.org/nixpkgs/manual/#sec-language-gnome>.
+, gtkSupport ? true, glib ? null, gtk3 ? null
+, videoSupport ? true, ffmpeg_3 ? null, libmpeg2 ? null}:
+
+assert gstreamerSupport -> gst_all_1 != null && builtins.isList (gstPlugins gst_all_1);
+assert gtkSupport -> glib != null && gtk3 != null;
+assert videoSupport -> ffmpeg_3 != null && libmpeg2 != null;
+
+stdenv.mkDerivation rec {
+  name = "libextractor-1.9";
+
+  src = fetchurl {
+    url = "mirror://gnu/libextractor/${name}.tar.gz";
+    sha256 = "1zz2zvikvfibxnk1va3kgzs7djsmiqy7bmk8y01vbsf54ryjb3zh";
+  };
+
+  patches = [
+    ./fix-gcc8-build.patch
+    # Fixes build with exiv2 0.27
+    (fetchpatch {
+      name = "libextractor-exiv2-0.27.patch";
+      url = "https://git.archlinux.org/svntogit/community.git/plain/trunk/libextractor-exiv2-0.27.patch?h=packages/libextractor&id=4dc53f7fc69210ae571285dface108ed65d8ee53";
+      sha256 = "0w4gc1q1m1yxsd4hv105nblmif465nw3g5nxzldy0x2rl9mdncg6";
+    })
+    (fetchpatch {
+      name = "CVE-2019-15531.patch";
+      url = "https://git.gnunet.org/libextractor.git/patch/?id=d2b032452241708bee68d02aa02092cfbfba951a";
+      sha256 = "01xhcjbzv6p53wz7y2ii76kb8m9iwvnm4ip9w4a0bpgaxqz4b9fw";
+      excludes = [ "ChangeLog" ];
+    })
+  ] ++ stdenv.lib.optionals gstreamerSupport [
+
+    # Libraries cannot be wrapped so we need to hardcode the plug-in paths.
+    (substituteAll {
+      src = ./gst-hardcode-plugins.patch;
+      load_gst_plugins = stdenv.lib.concatMapStrings
+        (plugin: ''gst_registry_scan_path(gst_registry_get(), "${plugin}/lib/gstreamer-1.0");'')
+        (gstPlugins gst_all_1);
+    })
+  ];
+
+  preConfigure =
+    '' echo "patching installation directory in \`extractor.c'..."
+       sed -i "src/main/extractor.c" \
+           -e "s|pexe[[:blank:]]*=.*$|pexe = strdup(\"$out/lib/\");|g"
+    '';
+
+  buildInputs =
+   [ libtool gettext zlib bzip2 flac libvorbis exiv2
+     libgsf rpm
+     pkgconfig
+   ] ++ stdenv.lib.optionals gstreamerSupport
+          ([ gst_all_1.gstreamer ] ++ gstPlugins gst_all_1)
+     ++ stdenv.lib.optionals gtkSupport [ glib gtk3 ]
+     ++ stdenv.lib.optionals videoSupport [ ffmpeg_3 libmpeg2 ];
+
+  configureFlags = [
+    "--disable-ltdl-install"
+    "--with-ltdl-include=${libtool}/include"
+    "--with-ltdl-lib=${libtool.lib}/lib"
+    "--enable-xpdf"
+  ];
+
+  # Checks need to be run after "make install", otherwise plug-ins are not in
+  # the search path, etc.
+  # FIXME: Tests currently fail and the test framework appears to be deeply
+  # broken anyway.
+  doCheck = false;
+  #postInstall = "make check";
+
+  meta = {
+    description = "Simple library for keyword extraction";
+
+    longDescription =
+      '' GNU libextractor is a library used to extract meta-data from files
+         of arbitrary type.  It is designed to use helper-libraries to perform
+         the actual extraction, and to be trivially extendable by linking
+         against external extractors for additional file types.
+
+         The goal is to provide developers of file-sharing networks or
+         WWW-indexing bots with a universal library to obtain simple keywords
+         to match against queries.  libextractor contains a shell-command
+         extract that, similar to the well-known file command, can extract
+         meta-data from a file an print the results to stdout.
+
+         Currently, libextractor supports the following formats: HTML, PDF,
+         PS, OLE2 (DOC, XLS, PPT), OpenOffice (sxw), StarOffice (sdw), DVI,
+         MAN, FLAC, MP3 (ID3v1 and ID3v2), NSF(E) (NES music), SID (C64
+         music), OGG, WAV, EXIV2, JPEG, GIF, PNG, TIFF, DEB, RPM, TAR(.GZ),
+         ZIP, ELF, S3M (Scream Tracker 3), XM (eXtended Module), IT (Impulse
+         Tracker), FLV, REAL, RIFF (AVI), MPEG, QT and ASF.  Also, various
+         additional MIME types are detected.
+      '';
+
+    license = stdenv.lib.licenses.gpl2Plus;
+
+    maintainers = [ ];
+    platforms = stdenv.lib.platforms.linux;
+  };
+}
diff --git a/nixpkgs/pkgs/development/libraries/libextractor/fix-gcc8-build.patch b/nixpkgs/pkgs/development/libraries/libextractor/fix-gcc8-build.patch
new file mode 100644
index 000000000000..e04d09be8998
--- /dev/null
+++ b/nixpkgs/pkgs/development/libraries/libextractor/fix-gcc8-build.patch
@@ -0,0 +1,16 @@
+diff --git a/src/plugins/ole2_extractor.c b/src/plugins/ole2_extractor.c
+index 072ffc5..a105840 100644
+--- a/src/plugins/ole2_extractor.c
++++ b/src/plugins/ole2_extractor.c
+@@ -345,9 +345,8 @@ process_star_office (GsfInput *src,
+     gsf_input_read (src, size, (unsigned char*) buf);
+     if ( (buf[0] != 0x0F) ||
+ 	 (buf[1] != 0x0) ||
+-	 (0 != strncmp (&buf[2],
+-			"SfxDocumentInfo",
+-			strlen ("SfxDocumentInfo"))) ||
++	 (0 != strcmp (&buf[2],
++			"SfxDocumentInfo")) ||
+ 	 (buf[0x11] != 0x0B) ||
+ 	 (buf[0x13] != 0x00) || /* pw protected! */
+ 	 (buf[0x12] != 0x00) )
diff --git a/nixpkgs/pkgs/development/libraries/libextractor/gst-hardcode-plugins.patch b/nixpkgs/pkgs/development/libraries/libextractor/gst-hardcode-plugins.patch
new file mode 100644
index 000000000000..73ecf5f252f6
--- /dev/null
+++ b/nixpkgs/pkgs/development/libraries/libextractor/gst-hardcode-plugins.patch
@@ -0,0 +1,11 @@
+--- a/src/plugins/gstreamer_extractor.c
++++ b/src/plugins/gstreamer_extractor.c
+@@ -2215,6 +2215,7 @@ void __attribute__ ((constructor))
+ gstreamer_init ()
+ {
+   gst_init (NULL, NULL);
++  @load_gst_plugins@
+   g_log_set_default_handler (&log_handler, NULL);
+   g_log_set_handler (NULL, G_LOG_LEVEL_MASK | G_LOG_FLAG_FATAL | G_LOG_FLAG_RECURSION,
+                     &log_handler, NULL);
+