about summary refs log tree commit diff
path: root/nixpkgs/pkgs/development/libraries/arrow-cpp/default.nix
diff options
context:
space:
mode:
Diffstat (limited to 'nixpkgs/pkgs/development/libraries/arrow-cpp/default.nix')
-rw-r--r--nixpkgs/pkgs/development/libraries/arrow-cpp/default.nix149
1 files changed, 149 insertions, 0 deletions
diff --git a/nixpkgs/pkgs/development/libraries/arrow-cpp/default.nix b/nixpkgs/pkgs/development/libraries/arrow-cpp/default.nix
new file mode 100644
index 000000000000..8a9074ccb904
--- /dev/null
+++ b/nixpkgs/pkgs/development/libraries/arrow-cpp/default.nix
@@ -0,0 +1,149 @@
+{ stdenv, lib, fetchurl, fetchFromGitHub, fetchpatch, fixDarwinDylibNames
+, autoconf, boost, brotli, cmake, flatbuffers, gflags, glog, gtest, lz4
+, perl, python3, rapidjson, re2, snappy, thrift, utf8proc, which, zlib, zstd
+, enableShared ? !stdenv.hostPlatform.isStatic
+}:
+
+let
+  arrow-testing = fetchFromGitHub {
+    owner = "apache";
+    repo = "arrow-testing";
+    rev = "d6c4deb22c4b4e9e3247a2f291046e3c671ad235";
+    sha256 = "0cwhnqijam632zp07j98i8ym967wz6kd35fim1msv88x2rhqky1i";
+  };
+
+  parquet-testing = fetchFromGitHub {
+    owner = "apache";
+    repo = "parquet-testing";
+    rev = "e31fe1a02c9e9f271e4bfb8002d403c52f1ef8eb";
+    sha256 = "02f51dvx8w5mw0bx3hn70hkn55mn1m65kzdps1ifvga9hghpy0sh";
+  };
+
+in stdenv.mkDerivation rec {
+  pname = "arrow-cpp";
+  version = "3.0.0";
+
+  src = fetchurl {
+    url =
+      "mirror://apache/arrow/arrow-${version}/apache-arrow-${version}.tar.gz";
+    sha256 = "0yp2b02wrc3s50zd56fmpz4nhhbihp0zw329v4zizaipwlxwrhkk";
+  };
+  sourceRoot = "apache-arrow-${version}/cpp";
+
+  ARROW_JEMALLOC_URL = fetchurl {
+    # From
+    # ./cpp/cmake_modules/ThirdpartyToolchain.cmake
+    # ./cpp/thirdparty/versions.txt
+    url =
+      "https://github.com/jemalloc/jemalloc/releases/download/5.2.1/jemalloc-5.2.1.tar.bz2";
+    sha256 = "1xl7z0vwbn5iycg7amka9jd6hxd8nmfk7nahi4p9w2bnw9f0wcrl";
+  };
+
+  ARROW_MIMALLOC_URL = fetchurl {
+    # From
+    # ./cpp/cmake_modules/ThirdpartyToolchain.cmake
+    # ./cpp/thirdparty/versions.txt
+    url =
+      "https://github.com/microsoft/mimalloc/archive/v1.6.4.tar.gz";
+    sha256 = "1b8av0974q70alcmaw5cwzbn6n9blnpmj721ik1qwmbbwwd6nqgs";
+  };
+
+  patches = [
+    # patch to fix python-test
+    ./darwin.patch
+  ];
+
+  nativeBuildInputs = [
+    cmake
+    autoconf # for vendored jemalloc
+    flatbuffers
+  ] ++ lib.optional stdenv.isDarwin fixDarwinDylibNames;
+  buildInputs = [
+    boost
+    brotli
+    flatbuffers
+    gflags
+    glog
+    gtest
+    lz4
+    rapidjson
+    re2
+    snappy
+    thrift
+    utf8proc
+    zlib
+    zstd
+  ] ++ lib.optionals enableShared [
+    python3.pkgs.python
+    python3.pkgs.numpy
+  ];
+
+  preConfigure = ''
+    patchShebangs build-support/
+  '';
+
+  cmakeFlags = [
+    "-DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON"
+    "-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}"
+    "-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}"
+    "-DARROW_BUILD_TESTS=ON"
+    "-DARROW_VERBOSE_THIRDPARTY_BUILD=ON"
+    "-DARROW_DEPENDENCY_SOURCE=SYSTEM"
+    "-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}"
+    "-DARROW_PLASMA=ON"
+    # Disable Python for static mode because openblas is currently broken there.
+    "-DARROW_PYTHON=${if enableShared then "ON" else "OFF"}"
+    "-DARROW_USE_GLOG=ON"
+    "-DARROW_WITH_BROTLI=ON"
+    "-DARROW_WITH_LZ4=ON"
+    "-DARROW_WITH_SNAPPY=ON"
+    "-DARROW_WITH_UTF8PROC=ON"
+    "-DARROW_WITH_ZLIB=ON"
+    "-DARROW_WITH_ZSTD=ON"
+    "-DARROW_MIMALLOC=ON"
+    # Parquet options:
+    "-DARROW_PARQUET=ON"
+    "-DPARQUET_BUILD_EXECUTABLES=ON"
+  ] ++ lib.optionals (!enableShared) [
+    "-DARROW_TEST_LINKAGE=static"
+  ] ++ lib.optionals stdenv.isDarwin [
+    "-DCMAKE_SKIP_BUILD_RPATH=OFF" # needed for tests
+    "-DCMAKE_INSTALL_RPATH=@loader_path/../lib" # needed for tools executables
+  ] ++ lib.optional (!stdenv.isx86_64) "-DARROW_USE_SIMD=OFF";
+
+  doInstallCheck = true;
+  ARROW_TEST_DATA =
+    if doInstallCheck then "${arrow-testing}/data" else null;
+  PARQUET_TEST_DATA =
+    if doInstallCheck then "${parquet-testing}/data" else null;
+  GTEST_FILTER =
+    if doInstallCheck then let
+      # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11398
+      filteredTests = lib.optionals stdenv.hostPlatform.isAarch64 [
+        "TestFilterKernelWithNumeric/3.CompareArrayAndFilterRandomNumeric"
+        "TestFilterKernelWithNumeric/7.CompareArrayAndFilterRandomNumeric"
+        "TestCompareKernel.PrimitiveRandomTests"
+      ];
+    in "-${builtins.concatStringsSep ":" filteredTests}" else null;
+  installCheckInputs = [ perl which ];
+  installCheckPhase =
+  let
+    excludedTests = lib.optionals stdenv.isDarwin [
+      # Some plasma tests need to be patched to use a shorter AF_UNIX socket
+      # path on Darwin. See https://github.com/NixOS/nix/pull/1085
+      "plasma-external-store-tests"
+      "plasma-client-tests"
+    ];
+  in ''
+    ctest -L unittest -V \
+      --exclude-regex '^(${builtins.concatStringsSep "|" excludedTests})$'
+  '';
+
+  meta = {
+    description = "A  cross-language development platform for in-memory data";
+    homepage = "https://arrow.apache.org/";
+    license = lib.licenses.asl20;
+    platforms = lib.platforms.unix;
+    maintainers = with lib.maintainers; [ tobim veprbl ];
+  };
+}