about summary refs log tree commit diff
path: root/nixpkgs/pkgs/by-name/ar/arrow-cpp
diff options
context:
space:
mode:
Diffstat (limited to 'nixpkgs/pkgs/by-name/ar/arrow-cpp')
-rw-r--r--nixpkgs/pkgs/by-name/ar/arrow-cpp/package.nix294
1 files changed, 294 insertions, 0 deletions
diff --git a/nixpkgs/pkgs/by-name/ar/arrow-cpp/package.nix b/nixpkgs/pkgs/by-name/ar/arrow-cpp/package.nix
new file mode 100644
index 000000000000..23a2432c824e
--- /dev/null
+++ b/nixpkgs/pkgs/by-name/ar/arrow-cpp/package.nix
@@ -0,0 +1,294 @@
+{ stdenv
+, lib
+, fetchurl
+, fetchFromGitHub
+, fixDarwinDylibNames
+, autoconf
+, aws-sdk-cpp
+, aws-sdk-cpp-arrow ? aws-sdk-cpp.override {
+    apis = [
+      "cognito-identity"
+      "config"
+      "identity-management"
+      "s3"
+      "sts"
+      "transfer"
+    ];
+  }
+, boost
+, brotli
+, bzip2
+, c-ares
+, cmake
+, crc32c
+, curl
+, flatbuffers
+, gflags
+, glog
+, google-cloud-cpp
+, grpc
+, gtest
+, libbacktrace
+, lz4
+, minio
+, ninja
+, nlohmann_json
+, openssl
+, perl
+, protobuf
+, python3
+, rapidjson
+, re2
+, snappy
+, sqlite
+, thrift
+, tzdata
+, utf8proc
+, which
+, zlib
+, zstd
+, testers
+, enableShared ? !stdenv.hostPlatform.isStatic
+, enableFlight ? true
+, enableJemalloc ? !stdenv.isDarwin
+, enableS3 ? true
+, enableGcs ? !stdenv.isDarwin
+}:
+
+assert lib.asserts.assertMsg
+  ((enableS3 && stdenv.isDarwin) -> (lib.versionOlder boost.version "1.69" || lib.versionAtLeast boost.version "1.70"))
+  "S3 on Darwin requires Boost != 1.69";
+
+let
+  arrow-testing = fetchFromGitHub {
+    name = "arrow-testing";
+    owner = "apache";
+    repo = "arrow-testing";
+    rev = "ad82a736c170e97b7c8c035ebd8a801c17eec170";
+    hash = "sha256-wN0dam0ZXOAJ+D8bGDMhsdaV3llI9LsiCXwqW9mR3gQ=";
+  };
+
+  parquet-testing = fetchFromGitHub {
+    name = "parquet-testing";
+    owner = "apache";
+    repo = "parquet-testing";
+    rev = "d69d979223e883faef9dc6fe3cf573087243c28a";
+    hash = "sha256-CUckfNjfDW05crWigzMP5b9UynviXKGZUlIr754OoGU=";
+  };
+
+in
+stdenv.mkDerivation (finalAttrs: {
+  pname = "arrow-cpp";
+  version = "15.0.0";
+
+  src = fetchurl {
+    url = "mirror://apache/arrow/arrow-${finalAttrs.version}/apache-arrow-${finalAttrs.version}.tar.gz";
+    hash = "sha256-Ad0/cOhdm1uTPsksDbik71BKUQX3jS2GIuhCeftFwl0=";
+  };
+
+  sourceRoot = "apache-arrow-${finalAttrs.version}/cpp";
+
+  # versions are all taken from
+  # https://github.com/apache/arrow/blob/apache-arrow-${version}/cpp/thirdparty/versions.txt
+
+  # jemalloc: arrow uses a custom prefix to prevent default allocator symbol
+  # collisions as well as custom build flags
+  ${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = fetchurl {
+    url = "https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2";
+    hash = "sha256-LbgtHnEZ3z5xt2QCGbbf6EeJvAU3mDw7esT3GJrs/qo=";
+  };
+
+  # mimalloc: arrow uses custom build flags for mimalloc
+  ARROW_MIMALLOC_URL = fetchFromGitHub {
+    owner = "microsoft";
+    repo = "mimalloc";
+    rev = "v2.0.6";
+    hash = "sha256-u2ITXABBN/dwU+mCIbL3tN1f4c17aBuSdNTV+Adtohc=";
+  };
+
+  ARROW_XSIMD_URL = fetchFromGitHub {
+    owner = "xtensor-stack";
+    repo = "xsimd";
+    rev = "9.0.1";
+    hash = "sha256-onALN6agtrHWigtFlCeefD9CiRZI4Y690XTzy2UDnrk=";
+  };
+
+  ARROW_SUBSTRAIT_URL = fetchFromGitHub {
+    owner = "substrait-io";
+    repo = "substrait";
+    rev = "v0.27.0";
+    hash = "sha256-wptEAXembah04pzqAz6UHeUxp+jMf6Lh/IdyuIhy/a8=";
+  };
+
+  nativeBuildInputs = [
+    cmake
+    ninja
+    autoconf # for vendored jemalloc
+    flatbuffers
+  ] ++ lib.optional stdenv.isDarwin fixDarwinDylibNames;
+  buildInputs = [
+    boost
+    brotli
+    bzip2
+    flatbuffers
+    gflags
+    glog
+    gtest
+    libbacktrace
+    lz4
+    nlohmann_json # alternative JSON parser to rapidjson
+    protobuf # substrait requires protobuf
+    rapidjson
+    re2
+    snappy
+    thrift
+    utf8proc
+    zlib
+    zstd
+  ] ++ lib.optionals enableFlight [
+    grpc
+    openssl
+    protobuf
+    sqlite
+  ] ++ lib.optionals enableS3 [ aws-sdk-cpp-arrow openssl ]
+  ++ lib.optionals enableGcs [
+    crc32c
+    curl
+    google-cloud-cpp
+    grpc
+    nlohmann_json
+  ];
+
+  preConfigure = ''
+    patchShebangs build-support/
+    substituteInPlace "src/arrow/vendored/datetime/tz.cpp" \
+      --replace 'discover_tz_dir();' '"${tzdata}/share/zoneinfo";'
+  '';
+
+  cmakeFlags = [
+    "-DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON"
+    "-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}"
+    "-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}"
+    "-DARROW_BUILD_TESTS=ON"
+    "-DARROW_BUILD_INTEGRATION=ON"
+    "-DARROW_BUILD_UTILITIES=ON"
+    "-DARROW_EXTRA_ERROR_CONTEXT=ON"
+    "-DARROW_VERBOSE_THIRDPARTY_BUILD=ON"
+    "-DARROW_DEPENDENCY_SOURCE=SYSTEM"
+    "-Dxsimd_SOURCE=AUTO"
+    "-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}"
+    "-DARROW_COMPUTE=ON"
+    "-DARROW_CSV=ON"
+    "-DARROW_DATASET=ON"
+    "-DARROW_FILESYSTEM=ON"
+    "-DARROW_FLIGHT_SQL=${if enableFlight then "ON" else "OFF"}"
+    "-DARROW_HDFS=ON"
+    "-DARROW_IPC=ON"
+    "-DARROW_JEMALLOC=${if enableJemalloc then "ON" else "OFF"}"
+    "-DARROW_JSON=ON"
+    "-DARROW_USE_GLOG=ON"
+    "-DARROW_WITH_BACKTRACE=ON"
+    "-DARROW_WITH_BROTLI=ON"
+    "-DARROW_WITH_BZ2=ON"
+    "-DARROW_WITH_LZ4=ON"
+    "-DARROW_WITH_NLOHMANN_JSON=ON"
+    "-DARROW_WITH_SNAPPY=ON"
+    "-DARROW_WITH_UTF8PROC=ON"
+    "-DARROW_WITH_ZLIB=ON"
+    "-DARROW_WITH_ZSTD=ON"
+    "-DARROW_MIMALLOC=ON"
+    "-DARROW_SUBSTRAIT=ON"
+    "-DARROW_FLIGHT=${if enableFlight then "ON" else "OFF"}"
+    "-DARROW_FLIGHT_TESTING=${if enableFlight then "ON" else "OFF"}"
+    "-DARROW_S3=${if enableS3 then "ON" else "OFF"}"
+    "-DARROW_GCS=${if enableGcs then "ON" else "OFF"}"
+    # Parquet options:
+    "-DARROW_PARQUET=ON"
+    "-DPARQUET_BUILD_EXECUTABLES=ON"
+    "-DPARQUET_REQUIRE_ENCRYPTION=ON"
+  ] ++ lib.optionals (!enableShared) [
+    "-DARROW_TEST_LINKAGE=static"
+  ] ++ lib.optionals stdenv.isDarwin [
+    "-DCMAKE_INSTALL_RPATH=@loader_path/../lib" # needed for tools executables
+  ] ++ lib.optionals (!stdenv.isx86_64) [ "-DARROW_USE_SIMD=OFF" ]
+  ++ lib.optionals enableS3 [ "-DAWSSDK_CORE_HEADER_FILE=${aws-sdk-cpp-arrow}/include/aws/core/Aws.h" ];
+
+  doInstallCheck = true;
+  ARROW_TEST_DATA = lib.optionalString finalAttrs.doInstallCheck "${arrow-testing}/data";
+  PARQUET_TEST_DATA = lib.optionalString finalAttrs.doInstallCheck "${parquet-testing}/data";
+  GTEST_FILTER =
+    let
+      # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11398
+      filteredTests = lib.optionals stdenv.hostPlatform.isAarch64 [
+        "TestFilterKernelWithNumeric/3.CompareArrayAndFilterRandomNumeric"
+        "TestFilterKernelWithNumeric/7.CompareArrayAndFilterRandomNumeric"
+        "TestCompareKernel.PrimitiveRandomTests"
+      ] ++ lib.optionals enableS3 [
+        "S3OptionsTest.FromUri"
+        "S3RegionResolutionTest.NonExistentBucket"
+        "S3RegionResolutionTest.PublicBucket"
+        "S3RegionResolutionTest.RestrictedBucket"
+        "TestMinioServer.Connect"
+        "TestS3FS.*"
+        "TestS3FSGeneric.*"
+      ] ++ lib.optionals stdenv.isDarwin [
+        # TODO: revisit at 12.0.0 or when
+        # https://github.com/apache/arrow/commit/295c6644ca6b67c95a662410b2c7faea0920c989
+        # is available, see
+        # https://github.com/apache/arrow/pull/15288#discussion_r1071244661
+        "ExecPlanExecution.StressSourceSinkStopped"
+      ];
+    in
+    lib.optionalString finalAttrs.doInstallCheck "-${lib.concatStringsSep ":" filteredTests}";
+
+  __darwinAllowLocalNetworking = true;
+
+  nativeInstallCheckInputs = [ perl which sqlite ]
+    ++ lib.optionals enableS3 [ minio ]
+    ++ lib.optionals enableFlight [ python3 ];
+
+  installCheckPhase =
+    let
+      disabledTests = [
+        # flaky
+        "arrow-flight-test"
+        # requires networking
+        "arrow-gcsfs-test"
+        "arrow-flight-integration-test"
+      ];
+    in
+    ''
+      runHook preInstallCheck
+
+      ctest -L unittest --exclude-regex '^(${lib.concatStringsSep "|" disabledTests})$'
+
+      runHook postInstallCheck
+    '';
+
+  meta = with lib; {
+    description = "A cross-language development platform for in-memory data";
+    homepage = "https://arrow.apache.org/docs/cpp/";
+    license = licenses.asl20;
+    platforms = platforms.unix;
+    maintainers = with maintainers; [ tobim veprbl cpcloud ];
+    pkgConfigModules = [
+      "arrow"
+      "arrow-acero"
+      "arrow-compute"
+      "arrow-csv"
+      "arrow-dataset"
+      "arrow-filesystem"
+      "arrow-flight"
+      "arrow-flight-sql"
+      "arrow-flight-testing"
+      "arrow-json"
+      "arrow-substrait"
+      "arrow-testing"
+      "parquet"
+    ];
+  };
+  passthru = {
+    inherit enableFlight enableJemalloc enableS3 enableGcs;
+    tests.pkg-config = testers.testMetaPkgConfig finalAttrs.finalPackage;
+  };
+})