about summary refs log tree commit diff
path: root/nixpkgs/pkgs/applications/networking/cluster/spark/default.nix
blob: 19c4b280a60cbde8222f5a1adf60dda27e04e3e5 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
{ lib
, stdenv
, fetchzip
, makeWrapper
, jdk8
, python3
, python310
, coreutils
, hadoop
, RSupport ? true
, R
, nixosTests
}:

let
  spark = { pname, version, hash, extraMeta ? {}, pysparkPython ? python3 }:
    stdenv.mkDerivation (finalAttrs: {
      inherit pname version hash hadoop R pysparkPython;
      inherit (finalAttrs.hadoop) jdk;
      src = fetchzip {
        url = with finalAttrs; "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz";
        inherit (finalAttrs) hash;
      };
      nativeBuildInputs = [ makeWrapper ];
      buildInputs = with finalAttrs; [ jdk pysparkPython ]
        ++ lib.optional RSupport finalAttrs.R;

      installPhase = ''
        mkdir -p "$out/opt"
        mv * $out/
        for n in $(find $out/bin -type f -executable ! -name "find-spark-home"); do
          wrapProgram "$n" --set JAVA_HOME "${finalAttrs.jdk}" \
            --run "[ -z $SPARK_DIST_CLASSPATH ] && export SPARK_DIST_CLASSPATH=$(${finalAttrs.hadoop}/bin/hadoop classpath)" \
            ${lib.optionalString RSupport ''--set SPARKR_R_SHELL "${finalAttrs.R}/bin/R"''} \
            --prefix PATH : "${
              lib.makeBinPath (
                [ finalAttrs.pysparkPython ] ++
                (lib.optionals RSupport [ finalAttrs.R ])
              )}"
        done
        ln -s ${finalAttrs.hadoop} "$out/opt/hadoop"
        ${lib.optionalString RSupport ''ln -s ${finalAttrs.R} "$out/opt/R"''}
      '';

      passthru = {
        tests = nixosTests.spark.default.passthru.override {
          sparkPackage = finalAttrs.finalPackage;
        };
        # Add python packages to PYSPARK_PYTHON
        withPythonPackages = f: finalAttrs.finalPackage.overrideAttrs (old: {
          pysparkPython = old.pysparkPython.withPackages f;
        });
      };

      meta = {
        description = "Apache Spark is a fast and general engine for large-scale data processing";
        homepage = "https://spark.apache.org/";
        sourceProvenance = with lib.sourceTypes; [ binaryBytecode ];
        license = lib.licenses.asl20;
        platforms = lib.platforms.all;
        maintainers = with lib.maintainers; [ thoughtpolice offline kamilchm illustris ];
      } // extraMeta;
    });
in
{
  spark_3_5 = spark rec {
    pname = "spark";
    version = "3.5.0";
    hash = "sha256-f+a4a23aOM0GCDoZlZ7WNXs0Olzyh3yMtO8ZmEoYvZ4=";
  };
  spark_3_4 = spark rec {
    pname = "spark";
    version = "3.4.2";
    hash = "sha256-qr0tRuzzEcarJznrQYkaQzGqI7tugp/XJpoZxL7tJwk=";
  };
  spark_3_3 = spark rec {
    pname = "spark";
    version = "3.3.3";
    hash = "sha256-YtHxRYTwrwSle3UpFjRSwKcnLFj2m9/zLBENH/HVzuM=";
    pysparkPython = python310;
  };
}