about summary refs log tree commit diff
path: root/nixpkgs/pkgs/applications/networking/cluster/spark/default.nix
diff options
context:
space:
mode:
Diffstat (limited to 'nixpkgs/pkgs/applications/networking/cluster/spark/default.nix')
-rw-r--r--nixpkgs/pkgs/applications/networking/cluster/spark/default.nix56
1 files changed, 56 insertions, 0 deletions
diff --git a/nixpkgs/pkgs/applications/networking/cluster/spark/default.nix b/nixpkgs/pkgs/applications/networking/cluster/spark/default.nix
new file mode 100644
index 000000000000..76230b8e1003
--- /dev/null
+++ b/nixpkgs/pkgs/applications/networking/cluster/spark/default.nix
@@ -0,0 +1,56 @@
+{ lib, stdenv, fetchzip, makeWrapper, jre, pythonPackages, coreutils, hadoop
+, RSupport? true, R
+}:
+
+with lib;
+
+stdenv.mkDerivation rec {
+
+  pname = "spark";
+  version = "2.4.4";
+
+  src = fetchzip {
+    url    = "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz";
+    sha256 = "1a9w5k0207fysgpxx6db3a00fs5hdc2ncx99x4ccy2s0v5ndc66g";
+  };
+
+  nativeBuildInputs = [ makeWrapper ];
+  buildInputs = [ jre pythonPackages.python pythonPackages.numpy ]
+    ++ optional RSupport R;
+
+  untarDir = "${pname}-${version}-bin-without-hadoop";
+  installPhase = ''
+    mkdir -p $out/{lib/${untarDir}/conf,bin,/share/java}
+    mv * $out/lib/${untarDir}
+
+    sed -e 's/INFO, console/WARN, console/' < \
+       $out/lib/${untarDir}/conf/log4j.properties.template > \
+       $out/lib/${untarDir}/conf/log4j.properties
+
+    cat > $out/lib/${untarDir}/conf/spark-env.sh <<- EOF
+    export JAVA_HOME="${jre}"
+    export SPARK_HOME="$out/lib/${untarDir}"
+    export SPARK_DIST_CLASSPATH=$(${hadoop}/bin/hadoop classpath)
+    export PYSPARK_PYTHON="${pythonPackages.python}/bin/${pythonPackages.python.executable}"
+    export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH"
+    ${optionalString RSupport
+      ''export SPARKR_R_SHELL="${R}/bin/R"
+        export PATH=$PATH:"${R}/bin/R"''}
+    EOF
+
+    for n in $(find $out/lib/${untarDir}/bin -type f ! -name "*.*"); do
+      makeWrapper "$n" "$out/bin/$(basename $n)"
+      substituteInPlace "$n" --replace dirname ${coreutils.out}/bin/dirname
+    done
+    ln -s $out/lib/${untarDir}/lib/spark-assembly-*.jar $out/share/java
+  '';
+
+  meta = {
+    description      = "Apache Spark is a fast and general engine for large-scale data processing";
+    homepage         = "http://spark.apache.org";
+    license          = lib.licenses.asl20;
+    platforms        = lib.platforms.all;
+    maintainers      = with maintainers; [ thoughtpolice offline kamilchm ];
+    repositories.git = "git://git.apache.org/spark.git";
+  };
+}