diff options
Diffstat (limited to 'nixpkgs/pkgs/applications/networking/cluster/spark/default.nix')
-rw-r--r-- | nixpkgs/pkgs/applications/networking/cluster/spark/default.nix | 56 |
1 files changed, 56 insertions, 0 deletions
diff --git a/nixpkgs/pkgs/applications/networking/cluster/spark/default.nix b/nixpkgs/pkgs/applications/networking/cluster/spark/default.nix new file mode 100644 index 000000000000..76230b8e1003 --- /dev/null +++ b/nixpkgs/pkgs/applications/networking/cluster/spark/default.nix @@ -0,0 +1,56 @@ +{ lib, stdenv, fetchzip, makeWrapper, jre, pythonPackages, coreutils, hadoop +, RSupport? true, R +}: + +with lib; + +stdenv.mkDerivation rec { + + pname = "spark"; + version = "2.4.4"; + + src = fetchzip { + url = "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz"; + sha256 = "1a9w5k0207fysgpxx6db3a00fs5hdc2ncx99x4ccy2s0v5ndc66g"; + }; + + nativeBuildInputs = [ makeWrapper ]; + buildInputs = [ jre pythonPackages.python pythonPackages.numpy ] + ++ optional RSupport R; + + untarDir = "${pname}-${version}-bin-without-hadoop"; + installPhase = '' + mkdir -p $out/{lib/${untarDir}/conf,bin,/share/java} + mv * $out/lib/${untarDir} + + sed -e 's/INFO, console/WARN, console/' < \ + $out/lib/${untarDir}/conf/log4j.properties.template > \ + $out/lib/${untarDir}/conf/log4j.properties + + cat > $out/lib/${untarDir}/conf/spark-env.sh <<- EOF + export JAVA_HOME="${jre}" + export SPARK_HOME="$out/lib/${untarDir}" + export SPARK_DIST_CLASSPATH=$(${hadoop}/bin/hadoop classpath) + export PYSPARK_PYTHON="${pythonPackages.python}/bin/${pythonPackages.python.executable}" + export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH" + ${optionalString RSupport + ''export SPARKR_R_SHELL="${R}/bin/R" + export PATH=$PATH:"${R}/bin/R"''} + EOF + + for n in $(find $out/lib/${untarDir}/bin -type f ! -name "*.*"); do + makeWrapper "$n" "$out/bin/$(basename $n)" + substituteInPlace "$n" --replace dirname ${coreutils.out}/bin/dirname + done + ln -s $out/lib/${untarDir}/lib/spark-assembly-*.jar $out/share/java + ''; + + meta = { + description = "Apache Spark is a fast and general engine for large-scale data processing"; + homepage = "http://spark.apache.org"; + license = lib.licenses.asl20; + platforms = lib.platforms.all; + maintainers = with maintainers; [ thoughtpolice offline kamilchm ]; + repositories.git = "git://git.apache.org/spark.git"; + }; +} |