diff options
author | illustris <rharikrishnan95@gmail.com> | 2021-10-21 02:01:12 +0530 |
---|---|---|
committer | Raphael Megzari <raphael@megzari.com> | 2021-10-25 16:30:19 +0900 |
commit | 91bb2b7016de43dfc08fde834d135954369737dc (patch) | |
tree | 67fde8755cbdf8bb26997e05adea5e6888182e13 /nixos | |
parent | ee1fd49ebe2f48ce73a4904a986ebf73b4ade3b7 (diff) | |
download | nixlib-91bb2b7016de43dfc08fde834d135954369737dc.tar nixlib-91bb2b7016de43dfc08fde834d135954369737dc.tar.gz nixlib-91bb2b7016de43dfc08fde834d135954369737dc.tar.bz2 nixlib-91bb2b7016de43dfc08fde834d135954369737dc.tar.lz nixlib-91bb2b7016de43dfc08fde834d135954369737dc.tar.xz nixlib-91bb2b7016de43dfc08fde834d135954369737dc.tar.zst nixlib-91bb2b7016de43dfc08fde834d135954369737dc.zip |
nixos/hadoop: fix yarn, add more service configuration options
Diffstat (limited to 'nixos')
-rw-r--r-- | nixos/modules/services/cluster/hadoop/conf.nix | 34 | ||||
-rw-r--r-- | nixos/modules/services/cluster/hadoop/default.nix | 80 | ||||
-rw-r--r-- | nixos/modules/services/cluster/hadoop/hdfs.nix | 81 | ||||
-rw-r--r-- | nixos/modules/services/cluster/hadoop/yarn.nix | 107 |
4 files changed, 240 insertions, 62 deletions
diff --git a/nixos/modules/services/cluster/hadoop/conf.nix b/nixos/modules/services/cluster/hadoop/conf.nix index 38db10406b9a..69472408cabe 100644 --- a/nixos/modules/services/cluster/hadoop/conf.nix +++ b/nixos/modules/services/cluster/hadoop/conf.nix @@ -1,4 +1,4 @@ -{ hadoop, pkgs }: +{ cfg, pkgs, lib }: let propertyXml = name: value: '' <property> @@ -13,19 +13,31 @@ let ${builtins.concatStringsSep "\n" (pkgs.lib.mapAttrsToList propertyXml properties)} </configuration> ''; + cfgLine = name: value: '' + ${name}=${builtins.toString value} + ''; + cfgFile = fileName: properties: pkgs.writeTextDir fileName '' + # generated by NixOS + ${builtins.concatStringsSep "" (pkgs.lib.mapAttrsToList cfgLine properties)} + ''; userFunctions = '' hadoop_verify_logdir() { echo Skipping verification of log directory } ''; + hadoopEnv = '' + export HADOOP_LOG_DIR=/tmp/hadoop/$USER + ''; in -pkgs.buildEnv { - name = "hadoop-conf"; - paths = [ - (siteXml "core-site.xml" hadoop.coreSite) - (siteXml "hdfs-site.xml" hadoop.hdfsSite) - (siteXml "mapred-site.xml" hadoop.mapredSite) - (siteXml "yarn-site.xml" hadoop.yarnSite) - (pkgs.writeTextDir "hadoop-user-functions.sh" userFunctions) - ]; -} +pkgs.runCommand "hadoop-conf" {} '' + mkdir -p $out/ + cp ${siteXml "core-site.xml" cfg.coreSite}/* $out/ + cp ${siteXml "hdfs-site.xml" cfg.hdfsSite}/* $out/ + cp ${siteXml "mapred-site.xml" cfg.mapredSite}/* $out/ + cp ${siteXml "yarn-site.xml" cfg.yarnSite}/* $out/ + cp ${cfgFile "container-executor.cfg" cfg.containerExecutorCfg}/* $out/ + cp ${pkgs.writeTextDir "hadoop-user-functions.sh" userFunctions}/* $out/ + cp ${pkgs.writeTextDir "hadoop-env.sh" hadoopEnv}/* $out/ + cp ${cfg.log4jProperties} $out/log4j.properties + ${lib.concatMapStringsSep "\n" (dir: "cp -r ${dir}/* $out/") cfg.extraConfDirs} +'' diff --git a/nixos/modules/services/cluster/hadoop/default.nix b/nixos/modules/services/cluster/hadoop/default.nix index a165f619dc0c..da3e47b95d4d 100644 --- a/nixos/modules/services/cluster/hadoop/default.nix +++ b/nixos/modules/services/cluster/hadoop/default.nix @@ -1,5 +1,7 @@ { config, lib, pkgs, ...}: - +let + cfg = config.services.hadoop; +in with lib; { imports = [ ./yarn.nix ./hdfs.nix ]; @@ -17,7 +19,9 @@ with lib; }; hdfsSite = mkOption { - default = {}; + default = { + "dfs.namenode.rpc-bind-host" = "0.0.0.0"; + }; type = types.attrsOf types.anything; example = literalExpression '' { @@ -28,27 +32,81 @@ with lib; }; mapredSite = mkOption { - default = {}; + default = { + "mapreduce.framework.name" = "yarn"; + "yarn.app.mapreduce.am.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}"; + "mapreduce.map.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}"; + "mapreduce.reduce.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}"; + }; type = types.attrsOf types.anything; example = literalExpression '' - { - "mapreduce.map.cpu.vcores" = "1"; + options.services.hadoop.mapredSite.default // { + "mapreduce.map.java.opts" = "-Xmx900m -XX:+UseParallelGC"; } ''; description = "Hadoop mapred-site.xml definition"; }; yarnSite = mkOption { - default = {}; + default = { + "yarn.nodemanager.admin-env" = "PATH=$PATH"; + "yarn.nodemanager.aux-services" = "mapreduce_shuffle"; + "yarn.nodemanager.aux-services.mapreduce_shuffle.class" = "org.apache.hadoop.mapred.ShuffleHandler"; + "yarn.nodemanager.bind-host" = "0.0.0.0"; + "yarn.nodemanager.container-executor.class" = "org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor"; + "yarn.nodemanager.env-whitelist" = "JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,LANG,TZ"; + "yarn.nodemanager.linux-container-executor.group" = "hadoop"; + "yarn.nodemanager.linux-container-executor.path" = "/run/wrappers/yarn-nodemanager/bin/container-executor"; + "yarn.nodemanager.log-dirs" = "/var/log/hadoop/yarn/nodemanager"; + "yarn.resourcemanager.bind-host" = "0.0.0.0"; + "yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler"; + }; type = types.attrsOf types.anything; example = literalExpression '' - { - "yarn.resourcemanager.ha.id" = "resourcemanager1"; + options.services.hadoop.yarnSite.default // { + "yarn.resourcemanager.hostname" = "''${config.networking.hostName}"; } ''; description = "Hadoop yarn-site.xml definition"; }; + log4jProperties = mkOption { + default = "${cfg.package}/lib/${cfg.package.untarDir}/etc/hadoop/log4j.properties"; + type = types.path; + example = literalExpression '' + "''${pkgs.hadoop}/lib/''${pkgs.hadoop.untarDir}/etc/hadoop/log4j.properties"; + ''; + description = "log4j.properties file added to HADOOP_CONF_DIR"; + }; + + containerExecutorCfg = mkOption { + default = { + # must be the same as yarn.nodemanager.linux-container-executor.group in yarnSite + "yarn.nodemanager.linux-container-executor.group"="hadoop"; + "min.user.id"=1000; + "feature.terminal.enabled"=1; + }; + type = types.attrsOf types.anything; + example = literalExpression '' + options.services.hadoop.containerExecutorCfg.default // { + "feature.terminal.enabled" = 0; + } + ''; + description = "Yarn container-executor.cfg definition"; + }; + + extraConfDirs = mkOption { + default = []; + type = types.listOf types.path; + example = literalExpression '' + [ + ./extraHDFSConfs + ./extraYARNConfs + ] + ''; + description = "Directories containing additional config files to be added to HADOOP_CONF_DIR"; + }; + package = mkOption { type = types.package; default = pkgs.hadoop; @@ -64,6 +122,12 @@ with lib; users.groups.hadoop = { gid = config.ids.gids.hadoop; }; + environment = { + systemPackages = [ cfg.package ]; + etc."hadoop-conf".source = let + hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/"; + in "${hadoopConf}"; + }; }) ]; diff --git a/nixos/modules/services/cluster/hadoop/hdfs.nix b/nixos/modules/services/cluster/hadoop/hdfs.nix index 4f4b0a92108f..e347b682b902 100644 --- a/nixos/modules/services/cluster/hadoop/hdfs.nix +++ b/nixos/modules/services/cluster/hadoop/hdfs.nix @@ -1,24 +1,54 @@ { config, lib, pkgs, ...}: +with lib; let cfg = config.services.hadoop; - hadoopConf = import ./conf.nix { hadoop = cfg; pkgs = pkgs; }; + hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/"; + restartIfChanged = mkOption { + type = types.bool; + description = '' + Automatically restart the service on config change. + This can be set to false to defer restarts on clusters running critical applications. + Please consider the security implications of inadvertently running an older version, + and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option. + ''; + default = false; + }; in -with lib; { options.services.hadoop.hdfs = { - namenode.enabled = mkOption { - type = types.bool; - default = false; - description = '' - Whether to run the Hadoop YARN NameNode - ''; + namenode = { + enabled = mkOption { + type = types.bool; + default = false; + description = '' + Whether to run the HDFS NameNode + ''; + }; + inherit restartIfChanged; + openFirewall = mkOption { + type = types.bool; + default = true; + description = '' + Open firewall ports for namenode + ''; + }; }; - datanode.enabled = mkOption { - type = types.bool; - default = false; - description = '' - Whether to run the Hadoop YARN DataNode - ''; + datanode = { + enabled = mkOption { + type = types.bool; + default = false; + description = '' + Whether to run the HDFS DataNode + ''; + }; + inherit restartIfChanged; + openFirewall = mkOption { + type = types.bool; + default = true; + description = '' + Open firewall ports for datanode + ''; + }; }; }; @@ -27,10 +57,7 @@ with lib; systemd.services.hdfs-namenode = { description = "Hadoop HDFS NameNode"; wantedBy = [ "multi-user.target" ]; - - environment = { - HADOOP_HOME = "${cfg.package}"; - }; + inherit (cfg.hdfs.namenode) restartIfChanged; preStart = '' ${cfg.package}/bin/hdfs --config ${hadoopConf} namenode -format -nonInteractive || true @@ -40,24 +67,34 @@ with lib; User = "hdfs"; SyslogIdentifier = "hdfs-namenode"; ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} namenode"; + Restart = "always"; }; }; + + networking.firewall.allowedTCPPorts = (mkIf cfg.hdfs.namenode.openFirewall [ + 9870 # namenode.http-address + 8020 # namenode.rpc-address + ]); }) (mkIf cfg.hdfs.datanode.enabled { systemd.services.hdfs-datanode = { description = "Hadoop HDFS DataNode"; wantedBy = [ "multi-user.target" ]; - - environment = { - HADOOP_HOME = "${cfg.package}"; - }; + inherit (cfg.hdfs.datanode) restartIfChanged; serviceConfig = { User = "hdfs"; SyslogIdentifier = "hdfs-datanode"; ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} datanode"; + Restart = "always"; }; }; + + networking.firewall.allowedTCPPorts = (mkIf cfg.hdfs.datanode.openFirewall [ + 9864 # datanode.http.address + 9866 # datanode.address + 9867 # datanode.ipc.address + ]); }) (mkIf ( cfg.hdfs.namenode.enabled || cfg.hdfs.datanode.enabled diff --git a/nixos/modules/services/cluster/hadoop/yarn.nix b/nixos/modules/services/cluster/hadoop/yarn.nix index c92020637e47..0086a53e3b74 100644 --- a/nixos/modules/services/cluster/hadoop/yarn.nix +++ b/nixos/modules/services/cluster/hadoop/yarn.nix @@ -1,24 +1,62 @@ { config, lib, pkgs, ...}: +with lib; let cfg = config.services.hadoop; - hadoopConf = import ./conf.nix { hadoop = cfg; pkgs = pkgs; }; + hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/"; + restartIfChanged = mkOption { + type = types.bool; + description = '' + Automatically restart the service on config change. + This can be set to false to defer restarts on clusters running critical applications. + Please consider the security implications of inadvertently running an older version, + and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option. + ''; + default = false; + }; in -with lib; { options.services.hadoop.yarn = { - resourcemanager.enabled = mkOption { - type = types.bool; - default = false; - description = '' - Whether to run the Hadoop YARN ResourceManager - ''; + resourcemanager = { + enabled = mkOption { + type = types.bool; + default = false; + description = '' + Whether to run the Hadoop YARN ResourceManager + ''; + }; + inherit restartIfChanged; + openFirewall = mkOption { + type = types.bool; + default = true; + description = '' + Open firewall ports for resourcemanager + ''; + }; }; - nodemanager.enabled = mkOption { - type = types.bool; - default = false; - description = '' - Whether to run the Hadoop YARN NodeManager - ''; + nodemanager = { + enabled = mkOption { + type = types.bool; + default = false; + description = '' + Whether to run the Hadoop YARN NodeManager + ''; + }; + inherit restartIfChanged; + addBinBash = mkOption { + type = types.bool; + default = true; + description = '' + Add /bin/bash. This is needed by the linux container executor's launch script. + ''; + }; + openFirewall = mkOption { + type = types.bool; + default = true; + description = '' + Open firewall ports for nodemanager. + Because containers can listen on any ephemeral port, TCP ports 1024–65535 will be opened. + ''; + }; }; }; @@ -38,36 +76,63 @@ with lib; systemd.services.yarn-resourcemanager = { description = "Hadoop YARN ResourceManager"; wantedBy = [ "multi-user.target" ]; - - environment = { - HADOOP_HOME = "${cfg.package}"; - }; + inherit (cfg.yarn.resourcemanager) restartIfChanged; serviceConfig = { User = "yarn"; SyslogIdentifier = "yarn-resourcemanager"; ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " + " resourcemanager"; + Restart = "always"; }; }; + networking.firewall.allowedTCPPorts = (mkIf cfg.yarn.resourcemanager.openFirewall [ + 8088 # resourcemanager.webapp.address + 8030 # resourcemanager.scheduler.address + 8031 # resourcemanager.resource-tracker.address + 8032 # resourcemanager.address + ]); }) (mkIf cfg.yarn.nodemanager.enabled { + # Needed because yarn hardcodes /bin/bash in container start scripts + # These scripts can't be patched, they are generated at runtime + systemd.tmpfiles.rules = [ + (mkIf cfg.yarn.nodemanager.addBinBash "L /bin/bash - - - - /run/current-system/sw/bin/bash") + ]; + systemd.services.yarn-nodemanager = { description = "Hadoop YARN NodeManager"; wantedBy = [ "multi-user.target" ]; + inherit (cfg.yarn.nodemanager) restartIfChanged; - environment = { - HADOOP_HOME = "${cfg.package}"; - }; + preStart = '' + # create log dir + mkdir -p /var/log/hadoop/yarn/nodemanager + chown yarn:hadoop /var/log/hadoop/yarn/nodemanager + + # set up setuid container executor binary + rm -rf /run/wrappers/yarn-nodemanager/ || true + mkdir -p /run/wrappers/yarn-nodemanager/{bin,etc/hadoop} + cp ${cfg.package}/lib/${cfg.package.untarDir}/bin/container-executor /run/wrappers/yarn-nodemanager/bin/ + chgrp hadoop /run/wrappers/yarn-nodemanager/bin/container-executor + chmod 6050 /run/wrappers/yarn-nodemanager/bin/container-executor + cp ${hadoopConf}/container-executor.cfg /run/wrappers/yarn-nodemanager/etc/hadoop/ + ''; serviceConfig = { User = "yarn"; SyslogIdentifier = "yarn-nodemanager"; + PermissionsStartOnly = true; ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " + " nodemanager"; + Restart = "always"; }; }; + + networking.firewall.allowedTCPPortRanges = [ + (mkIf (cfg.yarn.nodemanager.openFirewall) {from = 1024; to = 65535;}) + ]; }) ]; |