diff options
Diffstat (limited to 'nixos/modules/services/monitoring')
6 files changed, 708 insertions, 3 deletions
diff --git a/nixos/modules/services/monitoring/cadvisor.nix b/nixos/modules/services/monitoring/cadvisor.nix index a67df158be47..8ae8b12056ce 100644 --- a/nixos/modules/services/monitoring/cadvisor.nix +++ b/nixos/modules/services/monitoring/cadvisor.nix @@ -90,6 +90,7 @@ in { ${optionalString cfg.storageDriverSecure "-storage_driver_secure"} ''} ''; + TimeoutStartSec=300; }; }; diff --git a/nixos/modules/services/monitoring/prometheus/alertmanager.nix b/nixos/modules/services/monitoring/prometheus/alertmanager.nix new file mode 100644 index 000000000000..a9c0ce4ed6cb --- /dev/null +++ b/nixos/modules/services/monitoring/prometheus/alertmanager.nix @@ -0,0 +1,116 @@ +{ config, pkgs, lib, ... }: + +with lib; + +let + cfg = config.services.prometheus.alertmanager; + mkConfigFile = pkgs.writeText "alertmanager.yml" (builtins.toJSON cfg.configuration); +in { + options = { + services.prometheus.alertmanager = { + enable = mkEnableOption "Prometheus Alertmanager"; + + user = mkOption { + type = types.str; + default = "nobody"; + description = '' + User name under which Alertmanager shall be run. + ''; + }; + + group = mkOption { + type = types.str; + default = "nogroup"; + description = '' + Group under which Alertmanager shall be run. + ''; + }; + + configuration = mkOption { + type = types.attrs; + default = {}; + description = '' + Alertmanager configuration as nix attribute set. + ''; + }; + + logFormat = mkOption { + type = types.nullOr types.str; + default = null; + description = '' + If set use a syslog logger or JSON logging. + ''; + }; + + logLevel = mkOption { + type = types.enum ["debug" "info" "warn" "error" "fatal"]; + default = "warn"; + description = '' + Only log messages with the given severity or above. + ''; + }; + + webExternalUrl = mkOption { + type = types.nullOr types.str; + default = null; + description = '' + The URL under which Alertmanager is externally reachable (for example, if Alertmanager is served via a reverse proxy). + Used for generating relative and absolute links back to Alertmanager itself. + If the URL has a path portion, it will be used to prefix all HTTP endoints served by Alertmanager. + If omitted, relevant URL components will be derived automatically. + ''; + }; + + listenAddress = mkOption { + type = types.nullOr types.str; + default = null; + description = '' + Address to listen on for the web interface and API. + ''; + }; + + port = mkOption { + type = types.int; + default = 9093; + description = '' + Port to listen on for the web interface and API. + ''; + }; + + openFirewall = mkOption { + type = types.bool; + default = false; + description = '' + Open port in firewall for incoming connections. + ''; + }; + }; + }; + + + config = mkIf cfg.enable { + networking.firewall.allowedTCPPorts = optional cfg.openFirewall cfg.port; + + systemd.services.alertmanager = { + wantedBy = [ "multi-user.target" ]; + after = [ "network.target" ]; + script = '' + ${pkgs.prometheus-alertmanager.bin}/bin/alertmanager \ + -config.file ${mkConfigFile} \ + -web.listen-address ${cfg.listenAddress}:${toString cfg.port} \ + -log.level ${cfg.logLevel} \ + ${optionalString (cfg.webExternalUrl != null) ''-web.external-url ${cfg.webExternalUrl} \''} + ${optionalString (cfg.logFormat != null) "-log.format ${cfg.logFormat}"} + ''; + + serviceConfig = { + User = cfg.user; + Group = cfg.group; + Restart = "always"; + PrivateTmp = true; + WorkingDirectory = "/tmp"; + ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; + }; + }; + }; +} diff --git a/nixos/modules/services/monitoring/prometheus/default.nix b/nixos/modules/services/monitoring/prometheus/default.nix new file mode 100644 index 000000000000..e6817ee227ab --- /dev/null +++ b/nixos/modules/services/monitoring/prometheus/default.nix @@ -0,0 +1,445 @@ +{ config, pkgs, lib, ... }: + +with lib; + +let + cfg = config.services.prometheus; + promUser = "prometheus"; + promGroup = "prometheus"; + + # Get a submodule without any embedded metadata: + _filter = x: filterAttrs (k: v: k != "_module") x; + + # Pretty-print JSON to a file + writePrettyJSON = name: x: + pkgs.runCommand name { } '' + echo '${builtins.toJSON x}' | ${pkgs.jq}/bin/jq . > $out + ''; + + # This becomes the main config file + promConfig = { + global = cfg.globalConfig; + rule_files = cfg.ruleFiles ++ [ + (pkgs.writeText "prometheus.rules" (concatStringsSep "\n" cfg.rules)) + ]; + scrape_configs = cfg.scrapeConfigs; + }; + + cmdlineArgs = cfg.extraFlags ++ [ + "-storage.local.path=${cfg.dataDir}/metrics" + "-config.file=${writePrettyJSON "prometheus.yml" promConfig}" + "-web.listen-address=${cfg.listenAddress}" + "-alertmanager.notification-queue-capacity=${toString cfg.alertmanagerNotificationQueueCapacity}" + "-alertmanager.timeout=${toString cfg.alertmanagerTimeout}s" + (optionalString (cfg.alertmanagerURL != []) "-alertmanager.url=${concatStringsSep "," cfg.alertmanagerURL}") + ]; + + promTypes.globalConfig = types.submodule { + options = { + scrape_interval = mkOption { + type = types.str; + default = "1m"; + description = '' + How frequently to scrape targets by default. + ''; + }; + + scrape_timeout = mkOption { + type = types.str; + default = "10s"; + description = '' + How long until a scrape request times out. + ''; + }; + + evaluation_interval = mkOption { + type = types.str; + default = "1m"; + description = '' + How frequently to evaluate rules by default. + ''; + }; + + labels = mkOption { + type = types.attrsOf types.str; + default = {}; + description = '' + The labels to add to any timeseries that this Prometheus instance + scrapes. + ''; + }; + }; + }; + + promTypes.scrape_config = types.submodule { + options = { + job_name = mkOption { + type = types.str; + description = '' + The job name assigned to scraped metrics by default. + ''; + }; + scrape_interval = mkOption { + type = types.nullOr types.str; + default = null; + description = '' + How frequently to scrape targets from this job. Defaults to the + globally configured default. + ''; + }; + scrape_timeout = mkOption { + type = types.nullOr types.str; + default = null; + description = '' + Per-target timeout when scraping this job. Defaults to the + globally configured default. + ''; + }; + metrics_path = mkOption { + type = types.str; + default = "/metrics"; + description = '' + The HTTP resource path on which to fetch metrics from targets. + ''; + }; + scheme = mkOption { + type = types.enum ["http" "https"]; + default = "http"; + description = '' + The URL scheme with which to fetch metrics from targets. + ''; + }; + basic_auth = mkOption { + type = types.nullOr (types.submodule { + options = { + username = mkOption { + type = types.str; + description = '' + HTTP username + ''; + }; + password = mkOption { + type = types.str; + description = '' + HTTP password + ''; + }; + }; + }); + default = null; + description = '' + Optional http login credentials for metrics scraping. + ''; + }; + dns_sd_configs = mkOption { + type = types.listOf promTypes.dns_sd_config; + default = []; + apply = x: map _filter x; + description = '' + List of DNS service discovery configurations. + ''; + }; + consul_sd_configs = mkOption { + type = types.listOf promTypes.consul_sd_config; + default = []; + apply = x: map _filter x; + description = '' + List of Consul service discovery configurations. + ''; + }; + file_sd_configs = mkOption { + type = types.listOf promTypes.file_sd_config; + default = []; + apply = x: map _filter x; + description = '' + List of file service discovery configurations. + ''; + }; + static_configs = mkOption { + type = types.listOf promTypes.static_config; + default = []; + apply = x: map _filter x; + description = '' + List of labeled target groups for this job. + ''; + }; + relabel_configs = mkOption { + type = types.listOf promTypes.relabel_config; + default = []; + apply = x: map _filter x; + description = '' + List of relabel configurations. + ''; + }; + }; + }; + + promTypes.static_config = types.submodule { + options = { + targets = mkOption { + type = types.listOf types.str; + description = '' + The targets specified by the target group. + ''; + }; + labels = mkOption { + type = types.attrsOf types.str; + description = '' + Labels assigned to all metrics scraped from the targets. + ''; + }; + }; + }; + + promTypes.dns_sd_config = types.submodule { + options = { + names = mkOption { + type = types.listOf types.str; + description = '' + A list of DNS SRV record names to be queried. + ''; + }; + refresh_interval = mkOption { + type = types.str; + default = "30s"; + description = '' + The time after which the provided names are refreshed. + ''; + }; + }; + }; + + promTypes.consul_sd_config = types.submodule { + options = { + server = mkOption { + type = types.str; + description = "Consul server to query."; + }; + token = mkOption { + type = types.nullOr types.str; + description = "Consul token"; + }; + datacenter = mkOption { + type = types.nullOr types.str; + description = "Consul datacenter"; + }; + scheme = mkOption { + type = types.nullOr types.str; + description = "Consul scheme"; + }; + username = mkOption { + type = types.nullOr types.str; + description = "Consul username"; + }; + password = mkOption { + type = types.nullOr types.str; + description = "Consul password"; + }; + + services = mkOption { + type = types.listOf types.str; + description = '' + A list of services for which targets are retrieved. + ''; + }; + tag_separator = mkOption { + type = types.str; + default = ","; + description = '' + The string by which Consul tags are joined into the tag label. + ''; + }; + }; + }; + + promTypes.file_sd_config = types.submodule { + options = { + files = mkOption { + type = types.listOf types.str; + description = '' + Patterns for files from which target groups are extracted. Refer + to the Prometheus documentation for permitted filename patterns + and formats. + + ''; + }; + refresh_interval = mkOption { + type = types.str; + default = "30s"; + description = '' + Refresh interval to re-read the files. + ''; + }; + }; + }; + + promTypes.relabel_config = types.submodule { + options = { + source_labels = mkOption { + type = types.listOf types.str; + description = '' + The source labels select values from existing labels. Their content + is concatenated using the configured separator and matched against + the configured regular expression. + ''; + }; + separator = mkOption { + type = types.str; + default = ";"; + description = '' + Separator placed between concatenated source label values. + ''; + }; + target_label = mkOption { + type = types.nullOr types.str; + default = null; + description = '' + Label to which the resulting value is written in a replace action. + It is mandatory for replace actions. + ''; + }; + regex = mkOption { + type = types.str; + description = '' + Regular expression against which the extracted value is matched. + ''; + }; + replacement = mkOption { + type = types.str; + default = ""; + description = '' + Replacement value against which a regex replace is performed if the + regular expression matches. + ''; + }; + action = mkOption { + type = types.enum ["replace" "keep" "drop"]; + description = '' + Action to perform based on regex matching. + ''; + }; + }; + }; + +in { + options = { + services.prometheus = { + + enable = mkOption { + type = types.bool; + default = false; + description = '' + Enable the Prometheus monitoring daemon. + ''; + }; + + listenAddress = mkOption { + type = types.str; + default = "0.0.0.0:9090"; + description = '' + Address to listen on for the web interface, API, and telemetry. + ''; + }; + + dataDir = mkOption { + type = types.path; + default = "/var/lib/prometheus"; + description = '' + Directory to store Prometheus metrics data. + ''; + }; + + extraFlags = mkOption { + type = types.listOf types.str; + default = []; + description = '' + Extra commandline options when launching Prometheus. + ''; + }; + + globalConfig = mkOption { + type = promTypes.globalConfig; + default = {}; + apply = _filter; + description = '' + Parameters that are valid in all configuration contexts. They + also serve as defaults for other configuration sections + ''; + }; + + rules = mkOption { + type = types.listOf types.str; + default = []; + description = '' + Alerting and/or Recording rules to evaluate at runtime. + ''; + }; + + ruleFiles = mkOption { + type = types.listOf types.path; + default = []; + description = '' + Any additional rules files to include in this configuration. + ''; + }; + + scrapeConfigs = mkOption { + type = types.listOf promTypes.scrape_config; + default = []; + apply = x: map _filter x; + description = '' + A list of scrape configurations. + ''; + }; + + alertmanagerURL = mkOption { + type = types.listOf types.str; + default = []; + description = '' + List of Alertmanager URLs to send notifications to. + ''; + }; + + alertmanagerNotificationQueueCapacity = mkOption { + type = types.int; + default = 10000; + description = '' + The capacity of the queue for pending alert manager notifications. + ''; + }; + + alertmanagerTimeout = mkOption { + type = types.int; + default = 10; + description = '' + Alert manager HTTP API timeout (in seconds). + ''; + }; + }; + }; + + config = mkIf cfg.enable { + users.extraGroups.${promGroup}.gid = config.ids.gids.prometheus; + users.extraUsers.${promUser} = { + description = "Prometheus daemon user"; + uid = config.ids.uids.prometheus; + group = promGroup; + home = cfg.dataDir; + createHome = true; + }; + systemd.services.prometheus = { + wantedBy = [ "multi-user.target" ]; + after = [ "network.target" ]; + script = '' + #!/bin/sh + exec ${pkgs.prometheus}/bin/prometheus \ + ${concatStringsSep " \\\n " cmdlineArgs} + ''; + serviceConfig = { + User = promUser; + Restart = "always"; + WorkingDirectory = cfg.dataDir; + }; + }; + }; +} diff --git a/nixos/modules/services/monitoring/prometheus/node-exporter.nix b/nixos/modules/services/monitoring/prometheus/node-exporter.nix new file mode 100644 index 000000000000..52dc14effc45 --- /dev/null +++ b/nixos/modules/services/monitoring/prometheus/node-exporter.nix @@ -0,0 +1,71 @@ +{ config, pkgs, lib, ... }: + +with lib; + +let + cfg = config.services.prometheus.nodeExporter; + cmdlineArgs = cfg.extraFlags ++ [ + "-web.listen-address=${cfg.listenAddress}" + ]; +in { + options = { + services.prometheus.nodeExporter = { + enable = mkEnableOption "prometheus node exporter"; + + port = mkOption { + type = types.int; + default = 9100; + description = '' + Port to listen on. + ''; + }; + + listenAddress = mkOption { + type = types.string; + default = "0.0.0.0"; + description = '' + Address to listen on. + ''; + }; + + enabledCollectors = mkOption { + type = types.listOf types.string; + default = []; + example = ''[ "systemd" ]''; + description = '' + Collectors to enable, additionally to the defaults. + ''; + }; + + extraFlags = mkOption { + type = types.listOf types.str; + default = []; + description = '' + Extra commandline options when launching the node exporter. + ''; + }; + }; + }; + + config = mkIf cfg.enable { + systemd.services.prometheus-node-exporter = { + description = "Prometheus exporter for machine metrics"; + unitConfig.Documentation = "https://github.com/prometheus/node_exporter"; + wantedBy = [ "multi-user.target" ]; + script = '' + exec ${pkgs.prometheus-node-exporter}/bin/node_exporter \ + ${optionalString (cfg.enabledCollectors != []) + ''-collectors.enabled ${concatStringsSep "," cfg.enabledCollectors}''} \ + -web.listen-address ${cfg.listenAddress}:${toString cfg.port} \ + ${concatStringsSep " \\\n " cfg.extraFlags} + ''; + serviceConfig = { + User = "nobody"; + Restart = "always"; + PrivateTmp = true; + WorkingDirectory = /tmp; + ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; + }; + }; + }; +} diff --git a/nixos/modules/services/monitoring/telegraf.nix b/nixos/modules/services/monitoring/telegraf.nix new file mode 100644 index 000000000000..49dc9d8143e6 --- /dev/null +++ b/nixos/modules/services/monitoring/telegraf.nix @@ -0,0 +1,71 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + cfg = config.services.telegraf; + + configFile = pkgs.runCommand "config.toml" { + buildInputs = [ pkgs.remarshal ]; + } '' + remarshal -if json -of toml \ + < ${pkgs.writeText "config.json" (builtins.toJSON cfg.extraConfig)} \ + > $out + ''; +in { + ###### interface + options = { + services.telegraf = { + enable = mkEnableOption "telegraf server"; + + package = mkOption { + default = pkgs.telegraf; + defaultText = "pkgs.telegraf"; + description = "Which telegraf derivation to use"; + type = types.package; + }; + + extraConfig = mkOption { + default = {}; + description = "Extra configuration options for telegraf"; + type = types.attrs; + example = { + outputs = { + influxdb = { + urls = ["http://localhost:8086"]; + database = "telegraf"; + }; + }; + inputs = { + statsd = { + service_address = ":8125"; + delete_timings = true; + }; + }; + }; + }; + }; + }; + + + ###### implementation + config = mkIf config.services.telegraf.enable { + systemd.services.telegraf = { + description = "Telegraf Agent"; + wantedBy = [ "multi-user.target" ]; + after = [ "network-online.target" ]; + serviceConfig = { + ExecStart=''${cfg.package}/bin/telegraf -config "${configFile}"''; + ExecReload="${pkgs.coreutils}/bin/kill -HUP $MAINPID"; + User = "telegraf"; + Restart = "on-failure"; + }; + }; + + users.extraUsers = [{ + name = "telegraf"; + uid = config.ids.uids.telegraf; + description = "telegraf daemon user"; + }]; + }; +} diff --git a/nixos/modules/services/monitoring/ups.nix b/nixos/modules/services/monitoring/ups.nix index 5f80d547dbcb..febf0c95f5bd 100644 --- a/nixos/modules/services/monitoring/ups.nix +++ b/nixos/modules/services/monitoring/ups.nix @@ -182,7 +182,8 @@ in systemd.services.upsmon = { description = "Uninterruptible Power Supplies (Monitor)"; - wantedBy = [ "ip-up.target" ]; + after = [ "network.target" ]; + wantedBy = [ "multi-user.target" ]; serviceConfig.Type = "forking"; script = "${pkgs.nut}/sbin/upsmon"; environment.NUT_CONFPATH = "/etc/nut/"; @@ -191,8 +192,8 @@ in systemd.services.upsd = { description = "Uninterruptible Power Supplies (Daemon)"; + after = [ "network.target" "upsmon.service" ]; wantedBy = [ "multi-user.target" ]; - after = [ "network-interfaces.target" "upsmon.service" ]; serviceConfig.Type = "forking"; # TODO: replace 'root' by another username. script = "${pkgs.nut}/sbin/upsd -u root"; @@ -202,8 +203,8 @@ in systemd.services.upsdrv = { description = "Uninterruptible Power Supplies (Register all UPS)"; - wantedBy = [ "multi-user.target" ]; after = [ "upsd.service" ]; + wantedBy = [ "multi-user.target" ]; # TODO: replace 'root' by another username. script = ''${pkgs.nut}/bin/upsdrvctl -u root start''; serviceConfig = { |