about summary refs log tree commit diff
path: root/nixos/tests/kthxbye.nix
blob: 5ca0917ec8e794f40c600675610749d97b2d8a6e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import ./make-test-python.nix ({ lib, pkgs, ... }:
{
  name = "kthxbye";

  meta = with lib.maintainers; {
    maintainers = [ nukaduka ];
  };

  nodes.server = { ... }: {
    environment.systemPackages = with pkgs; [ prometheus-alertmanager ];
    services.prometheus = {
      enable = true;

      globalConfig = {
        scrape_interval = "5s";
        scrape_timeout = "5s";
        evaluation_interval = "5s";
      };

      scrapeConfigs = [
        {
          job_name = "prometheus";
          scrape_interval = "5s";
          static_configs = [
            {
              targets = [ "localhost:9090" ];
            }
          ];
        }
      ];

      rules = [
        ''
          groups:
            - name: test
              rules:
                - alert: node_up
                  expr: up != 0
                  for: 5s
                  labels:
                    severity: bottom of the barrel
                  annotations:
                    summary: node is fine
        ''
      ];

      alertmanagers = [
        {
          static_configs = [
            {
              targets = [
                "localhost:9093"
              ];
            }
          ];
        }
      ];

      alertmanager = {
        enable = true;
        openFirewall = true;
        configuration.route = {
          receiver = "test";
          group_wait = "5s";
          group_interval = "5s";
          group_by = [ "..." ];
        };
        configuration.receivers = [
          {
            name = "test";
            webhook_configs = [
              {
                url = "http://localhost:1234";
              }
            ];
          }
        ];
      };
    };

    services.kthxbye = {
      enable = true;
      openFirewall = true;
      extendIfExpiringIn = "30s";
      logJSON = true;
      maxDuration = "15m";
      interval = "5s";
    };
  };

  testScript = ''
    with subtest("start the server"):
      start_all()
      server.wait_for_unit("prometheus.service")
      server.wait_for_unit("alertmanager.service")
      server.wait_for_unit("kthxbye.service")

      server.sleep(2) # wait for units to settle
      server.systemctl("restart kthxbye.service") # make sure kthxbye comes up after alertmanager
      server.sleep(2)

    with subtest("set up test silence which expires in 20s"):
      server.succeed('amtool --alertmanager.url "http://localhost:9093" silence add alertname="node_up" -a "nixosTest" -d "20s" -c "ACK! this server is fine!!"')

    with subtest("wait for 21 seconds and check if the silence is still active"):
      server.sleep(21)
      server.systemctl("status kthxbye.service")
      server.succeed("amtool --alertmanager.url 'http://localhost:9093' silence | grep 'ACK'")
  '';
})