about summary refs log tree commit diff
path: root/doc
diff options
context:
space:
mode:
authornicoo <nicoo@mur.at>2023-12-18 18:37:16 +0000
committernicoo <nicoo@mur.at>2023-12-18 20:28:51 +0000
commitb1a96bbbf22c2e375080773a366f6e9dbf7cd3ba (patch)
treebadf5a892b0ef412218c3ecee2595ffb8ddc9624 /doc
parent55520e0602239cf57799cc5eebcac2712b5728eb (diff)
downloadnixlib-b1a96bbbf22c2e375080773a366f6e9dbf7cd3ba.tar
nixlib-b1a96bbbf22c2e375080773a366f6e9dbf7cd3ba.tar.gz
nixlib-b1a96bbbf22c2e375080773a366f6e9dbf7cd3ba.tar.bz2
nixlib-b1a96bbbf22c2e375080773a366f6e9dbf7cd3ba.tar.lz
nixlib-b1a96bbbf22c2e375080773a366f6e9dbf7cd3ba.tar.xz
nixlib-b1a96bbbf22c2e375080773a366f6e9dbf7cd3ba.tar.zst
nixlib-b1a96bbbf22c2e375080773a366f6e9dbf7cd3ba.zip
doc: Add test for broken links in `manpage-urls.json`
Diffstat (limited to 'doc')
-rw-r--r--doc/default.nix22
-rwxr-xr-xdoc/tests/manpage-urls.py107
2 files changed, 129 insertions, 0 deletions
diff --git a/doc/default.nix b/doc/default.nix
index 8f5def70a6e7..26aae9efa573 100644
--- a/doc/default.nix
+++ b/doc/default.nix
@@ -149,4 +149,26 @@ in pkgs.stdenv.mkDerivation {
     echo "doc manual $dest ${common.indexPath}" >> $out/nix-support/hydra-build-products
     echo "doc manual $dest nixpkgs-manual.epub" >> $out/nix-support/hydra-build-products
   '';
+
+  passthru.tests.manpage-urls = with pkgs; testers.invalidateFetcherByDrvHash
+    ({ name ? "manual_check-manpage-urls"
+     , script
+     , urlsFile
+     }: runCommand name {
+      nativeBuildInputs = [
+        cacert
+        (python3.withPackages (p: with p; [
+          aiohttp
+          rich
+          structlog
+        ]))
+      ];
+      outputHash = "sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=";  # Empty output
+    } ''
+      python3 ${script} ${urlsFile}
+      touch $out
+    '') {
+      script = ./tests/manpage-urls.py;
+      urlsFile = ./manpage-urls.json;
+    };
 }
diff --git a/doc/tests/manpage-urls.py b/doc/tests/manpage-urls.py
new file mode 100755
index 000000000000..e5242892b7fb
--- /dev/null
+++ b/doc/tests/manpage-urls.py
@@ -0,0 +1,107 @@
+#! /usr/bin/env nix-shell
+#! nix-shell -i "python3 -I" -p "python3.withPackages(p: with p; [ aiohttp rich structlog ])"
+
+from argparse import ArgumentParser
+from collections import defaultdict
+from enum import IntEnum
+from http import HTTPStatus
+from pathlib import Path
+import asyncio, json, logging
+
+import aiohttp, structlog
+from structlog.contextvars import bound_contextvars as log_context
+
+
+LogLevel = IntEnum('LogLevel', {
+    lvl: getattr(logging, lvl)
+    for lvl in ('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL')
+})
+LogLevel.__str__ = lambda self: self.name
+
+
+EXPECTED_STATUS=frozenset((
+    HTTPStatus.OK, HTTPStatus.FOUND,
+    HTTPStatus.NOT_FOUND,
+))
+
+async def check(session, manpage: str, url: str) -> HTTPStatus:
+    with log_context(manpage=manpage, url=url):
+        logger.debug("Checking")
+        async with session.head(url) as resp:
+            st = HTTPStatus(resp.status)
+            match st:
+                case HTTPStatus.OK | HTTPStatus.FOUND:
+                    logger.debug("OK!")
+                case HTTPStatus.NOT_FOUND:
+                    logger.error("Broken link!")
+                case _ if st < 400:
+                    logger.info("Unexpected code", status=st)
+                case _ if 400 <= st < 600:
+                    logger.warn("Unexpected error", status=st)
+
+            return st
+
+async def main(urls_path):
+    logger.info(f"Parsing {urls_path}")
+    with urls_path.open() as urls_file:
+        urls = json.load(urls_file)
+
+    count = defaultdict(lambda: 0)
+
+    logger.info(f"Checking URLs from {urls_path}")
+    async with aiohttp.ClientSession() as session:
+        for status in asyncio.as_completed([
+            check(session, manpage, url)
+            for manpage, url in urls.items()
+        ]):
+            count[await status]+=1
+
+    ok = count[HTTPStatus.OK] + count[HTTPStatus.FOUND]
+    broken = count[HTTPStatus.NOT_FOUND]
+    unknown = sum(c for st, c in count.items() if st not in EXPECTED_STATUS)
+    logger.info(f"Done: {broken} broken links, "
+                f"{ok} correct links, and {unknown} unexpected status")
+
+    return count
+
+
+def parse_args(args=None):
+    parser = ArgumentParser(
+        prog = 'check-manpage-urls',
+        description = 'Check the validity of the manpage URLs linked in the nixpkgs manual',
+    )
+    parser.add_argument(
+        '-l', '--log-level',
+        default = os.getenv('LOG_LEVEL', 'INFO'),
+        type = lambda s: LogLevel[s],
+        choices = list(LogLevel),
+    )
+    parser.add_argument(
+        'file',
+        type = Path,
+        nargs = '?',
+    )
+
+    return parser.parse_args(args)
+
+
+if __name__ == "__main__":
+    import os, sys
+
+    args = parse_args()
+
+    structlog.configure(
+        wrapper_class=structlog.make_filtering_bound_logger(args.log_level),
+    )
+    logger = structlog.getLogger("check-manpage-urls.py")
+
+    urls_path = args.file
+    if urls_path is None:
+        REPO_ROOT = Path(__file__).parent.parent.parent.parent
+        logger.info(f"Assuming we are in a nixpkgs repo rooted at {REPO_ROOT}")
+
+        urls_path = REPO_ROOT / 'doc' / 'manpage-urls.json'
+
+    count = asyncio.run(main(urls_path))
+
+    sys.exit(0 if count[HTTPStatus.NOT_FOUND] == 0 else 1)