diff options
author | nicoo <nicoo@mur.at> | 2023-12-18 18:37:16 +0000 |
---|---|---|
committer | nicoo <nicoo@mur.at> | 2023-12-18 20:28:51 +0000 |
commit | b1a96bbbf22c2e375080773a366f6e9dbf7cd3ba (patch) | |
tree | badf5a892b0ef412218c3ecee2595ffb8ddc9624 /doc | |
parent | 55520e0602239cf57799cc5eebcac2712b5728eb (diff) | |
download | nixlib-b1a96bbbf22c2e375080773a366f6e9dbf7cd3ba.tar nixlib-b1a96bbbf22c2e375080773a366f6e9dbf7cd3ba.tar.gz nixlib-b1a96bbbf22c2e375080773a366f6e9dbf7cd3ba.tar.bz2 nixlib-b1a96bbbf22c2e375080773a366f6e9dbf7cd3ba.tar.lz nixlib-b1a96bbbf22c2e375080773a366f6e9dbf7cd3ba.tar.xz nixlib-b1a96bbbf22c2e375080773a366f6e9dbf7cd3ba.tar.zst nixlib-b1a96bbbf22c2e375080773a366f6e9dbf7cd3ba.zip |
doc: Add test for broken links in `manpage-urls.json`
Diffstat (limited to 'doc')
-rw-r--r-- | doc/default.nix | 22 | ||||
-rwxr-xr-x | doc/tests/manpage-urls.py | 107 |
2 files changed, 129 insertions, 0 deletions
diff --git a/doc/default.nix b/doc/default.nix index 8f5def70a6e7..26aae9efa573 100644 --- a/doc/default.nix +++ b/doc/default.nix @@ -149,4 +149,26 @@ in pkgs.stdenv.mkDerivation { echo "doc manual $dest ${common.indexPath}" >> $out/nix-support/hydra-build-products echo "doc manual $dest nixpkgs-manual.epub" >> $out/nix-support/hydra-build-products ''; + + passthru.tests.manpage-urls = with pkgs; testers.invalidateFetcherByDrvHash + ({ name ? "manual_check-manpage-urls" + , script + , urlsFile + }: runCommand name { + nativeBuildInputs = [ + cacert + (python3.withPackages (p: with p; [ + aiohttp + rich + structlog + ])) + ]; + outputHash = "sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="; # Empty output + } '' + python3 ${script} ${urlsFile} + touch $out + '') { + script = ./tests/manpage-urls.py; + urlsFile = ./manpage-urls.json; + }; } diff --git a/doc/tests/manpage-urls.py b/doc/tests/manpage-urls.py new file mode 100755 index 000000000000..e5242892b7fb --- /dev/null +++ b/doc/tests/manpage-urls.py @@ -0,0 +1,107 @@ +#! /usr/bin/env nix-shell +#! nix-shell -i "python3 -I" -p "python3.withPackages(p: with p; [ aiohttp rich structlog ])" + +from argparse import ArgumentParser +from collections import defaultdict +from enum import IntEnum +from http import HTTPStatus +from pathlib import Path +import asyncio, json, logging + +import aiohttp, structlog +from structlog.contextvars import bound_contextvars as log_context + + +LogLevel = IntEnum('LogLevel', { + lvl: getattr(logging, lvl) + for lvl in ('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL') +}) +LogLevel.__str__ = lambda self: self.name + + +EXPECTED_STATUS=frozenset(( + HTTPStatus.OK, HTTPStatus.FOUND, + HTTPStatus.NOT_FOUND, +)) + +async def check(session, manpage: str, url: str) -> HTTPStatus: + with log_context(manpage=manpage, url=url): + logger.debug("Checking") + async with session.head(url) as resp: + st = HTTPStatus(resp.status) + match st: + case HTTPStatus.OK | HTTPStatus.FOUND: + logger.debug("OK!") + case HTTPStatus.NOT_FOUND: + logger.error("Broken link!") + case _ if st < 400: + logger.info("Unexpected code", status=st) + case _ if 400 <= st < 600: + logger.warn("Unexpected error", status=st) + + return st + +async def main(urls_path): + logger.info(f"Parsing {urls_path}") + with urls_path.open() as urls_file: + urls = json.load(urls_file) + + count = defaultdict(lambda: 0) + + logger.info(f"Checking URLs from {urls_path}") + async with aiohttp.ClientSession() as session: + for status in asyncio.as_completed([ + check(session, manpage, url) + for manpage, url in urls.items() + ]): + count[await status]+=1 + + ok = count[HTTPStatus.OK] + count[HTTPStatus.FOUND] + broken = count[HTTPStatus.NOT_FOUND] + unknown = sum(c for st, c in count.items() if st not in EXPECTED_STATUS) + logger.info(f"Done: {broken} broken links, " + f"{ok} correct links, and {unknown} unexpected status") + + return count + + +def parse_args(args=None): + parser = ArgumentParser( + prog = 'check-manpage-urls', + description = 'Check the validity of the manpage URLs linked in the nixpkgs manual', + ) + parser.add_argument( + '-l', '--log-level', + default = os.getenv('LOG_LEVEL', 'INFO'), + type = lambda s: LogLevel[s], + choices = list(LogLevel), + ) + parser.add_argument( + 'file', + type = Path, + nargs = '?', + ) + + return parser.parse_args(args) + + +if __name__ == "__main__": + import os, sys + + args = parse_args() + + structlog.configure( + wrapper_class=structlog.make_filtering_bound_logger(args.log_level), + ) + logger = structlog.getLogger("check-manpage-urls.py") + + urls_path = args.file + if urls_path is None: + REPO_ROOT = Path(__file__).parent.parent.parent.parent + logger.info(f"Assuming we are in a nixpkgs repo rooted at {REPO_ROOT}") + + urls_path = REPO_ROOT / 'doc' / 'manpage-urls.json' + + count = asyncio.run(main(urls_path)) + + sys.exit(0 if count[HTTPStatus.NOT_FOUND] == 0 else 1) |