diff options
author | Peter Hoeg <peter@speartail.com> | 2016-05-26 17:20:30 +0800 |
---|---|---|
committer | Peter Hoeg <peter@speartail.com> | 2016-05-27 15:07:05 +0800 |
commit | c1ffbd8ee887f7dfb17c2a5d9249929e3672b935 (patch) | |
tree | c00b8acd7907780bc185eb10cb7536037bf05789 | |
parent | c3bb94e7d13b11222ad741c643ea8ce459344ed2 (diff) | |
download | nixlib-c1ffbd8ee887f7dfb17c2a5d9249929e3672b935.tar nixlib-c1ffbd8ee887f7dfb17c2a5d9249929e3672b935.tar.gz nixlib-c1ffbd8ee887f7dfb17c2a5d9249929e3672b935.tar.bz2 nixlib-c1ffbd8ee887f7dfb17c2a5d9249929e3672b935.tar.lz nixlib-c1ffbd8ee887f7dfb17c2a5d9249929e3672b935.tar.xz nixlib-c1ffbd8ee887f7dfb17c2a5d9249929e3672b935.tar.zst nixlib-c1ffbd8ee887f7dfb17c2a5d9249929e3672b935.zip |
linkchecker: init at 9.3
A few changes from upstream: 1) the executable is patched to support a --no-robots flag to ignore robots.txto 2) the GUI doesn't work (for now), so this is CLI only
-rw-r--r-- | pkgs/tools/networking/linkchecker/add-no-robots-flag.patch | 60 | ||||
-rw-r--r-- | pkgs/tools/networking/linkchecker/default.nix | 30 | ||||
-rw-r--r-- | pkgs/top-level/all-packages.nix | 2 |
3 files changed, 92 insertions, 0 deletions
diff --git a/pkgs/tools/networking/linkchecker/add-no-robots-flag.patch b/pkgs/tools/networking/linkchecker/add-no-robots-flag.patch new file mode 100644 index 000000000000..270ef2c02e1e --- /dev/null +++ b/pkgs/tools/networking/linkchecker/add-no-robots-flag.patch @@ -0,0 +1,60 @@ +diff --git a/linkcheck/checker/httpurl.py b/linkcheck/checker/httpurl.py +index 6f207b6..161619c 100644 +--- a/linkcheck/checker/httpurl.py ++++ b/linkcheck/checker/httpurl.py +@@ -75,7 +75,7 @@ def allows_robots (self, url): + @return: True if access is granted, otherwise False + @rtype: bool + """ +- return self.aggregate.robots_txt.allows_url(self) ++ return not self.aggregate.config['robotstxt'] or self.aggregate.robots_txt.allows_url(self) + + def content_allows_robots (self): + """ +diff --git a/linkcheck/configuration/__init__.py b/linkcheck/configuration/__init__.py +index fc2c148..234fa05 100644 +--- a/linkcheck/configuration/__init__.py ++++ b/linkcheck/configuration/__init__.py +@@ -163,6 +163,7 @@ def __init__ (self): + ## checking options + self["allowedschemes"] = [] + self['cookiefile'] = None ++ self['robotstxt'] = True + self["debugmemory"] = False + self["localwebroot"] = None + self["maxfilesizeparse"] = 1*1024*1024 +diff --git a/linkcheck/configuration/confparse.py b/linkcheck/configuration/confparse.py +index 67751ed..845fa95 100644 +--- a/linkcheck/configuration/confparse.py ++++ b/linkcheck/configuration/confparse.py +@@ -149,6 +149,7 @@ def read_checking_config (self): + self.get(section, 'allowedschemes').split(',')] + self.read_boolean_option(section, "debugmemory") + self.read_string_option(section, "cookiefile") ++ self.read_boolean_option(section, "robotstxt") + self.read_string_option(section, "localwebroot") + try: + self.read_boolean_option(section, "sslverify") +diff --git a/linkchecker b/linkchecker +index 199532c..9e91fa5 100755 +--- a/linkchecker ++++ b/linkchecker +@@ -321,6 +321,9 @@ group.add_argument("--cookiefile", dest="cookiefile", metavar="FILENAME", + help=_( + """Read a file with initial cookie data. The cookie data format is + explained below.""")) ++# const because store_false doesn't detect absent flags ++group.add_argument("--no-robots", action="store_const", const=False, ++ dest="norobotstxt", help=_("Disable robots.txt checks")) + group.add_argument("--check-extern", action="store_true", + dest="checkextern", help=_("""Check external URLs.""")) + group.add_argument("--ignore-url", action="append", metavar="REGEX", +@@ -431,6 +434,8 @@ if options.externstrict: + if options.extern: + pats = [linkcheck.get_link_pat(arg) for arg in options.extern] + config["externlinks"].extend(pats) ++if options.norobotstxt is not None: ++ config['robotstxt'] = options.norobotstxt + if options.checkextern: + config["checkextern"] = True + elif not config["checkextern"]: diff --git a/pkgs/tools/networking/linkchecker/default.nix b/pkgs/tools/networking/linkchecker/default.nix new file mode 100644 index 000000000000..79566f129019 --- /dev/null +++ b/pkgs/tools/networking/linkchecker/default.nix @@ -0,0 +1,30 @@ +{ stdenv, lib, fetchurl, python2Packages }: + +python2Packages.buildPythonApplication rec { + name = "LinkChecker-${version}"; + version = "9.3"; + + # LinkChecker 9.3 only works with requests 2.9.x + propagatedBuildInputs = with python2Packages ; [ requests2 ]; + + src = fetchurl { + url = "mirror://pypi/L/LinkChecker/${name}.tar.gz"; + sha256 = "0v8pavf0bx33xnz1kwflv0r7lxxwj7vg3syxhy2wzza0wh6sc2pf"; + }; + + # upstream refuses to support ignoring robots.txt + patches = [ + ./add-no-robots-flag.patch + ]; + + postInstall = '' + rm $out/bin/linkchecker-gui + ''; + + meta = { + description = "Check websites for broken links"; + homepage = "https://wummel.github.io/linkchecker/"; + license = lib.licenses.gpl2; + maintainers = with lib.maintainers; [ peterhoeg ]; + }; +} diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index 302c806413cf..58fe3f129577 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -16911,6 +16911,8 @@ in golden-cheetah = qt55.callPackage ../applications/misc/golden-cheetah {}; + linkchecker = callPackage ../tools/networking/linkchecker { }; + tomb = callPackage ../os-specific/linux/tomb {}; imatix_gsl = callPackage ../development/tools/imatix_gsl {}; |