about summary refs log tree commit diff
diff options
context:
space:
mode:
authorPeter Hoeg <peter@speartail.com>2016-05-26 17:20:30 +0800
committerPeter Hoeg <peter@speartail.com>2016-05-27 15:07:05 +0800
commitc1ffbd8ee887f7dfb17c2a5d9249929e3672b935 (patch)
treec00b8acd7907780bc185eb10cb7536037bf05789
parentc3bb94e7d13b11222ad741c643ea8ce459344ed2 (diff)
downloadnixlib-c1ffbd8ee887f7dfb17c2a5d9249929e3672b935.tar
nixlib-c1ffbd8ee887f7dfb17c2a5d9249929e3672b935.tar.gz
nixlib-c1ffbd8ee887f7dfb17c2a5d9249929e3672b935.tar.bz2
nixlib-c1ffbd8ee887f7dfb17c2a5d9249929e3672b935.tar.lz
nixlib-c1ffbd8ee887f7dfb17c2a5d9249929e3672b935.tar.xz
nixlib-c1ffbd8ee887f7dfb17c2a5d9249929e3672b935.tar.zst
nixlib-c1ffbd8ee887f7dfb17c2a5d9249929e3672b935.zip
linkchecker: init at 9.3
A few changes from upstream:

 1) the executable is patched to support a --no-robots flag to ignore
    robots.txto

 2) the GUI doesn't work (for now), so this is CLI only
-rw-r--r--pkgs/tools/networking/linkchecker/add-no-robots-flag.patch60
-rw-r--r--pkgs/tools/networking/linkchecker/default.nix30
-rw-r--r--pkgs/top-level/all-packages.nix2
3 files changed, 92 insertions, 0 deletions
diff --git a/pkgs/tools/networking/linkchecker/add-no-robots-flag.patch b/pkgs/tools/networking/linkchecker/add-no-robots-flag.patch
new file mode 100644
index 000000000000..270ef2c02e1e
--- /dev/null
+++ b/pkgs/tools/networking/linkchecker/add-no-robots-flag.patch
@@ -0,0 +1,60 @@
+diff --git a/linkcheck/checker/httpurl.py b/linkcheck/checker/httpurl.py
+index 6f207b6..161619c 100644
+--- a/linkcheck/checker/httpurl.py
++++ b/linkcheck/checker/httpurl.py
+@@ -75,7 +75,7 @@ def allows_robots (self, url):
+         @return: True if access is granted, otherwise False
+         @rtype: bool
+         """
+-        return self.aggregate.robots_txt.allows_url(self)
++        return not self.aggregate.config['robotstxt'] or self.aggregate.robots_txt.allows_url(self)
+ 
+     def content_allows_robots (self):
+         """
+diff --git a/linkcheck/configuration/__init__.py b/linkcheck/configuration/__init__.py
+index fc2c148..234fa05 100644
+--- a/linkcheck/configuration/__init__.py
++++ b/linkcheck/configuration/__init__.py
+@@ -163,6 +163,7 @@ def __init__ (self):
+         ## checking options
+         self["allowedschemes"] = []
+         self['cookiefile'] = None
++        self['robotstxt'] = True
+         self["debugmemory"] = False
+         self["localwebroot"] = None
+         self["maxfilesizeparse"] = 1*1024*1024
+diff --git a/linkcheck/configuration/confparse.py b/linkcheck/configuration/confparse.py
+index 67751ed..845fa95 100644
+--- a/linkcheck/configuration/confparse.py
++++ b/linkcheck/configuration/confparse.py
+@@ -149,6 +149,7 @@ def read_checking_config (self):
+                  self.get(section, 'allowedschemes').split(',')]
+         self.read_boolean_option(section, "debugmemory")
+         self.read_string_option(section, "cookiefile")
++        self.read_boolean_option(section, "robotstxt")
+         self.read_string_option(section, "localwebroot")
+         try:
+             self.read_boolean_option(section, "sslverify")
+diff --git a/linkchecker b/linkchecker
+index 199532c..9e91fa5 100755
+--- a/linkchecker
++++ b/linkchecker
+@@ -321,6 +321,9 @@ group.add_argument("--cookiefile", dest="cookiefile", metavar="FILENAME",
+                  help=_(
+ """Read a file with initial cookie data. The cookie data format is
+ explained below."""))
++# const because store_false doesn't detect absent flags
++group.add_argument("--no-robots", action="store_const", const=False,
++                   dest="norobotstxt", help=_("Disable robots.txt checks"))
+ group.add_argument("--check-extern", action="store_true",
+                  dest="checkextern", help=_("""Check external URLs."""))
+ group.add_argument("--ignore-url", action="append", metavar="REGEX",
+@@ -431,6 +434,8 @@ if options.externstrict:
+ if options.extern:
+     pats = [linkcheck.get_link_pat(arg) for arg in options.extern]
+     config["externlinks"].extend(pats)
++if options.norobotstxt is not None:
++    config['robotstxt'] = options.norobotstxt
+ if options.checkextern:
+     config["checkextern"] = True
+ elif not config["checkextern"]:
diff --git a/pkgs/tools/networking/linkchecker/default.nix b/pkgs/tools/networking/linkchecker/default.nix
new file mode 100644
index 000000000000..79566f129019
--- /dev/null
+++ b/pkgs/tools/networking/linkchecker/default.nix
@@ -0,0 +1,30 @@
+{ stdenv, lib, fetchurl, python2Packages }:
+
+python2Packages.buildPythonApplication rec {
+  name = "LinkChecker-${version}";
+  version = "9.3";
+
+  # LinkChecker 9.3 only works with requests 2.9.x
+  propagatedBuildInputs = with python2Packages ; [ requests2 ]; 
+
+  src = fetchurl {
+    url = "mirror://pypi/L/LinkChecker/${name}.tar.gz";
+    sha256 = "0v8pavf0bx33xnz1kwflv0r7lxxwj7vg3syxhy2wzza0wh6sc2pf";
+  };
+
+  # upstream refuses to support ignoring robots.txt
+  patches = [
+    ./add-no-robots-flag.patch
+  ];
+
+  postInstall = ''
+    rm $out/bin/linkchecker-gui
+  '';
+
+  meta = {
+    description = "Check websites for broken links";
+    homepage = "https://wummel.github.io/linkchecker/";
+    license = lib.licenses.gpl2;
+    maintainers = with lib.maintainers; [ peterhoeg ];
+  };
+}
diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix
index 302c806413cf..58fe3f129577 100644
--- a/pkgs/top-level/all-packages.nix
+++ b/pkgs/top-level/all-packages.nix
@@ -16911,6 +16911,8 @@ in
 
   golden-cheetah = qt55.callPackage ../applications/misc/golden-cheetah {};
 
+  linkchecker = callPackage ../tools/networking/linkchecker { };
+
   tomb = callPackage ../os-specific/linux/tomb {};
 
   imatix_gsl = callPackage ../development/tools/imatix_gsl {};