3 files changed, 71 insertions, 32 deletions
diff --git a/pkgs/development/python-modules/scrapy/default.nix b/pkgs/development/python-modules/scrapy/default.nix
new file mode 100644
index 000000000000..8f3b2ef74b21
--- /dev/null
+++ b/pkgs/development/python-modules/scrapy/default.nix
@@ -0,0 +1,38 @@
+{ buildPythonPackage, fetchurl, glibcLocales, mock, pytest, botocore,
+  testfixtures, pillow, six, twisted, w3lib, lxml, queuelib, pyopenssl,
+  service-identity, parsel, pydispatcher, cssselect, lib }:
+buildPythonPackage rec {
+    name = "Scrapy-${version}";
+    version = "1.3.1";
+
+    buildInputs = [ glibcLocales mock pytest botocore testfixtures pillow ];
+    propagatedBuildInputs = [
+      six twisted w3lib lxml cssselect queuelib pyopenssl service-identity parsel pydispatcher
+    ];
+
+    # Scrapy is usually installed via pip where copying all
+    # permissions makes sense. In Nix the files copied are owned by
+    # root and readonly. As a consequence scrapy can't edit the
+    # project templates.
+    patches = [ ./permissions-fix.patch ];
+
+    LC_ALL="en_US.UTF-8";
+
+    checkPhase = ''
+      py.test --ignore=tests/test_linkextractors_deprecated.py --ignore=tests/test_proxy_connect.py
+      # The ignored tests require mitmproxy, which depends on protobuf, but it's disabled on Python3
+    '';
+
+    src = fetchurl {
+      url = "mirror://pypi/S/Scrapy/${name}.tar.gz";
+      sha256 = "0s5qkxwfq842maxjd2j82ldp4dyb70kla3z5rr56z0p7ig53cbvk";
+    };
+
+    meta = with lib; {
+      description = "A fast high-level web crawling and web scraping framework, used to crawl websites and extract structured data from their pages";
+      homepage = "http://scrapy.org/";
+      license = licenses.bsd3;
+      maintainers = with maintainers; [ drewkett ];
+      platforms = platforms.linux;
+    };
+}
diff --git a/pkgs/development/python-modules/scrapy/permissions-fix.patch b/pkgs/development/python-modules/scrapy/permissions-fix.patch
new file mode 100644
index 000000000000..5ea5269c799e
--- /dev/null
+++ b/pkgs/development/python-modules/scrapy/permissions-fix.patch
@@ -0,0 +1,28 @@
+diff --git a/scrapy/commands/startproject.py b/scrapy/commands/startproject.py
+index 5941066..89f8edb 100644
+--- a/scrapy/commands/startproject.py
++++ b/scrapy/commands/startproject.py
+@@ -4,7 +4,7 @@ import os
+ import string
+ from importlib import import_module
+ from os.path import join, exists, abspath
+-from shutil import ignore_patterns, move, copy2, copystat
++from shutil import ignore_patterns, move, copyfile, copystat
+ 
+ import scrapy
+ from scrapy.commands import ScrapyCommand
+@@ -76,8 +76,7 @@ class Command(ScrapyCommand):
+             if os.path.isdir(srcname):
+                 self._copytree(srcname, dstname)
+             else:
+-                copy2(srcname, dstname)
+-        copystat(src, dst)
++                copyfile(srcname, dstname)
+ 
+     def run(self, args, opts):
+         if len(args) not in (1, 2):
+@@ -118,4 +117,3 @@ class Command(ScrapyCommand):
+         _templates_base_dir = self.settings['TEMPLATES_DIR'] or \
+             join(scrapy.__path__[0], 'templates')
+         return join(_templates_base_dir, 'project')
+-    
diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix
index 6567158c185c..03f6d7ce07eb 100644
--- a/pkgs/top-level/python-packages.nix
+++ b/pkgs/top-level/python-packages.nix
@@ -18146,11 +18146,11 @@ in {
 
   parsel = buildPythonPackage rec {
     name = "parsel-${version}";
-    version = "1.0.3";
+    version = "1.1.0";
 
     src = pkgs.fetchurl {
       url = "mirror://pypi/p/parsel/${name}.tar.gz";
-      sha256 = "9c12c370feda864c2f541cecce9bfb3a2a682c6c59c097a852e7b040dc6b8431";
+      sha256 = "0a34d1c0bj1fzb5dk5744m2ag6v3b8glk4xp0amqxdan9ldbcd97";
     };
 
     buildInputs = with self; [ pytest pytestrunner ];
@@ -30937,13 +30937,13 @@ EOF
 
   w3lib = buildPythonPackage rec {
     name = "w3lib-${version}";
-    version = "1.14.2";
+    version = "1.17.0";
 
     buildInputs = with self ; [ six pytest ];
 
     src = pkgs.fetchurl {
       url = "mirror://pypi/w/w3lib/${name}.tar.gz";
-      sha256 = "bd87eae62d208eef70869951abf05e96a8ee559714074a485168de4c5b190004";
+      sha256 = "0vshh300ay5wn5hwl9qcb32m71pz5s6miy0if56vm4nggy159inq";
     };
 
     meta = {
@@ -31002,35 +31002,8 @@ EOF
     };
   };
 
-  scrapy = buildPythonPackage rec {
-    name = "Scrapy-${version}";
-    version = "1.1.2";
-
-    buildInputs = with self; [ pkgs.glibcLocales mock pytest botocore testfixtures pillow ];
-    propagatedBuildInputs = with self; [
-      six twisted w3lib lxml cssselect queuelib pyopenssl service-identity parsel pydispatcher
-    ];
-
-    LC_ALL="en_US.UTF-8";
-
-    checkPhase = ''
-      py.test --ignore=tests/test_linkextractors_deprecated.py --ignore=tests/test_proxy_connect.py
-      # The ignored tests require mitmproxy, which depends on protobuf, but it's disabled on Python3
-    '';
+  scrapy = callPackage ../development/python-modules/scrapy { };
 
-    src = pkgs.fetchurl {
-      url = "mirror://pypi/S/Scrapy/${name}.tar.gz";
-      sha256 = "a0a8c7bccbd598d2731ec9f267b8efbd8fb99767f826f8f2924a5610707a03d4";
-    };
-
-    meta = {
-      description = "A fast high-level web crawling and web scraping framework, used to crawl websites and extract structured data from their pages";
-      homepage = "http://scrapy.org/";
-      license = licenses.bsd3;
-      maintainers = with maintainers; [ drewkett ];
-      platforms = platforms.linux;
-    };
-  };
   pandocfilters = buildPythonPackage rec{
     version = "1.4.1";
     pname = "pandocfilters";