about summary refs log tree commit diff
path: root/nixpkgs/pkgs/development/python-modules/datasets/default.nix
diff options
context:
space:
mode:
Diffstat (limited to 'nixpkgs/pkgs/development/python-modules/datasets/default.nix')
-rw-r--r--nixpkgs/pkgs/development/python-modules/datasets/default.nix78
1 files changed, 78 insertions, 0 deletions
diff --git a/nixpkgs/pkgs/development/python-modules/datasets/default.nix b/nixpkgs/pkgs/development/python-modules/datasets/default.nix
new file mode 100644
index 000000000000..0505ea7e13cc
--- /dev/null
+++ b/nixpkgs/pkgs/development/python-modules/datasets/default.nix
@@ -0,0 +1,78 @@
+{ lib
+, aiohttp
+, buildPythonPackage
+, dill
+, fetchFromGitHub
+, fetchpatch
+, fsspec
+, huggingface-hub
+, importlib-metadata
+, multiprocess
+, numpy
+, packaging
+, pandas
+, pyarrow
+, pythonOlder
+, requests
+, responses
+, tqdm
+, xxhash
+}:
+
+buildPythonPackage rec {
+  pname = "datasets";
+  version = "2.15.0";
+  format = "setuptools";
+
+  disabled = pythonOlder "3.8";
+
+  src = fetchFromGitHub {
+    owner = "huggingface";
+    repo = pname;
+    rev = "refs/tags/${version}";
+    hash = "sha256-Q8cSgupfj6xKD0bYgL6bvYBwdYDdNaiWEWWUrRvwc4g=";
+  };
+
+  # remove pyarrow<14.0.1 vulnerability fix
+  postPatch = ''
+    substituteInPlace src/datasets/features/features.py \
+      --replace "import pyarrow_hotfix" "#import pyarrow_hotfix"
+  '';
+
+  propagatedBuildInputs = [
+    aiohttp
+    dill
+    fsspec
+    huggingface-hub
+    multiprocess
+    numpy
+    packaging
+    pandas
+    pyarrow
+    requests
+    responses
+    tqdm
+    xxhash
+  ] ++ lib.optionals (pythonOlder "3.8") [
+    importlib-metadata
+  ];
+
+  # Tests require pervasive internet access
+  doCheck = false;
+
+  # Module import will attempt to create a cache directory
+  postFixup = "export HF_MODULES_CACHE=$TMPDIR";
+
+  pythonImportsCheck = [
+    "datasets"
+  ];
+
+  meta = with lib; {
+    description = "Open-access datasets and evaluation metrics for natural language processing";
+    homepage = "https://github.com/huggingface/datasets";
+    changelog = "https://github.com/huggingface/datasets/releases/tag/${version}";
+    license = licenses.asl20;
+    platforms = platforms.unix;
+    maintainers = with maintainers; [ ];
+  };
+}