about summary refs log tree commit diff
path: root/nixpkgs/pkgs/development/python-modules/dataprep-ml/default.nix
diff options
context:
space:
mode:
Diffstat (limited to 'nixpkgs/pkgs/development/python-modules/dataprep-ml/default.nix')
-rw-r--r--nixpkgs/pkgs/development/python-modules/dataprep-ml/default.nix77
1 files changed, 77 insertions, 0 deletions
diff --git a/nixpkgs/pkgs/development/python-modules/dataprep-ml/default.nix b/nixpkgs/pkgs/development/python-modules/dataprep-ml/default.nix
new file mode 100644
index 000000000000..3eeac2e85b9b
--- /dev/null
+++ b/nixpkgs/pkgs/development/python-modules/dataprep-ml/default.nix
@@ -0,0 +1,77 @@
+{ lib
+, buildPythonPackage
+, fetchPypi
+, pythonOlder
+, poetry-core
+, numpy
+, pandas
+, pydateinfer
+, python-dateutil
+, scipy
+, type-infer
+, dataclasses-json
+, colorlog
+, pydantic
+, nltk-data
+, symlinkJoin
+}:
+let
+  testNltkData = symlinkJoin {
+    name = "nltk-test-data";
+    paths = [ nltk-data.punkt nltk-data.stopwords ];
+  };
+in
+buildPythonPackage rec {
+  pname = "dataprep-ml";
+  version = "0.0.18";
+  pyproject = true;
+
+  disabled = pythonOlder "3.8";
+
+  # using PyPI as github repo does not contain tags or release branches
+  src = fetchPypi {
+    pname = "dataprep_ml";
+    inherit version;
+    hash = "sha256-nIqyRwv62j8x5Fy7ILMLWxw6yJmkkNRE1zyUlfvRYTI=";
+  };
+
+  nativeBuildInputs = [
+    poetry-core
+  ];
+
+  propagatedBuildInputs = [
+    numpy
+    pandas
+    pydateinfer
+    python-dateutil
+    scipy
+    type-infer
+    dataclasses-json
+    colorlog
+    pydantic
+  ];
+
+  # PyPI tarball has no tests
+  doCheck = false;
+
+  # Package import requires NLTK data to be downloaded
+  # It is the only way to set NLTK_DATA environment variable,
+  # so that it is available in pythonImportsCheck
+  env.NLTK_DATA = testNltkData;
+  pythonImportsCheck = [
+    "dataprep_ml"
+    "dataprep_ml.cleaners"
+    "dataprep_ml.helpers"
+    "dataprep_ml.imputers"
+    "dataprep_ml.insights"
+    "dataprep_ml.recommenders"
+    "dataprep_ml.splitters"
+  ];
+
+  meta = with lib; {
+    description = "Data utilities for Machine Learning pipelines";
+    homepage = "https://github.com/mindsdb/dataprep_ml";
+    license = licenses.gpl3Only;
+    maintainers = with maintainers; [ mbalatsko ];
+  };
+}