diff options
Diffstat (limited to 'nixpkgs/pkgs/development/python-modules/transformers/default.nix')
-rw-r--r-- | nixpkgs/pkgs/development/python-modules/transformers/default.nix | 73 |
1 files changed, 11 insertions, 62 deletions
diff --git a/nixpkgs/pkgs/development/python-modules/transformers/default.nix b/nixpkgs/pkgs/development/python-modules/transformers/default.nix index bd9040668c08..594204754408 100644 --- a/nixpkgs/pkgs/development/python-modules/transformers/default.nix +++ b/nixpkgs/pkgs/development/python-modules/transformers/default.nix @@ -1,32 +1,28 @@ { buildPythonPackage -, stdenv +, lib, stdenv , fetchFromGitHub -, isPy39 +, pythonOlder , cookiecutter , filelock +, importlib-metadata , regex , requests , numpy -, pandas -, parameterized , protobuf , sacremoses -, timeout-decorator , tokenizers , tqdm -, pytestCheckHook }: buildPythonPackage rec { pname = "transformers"; - version = "4.1.1"; - disabled = isPy39; + version = "4.2.2"; src = fetchFromGitHub { owner = "huggingface"; repo = pname; rev = "v${version}"; - sha256 = "1l1gxdsakjmzsgggypq45pnwm87brhlccjfzafs43460pz0wbd6k"; + hash = "sha256-sBMCzEgYX6HQbzoEIYnmMdpYecCCsQjTdl2mO1Veu9M="; }; propagatedBuildInputs = [ @@ -39,65 +35,18 @@ buildPythonPackage rec { sacremoses tokenizers tqdm - ]; + ] ++ lib.optionals (pythonOlder "3.8") [ importlib-metadata ]; - checkInputs = [ - pandas - parameterized - pytestCheckHook - timeout-decorator - ]; + # Many tests require internet access. + doCheck = false; postPatch = '' - substituteInPlace setup.py \ - --replace "tokenizers == 0.9.4" "tokenizers" + sed -ri 's/tokenizers==[0-9.]+/tokenizers/g' setup.py ''; - preCheck = '' - export HOME="$TMPDIR" + pythonImportsCheck = [ "transformers" ]; - # This test requires the `datasets` module to download test - # data. However, since we cannot download in the Nix sandbox - # and `dataset` is an optional dependency for transformers - # itself, we will just remove the tests files that import - # `dataset`. - rm tests/test_retrieval_rag.py - rm tests/test_trainer.py - ''; - - # We have to run from the main directory for the tests. However, - # letting pytest discover tests leads to errors. - pytestFlagsArray = [ "tests" ]; - - # Disable tests that require network access. - disabledTests = [ - "BlenderbotSmallTokenizerTest" - "Blenderbot3BTokenizerTests" - "GetFromCacheTests" - "TokenizationTest" - "TestTokenizationBart" - "test_all_tokenizers" - "test_batch_encoding_is_fast" - "test_batch_encoding_pickle" - "test_batch_encoding_word_to_tokens" - "test_config_from_model_shortcut" - "test_config_model_type_from_model_identifier" - "test_from_pretrained_use_fast_toggle" - "test_hf_api" - "test_outputs_can_be_shorter" - "test_outputs_not_longer_than_maxlen" - "test_padding_accepts_tensors" - "test_pretokenized_tokenizers" - "test_tokenizer_equivalence_en_de" - "test_tokenizer_from_model_type" - "test_tokenizer_from_model_type" - "test_tokenizer_from_pretrained" - "test_tokenizer_from_tokenizer_class" - "test_tokenizer_identifier_with_correct_config" - "test_tokenizer_identifier_non_existent" - ]; - - meta = with stdenv.lib; { + meta = with lib; { homepage = "https://github.com/huggingface/transformers"; description = "State-of-the-art Natural Language Processing for TensorFlow 2.0 and PyTorch"; changelog = "https://github.com/huggingface/transformers/releases/tag/v${version}"; |