{ lib , buildPythonPackage , pythonRelaxDepsHook , fetchFromGitHub , attrdict , beautifulsoup4 , cython , fire , fonttools , lmdb , lxml , numpy , opencv4 , openpyxl , pdf2docx , pillow , premailer , pyclipper , pymupdf , python-docx , rapidfuzz , scikit-image , shapely , tqdm , paddlepaddle , lanms-neo , polygon3 }: let version = "2.7.0.1"; in buildPythonPackage { pname = "paddleocr"; inherit version; format = "setuptools"; src = fetchFromGitHub { owner = "PaddlePaddle"; repo = "PaddleOCR"; rev = "254786752a2659e184822b4b2de5637a05236590"; hash = "sha256-M/Fpk9swX9Gds7o5poM9Iv6LOhKoZNbe0Wv9JNMPOU0="; }; patches = [ # The `ppocr.data.imaug` re-exports the `IaaAugment` and `CopyPaste` # classes. These classes depend on the `imgaug` package which is # unmaintained and has been removed from nixpkgs. # # The image OCR feature of PaddleOCR doesn't use these classes though, so # they work even after stripping the the `IaaAugment` and `CopyPaste` # exports. It probably breaks some of the OCR model creation tooling that # PaddleOCR provides, however. ./remove-import-imaug.patch ]; nativeBuildInputs = [ pythonRelaxDepsHook ]; # trying to relax only pymupdf makes the whole build fail pythonRelaxDeps = true; pythonRemoveDeps = [ "imgaug" "visualdl" "opencv-python" "opencv-contrib-python" ]; propagatedBuildInputs = [ attrdict beautifulsoup4 cython fire fonttools lmdb lxml numpy opencv4 openpyxl pdf2docx pillow premailer pyclipper pymupdf python-docx rapidfuzz scikit-image shapely tqdm paddlepaddle lanms-neo polygon3 ]; # TODO: The tests depend, among possibly other things, on `cudatoolkit`. # But Cudatoolkit fails to install. # preCheck = "export HOME=$TMPDIR"; # nativeCheckInputs = with pkgs; [ which cudatoolkit ]; doCheck = false; meta = with lib; { homepage = "https://github.com/PaddlePaddle/PaddleOCR"; license = licenses.asl20; description = "Multilingual OCR toolkits based on PaddlePaddle"; longDescription = '' PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools that help users train better models and apply them into practice. ''; changelog = "https://github.com/PaddlePaddle/PaddleOCR/releases/tag/v${version}"; maintainers = with maintainers; [ happysalada ]; platforms = [ "x86_64-linux" "x86_64-darwin" "aarch64-darwin" ]; }; }