about summary refs log tree commit diff
path: root/nixpkgs/pkgs/applications/science/biology/gatk/default.nix
blob: 43e5382b5d50be5e9f36fc0e893fb2dbeda14eb8 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
{ lib, stdenv, fetchzip, jre, makeWrapper, python3 }:

stdenv.mkDerivation rec {
  pname = "gatk";
  version = "4.4.0.0";
  src = fetchzip {
    url = "https://github.com/broadinstitute/gatk/releases/download/${version}/gatk-${version}.zip";
    sha256 = "sha256-svOtIS6gz9nwVgVmVQbk9z6Ufyobpn6bFbZY4zurvUI=";
  };

  nativeBuildInputs = [ makeWrapper ];
  buildInputs = [ python3 ];

  dontUnpack = true;

  installPhase = ''
    mkdir -p $out/bin
    install -m755 -D $src/gatk-package-${version}-local.jar $out/bin/
    install -m755 -D $src/gatk-package-${version}-spark.jar $out/bin/
    install -m755 -D $src/gatk $out/bin/
  '';
  postFixup = ''
    wrapProgram $out/bin/gatk --prefix PATH : ${lib.makeBinPath [ jre ]}
  '';

  meta = with lib; {
    homepage = "https://gatk.broadinstitute.org/hc/en-us";
    description = "A wide variety of tools with a primary focus on variant discovery and genotyping." ;
    license = licenses.asl20;
    sourceProvenance = with lib.sourceTypes; [ binaryBytecode ] ;
    maintainers = with maintainers; [ apraga ];
    longDescription = ''
      The GATK is the industry standard for identifying SNPs and indels in germline
      DNA and RNAseq data. Its scope is now expanding to include somatic short variant
      calling, and to tackle copy number (CNV) and structural variation (SV). In
      addition to the variant callers themselves, the GATK also includes many
      utilities to perform related tasks such as processing and quality control of
      high-throughput sequencing data, and bundles the popular Picard toolkit.

      These tools were primarily designed to process exomes and whole genomes
      generated with Illumina sequencing technology, but they can be adapted to handle
      a variety of other technologies and experimental designs. And although it was
      originally developed for human genetics, the GATK has since evolved to handle
      genome data from any organism, with any level of ploidy.
    '';
  };
}