about summary refs log tree commit diff
path: root/nixpkgs/pkgs/tools/security/semgrep
diff options
context:
space:
mode:
Diffstat (limited to 'nixpkgs/pkgs/tools/security/semgrep')
-rw-r--r--nixpkgs/pkgs/tools/security/semgrep/common.nix55
-rw-r--r--nixpkgs/pkgs/tools/security/semgrep/default.nix145
-rw-r--r--nixpkgs/pkgs/tools/security/semgrep/semgrep-core.nix53
-rwxr-xr-xnixpkgs/pkgs/tools/security/semgrep/update.sh146
4 files changed, 399 insertions, 0 deletions
diff --git a/nixpkgs/pkgs/tools/security/semgrep/common.nix b/nixpkgs/pkgs/tools/security/semgrep/common.nix
new file mode 100644
index 000000000000..3436d4267964
--- /dev/null
+++ b/nixpkgs/pkgs/tools/security/semgrep/common.nix
@@ -0,0 +1,55 @@
+{ lib }:
+
+rec {
+  version = "1.66.2";
+
+  srcHash = "sha256-xonZzZsAkAPMVINGEA10CvQ1diYgHBowNsR2pk4tYr8=";
+
+  # submodule dependencies
+  # these are fetched so we:
+  #   1. don't fetch the many submodules we don't need
+  #   2. avoid fetchSubmodules since it's prone to impurities
+  submodules = {
+    "cli/src/semgrep/semgrep_interfaces" = {
+      owner = "semgrep";
+      repo = "semgrep-interfaces";
+      rev = "215a54782174de84f97188632b4a37e35ba0f827";
+      hash = "sha256-Q8E5LkC/NV0wvt9ZwhkoPGjPlDavVHHMnX0sVNK3dAM=";
+    };
+  };
+
+  # fetch pre-built semgrep-core since the ocaml build is complex and relies on
+  # the opam package manager at some point
+  # pulling it out of the python wheel as r2c no longer release a built binary
+  # on github releases
+  core = {
+    x86_64-linux = {
+      platform = "any";
+      hash = "sha256-f/RcuJyd8y2bMclMxZ1BdNTVixhjLz0UxSKGZm+H8yI=";
+    };
+    x86_64-darwin = {
+      platform = "macosx_10_14_x86_64";
+      hash = "sha256-4H9PT41lPydMFl51O2CgeMQiTE66fZ8RP26CVT7Y7Ok=";
+    };
+    aarch64-darwin = {
+      platform = "macosx_11_0_arm64";
+      hash = "sha256-WxQ0ohojzhWmPo208xN98F5GwbNzQuxCjSwP7h3rBGA=";
+    };
+  };
+
+  meta = with lib; {
+    homepage = "https://semgrep.dev/";
+    downloadPage = "https://github.com/semgrep/semgrep/";
+    changelog = "https://github.com/semgrep/semgrep/blob/v${version}/CHANGELOG.md";
+    description = "Lightweight static analysis for many languages";
+    longDescription = ''
+      Semgrep is a fast, open-source, static analysis tool for finding bugs and
+      enforcing code standards at editor, commit, and CI time. Semgrep analyzes
+      code locally on your computer or in your build environment: code is never
+      uploaded. Its rules look like the code you already write; no abstract
+      syntax trees, regex wrestling, or painful DSLs.
+    '';
+    license = licenses.lgpl21Plus;
+    maintainers = with maintainers; [ jk ambroisie ];
+  };
+}
diff --git a/nixpkgs/pkgs/tools/security/semgrep/default.nix b/nixpkgs/pkgs/tools/security/semgrep/default.nix
new file mode 100644
index 000000000000..6b62ab80e7ea
--- /dev/null
+++ b/nixpkgs/pkgs/tools/security/semgrep/default.nix
@@ -0,0 +1,145 @@
+{ lib
+, fetchFromGitHub
+, fetchpatch
+, semgrep-core
+, buildPythonApplication
+, pythonPackages
+, pythonRelaxDepsHook
+
+, pytestCheckHook
+, git
+}:
+
+# testing locally post build:
+# ./result/bin/semgrep scan --metrics=off --config 'r/generic.unicode.security.bidi.contains-bidirectional-characters'
+
+let
+  common = import ./common.nix { inherit lib; };
+  semgrepBinPath = lib.makeBinPath [ semgrep-core ];
+in
+buildPythonApplication rec {
+  pname = "semgrep";
+  inherit (common) version;
+  src = fetchFromGitHub {
+    owner = "semgrep";
+    repo = "semgrep";
+    rev = "v${version}";
+    hash = common.srcHash;
+  };
+
+  # prepare a subset of the submodules as we only need a handful
+  # and there are many many submodules total
+  postPatch = (lib.concatStringsSep "\n" (lib.mapAttrsToList
+    (
+      path: submodule: ''
+        # substitute ${path}
+        # remove git submodule placeholder
+        rm -r ${path}
+        # link submodule
+        ln -s ${submodule}/ ${path}
+      ''
+    )
+    passthru.submodulesSubset)) + ''
+    cd cli
+  '';
+
+  nativeBuildInputs = [ pythonRelaxDepsHook ];
+  # tell cli/setup.py to not copy semgrep-core into the result
+  # this means we can share a copy of semgrep-core and avoid an issue where it
+  # copies the binary but doesn't retain the executable bit
+  SEMGREP_SKIP_BIN = true;
+
+  pythonRelaxDeps = [
+    "boltons"
+    "glom"
+  ];
+
+  propagatedBuildInputs = with pythonPackages; [
+    attrs
+    boltons
+    colorama
+    click
+    click-option-group
+    glom
+    requests
+    rich
+    ruamel-yaml
+    tqdm
+    packaging
+    jsonschema
+    wcmatch
+    peewee
+    defusedxml
+    urllib3
+    typing-extensions
+    python-lsp-jsonrpc
+    tomli
+  ];
+
+  doCheck = true;
+
+  nativeCheckInputs = [ git pytestCheckHook ] ++ (with pythonPackages; [
+    flaky
+    pytest-snapshot
+    pytest-mock
+    pytest-freezegun
+    types-freezegun
+  ]);
+
+  disabledTestPaths = [
+    "tests/default/e2e"
+    "tests/default/e2e-pro"
+    "tests/default/e2e-pysemgrep"
+  ];
+
+  disabledTests = [
+    # requires networking
+    "test_send"
+    # requires networking
+    "test_parse_exclude_rules_auto"
+    # many child tests require networking to download files
+    "TestConfigLoaderForProducts"
+    # doesn't start flaky plugin correctly
+    "test_debug_performance"
+  ];
+
+  preCheck = ''
+    # tests need a home directory
+    export HOME="$(mktemp -d)"
+
+    # tests need access to `semgrep-core`
+    export OLD_PATH="$PATH"
+    export PATH="$PATH:${semgrepBinPath}"
+
+    # we're in cli
+    # replace old semgrep with wrapped one
+    rm ./bin/semgrep
+    ln -s $out/bin/semgrep ./bin/semgrep
+  '';
+
+  postCheck = ''
+    export PATH="$OLD_PATH"
+    unset OLD_PATH
+  '';
+
+  # since we stop cli/setup.py from finding semgrep-core and copying it into
+  # the result we need to provide it on the PATH
+  preFixup = ''
+    makeWrapperArgs+=(--prefix PATH : ${semgrepBinPath})
+  '';
+
+  postInstall = ''
+    chmod +x $out/bin/{,py}semgrep
+  '';
+
+  passthru = {
+    inherit common;
+    submodulesSubset = lib.mapAttrs (k: args: fetchFromGitHub args) common.submodules;
+    updateScript = ./update.sh;
+  };
+
+  meta = common.meta // {
+    description = common.meta.description + " - cli";
+    inherit (semgrep-core.meta) platforms;
+  };
+}
diff --git a/nixpkgs/pkgs/tools/security/semgrep/semgrep-core.nix b/nixpkgs/pkgs/tools/security/semgrep/semgrep-core.nix
new file mode 100644
index 000000000000..8986cba7d880
--- /dev/null
+++ b/nixpkgs/pkgs/tools/security/semgrep/semgrep-core.nix
@@ -0,0 +1,53 @@
+{ lib, stdenvNoCC, fetchPypi, unzip }:
+
+let
+  common = import ./common.nix { inherit lib; };
+in
+stdenvNoCC.mkDerivation rec {
+  pname = "semgrep-core";
+  inherit (common) version;
+  # fetch pre-built semgrep-core since the ocaml build is complex and relies on
+  # the opam package manager at some point
+  # pulling it out of the python wheel as r2c no longer release a built binary
+  # on github releases
+  src =
+    let
+      inherit (stdenvNoCC.hostPlatform) system;
+      data = common.core.${system} or (throw "Unsupported system: ${system}");
+    in
+    fetchPypi rec {
+      pname = "semgrep";
+      inherit version;
+      format = "wheel";
+      dist = python;
+      python = "cp38.cp39.cp310.cp311.py37.py38.py39.py310.py311";
+      inherit (data) platform hash;
+    };
+
+  nativeBuildInputs = [ unzip ];
+
+  # _tryUnzip from unzip's setup-hook doesn't recognise .whl
+  # "do not know how to unpack source archive"
+  # perform unpack by hand
+  unpackPhase = ''
+    runHook preUnpack
+    LANG=en_US.UTF-8 unzip -qq "$src"
+    runHook postUnpack
+  '';
+
+  dontConfigure = true;
+  dontBuild = true;
+
+  installPhase = ''
+    runHook preInstall
+    install -Dm 755 -t $out/bin semgrep-${version}.data/purelib/semgrep/bin/semgrep-core
+    runHook postInstall
+  '';
+
+  meta = common.meta // {
+    description = common.meta.description + " - core binary";
+    mainProgram = "semgrep-core";
+    sourceProvenance = with lib.sourceTypes; [ binaryNativeCode ];
+    platforms = lib.attrNames common.core;
+  };
+}
diff --git a/nixpkgs/pkgs/tools/security/semgrep/update.sh b/nixpkgs/pkgs/tools/security/semgrep/update.sh
new file mode 100755
index 000000000000..67b720154b0b
--- /dev/null
+++ b/nixpkgs/pkgs/tools/security/semgrep/update.sh
@@ -0,0 +1,146 @@
+#!/usr/bin/env nix-shell
+#!nix-shell -i bash -p curl gnused jq nix-prefetch
+
+set -euxo pipefail
+
+# provide a github token so you don't get rate limited
+# if you use gh cli you can use:
+#     `export GITHUB_TOKEN="$(cat ~/.config/gh/config.yml | yq '.hosts."github.com".oauth_token' -r)"`
+# or just set your token by hand:
+#     `read -s -p "Enter your token: " GITHUB_TOKEN; export GITHUB_TOKEN`
+#     (we use read so it doesn't show in our shell history and in secret mode so the token you paste isn't visible)
+if [ -z "${GITHUB_TOKEN:-}" ]; then
+    echo "no GITHUB_TOKEN provided - you could meet API request limiting" >&2
+fi
+
+ROOT="$(dirname "$(readlink -f "$0")")"
+NIXPKGS_ROOT="$ROOT/../../../.."
+
+COMMON_FILE="$ROOT/common.nix"
+
+instantiateClean() {
+    nix-instantiate -A "$1" --eval --strict | cut -d\" -f2
+}
+
+# get latest version
+NEW_VERSION=$(
+  curl -s -L -H \
+    "Accept: application/vnd.github.v3+json" \
+    ${GITHUB_TOKEN:+ -H "Authorization: bearer $GITHUB_TOKEN"} \
+    https://api.github.com/repos/semgrep/semgrep/releases/latest \
+  | jq -r '.tag_name'
+)
+# trim v prefix
+NEW_VERSION="${NEW_VERSION:1}"
+OLD_VERSION="$(instantiateClean semgrep.passthru.common.version)"
+
+if [[ "$OLD_VERSION" == "$NEW_VERSION" ]]; then
+    echo "Already up to date"
+    exit
+fi
+
+replace() {
+    sed -i "s@$1@$2@g" "$3"
+}
+
+fetchgithub() {
+    set +eo pipefail
+    nix-build -A "$1" 2>&1 >/dev/null | grep "got:" | cut -d':' -f2 | sed 's| ||g'
+    set -eo pipefail
+}
+
+fetch_arch() {
+  VERSION=$1
+  PLATFORM=$2
+  nix-prefetch "{ fetchPypi }:
+fetchPypi rec {
+  pname = \"semgrep\";
+  version = \"$VERSION\";
+  format = \"wheel\";
+  dist = python;
+  python = \"cp38.cp39.cp310.cp311.py37.py38.py39.py310.py311\";
+  platform = \"$PLATFORM\";
+}
+"
+}
+
+replace "$OLD_VERSION" "$NEW_VERSION" "$COMMON_FILE"
+
+echo "Updating src"
+
+OLD_HASH="$(instantiateClean semgrep.passthru.common.srcHash)"
+echo "Old hash $OLD_HASH"
+TMP_HASH="sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="
+replace "$OLD_HASH" "$TMP_HASH" "$COMMON_FILE"
+NEW_HASH="$(fetchgithub semgrep.src)"
+echo "New hash $NEW_HASH"
+replace "$TMP_HASH" "$NEW_HASH" "$COMMON_FILE"
+
+echo "Updated src"
+
+
+update_core_platform() {
+    SYSTEM=$1
+    echo "Updating core src $SYSTEM"
+
+    PLATFORM="$(instantiateClean "semgrep.passthru.common.core.$SYSTEM.platform")"
+
+    OLD_HASH="$(instantiateClean "semgrep.passthru.common.core.$SYSTEM.hash")"
+    echo "Old core hash $OLD_HASH"
+    NEW_HASH="$(fetch_arch "$NEW_VERSION" "$PLATFORM")"
+    echo "New core hash $NEW_HASH"
+    replace "$OLD_HASH" "$NEW_HASH" "$COMMON_FILE"
+
+    echo "Updated core src $SYSTEM"
+}
+
+update_core_platform "x86_64-linux"
+update_core_platform "x86_64-darwin"
+update_core_platform "aarch64-darwin"
+
+OLD_PWD=$PWD
+TMPDIR="$(mktemp -d)"
+# shallow clone to check submodule commits, don't actually need the submodules
+git clone https://github.com/semgrep/semgrep "$TMPDIR/semgrep" --depth 1 --branch "v$NEW_VERSION"
+
+get_submodule_commit() {
+    OLD_PWD=$PWD
+    (
+        cd "$TMPDIR/semgrep"
+        git ls-tree --object-only HEAD "$1"
+        cd "$OLD_PWD"
+    )
+}
+
+# loop through submodules
+nix-instantiate -E "with import $NIXPKGS_ROOT {}; builtins.attrNames semgrep.passthru.common.submodules" --eval --strict --json \
+| jq '.[]' -r \
+| while read -r SUBMODULE; do
+    echo "Updating $SUBMODULE"
+    OLD_REV=$(instantiateClean semgrep.passthru.common.submodules."$SUBMODULE".rev)
+    echo "Old commit $OLD_REV"
+    OLD_HASH=$(instantiateClean semgrep.passthru.common.submodules."$SUBMODULE".hash)
+    echo "Old hash $OLD_HASH"
+
+    NEW_REV=$(get_submodule_commit "$SUBMODULE")
+    echo "New commit $NEW_REV"
+
+    if [[ "$OLD_REV" == "$NEW_REV" ]]; then
+      echo "$SUBMODULE already up to date"
+      continue
+    fi
+
+    TMP_HASH="sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="
+    replace "$OLD_REV" "$NEW_REV" "$COMMON_FILE"
+    replace "$OLD_HASH" "$TMP_HASH" "$COMMON_FILE"
+    NEW_HASH="$(fetchgithub semgrep.passthru.submodulesSubset."$SUBMODULE")"
+    echo "New hash $NEW_HASH"
+    replace "$TMP_HASH" "$NEW_HASH" "$COMMON_FILE"
+
+    echo "Updated $SUBMODULE"
+done
+
+rm -rf "$TMPDIR"
+
+echo "Finished"
+