diff options
Diffstat (limited to 'nixpkgs/pkgs/tools/security/semgrep')
-rw-r--r-- | nixpkgs/pkgs/tools/security/semgrep/common.nix | 55 | ||||
-rw-r--r-- | nixpkgs/pkgs/tools/security/semgrep/default.nix | 145 | ||||
-rw-r--r-- | nixpkgs/pkgs/tools/security/semgrep/semgrep-core.nix | 53 | ||||
-rwxr-xr-x | nixpkgs/pkgs/tools/security/semgrep/update.sh | 146 |
4 files changed, 399 insertions, 0 deletions
diff --git a/nixpkgs/pkgs/tools/security/semgrep/common.nix b/nixpkgs/pkgs/tools/security/semgrep/common.nix new file mode 100644 index 000000000000..3436d4267964 --- /dev/null +++ b/nixpkgs/pkgs/tools/security/semgrep/common.nix @@ -0,0 +1,55 @@ +{ lib }: + +rec { + version = "1.66.2"; + + srcHash = "sha256-xonZzZsAkAPMVINGEA10CvQ1diYgHBowNsR2pk4tYr8="; + + # submodule dependencies + # these are fetched so we: + # 1. don't fetch the many submodules we don't need + # 2. avoid fetchSubmodules since it's prone to impurities + submodules = { + "cli/src/semgrep/semgrep_interfaces" = { + owner = "semgrep"; + repo = "semgrep-interfaces"; + rev = "215a54782174de84f97188632b4a37e35ba0f827"; + hash = "sha256-Q8E5LkC/NV0wvt9ZwhkoPGjPlDavVHHMnX0sVNK3dAM="; + }; + }; + + # fetch pre-built semgrep-core since the ocaml build is complex and relies on + # the opam package manager at some point + # pulling it out of the python wheel as r2c no longer release a built binary + # on github releases + core = { + x86_64-linux = { + platform = "any"; + hash = "sha256-f/RcuJyd8y2bMclMxZ1BdNTVixhjLz0UxSKGZm+H8yI="; + }; + x86_64-darwin = { + platform = "macosx_10_14_x86_64"; + hash = "sha256-4H9PT41lPydMFl51O2CgeMQiTE66fZ8RP26CVT7Y7Ok="; + }; + aarch64-darwin = { + platform = "macosx_11_0_arm64"; + hash = "sha256-WxQ0ohojzhWmPo208xN98F5GwbNzQuxCjSwP7h3rBGA="; + }; + }; + + meta = with lib; { + homepage = "https://semgrep.dev/"; + downloadPage = "https://github.com/semgrep/semgrep/"; + changelog = "https://github.com/semgrep/semgrep/blob/v${version}/CHANGELOG.md"; + description = "Lightweight static analysis for many languages"; + longDescription = '' + Semgrep is a fast, open-source, static analysis tool for finding bugs and + enforcing code standards at editor, commit, and CI time. Semgrep analyzes + code locally on your computer or in your build environment: code is never + uploaded. Its rules look like the code you already write; no abstract + syntax trees, regex wrestling, or painful DSLs. + ''; + license = licenses.lgpl21Plus; + maintainers = with maintainers; [ jk ambroisie ]; + }; +} diff --git a/nixpkgs/pkgs/tools/security/semgrep/default.nix b/nixpkgs/pkgs/tools/security/semgrep/default.nix new file mode 100644 index 000000000000..6b62ab80e7ea --- /dev/null +++ b/nixpkgs/pkgs/tools/security/semgrep/default.nix @@ -0,0 +1,145 @@ +{ lib +, fetchFromGitHub +, fetchpatch +, semgrep-core +, buildPythonApplication +, pythonPackages +, pythonRelaxDepsHook + +, pytestCheckHook +, git +}: + +# testing locally post build: +# ./result/bin/semgrep scan --metrics=off --config 'r/generic.unicode.security.bidi.contains-bidirectional-characters' + +let + common = import ./common.nix { inherit lib; }; + semgrepBinPath = lib.makeBinPath [ semgrep-core ]; +in +buildPythonApplication rec { + pname = "semgrep"; + inherit (common) version; + src = fetchFromGitHub { + owner = "semgrep"; + repo = "semgrep"; + rev = "v${version}"; + hash = common.srcHash; + }; + + # prepare a subset of the submodules as we only need a handful + # and there are many many submodules total + postPatch = (lib.concatStringsSep "\n" (lib.mapAttrsToList + ( + path: submodule: '' + # substitute ${path} + # remove git submodule placeholder + rm -r ${path} + # link submodule + ln -s ${submodule}/ ${path} + '' + ) + passthru.submodulesSubset)) + '' + cd cli + ''; + + nativeBuildInputs = [ pythonRelaxDepsHook ]; + # tell cli/setup.py to not copy semgrep-core into the result + # this means we can share a copy of semgrep-core and avoid an issue where it + # copies the binary but doesn't retain the executable bit + SEMGREP_SKIP_BIN = true; + + pythonRelaxDeps = [ + "boltons" + "glom" + ]; + + propagatedBuildInputs = with pythonPackages; [ + attrs + boltons + colorama + click + click-option-group + glom + requests + rich + ruamel-yaml + tqdm + packaging + jsonschema + wcmatch + peewee + defusedxml + urllib3 + typing-extensions + python-lsp-jsonrpc + tomli + ]; + + doCheck = true; + + nativeCheckInputs = [ git pytestCheckHook ] ++ (with pythonPackages; [ + flaky + pytest-snapshot + pytest-mock + pytest-freezegun + types-freezegun + ]); + + disabledTestPaths = [ + "tests/default/e2e" + "tests/default/e2e-pro" + "tests/default/e2e-pysemgrep" + ]; + + disabledTests = [ + # requires networking + "test_send" + # requires networking + "test_parse_exclude_rules_auto" + # many child tests require networking to download files + "TestConfigLoaderForProducts" + # doesn't start flaky plugin correctly + "test_debug_performance" + ]; + + preCheck = '' + # tests need a home directory + export HOME="$(mktemp -d)" + + # tests need access to `semgrep-core` + export OLD_PATH="$PATH" + export PATH="$PATH:${semgrepBinPath}" + + # we're in cli + # replace old semgrep with wrapped one + rm ./bin/semgrep + ln -s $out/bin/semgrep ./bin/semgrep + ''; + + postCheck = '' + export PATH="$OLD_PATH" + unset OLD_PATH + ''; + + # since we stop cli/setup.py from finding semgrep-core and copying it into + # the result we need to provide it on the PATH + preFixup = '' + makeWrapperArgs+=(--prefix PATH : ${semgrepBinPath}) + ''; + + postInstall = '' + chmod +x $out/bin/{,py}semgrep + ''; + + passthru = { + inherit common; + submodulesSubset = lib.mapAttrs (k: args: fetchFromGitHub args) common.submodules; + updateScript = ./update.sh; + }; + + meta = common.meta // { + description = common.meta.description + " - cli"; + inherit (semgrep-core.meta) platforms; + }; +} diff --git a/nixpkgs/pkgs/tools/security/semgrep/semgrep-core.nix b/nixpkgs/pkgs/tools/security/semgrep/semgrep-core.nix new file mode 100644 index 000000000000..8986cba7d880 --- /dev/null +++ b/nixpkgs/pkgs/tools/security/semgrep/semgrep-core.nix @@ -0,0 +1,53 @@ +{ lib, stdenvNoCC, fetchPypi, unzip }: + +let + common = import ./common.nix { inherit lib; }; +in +stdenvNoCC.mkDerivation rec { + pname = "semgrep-core"; + inherit (common) version; + # fetch pre-built semgrep-core since the ocaml build is complex and relies on + # the opam package manager at some point + # pulling it out of the python wheel as r2c no longer release a built binary + # on github releases + src = + let + inherit (stdenvNoCC.hostPlatform) system; + data = common.core.${system} or (throw "Unsupported system: ${system}"); + in + fetchPypi rec { + pname = "semgrep"; + inherit version; + format = "wheel"; + dist = python; + python = "cp38.cp39.cp310.cp311.py37.py38.py39.py310.py311"; + inherit (data) platform hash; + }; + + nativeBuildInputs = [ unzip ]; + + # _tryUnzip from unzip's setup-hook doesn't recognise .whl + # "do not know how to unpack source archive" + # perform unpack by hand + unpackPhase = '' + runHook preUnpack + LANG=en_US.UTF-8 unzip -qq "$src" + runHook postUnpack + ''; + + dontConfigure = true; + dontBuild = true; + + installPhase = '' + runHook preInstall + install -Dm 755 -t $out/bin semgrep-${version}.data/purelib/semgrep/bin/semgrep-core + runHook postInstall + ''; + + meta = common.meta // { + description = common.meta.description + " - core binary"; + mainProgram = "semgrep-core"; + sourceProvenance = with lib.sourceTypes; [ binaryNativeCode ]; + platforms = lib.attrNames common.core; + }; +} diff --git a/nixpkgs/pkgs/tools/security/semgrep/update.sh b/nixpkgs/pkgs/tools/security/semgrep/update.sh new file mode 100755 index 000000000000..67b720154b0b --- /dev/null +++ b/nixpkgs/pkgs/tools/security/semgrep/update.sh @@ -0,0 +1,146 @@ +#!/usr/bin/env nix-shell +#!nix-shell -i bash -p curl gnused jq nix-prefetch + +set -euxo pipefail + +# provide a github token so you don't get rate limited +# if you use gh cli you can use: +# `export GITHUB_TOKEN="$(cat ~/.config/gh/config.yml | yq '.hosts."github.com".oauth_token' -r)"` +# or just set your token by hand: +# `read -s -p "Enter your token: " GITHUB_TOKEN; export GITHUB_TOKEN` +# (we use read so it doesn't show in our shell history and in secret mode so the token you paste isn't visible) +if [ -z "${GITHUB_TOKEN:-}" ]; then + echo "no GITHUB_TOKEN provided - you could meet API request limiting" >&2 +fi + +ROOT="$(dirname "$(readlink -f "$0")")" +NIXPKGS_ROOT="$ROOT/../../../.." + +COMMON_FILE="$ROOT/common.nix" + +instantiateClean() { + nix-instantiate -A "$1" --eval --strict | cut -d\" -f2 +} + +# get latest version +NEW_VERSION=$( + curl -s -L -H \ + "Accept: application/vnd.github.v3+json" \ + ${GITHUB_TOKEN:+ -H "Authorization: bearer $GITHUB_TOKEN"} \ + https://api.github.com/repos/semgrep/semgrep/releases/latest \ + | jq -r '.tag_name' +) +# trim v prefix +NEW_VERSION="${NEW_VERSION:1}" +OLD_VERSION="$(instantiateClean semgrep.passthru.common.version)" + +if [[ "$OLD_VERSION" == "$NEW_VERSION" ]]; then + echo "Already up to date" + exit +fi + +replace() { + sed -i "s@$1@$2@g" "$3" +} + +fetchgithub() { + set +eo pipefail + nix-build -A "$1" 2>&1 >/dev/null | grep "got:" | cut -d':' -f2 | sed 's| ||g' + set -eo pipefail +} + +fetch_arch() { + VERSION=$1 + PLATFORM=$2 + nix-prefetch "{ fetchPypi }: +fetchPypi rec { + pname = \"semgrep\"; + version = \"$VERSION\"; + format = \"wheel\"; + dist = python; + python = \"cp38.cp39.cp310.cp311.py37.py38.py39.py310.py311\"; + platform = \"$PLATFORM\"; +} +" +} + +replace "$OLD_VERSION" "$NEW_VERSION" "$COMMON_FILE" + +echo "Updating src" + +OLD_HASH="$(instantiateClean semgrep.passthru.common.srcHash)" +echo "Old hash $OLD_HASH" +TMP_HASH="sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" +replace "$OLD_HASH" "$TMP_HASH" "$COMMON_FILE" +NEW_HASH="$(fetchgithub semgrep.src)" +echo "New hash $NEW_HASH" +replace "$TMP_HASH" "$NEW_HASH" "$COMMON_FILE" + +echo "Updated src" + + +update_core_platform() { + SYSTEM=$1 + echo "Updating core src $SYSTEM" + + PLATFORM="$(instantiateClean "semgrep.passthru.common.core.$SYSTEM.platform")" + + OLD_HASH="$(instantiateClean "semgrep.passthru.common.core.$SYSTEM.hash")" + echo "Old core hash $OLD_HASH" + NEW_HASH="$(fetch_arch "$NEW_VERSION" "$PLATFORM")" + echo "New core hash $NEW_HASH" + replace "$OLD_HASH" "$NEW_HASH" "$COMMON_FILE" + + echo "Updated core src $SYSTEM" +} + +update_core_platform "x86_64-linux" +update_core_platform "x86_64-darwin" +update_core_platform "aarch64-darwin" + +OLD_PWD=$PWD +TMPDIR="$(mktemp -d)" +# shallow clone to check submodule commits, don't actually need the submodules +git clone https://github.com/semgrep/semgrep "$TMPDIR/semgrep" --depth 1 --branch "v$NEW_VERSION" + +get_submodule_commit() { + OLD_PWD=$PWD + ( + cd "$TMPDIR/semgrep" + git ls-tree --object-only HEAD "$1" + cd "$OLD_PWD" + ) +} + +# loop through submodules +nix-instantiate -E "with import $NIXPKGS_ROOT {}; builtins.attrNames semgrep.passthru.common.submodules" --eval --strict --json \ +| jq '.[]' -r \ +| while read -r SUBMODULE; do + echo "Updating $SUBMODULE" + OLD_REV=$(instantiateClean semgrep.passthru.common.submodules."$SUBMODULE".rev) + echo "Old commit $OLD_REV" + OLD_HASH=$(instantiateClean semgrep.passthru.common.submodules."$SUBMODULE".hash) + echo "Old hash $OLD_HASH" + + NEW_REV=$(get_submodule_commit "$SUBMODULE") + echo "New commit $NEW_REV" + + if [[ "$OLD_REV" == "$NEW_REV" ]]; then + echo "$SUBMODULE already up to date" + continue + fi + + TMP_HASH="sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + replace "$OLD_REV" "$NEW_REV" "$COMMON_FILE" + replace "$OLD_HASH" "$TMP_HASH" "$COMMON_FILE" + NEW_HASH="$(fetchgithub semgrep.passthru.submodulesSubset."$SUBMODULE")" + echo "New hash $NEW_HASH" + replace "$TMP_HASH" "$NEW_HASH" "$COMMON_FILE" + + echo "Updated $SUBMODULE" +done + +rm -rf "$TMPDIR" + +echo "Finished" + |