From 53614cf1a7037a7cf355366880c44433674f19af Mon Sep 17 00:00:00 2001 From: Bjørn Forsman Date: Sat, 1 Nov 2014 16:42:03 +0100 Subject: nix-prefetch-git: fix determinism with leaveDotGit Add more files to the delete list: * .git/FETCH_HEAD * .git/ORIG_HEAD * .git/refs/remotes/origin/HEAD * .git/config Further, remove all remote branches, remove tags not reachable from the given 'rev', do a full repack and then garbage collect unreferenced objects. According to my testing, the result is fully deterministic. As in "any change done to the upstream repo, ahead of 'rev', will not affect the hash of the resulting 'clone'". Even changing the clone URL will not change the output hash, because .git/config is removed. A new version of git can of course change store format, but that's unavoidable. For big repositories, the repack operation may be a bit heavy. But as far as I can see there is no cheaper way to determinism. --- pkgs/build-support/fetchgit/nix-prefetch-git | 43 ++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-) (limited to 'pkgs/build-support') diff --git a/pkgs/build-support/fetchgit/nix-prefetch-git b/pkgs/build-support/fetchgit/nix-prefetch-git index 0a01794f4665..c4f9a1bf916a 100755 --- a/pkgs/build-support/fetchgit/nix-prefetch-git +++ b/pkgs/build-support/fetchgit/nix-prefetch-git @@ -199,6 +199,43 @@ clone(){ cd $top } +# Remove all remote branches, remove tags not reachable from HEAD, do a full +# repack and then garbage collect unreferenced objects. +make_deterministic_repo(){ + local repo="$1" + + # run in sub-shell to not touch current working directory + ( + cd "$repo" + # Remove files that contain timestamps or otherwise have non-deterministic + # properties. + rm -rf .git/logs/ .git/hooks/ .git/index .git/FETCH_HEAD .git/ORIG_HEAD \ + .git/refs/remotes/origin/HEAD .git/config + + # Remove all remote branches. + git branch -r | while read branch; do + git branch -rD "$branch" >&2 + done + + # Remove tags not reachable from HEAD. If we're exactly on a tag, don't + # delete it. + maybe_tag=$(git tag --points-at HEAD) + git tag --contains HEAD | while read tag; do + if [ "$tag" != "$maybe_tag" ]; then + git tag -d "$tag" >&2 + fi + done + + # Do a full repack, for determinism. + # Repack does not add unreferenced objects to a pack file. + git repack -A -d -f + + # Garbage collect unreferenced objects. + git gc --prune=all + ) +} + + clone_user_rev() { local dir="$1" local url="$2" @@ -227,9 +264,9 @@ clone_user_rev() { echo "removing \`.git'..." >&2 find $dir -name .git\* | xargs rm -rf else - # The logs and index contain timestamps, and the hooks contain - # the nix path of git's bash - find $dir -name .git | xargs -I {} rm -rf {}/logs {}/index {}/hooks + find $dir -name .git | while read gitdir; do + make_deterministic_repo "$(readlink -f "$gitdir/..")" + done fi } -- cgit 1.4.1