about summary refs log tree commit diff
path: root/pkgs/build-support/fetchgit/nix-prefetch-git
diff options
context:
space:
mode:
authorBjørn Forsman <bjorn.forsman@gmail.com>2014-11-01 16:42:03 +0100
committerBjørn Forsman <bjorn.forsman@gmail.com>2014-11-02 13:15:33 +0100
commit53614cf1a7037a7cf355366880c44433674f19af (patch)
treef689c48ea3038333ebe4d91dfeb0bff2f478f2e2 /pkgs/build-support/fetchgit/nix-prefetch-git
parent415f41bf6888e32f9359c5e32f59c40855bf74f0 (diff)
downloadnixlib-53614cf1a7037a7cf355366880c44433674f19af.tar
nixlib-53614cf1a7037a7cf355366880c44433674f19af.tar.gz
nixlib-53614cf1a7037a7cf355366880c44433674f19af.tar.bz2
nixlib-53614cf1a7037a7cf355366880c44433674f19af.tar.lz
nixlib-53614cf1a7037a7cf355366880c44433674f19af.tar.xz
nixlib-53614cf1a7037a7cf355366880c44433674f19af.tar.zst
nixlib-53614cf1a7037a7cf355366880c44433674f19af.zip
nix-prefetch-git: fix determinism with leaveDotGit
Add more files to the delete list:

 * .git/FETCH_HEAD
 * .git/ORIG_HEAD
 * .git/refs/remotes/origin/HEAD
 * .git/config

Further, remove all remote branches, remove tags not reachable from the
given 'rev', do a full repack and then garbage collect unreferenced
objects.

According to my testing, the result is fully deterministic. As in "any
change done to the upstream repo, ahead of 'rev', will not affect the
hash of the resulting 'clone'". Even changing the clone URL will not
change the output hash, because .git/config is removed.

A new version of git can of course change store format, but that's
unavoidable.

For big repositories, the repack operation may be a bit heavy. But as
far as I can see there is no cheaper way to determinism.
Diffstat (limited to 'pkgs/build-support/fetchgit/nix-prefetch-git')
-rwxr-xr-xpkgs/build-support/fetchgit/nix-prefetch-git43
1 files changed, 40 insertions, 3 deletions
diff --git a/pkgs/build-support/fetchgit/nix-prefetch-git b/pkgs/build-support/fetchgit/nix-prefetch-git
index 0a01794f4665..c4f9a1bf916a 100755
--- a/pkgs/build-support/fetchgit/nix-prefetch-git
+++ b/pkgs/build-support/fetchgit/nix-prefetch-git
@@ -199,6 +199,43 @@ clone(){
     cd $top
 }
 
+# Remove all remote branches, remove tags not reachable from HEAD, do a full
+# repack and then garbage collect unreferenced objects.
+make_deterministic_repo(){
+    local repo="$1"
+
+    # run in sub-shell to not touch current working directory
+    (
+    cd "$repo"
+    # Remove files that contain timestamps or otherwise have non-deterministic
+    # properties.
+    rm -rf .git/logs/ .git/hooks/ .git/index .git/FETCH_HEAD .git/ORIG_HEAD \
+        .git/refs/remotes/origin/HEAD .git/config
+
+    # Remove all remote branches.
+    git branch -r | while read branch; do
+        git branch -rD "$branch" >&2
+    done
+
+    # Remove tags not reachable from HEAD. If we're exactly on a tag, don't
+    # delete it.
+    maybe_tag=$(git tag --points-at HEAD)
+    git tag --contains HEAD | while read tag; do
+        if [ "$tag" != "$maybe_tag" ]; then
+            git tag -d "$tag" >&2
+        fi
+    done
+
+    # Do a full repack, for determinism.
+    # Repack does not add unreferenced objects to a pack file.
+    git repack -A -d -f
+
+    # Garbage collect unreferenced objects.
+    git gc --prune=all
+    )
+}
+
+
 clone_user_rev() {
     local dir="$1"
     local url="$2"
@@ -227,9 +264,9 @@ clone_user_rev() {
         echo "removing \`.git'..." >&2
         find $dir -name .git\* | xargs rm -rf
     else
-        # The logs and index contain timestamps, and the hooks contain
-        # the nix path of git's bash
-        find $dir -name .git | xargs -I {} rm -rf {}/logs {}/index {}/hooks
+        find $dir -name .git | while read gitdir; do
+            make_deterministic_repo "$(readlink -f "$gitdir/..")"
+        done
     fi
 }