summary refs log tree commit diff
path: root/pkgs/build-support/rust/fetch-cargo-deps
blob: 4fbc53d1039e06a7ecdf0f09e71f97b2ddb8db34 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
fetchCargoDeps() {
    src=$(realpath $1)
    out=$(realpath $2)

    echo "Fetching $src to $out"

    mkdir $out

    # Configure cargo to fetch from a local copy of the crates.io registry

    echo "Using rust registry from $rustRegistry"

    cat <<EOF > $out/config
[registry]
index = "file://$rustRegistry"
EOF

    export CARGO_HOME=$out
    cd $src

    if [[ ! -f Cargo.lock ]]; then
        echo
        echo "ERROR: The Cargo.lock file doesn't exist"
        echo
        echo "Cargo.lock is needed to make sure that depsSha256 doesn't change"
        echo "when the registry is updated."
        echo

        exit 1
    fi

    # We need to do the following string replacement so that 'cargo fetch'
    # doesn't ignore the versions specified in Cargo.lock
    substituteInPlace Cargo.lock \
        --replace "registry+https://github.com/rust-lang/crates.io-index" \
                  "registry+file://$rustRegistry"

    # Do any possible 'cargo update -p <pkgName> --precise <version>' ad-hoc updates
    eval "$cargoUpdateHook"

    # Do the fetch
    cargo fetch --verbose

    # Now that we have fetched everything, let's make the output deterministic

    # Cargo uses the following directory structure for fetched data, where
    # $indexHash is a hash of the registry index URL:
    #
    #
    # /config:
    #
    #     Cargo config file. We'll delete this because it's not deterministic,
    #     and instead recreate it just before running 'cargo build'.
    #
    # /registry/cache/$indexHash/:
    #
    #     This is where tarballs of registry package dependencies are kept
    #     We'll need to keep this, but make sure $indexHash is a fixed name.
    #
    # /registry/index/$indexHash/:
    #
    #     A copy of the registry index is kept here. We can delete this, and
    #     instead, just before running 'cargo build', we'll symlink this
    #     directory to our static copy of the registry in the Nix store.
    #
    # /registry/src/$indexHash/{pkgName-pkgVersion}/:
    #
    #     Here cargo keeps extracted sources of the cached tarballs.
    #     We'll just delete this because cargo will re-populate them from the
    #     tarballs.
    #
    # /git/db/{domain-hash}/:
    #
    #     Here cargo keeps the `.git` directories of git dependencies.
    #     We'll need to keep these, but make them deterministic.
    #
    # /git/checkouts/{domain-hash}/{branchName}/:
    #
    #     Here cargo keeps checked-out sources of the git dependencies.
    #     We can delete this, because cargo will re-populate them from the above
    #     `.git` directories.
    #
    # Let's start

    # Remove cargo config file, which points to the ever-changing registry
    rm $out/config

    # Save the Cargo.lock file into the output, so that we don't have to do another
    # 'cargo update' during the build (which would try to access the network) for
    # any ad-hoc package updates (through $cargoUpdateHook).
    #
    # We need to replace the rustRegistry URL with something deterministic.
    # Since the URL won't actually be accessed anymore, it's fine to use /dev/null.

    substituteInPlace Cargo.lock \
        --replace "registry+file://$rustRegistry" \
                  "registry+file:///dev/null"
    mv Cargo.lock $out/


    # Let's replace $indexHash with something more deterministic
    mv $out/registry/cache/* $out/registry/cache/HASH

    # The registry index changes all the time, so it's not deterministic
    # We'll symlink it before running 'cargo build'
    rm -rf $out/registry/index/*

    # Make git DBs deterministic
    # TODO: test with git submodules
    [[ ! -d $out/git/checkouts ]] || (cd $out/git/checkouts && for name in *; do
        revs=""
        cd "$out/git/checkouts/$name"
        while read dir; do
            # extract substring: [dir = "./xxx/yyy/.git"] => [branch = "xxx/yyy"]
            branch="${dir:2:$((${#dir}-7))}"

            cd "$out/git/checkouts/$name/$branch"
            rev="$(git rev-parse HEAD)"
            revs="$revs $rev"
        done < <(find . -type d -name .git -print)

        echo "List of revs to keep for git db $name: $revs"

        (
            # The following code was adapted from nix-prefetch-git

            cd "$out/git/db/$name"

            export GIT_DIR=.

            # Remove all remote branches
            git branch -r | while read branch; do
                git branch -rD "$branch" >&2
            done

            # Remove all tags
            git tag | while read tag; do
                git tag -d "$tag" >&2
            done

            # Remove all local branches
            branchrefs=()
            eval "$(git for-each-ref --shell --format='branchrefs+=(%(refname))' refs/heads/)"

            for branchref in "${branchrefs[@]}"; do
                git update-ref -d "$branchref" >&2
            done

            # Create ad-hoc branches for the revs we need
            echo "$revs" | while read rev; do
                echo "Creating git branch b_$rev $rev"
                git branch b_$rev $rev
            done

            # Remove files that have timestamps or otherwise have non-deterministic
            # properties.
            rm -rf logs/ hooks/ index FETCH_HEAD ORIG_HEAD refs/remotes/origin/HEAD config

            # Do a full repack. Must run single-threaded, or else we lose determinism.
            git config pack.threads 1
            git repack -A -d -f
            rm -f config

            # Garbage collect unreferenced objects.
            git gc --prune=all
        )
    done)

    # Remove unneeded outputs
    [[ ! -d $out/registry/src ]] || rm -rf $out/registry/src
    [[ ! -d $out/git/checkouts ]] || rm -rf $out/git/checkouts

    # XXX: provide some debugging output to see find out why we are seeing
    # sporadic hash mismatches
    find $out ! -type f
    find $out -type f -exec sha256sum {} +
}