summary refs log tree commit diff
path: root/pkgs/build-support
diff options
context:
space:
mode:
authorDaiderd Jordan <daiderd@gmail.com>2018-10-01 19:41:36 +0200
committerDaiderd Jordan <daiderd@gmail.com>2018-10-01 19:42:07 +0200
commit1383c08f2c086cb0b90c8624ebab4ca1182e3384 (patch)
treecce12f1295915db0a6861f148715764ad64ce0c9 /pkgs/build-support
parent9c4b11e9a060de2175aef4d36881f97812ab17fe (diff)
parent83fd9785f65b8fd4cbaa146270f90b2a0d74b5f3 (diff)
downloadnixlib-1383c08f2c086cb0b90c8624ebab4ca1182e3384.tar
nixlib-1383c08f2c086cb0b90c8624ebab4ca1182e3384.tar.gz
nixlib-1383c08f2c086cb0b90c8624ebab4ca1182e3384.tar.bz2
nixlib-1383c08f2c086cb0b90c8624ebab4ca1182e3384.tar.lz
nixlib-1383c08f2c086cb0b90c8624ebab4ca1182e3384.tar.xz
nixlib-1383c08f2c086cb0b90c8624ebab4ca1182e3384.tar.zst
nixlib-1383c08f2c086cb0b90c8624ebab4ca1182e3384.zip
Merge branch 'master' into staging-next
Diffstat (limited to 'pkgs/build-support')
-rw-r--r--pkgs/build-support/docker/default.nix197
-rw-r--r--pkgs/build-support/docker/examples.nix7
-rwxr-xr-xpkgs/build-support/docker/store-path-to-layer.sh24
-rw-r--r--pkgs/build-support/references-by-popularity/closure-graph.py520
-rw-r--r--pkgs/build-support/references-by-popularity/default.nix15
5 files changed, 753 insertions, 10 deletions
diff --git a/pkgs/build-support/docker/default.nix b/pkgs/build-support/docker/default.nix
index 0cee1dd2916f..2d6d6c1fc91d 100644
--- a/pkgs/build-support/docker/default.nix
+++ b/pkgs/build-support/docker/default.nix
@@ -1,4 +1,5 @@
 {
+  symlinkJoin,
   coreutils,
   docker,
   e2fsprogs,
@@ -9,7 +10,7 @@
   lib,
   pkgs,
   pigz,
-  nixUnstable,
+  nix,
   perl,
   runCommand,
   rsync,
@@ -19,6 +20,7 @@
   utillinux,
   vmTools,
   writeReferencesToFile,
+  referencesByPopularity,
   writeScript,
   writeText,
 }:
@@ -77,7 +79,9 @@ rec {
     ln -sT ${docker.src}/components/engine/pkg/tarsum src/github.com/docker/docker/pkg/tarsum
     go build
 
-    cp tarsum $out
+    mkdir -p $out/bin
+
+    cp tarsum $out/bin/
   '';
 
   # buildEnv creates symlinks to dirs, which is hard to edit inside the overlay VM
@@ -258,7 +262,7 @@ rec {
     '';
 
   nixRegistration = contents: runCommand "nix-registration" {
-    buildInputs = [ nixUnstable perl ];
+    buildInputs = [ nix perl ];
     # For obtaining the closure of `contents'.
     exportReferencesGraph =
       let contentsList = if builtins.isList contents then contents else [ contents ];
@@ -270,6 +274,81 @@ rec {
       perl ${pkgs.pathsFromGraph} closure-* > $out/storePaths
     '';
 
+  # Create $maxLayers worth of Docker Layers, one layer per store path
+  # unless there are more paths than $maxLayers. In that case, create
+  # $maxLayers-1 for the most popular layers, and smush the remainaing
+  # store paths in to one final layer.
+  mkManyPureLayers = {
+    name,
+    # Files to add to the layer.
+    closure,
+    configJson,
+    # Docker has a 42-layer maximum, we pick 24 to ensure there is plenty
+    # of room for extension
+    maxLayers ? 24
+  }:
+    runCommand "${name}-granular-docker-layers" {
+      inherit maxLayers;
+      paths = referencesByPopularity closure;
+      buildInputs = [ jshon rsync tarsum ];
+      enableParallelBuilding = true;
+    }
+    ''
+      # Delete impurities for store path layers, so they don't get
+      # shared and taint other projects.
+      cat ${configJson} \
+        | jshon -d config \
+        | jshon -s "1970-01-01T00:00:01Z" -i created > generic.json
+
+      # WARNING!
+      # The following code is fiddly w.r.t. ensuring every layer is
+      # created, and that no paths are missed. If you change the
+      # following head and tail call lines, double-check that your
+      # code behaves properly when the number of layers equals:
+      #      maxLayers-1, maxLayers, and maxLayers+1
+      head -n $((maxLayers - 1)) $paths | cat -n | xargs -P$NIX_BUILD_CORES -n2 ${./store-path-to-layer.sh}
+      if [ $(cat $paths | wc -l) -ge $maxLayers ]; then
+        tail -n+$maxLayers $paths | xargs ${./store-path-to-layer.sh} $maxLayers
+      fi
+
+      echo "Finished building layer '$name'"
+
+      mv ./layers $out
+    '';
+
+  # Create a "Customisation" layer which adds symlinks at the root of
+  # the image to the root paths of the closure. Also add the config
+  # data like what command to run and the environment to run it in.
+  mkCustomisationLayer = {
+    name,
+    # Files to add to the layer.
+    contents,
+    baseJson,
+    uid ? 0, gid ? 0,
+  }:
+    runCommand "${name}-customisation-layer" {
+      buildInputs = [ jshon rsync tarsum ];
+    }
+    ''
+      cp -r ${contents}/ ./layer
+
+      # Tar up the layer and throw it into 'layer.tar'.
+      echo "Packing layer..."
+      mkdir $out
+      tar -C layer --sort=name --mtime="@$SOURCE_DATE_EPOCH" --owner=${toString uid} --group=${toString gid} -cf $out/layer.tar .
+
+      # Compute a checksum of the tarball.
+      echo "Computing layer checksum..."
+      tarhash=$(tarsum < $out/layer.tar)
+
+      # Add a 'checksum' field to the JSON, with the value set to the
+      # checksum of the tarball.
+      cat ${baseJson} | jshon -s "$tarhash" -i checksum > $out/json
+
+      # Indicate to docker that we're using schema version 1.0.
+      echo -n "1.0" > $out/VERSION
+    '';
+
   # Create a "layer" (set of files).
   mkPureLayer = {
     # Name of the layer
@@ -287,7 +366,7 @@ rec {
   }:
     runCommand "docker-layer-${name}" {
       inherit baseJson contents extraCommands;
-      buildInputs = [ jshon rsync ];
+      buildInputs = [ jshon rsync tarsum ];
     }
     ''
       mkdir layer
@@ -314,11 +393,11 @@ rec {
 
       # Compute a checksum of the tarball.
       echo "Computing layer checksum..."
-      tarsum=$(${tarsum} < $out/layer.tar)
+      tarhash=$(tarsum < $out/layer.tar)
 
       # Add a 'checksum' field to the JSON, with the value set to the
       # checksum of the tarball.
-      cat ${baseJson} | jshon -s "$tarsum" -i checksum > $out/json
+      cat ${baseJson} | jshon -s "$tarhash" -i checksum > $out/json
 
       # Indicate to docker that we're using schema version 1.0.
       echo -n "1.0" > $out/VERSION
@@ -402,8 +481,8 @@ rec {
 
         # Compute the tar checksum and add it to the output json.
         echo "Computing checksum..."
-        ts=$(${tarsum} < $out/layer.tar)
-        cat ${baseJson} | jshon -s "$ts" -i checksum > $out/json
+        tarhash=$(${tarsum}/bin/tarsum < $out/layer.tar)
+        cat ${baseJson} | jshon -s "$tarhash" -i checksum > $out/json
         # Indicate to docker that we're using schema version 1.0.
         echo -n "1.0" > $out/VERSION
 
@@ -411,6 +490,104 @@ rec {
       '';
     };
 
+  buildLayeredImage = {
+    # Image Name
+    name,
+    # Image tag, the Nix's output hash will be used if null
+    tag ? null,
+    # Files to put on the image (a nix store path or list of paths).
+    contents ? [],
+    # Docker config; e.g. what command to run on the container.
+    config ? {},
+    # Time of creation of the image. Passing "now" will make the
+    # created date be the time of building.
+    created ? "1970-01-01T00:00:01Z",
+    # Docker's lowest maximum layer limit is 42-layers for an old
+    # version of the AUFS graph driver. We pick 24 to ensure there is
+    # plenty of room for extension. I believe the actual maximum is
+    # 128.
+    maxLayers ? 24
+  }:
+    let
+      uid = 0;
+      gid = 0;
+      baseName = baseNameOf name;
+      contentsEnv = symlinkJoin { name = "bulk-layers"; paths = (if builtins.isList contents then contents else [ contents ]); };
+
+      configJson = let
+          pure = writeText "${baseName}-config.json" (builtins.toJSON {
+            inherit created config;
+            architecture = "amd64";
+            os = "linux";
+          });
+          impure = runCommand "${baseName}-standard-dynamic-date.json"
+            { buildInputs = [ jq ]; }
+            ''
+               jq ".created = \"$(TZ=utc date --iso-8601="seconds")\"" ${pure} > $out
+            '';
+        in if created == "now" then impure else pure;
+
+      bulkLayers = mkManyPureLayers {
+          name = baseName;
+          closure = writeText "closure" "${contentsEnv} ${configJson}";
+          # One layer will be taken up by the customisationLayer, so
+          # take up one less.
+          maxLayers = maxLayers - 1;
+          inherit configJson;
+        };
+      customisationLayer = mkCustomisationLayer {
+          name = baseName;
+          contents = contentsEnv;
+          baseJson = configJson;
+          inherit uid gid;
+        };
+      result = runCommand "docker-image-${baseName}.tar.gz" {
+        buildInputs = [ jshon pigz coreutils findutils jq ];
+        # Image name and tag must be lowercase
+        imageName = lib.toLower name;
+        imageTag = if tag == null then "" else lib.toLower tag;
+        baseJson = configJson;
+      } ''
+        ${lib.optionalString (tag == null) ''
+          outName="$(basename "$out")"
+          outHash=$(echo "$outName" | cut -d - -f 1)
+
+          imageTag=$outHash
+        ''}
+
+        find ${bulkLayers} -mindepth 1 -maxdepth 1 | sort -t/ -k5 -n > layer-list
+        echo ${customisationLayer} >> layer-list
+
+        mkdir image
+        imageJson=$(cat ${configJson} | jq ". + {\"rootfs\": {\"diff_ids\": [], \"type\": \"layers\"}}")
+        manifestJson=$(jq -n "[{\"RepoTags\":[\"$imageName:$imageTag\"]}]")
+        for layer in $(cat layer-list); do
+          layerChecksum=$(sha256sum $layer/layer.tar | cut -d ' ' -f1)
+          layerID=$(sha256sum "$layer/json" | cut -d ' ' -f 1)
+          ln -s "$layer" "./image/$layerID"
+
+          manifestJson=$(echo "$manifestJson" | jq ".[0].Layers |= [\"$layerID/layer.tar\"] + .")
+          imageJson=$(echo "$imageJson" | jq ".history |= [{\"created\": \"$(jq -r .created ${configJson})\"}] + .")
+          imageJson=$(echo "$imageJson" | jq ".rootfs.diff_ids |= [\"sha256:$layerChecksum\"] + .")
+        done
+        imageJsonChecksum=$(echo "$imageJson" | sha256sum | cut -d ' ' -f1)
+        echo "$imageJson" > "image/$imageJsonChecksum.json"
+        manifestJson=$(echo "$manifestJson" | jq ".[0].Config = \"$imageJsonChecksum.json\"")
+        echo "$manifestJson" > image/manifest.json
+
+        jshon -n object \
+          -n object -s "$layerID" -i "$imageTag" \
+          -i "$imageName" > image/repositories
+
+        echo "Cooking the image..."
+        tar -C image --dereference --hard-dereference --sort=name --mtime="@$SOURCE_DATE_EPOCH" --owner=0 --group=0  --mode=a-w --xform s:'^./':: -c . | pigz -nT > $out
+
+        echo "Finished."
+      '';
+
+    in
+    result;
+
   # 1. extract the base image
   # 2. create the layer
   # 3. add layer deps to the layer itself, diffing with the base image
@@ -626,7 +803,7 @@ rec {
         echo "         be better to only have one layer that contains a nix store."
         # This requires Nix 1.12 or higher
         export NIX_REMOTE=local?root=$PWD
-        ${nixUnstable}/bin/nix-store --load-db < ${nixRegistration contents}/db.dump
+        ${nix}/bin/nix-store --load-db < ${nixRegistration contents}/db.dump
 
         # We fill the store in order to run the 'verify' command that
         # generates hash and size of output paths.
@@ -637,7 +814,7 @@ rec {
         storePaths=$(cat ${nixRegistration contents}/storePaths)
         echo "Copying everything to /nix/store (will take a while)..."
         cp -prd $storePaths nix/store/
-        ${nixUnstable}/bin/nix-store --verify --check-contents
+        ${nix}/bin/nix-store --verify --check-contents
 
         mkdir -p nix/var/nix/gcroots/docker/
         for i in ${lib.concatStringsSep " " contents}; do
diff --git a/pkgs/build-support/docker/examples.nix b/pkgs/build-support/docker/examples.nix
index 822e0dbb31f2..003e7429a81b 100644
--- a/pkgs/build-support/docker/examples.nix
+++ b/pkgs/build-support/docker/examples.nix
@@ -150,4 +150,11 @@ rec {
     contents = [ pkgs.coreutils ];
     created = "now";
   };
+
+  # 10. Create a layered image
+  layered-image = pkgs.dockerTools.buildLayeredImage {
+    name = "layered-image";
+    tag = "latest";
+    config.Cmd = [ "${pkgs.hello}/bin/hello" ];
+  };
 }
diff --git a/pkgs/build-support/docker/store-path-to-layer.sh b/pkgs/build-support/docker/store-path-to-layer.sh
new file mode 100755
index 000000000000..ff814c1f6130
--- /dev/null
+++ b/pkgs/build-support/docker/store-path-to-layer.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+set -eu
+
+layerNumber=$1
+shift
+
+layerPath="./layers/$layerNumber"
+echo "Creating layer #$layerNumber for $@"
+
+mkdir -p "$layerPath"
+tar -rpf "$layerPath/layer.tar" --hard-dereference --sort=name \
+    --mtime="@$SOURCE_DATE_EPOCH" \
+    --owner=0 --group=0 "$@"
+
+# Compute a checksum of the tarball.
+tarhash=$(tarsum < $layerPath/layer.tar)
+
+# Add a 'checksum' field to the JSON, with the value set to the
+# checksum of the tarball.
+cat ./generic.json | jshon -s "$tarhash" -i checksum > $layerPath/json
+
+# Indicate to docker that we're using schema version 1.0.
+echo -n "1.0" > $layerPath/VERSION
diff --git a/pkgs/build-support/references-by-popularity/closure-graph.py b/pkgs/build-support/references-by-popularity/closure-graph.py
new file mode 100644
index 000000000000..d67a5dfcf140
--- /dev/null
+++ b/pkgs/build-support/references-by-popularity/closure-graph.py
@@ -0,0 +1,520 @@
+# IMPORTANT: Making changes?
+#
+# Validate your changes with python3 ./closure-graph.py --test
+
+
+# Using a simple algorithm, convert the references to a path in to a
+# sorted list of dependent paths based on how often they're referenced
+# and how deep in the tree they live. Equally-"popular" paths are then
+# sorted by name.
+#
+# The existing writeReferencesToFile prints the paths in a simple
+# ascii-based sorting of the paths.
+#
+# Sorting the paths by graph improves the chances that the difference
+# between two builds appear near the end of the list, instead of near
+# the beginning. This makes a difference for Nix builds which export a
+# closure for another program to consume, if that program implements its
+# own level of binary diffing.
+#
+# For an example, Docker Images. If each store path is a separate layer
+# then Docker Images can be very efficiently transfered between systems,
+# and we get very good cache reuse between images built with the same
+# version of Nixpkgs. However, since Docker only reliably supports a
+# small number of layers (42) it is important to pick the individual
+# layers carefully. By storing very popular store paths in the first 40
+# layers, we improve the chances that the next Docker image will share
+# many of those layers.*
+#
+# Given the dependency tree:
+#
+#     A - B - C - D -\
+#      \   \   \      \
+#       \   \   \      \
+#        \   \ - E ---- F
+#         \- G
+#
+# Nodes which have multiple references are duplicated:
+#
+#     A - B - C - D - F
+#      \   \   \
+#       \   \   \- E - F
+#        \   \
+#         \   \- E - F
+#          \
+#           \- G
+#
+# Each leaf node is now replaced by a counter defaulted to 1:
+#
+#     A - B - C - D - (F:1)
+#      \   \   \
+#       \   \   \- E - (F:1)
+#        \   \
+#         \   \- E - (F:1)
+#          \
+#           \- (G:1)
+#
+# Then each leaf counter is merged with its parent node, replacing the
+# parent node with a counter of 1, and each existing counter being
+# incremented by 1. That is to say `- D - (F:1)` becomes `- (D:1, F:2)`:
+#
+#     A - B - C - (D:1, F:2)
+#      \   \   \
+#       \   \   \- (E:1, F:2)
+#        \   \
+#         \   \- (E:1, F:2)
+#          \
+#           \- (G:1)
+#
+# Then each leaf counter is merged with its parent node again, merging
+# any counters, then incrementing each:
+#
+#     A - B - (C:1, D:2, E:2, F:5)
+#      \   \
+#       \   \- (E:1, F:2)
+#        \
+#         \- (G:1)
+#
+# And again:
+#
+#     A - (B:1, C:2, D:3, E:4, F:8)
+#      \
+#       \- (G:1)
+#
+# And again:
+#
+#     (A:1, B:2, C:3, D:4, E:5, F:9, G:2)
+#
+# and then paths have the following "popularity":
+#
+#     A     1
+#     B     2
+#     C     3
+#     D     4
+#     E     5
+#     F     9
+#     G     2
+#
+# and the popularity contest would result in the paths being printed as:
+#
+#     F
+#     E
+#     D
+#     C
+#     B
+#     G
+#     A
+#
+# * Note: People who have used a Dockerfile before assume Docker's
+# Layers are inherently ordered. However, this is not true -- Docker
+# layers are content-addressable and are not explicitly layered until
+# they are composed in to an Image.
+
+import sys
+import json
+import unittest
+
+from pprint import pprint
+from collections import defaultdict
+
+# Find paths in the original dataset which are never referenced by
+# any other paths
+def find_roots(closures):
+    roots = [];
+
+    for closure in closures:
+        path = closure['path']
+        if not any_refer_to(path, closures):
+            roots.append(path)
+
+    return roots
+
+class TestFindRoots(unittest.TestCase):
+    def test_find_roots(self):
+        self.assertCountEqual(
+            find_roots([
+                {
+                    "path": "/nix/store/foo",
+                    "references": [
+                        "/nix/store/foo",
+                        "/nix/store/bar"
+                    ]
+                },
+                {
+                    "path": "/nix/store/bar",
+                    "references": [
+                        "/nix/store/bar",
+                        "/nix/store/tux"
+                    ]
+                },
+                {
+                    "path": "/nix/store/hello",
+                    "references": [
+                    ]
+                }
+            ]),
+            ["/nix/store/foo", "/nix/store/hello"]
+        )
+
+
+def any_refer_to(path, closures):
+    for closure in closures:
+        if path != closure['path']:
+            if path in closure['references']:
+                return True
+    return False
+
+class TestAnyReferTo(unittest.TestCase):
+    def test_has_references(self):
+        self.assertTrue(
+            any_refer_to(
+                "/nix/store/bar",
+                [
+                    {
+                        "path": "/nix/store/foo",
+                        "references": [
+                            "/nix/store/bar"
+                        ]
+                    },
+                ]
+            ),
+        )
+    def test_no_references(self):
+        self.assertFalse(
+            any_refer_to(
+                "/nix/store/foo",
+                [
+                    {
+                        "path": "/nix/store/foo",
+                        "references": [
+                            "/nix/store/foo",
+                            "/nix/store/bar"
+                        ]
+                    },
+                ]
+            ),
+        )
+
+def all_paths(closures):
+    paths = []
+    for closure in closures:
+        paths.append(closure['path'])
+        paths.extend(closure['references'])
+    paths.sort()
+    return list(set(paths))
+
+
+class TestAllPaths(unittest.TestCase):
+    def test_returns_all_paths(self):
+        self.assertCountEqual(
+            all_paths([
+                {
+                    "path": "/nix/store/foo",
+                    "references": [
+                        "/nix/store/foo",
+                        "/nix/store/bar"
+                    ]
+                },
+                {
+                    "path": "/nix/store/bar",
+                    "references": [
+                        "/nix/store/bar",
+                        "/nix/store/tux"
+                    ]
+                },
+                {
+                    "path": "/nix/store/hello",
+                    "references": [
+                    ]
+                }
+            ]),
+            ["/nix/store/foo", "/nix/store/bar", "/nix/store/hello", "/nix/store/tux",]
+        )
+    def test_no_references(self):
+        self.assertFalse(
+            any_refer_to(
+                "/nix/store/foo",
+                [
+                    {
+                        "path": "/nix/store/foo",
+                        "references": [
+                            "/nix/store/foo",
+                            "/nix/store/bar"
+                        ]
+                    },
+                ]
+            ),
+        )
+
+# Convert:
+#
+# [
+#    { path: /nix/store/foo, references: [ /nix/store/foo, /nix/store/bar, /nix/store/baz ] },
+#    { path: /nix/store/bar, references: [ /nix/store/bar, /nix/store/baz ] },
+#    { path: /nix/store/baz, references: [ /nix/store/baz, /nix/store/tux ] },
+#    { path: /nix/store/tux, references: [ /nix/store/tux ] }
+#  ]
+#
+# To:
+#    {
+#      /nix/store/foo: [ /nix/store/bar, /nix/store/baz ],
+#      /nix/store/bar: [ /nix/store/baz ],
+#      /nix/store/baz: [ /nix/store/tux ] },
+#      /nix/store/tux: [ ]
+#    }
+#
+# Note that it drops self-references to avoid loops.
+def make_lookup(closures):
+    lookup = {}
+
+    for closure in closures:
+        # paths often self-refer
+        nonreferential_paths = [ref for ref in closure['references'] if ref != closure['path']]
+        lookup[closure['path']] = nonreferential_paths
+
+    return lookup
+
+class TestMakeLookup(unittest.TestCase):
+    def test_returns_lookp(self):
+        self.assertDictEqual(
+            make_lookup([
+                {
+                    "path": "/nix/store/foo",
+                    "references": [
+                        "/nix/store/foo",
+                        "/nix/store/bar"
+                    ]
+                },
+                {
+                    "path": "/nix/store/bar",
+                    "references": [
+                        "/nix/store/bar",
+                        "/nix/store/tux"
+                    ]
+                },
+                {
+                    "path": "/nix/store/hello",
+                    "references": [
+                    ]
+                }
+            ]),
+            {
+                "/nix/store/foo": [ "/nix/store/bar" ],
+                "/nix/store/bar": [ "/nix/store/tux" ],
+                "/nix/store/hello": [ ],
+            }
+        )
+
+# Convert:
+#
+# /nix/store/foo with
+#  {
+#    /nix/store/foo: [ /nix/store/bar, /nix/store/baz ],
+#    /nix/store/bar: [ /nix/store/baz ],
+#    /nix/store/baz: [ /nix/store/tux ] },
+#    /nix/store/tux: [ ]
+#  }
+#
+# To:
+#
+# {
+#   /nix/store/bar: {
+#                    /nix/store/baz: {
+#                                     /nix/store/tux: {}
+#                    }
+#   },
+#   /nix/store/baz: {
+#                   /nix/store/tux: {}
+#   }
+# }
+def make_graph_segment_from_root(root, lookup):
+    children = {}
+    for ref in lookup[root]:
+        children[ref] = make_graph_segment_from_root(ref, lookup)
+    return children
+
+class TestMakeGraphSegmentFromRoot(unittest.TestCase):
+    def test_returns_graph(self):
+        self.assertDictEqual(
+            make_graph_segment_from_root("/nix/store/foo", {
+                "/nix/store/foo": [ "/nix/store/bar" ],
+                "/nix/store/bar": [ "/nix/store/tux" ],
+                "/nix/store/tux": [ ],
+                "/nix/store/hello": [ ],
+            }),
+            {
+                "/nix/store/bar": {
+                    "/nix/store/tux": {}
+                }
+            }
+        )
+    def test_returns_graph_tiny(self):
+        self.assertDictEqual(
+            make_graph_segment_from_root("/nix/store/tux", {
+                "/nix/store/foo": [ "/nix/store/bar" ],
+                "/nix/store/bar": [ "/nix/store/tux" ],
+                "/nix/store/tux": [ ],
+            }),
+            {}
+        )
+
+# Convert a graph segment in to a popularity-counted dictionary:
+#
+# From:
+# {
+#    /nix/store/foo: {
+#                      /nix/store/bar: {
+#                                        /nix/store/baz: {
+#                                                           /nix/store/tux: {}
+#                                        }
+#                      }
+#                      /nix/store/baz: {
+#                                         /nix/store/tux: {}
+#                      }
+#    }
+# }
+#
+# to:
+# [
+#   /nix/store/foo: 1
+#   /nix/store/bar: 2
+#   /nix/store/baz: 4
+#   /nix/store/tux: 6
+# ]
+def graph_popularity_contest(full_graph):
+    popularity = defaultdict(int)
+    for path, subgraph in full_graph.items():
+        popularity[path] += 1
+        subcontest = graph_popularity_contest(subgraph)
+        for subpath, subpopularity in subcontest.items():
+            popularity[subpath] += subpopularity + 1
+
+    return popularity
+
+class TestGraphPopularityContest(unittest.TestCase):
+    def test_counts_popularity(self):
+        self.assertDictEqual(
+            graph_popularity_contest({
+                "/nix/store/foo": {
+                    "/nix/store/bar": {
+                        "/nix/store/baz": {
+                            "/nix/store/tux": {}
+                        }
+                    },
+                    "/nix/store/baz": {
+                        "/nix/store/tux": {}
+                    }
+                }
+            }),
+            {
+                   "/nix/store/foo": 1,
+                   "/nix/store/bar": 2,
+                   "/nix/store/baz": 4,
+                   "/nix/store/tux": 6,
+            }
+        )
+
+# Emit a list of packages by popularity, most first:
+#
+# From:
+# [
+#   /nix/store/foo: 1
+#   /nix/store/bar: 1
+#   /nix/store/baz: 2
+#   /nix/store/tux: 2
+# ]
+#
+# To:
+# [ /nix/store/baz /nix/store/tux /nix/store/bar /nix/store/foo ]
+def order_by_popularity(paths):
+    paths_by_popularity = defaultdict(list)
+    popularities = []
+    for path, popularity in paths.items():
+        popularities.append(popularity)
+        paths_by_popularity[popularity].append(path)
+
+    popularities = list(set(popularities))
+    popularities.sort()
+
+    flat_ordered = []
+    for popularity in popularities:
+        paths = paths_by_popularity[popularity]
+        paths.sort(key=package_name)
+
+        flat_ordered.extend(reversed(paths))
+    return list(reversed(flat_ordered))
+
+
+class TestOrderByPopularity(unittest.TestCase):
+    def test_returns_in_order(self):
+        self.assertEqual(
+            order_by_popularity({
+                   "/nix/store/foo": 1,
+                   "/nix/store/bar": 1,
+                   "/nix/store/baz": 2,
+                   "/nix/store/tux": 2,
+            }),
+            [
+                "/nix/store/baz",
+                "/nix/store/tux",
+                "/nix/store/bar",
+                "/nix/store/foo"
+            ]
+        )
+
+def package_name(path):
+    parts = path.split('-')
+    start = parts.pop(0)
+    # don't throw away any data, so the order is always the same.
+    # even in cases where only the hash at the start has changed.
+    parts.append(start)
+    return '-'.join(parts)
+
+def main():
+    filename = sys.argv[1]
+    key = sys.argv[2]
+
+    with open(filename) as f:
+        data = json.load(f)
+
+    # Data comes in as:
+    # [
+    #    { path: /nix/store/foo, references: [ /nix/store/foo, /nix/store/bar, /nix/store/baz ] },
+    #    { path: /nix/store/bar, references: [ /nix/store/bar, /nix/store/baz ] },
+    #    { path: /nix/store/baz, references: [ /nix/store/baz, /nix/store/tux ] },
+    #    { path: /nix/store/tux, references: [ /nix/store/tux ] }
+    #  ]
+    #
+    # and we want to get out a list of paths ordered by how universally,
+    # important they are, ie: tux is referenced by every path, transitively
+    # so it should be #1
+    #
+    # [
+    #   /nix/store/tux,
+    #   /nix/store/baz,
+    #   /nix/store/bar,
+    #   /nix/store/foo,
+    # ]
+    graph = data[key]
+
+    roots = find_roots(graph);
+    lookup = make_lookup(graph)
+
+    full_graph = {}
+    for root in roots:
+        full_graph[root] = make_graph_segment_from_root(root, lookup)
+
+    ordered = order_by_popularity(graph_popularity_contest(full_graph))
+    missing = []
+    for path in all_paths(graph):
+        if path not in ordered:
+            missing.append(path)
+
+    ordered.extend(missing)
+    print("\n".join(ordered))
+
+if "--test" in sys.argv:
+    # Don't pass --test otherwise unittest gets mad
+    unittest.main(argv = [f for f in sys.argv if f != "--test" ])
+else:
+    main()
diff --git a/pkgs/build-support/references-by-popularity/default.nix b/pkgs/build-support/references-by-popularity/default.nix
new file mode 100644
index 000000000000..4cae2dcf3ca9
--- /dev/null
+++ b/pkgs/build-support/references-by-popularity/default.nix
@@ -0,0 +1,15 @@
+{ runCommand, python3, coreutils }:
+# Write the references of `path' to a file, in order of how "popular" each
+# reference is. Nix 2 only.
+path: runCommand "closure-paths"
+{
+  exportReferencesGraph.graph = path;
+  __structuredAttrs = true;
+  PATH = "${coreutils}/bin:${python3}/bin";
+  builder = builtins.toFile "builder"
+    ''
+      . .attrs.sh
+      python3 ${./closure-graph.py} .attrs.json graph > ''${outputs[out]}
+    '';
+  }
+  ""