From 0aeba64fb26e4defa0842a942757144659c6e29f Mon Sep 17 00:00:00 2001 From: Arnout Engelen Date: Sun, 28 Feb 2021 19:03:50 +0100 Subject: squashfs: use -no-hardlinks for reproducible squashfs images (#114454) the nix store may contain hardlinks: derivations may output them directly, or users may be using store optimization which automatically hardlinks identical files in the nix store. The presence of these links are intended to be a 'transparent' optimization. However, when creating a squashfs image, the image will be different depending on whether hard links were present on the filesystem, leading to reproducibility problems. By passing '-no-hardlinks' to mksquashfs the files are stored as duplicates in the squashfs image. Since squashfs has support for duplicate files this does not lead to a larger image. For more details see https://github.com/NixOS/nixpkgs/issues/114331 --- .../0001-Mksquashfs-add-no-hardlinks-option.patch | 76 ++++++++++++++++++++++ pkgs/tools/filesystems/squashfs/default.nix | 4 ++ 2 files changed, 80 insertions(+) create mode 100644 pkgs/tools/filesystems/squashfs/0001-Mksquashfs-add-no-hardlinks-option.patch (limited to 'pkgs/tools/filesystems/squashfs') diff --git a/pkgs/tools/filesystems/squashfs/0001-Mksquashfs-add-no-hardlinks-option.patch b/pkgs/tools/filesystems/squashfs/0001-Mksquashfs-add-no-hardlinks-option.patch new file mode 100644 index 000000000000..0d6804a647b2 --- /dev/null +++ b/pkgs/tools/filesystems/squashfs/0001-Mksquashfs-add-no-hardlinks-option.patch @@ -0,0 +1,76 @@ +From d925c9a11ee2e88ac8aac03f51892746f2bcf8cd Mon Sep 17 00:00:00 2001 +From: Phillip Lougher +Date: Thu, 25 Feb 2021 23:12:10 +0000 +Subject: [PATCH] Mksquashfs: add -no-hardlinks option + +Normally Mksquashfs will detect hardlinks (multiple files with the +same inode) and hardlink them in the Squashfs image. + +But often hardlinks are used in the original filesystem +to save space, when files are discovered to be duplicate. +In this special case the only reason the files are +hardlinked is to save space, and where the filesystem +doesn't handle duplicate files (different inode, same +data). + +Squashfs does handle duplicate files, and so add +an option to ignore hardlinks and instead +store them as duplicates. + +Signed-off-by: Phillip Lougher +--- + squashfs-tools/mksquashfs.c | 16 +++++++++++----- + 1 file changed, 11 insertions(+), 5 deletions(-) + +diff --git a/squashfs-tools/mksquashfs.c b/squashfs-tools/mksquashfs.c +index a45b77f..d4dc359 100644 +--- a/squashfs-tools/mksquashfs.c ++++ b/squashfs-tools/mksquashfs.c +@@ -312,6 +312,9 @@ struct dir_info *root_dir; + FILE *log_fd; + int logging=FALSE; + ++/* Should Mksquashfs detect hardlinked files? */ ++int no_hardlinks = FALSE; ++ + static char *read_from_disk(long long start, unsigned int avail_bytes); + void add_old_root_entry(char *name, squashfs_inode inode, int inode_number, + int type); +@@ -3093,11 +3096,11 @@ struct inode_info *lookup_inode3(struct stat *buf, int pseudo, int id, + + /* + * Look-up inode in hash table, if it already exists we have a +- * hard-link, so increment the nlink count and return it. +- * Don't do the look-up for directories because we don't hard-link +- * directories. ++ * hardlink, so increment the nlink count and return it. ++ * Don't do the look-up for directories because Unix/Linux doesn't ++ * allow hard-links to directories. + */ +- if ((buf->st_mode & S_IFMT) != S_IFDIR) { ++ if ((buf->st_mode & S_IFMT) != S_IFDIR && !no_hardlinks) { + for(inode = inode_info[ino_hash]; inode; inode = inode->next) { + if(memcmp(buf, &inode->buf, sizeof(struct stat)) == 0) { + inode->nlink ++; +@@ -5447,7 +5450,9 @@ int main(int argc, char *argv[]) + comp = lookup_compressor(COMP_DEFAULT); + + for(i = source + 2; i < argc; i++) { +- if(strcmp(argv[i], "-mkfs-time") == 0 || ++ if(strcmp(argv[i], "-no-hardlinks") == 0) ++ no_hardlinks = TRUE; ++ else if(strcmp(argv[i], "-mkfs-time") == 0 || + strcmp(argv[i], "-fstime") == 0) { + if((++i == argc) || !parse_num_unsigned(argv[i], &mkfs_time)) { + ERROR("%s: %s missing or invalid time value\n", argv[0], argv[i - 1]); +@@ -5893,6 +5898,7 @@ printOptions: + "files larger than block size\n"); + ERROR("-no-duplicates\t\tdo not perform duplicate " + "checking\n"); ++ ERROR("-no-hardlinks\t\tdo not hardlink files, instead store duplicates\n"); + ERROR("-all-root\t\tmake all files owned by root\n"); + ERROR("-root-mode \tset root directory permissions to octal \n"); + ERROR("-force-uid \tset all file uids to \n"); +-- +2.30.0 + diff --git a/pkgs/tools/filesystems/squashfs/default.nix b/pkgs/tools/filesystems/squashfs/default.nix index c833cfe10ada..7e55bcf6e0e4 100644 --- a/pkgs/tools/filesystems/squashfs/default.nix +++ b/pkgs/tools/filesystems/squashfs/default.nix @@ -19,6 +19,10 @@ stdenv.mkDerivation { # This patch adds an option to pad filesystems (increasing size) in # exchange for better chunking / binary diff calculation. ./4k-align.patch + # Add -no-hardlinks option. This is a rebased version of + # c37bb4da4a5fa8c1cf114237ba364692dd522262, can be removed + # when upgrading to the next version after 4.4 + ./0001-Mksquashfs-add-no-hardlinks-option.patch ] ++ lib.optional stdenv.isDarwin ./darwin.patch; buildInputs = [ zlib xz zstd lz4 ]; -- cgit 1.4.1