From f82a1686e6840c396a141ff122e2f39d64fb6d4b Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Wed, 8 Nov 2023 05:16:01 +0100 Subject: lib.fileset: Split out internal test helper --- lib/fileset/tests.sh | 68 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 26 deletions(-) (limited to 'lib') diff --git a/lib/fileset/tests.sh b/lib/fileset/tests.sh index 529f23ae8871..ead6d4c49569 100755 --- a/lib/fileset/tests.sh +++ b/lib/fileset/tests.sh @@ -224,6 +224,43 @@ withFileMonitor() { fi } + +# Create the tree structure declared in the tree variable, usage: +# +# tree=( +# [a/b] = # Declare that file a/b should exist +# [c/a] = # Declare that file c/a should exist +# [c/d/]= # Declare that directory c/d/ should exist +# ) +# createTree +declare -A tree +createTree() { + # Track which paths need to be created + local -a dirsToCreate=() + local -a filesToCreate=() + for p in "${!tree[@]}"; do + # If keys end with a `/` we treat them as directories, otherwise files + if [[ "$p" =~ /$ ]]; then + dirsToCreate+=("$p") + else + filesToCreate+=("$p") + fi + done + + # Create all the necessary paths. + # This is done with only a fixed number of processes, + # in order to not be too slow + # Though this does mean we're a bit limited with how many files can be created + if (( ${#dirsToCreate[@]} != 0 )); then + mkdir -p "${dirsToCreate[@]}" + fi + if (( ${#filesToCreate[@]} != 0 )); then + readarray -d '' -t parentsToCreate < <(dirname -z "${filesToCreate[@]}") + mkdir -p "${parentsToCreate[@]}" + touch "${filesToCreate[@]}" + fi +} + # Check whether a file set includes/excludes declared paths as expected, usage: # # tree=( @@ -232,34 +269,26 @@ withFileMonitor() { # [c/d/]= # Declare that directory c/d/ should exist and expect it to be excluded in the store path # ) # checkFileset './a' # Pass the fileset as the argument -declare -A tree checkFileset() { # New subshell so that we can have a separate trap handler, see `trap` below local fileset=$1 + # Create the tree + createTree + # Process the tree into separate arrays for included paths, excluded paths and excluded files. local -a included=() local -a excluded=() local -a excludedFiles=() - # Track which paths need to be created - local -a dirsToCreate=() - local -a filesToCreate=() for p in "${!tree[@]}"; do - # If keys end with a `/` we treat them as directories, otherwise files - if [[ "$p" =~ /$ ]]; then - dirsToCreate+=("$p") - isFile= - else - filesToCreate+=("$p") - isFile=1 - fi case "${tree[$p]}" in 1) included+=("$p") ;; 0) excluded+=("$p") - if [[ -n "$isFile" ]]; then + # If keys end with a `/` we treat them as directories, otherwise files + if [[ ! "$p" =~ /$ ]]; then excludedFiles+=("$p") fi ;; @@ -268,19 +297,6 @@ checkFileset() { esac done - # Create all the necessary paths. - # This is done with only a fixed number of processes, - # in order to not be too slow - # Though this does mean we're a bit limited with how many files can be created - if (( ${#dirsToCreate[@]} != 0 )); then - mkdir -p "${dirsToCreate[@]}" - fi - if (( ${#filesToCreate[@]} != 0 )); then - readarray -d '' -t parentsToCreate < <(dirname -z "${filesToCreate[@]}") - mkdir -p "${parentsToCreate[@]}" - touch "${filesToCreate[@]}" - fi - expression="toSource { root = ./.; fileset = $fileset; }" # We don't have lambda's in bash unfortunately, -- cgit 1.4.1 From 73493584a74cddb22d785942a2215c3dab12c29e Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Wed, 18 Oct 2023 00:16:50 +0200 Subject: lib.fileset.fromSource: init --- lib/fileset/default.nix | 71 ++++++++++++++++ lib/fileset/internal.nix | 53 ++++++++++++ lib/fileset/tests.sh | 213 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 337 insertions(+) (limited to 'lib') diff --git a/lib/fileset/default.nix b/lib/fileset/default.nix index 7bd701670386..53edc9e77097 100644 --- a/lib/fileset/default.nix +++ b/lib/fileset/default.nix @@ -3,8 +3,10 @@ let inherit (import ./internal.nix { inherit lib; }) _coerce + _singleton _coerceMany _toSourceFilter + _fromSourceFilter _unionMany _printFileset _intersection @@ -187,6 +189,75 @@ If a directory does not recursively contain any file, it is omitted from the sto filter = sourceFilter; }; + /* + Create a file set with the same files as a `lib.sources`-based value. + This does not import any of the files into the store. + + This can be used to gradually migrate from `lib.sources`-based filtering to `lib.fileset`. + + A file set can be turned back into a source using [`toSource`](#function-library-lib.fileset.toSource). + + :::{.note} + File sets cannot represent empty directories. + Turning the result of this function back into a source using `toSource` will therefore not preserve empty directories. + ::: + + Type: + fromSource :: SourceLike -> FileSet + + Example: + # There's no cleanSource-like function for file sets yet, + # but we can just convert cleanSource to a file set and use it that way + toSource { + root = ./.; + fileset = fromSource (lib.sources.cleanSource ./.); + } + + # Keeping a previous sourceByRegex (which could be migrated to `lib.fileset.unions`), + # but removing a subdirectory using file set functions + difference + (fromSource (lib.sources.sourceByRegex ./. [ + "^README\.md$" + # This regex includes everything in ./doc + "^doc(/.*)?$" + ]) + ./doc/generated + + # Use cleanSource, but limit it to only include ./Makefile and files under ./src + intersection + (fromSource (lib.sources.cleanSource ./.)) + (unions [ + ./Makefile + ./src + ]); + */ + fromSource = source: + let + # This function uses `._isLibCleanSourceWith`, `.origSrc` and `.filter`, + # which are technically internal to lib.sources, + # but we'll allow this since both libraries are in the same code base + # and this function is a bridge between them. + isFiltered = source ? _isLibCleanSourceWith; + path = if isFiltered then source.origSrc else source; + in + # We can only support sources created from paths + if ! isPath path then + if isStringLike path then + throw '' + lib.fileset.fromSource: The source origin of the argument is a string-like value ("${toString path}"), but it should be a path instead. + Sources created from paths in strings cannot be turned into file sets, use `lib.sources` or derivations instead.'' + else + throw '' + lib.fileset.fromSource: The source origin of the argument is of type ${typeOf path}, but it should be a path instead.'' + else if ! pathExists path then + throw '' + lib.fileset.fromSource: The source origin (${toString path}) of the argument does not exist.'' + else if isFiltered then + _fromSourceFilter path source.filter + else + # If there's no filter, no need to run the expensive conversion, all subpaths will be included + _singleton path; + /* The file set containing all files that are in either of two given file sets. This is the same as [`unions`](#function-library-lib.fileset.unions), diff --git a/lib/fileset/internal.nix b/lib/fileset/internal.nix index 9892172955c3..031262520785 100644 --- a/lib/fileset/internal.nix +++ b/lib/fileset/internal.nix @@ -461,6 +461,59 @@ rec { else nonEmpty; + # Turn a builtins.filterSource-based source filter on a root path into a file set + # containing only files included by the filter. + # The filter is lazily called as necessary to determine whether paths are included + # Type: Path -> (String -> String -> Bool) -> fileset + _fromSourceFilter = root: sourceFilter: + let + # During the recursion we need to track both: + # - The path value such that we can safely call `readDir` on it + # - The path string value such that we can correctly call the `filter` with it + # + # While we could just recurse with the path value, + # this would then require converting it to a path string for every path, + # which is a fairly expensive operation + + # Create a file set from a directory entry + fromDirEntry = path: pathString: type: + # The filter needs to run on the path as a string + if ! sourceFilter pathString type then + null + else if type == "directory" then + fromDir path pathString + else + type; + + # Create a file set from a directory + fromDir = path: pathString: + mapAttrs + # This looks a bit funny, but we need both the path-based and the path string-based values + (name: fromDirEntry (path + "/${name}") (pathString + "/${name}")) + # We need to readDir on the path value, because reading on a path string + # would be unspecified if there are multiple filesystem roots + (readDir path); + + rootPathType = pathType root; + + # We need to convert the path to a string to imitate what builtins.path calls the filter function with. + # We don't want to rely on `toString` for this though because it's not very well defined, see ../path/README.md + # So instead we use `lib.path.splitRoot` to safely deconstruct the path into its filesystem root and subpath + # We don't need the filesystem root though, builtins.path doesn't expose that in any way to the filter. + # So we only need the components, which we then turn into a string as one would expect. + rootString = "/" + concatStringsSep "/" (components (splitRoot root).subpath); + in + if rootPathType == "directory" then + # We imitate builtins.path not calling the filter on the root path + _create root (fromDir root rootString) + else + # Direct files are always included by builtins.path without calling the filter + # But we need to lift up the base path to its parent to satisfy the base path invariant + _create (dirOf root) + { + ${baseNameOf root} = rootPathType; + }; + # Transforms the filesetTree of a file set to a shorter base path, e.g. # _shortenTreeBase [ "foo" ] (_create /foo/bar null) # => { bar = null; } diff --git a/lib/fileset/tests.sh b/lib/fileset/tests.sh index ead6d4c49569..13f02bb656b9 100755 --- a/lib/fileset/tests.sh +++ b/lib/fileset/tests.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash # shellcheck disable=SC2016 +# shellcheck disable=SC2317 +# shellcheck disable=SC2192 # Tests lib.fileset # Run: @@ -839,6 +841,217 @@ touch 0 "${filesToCreate[@]}" expectTrace 'unions (mapAttrsToList (n: _: ./. + "/${n}") (removeAttrs (builtins.readDir ./.) [ "0" ]))' "$expectedTrace" rm -rf -- * +## lib.fileset.fromSource + +# Check error messages +expectFailure 'fromSource null' 'lib.fileset.fromSource: The source origin of the argument is of type null, but it should be a path instead.' + +expectFailure 'fromSource (lib.cleanSource "")' 'lib.fileset.fromSource: The source origin of the argument is a string-like value \(""\), but it should be a path instead. +\s*Sources created from paths in strings cannot be turned into file sets, use `lib.sources` or derivations instead.' + +expectFailure 'fromSource (lib.cleanSource null)' 'lib.fileset.fromSource: The source origin of the argument is of type null, but it should be a path instead.' + +# fromSource on a path works and is the same as coercing that path +mkdir a +touch a/b c +expectEqual 'trace (fromSource ./.) null' 'trace ./. null' +rm -rf -- * + +# Check that converting to a file set doesn't read the included files +mkdir a +touch a/b +run() { + expectEqual "trace (fromSource (lib.cleanSourceWith { src = ./a; })) null" "builtins.trace \"$work/a (all files in directory)\" null" + rm a/b +} +withFileMonitor run a/b +rm -rf -- * + +# Check that converting to a file set doesn't read entries for directories that are filtered out +mkdir -p a/b +touch a/b/c +run() { + expectEqual "trace (fromSource (lib.cleanSourceWith { + src = ./a; + filter = pathString: type: false; + })) null" "builtins.trace \"(empty)\" null" + rm a/b/c + rmdir a/b +} +withFileMonitor run a/b +rm -rf -- * + +# The filter is not needed on empty directories +expectEqual 'trace (fromSource (lib.cleanSourceWith { + src = ./.; + filter = abort "filter should not be needed"; +})) null' 'trace _emptyWithoutBase null' + +# Single files also work +touch a b +expectEqual 'trace (fromSource (cleanSourceWith { src = ./a; })) null' 'trace ./a null' +rm -rf -- * + +# For a tree assigning each subpath true/false, +# check whether a source filter with those results includes the same files +# as a file set created using fromSource. Usage: +# +# tree=( +# [a]=1 # ./a is a file and the filter should return true for it +# [b/]=0 # ./b is a directory and the filter should return false for it +# ) +# checkSource +checkSource() { + createTree + + # Serialise the tree as JSON (there's only minimal savings with jq, + # and we don't need to handle escapes) + { + echo "{" + first=1 + for p in "${!tree[@]}"; do + if [[ -z "$first" ]]; then + echo "," + else + first= + fi + echo "\"$p\":" + case "${tree[$p]}" in + 1) + echo "true" + ;; + 0) + echo "false" + ;; + *) + die "Unsupported tree value: ${tree[$p]}" + esac + done + echo "}" + } > "$tmp/tree.json" + + # An expression to create a source value with a filter matching the tree + sourceExpr=' + let + tree = importJSON '"$tmp"'/tree.json; + in + cleanSourceWith { + src = ./.; + filter = + pathString: type: + let + stripped = removePrefix (toString ./. + "/") pathString; + key = stripped + optionalString (type == "directory") "/"; + in + tree.${key} or + (throw "tree key ${key} missing"); + } + ' + + filesetExpr=' + toSource { + root = ./.; + fileset = fromSource ('"$sourceExpr"'); + } + ' + + # Turn both into store paths + sourceStorePath=$(expectStorePath "$sourceExpr") + filesetStorePath=$(expectStorePath "$filesetExpr") + + # Loop through each path in the tree + while IFS= read -r -d $'\0' subpath; do + if [[ ! -e "$sourceStorePath"/"$subpath" ]]; then + # If it's not in the source store path, it's also not in the file set store path + if [[ -e "$filesetStorePath"/"$subpath" ]]; then + die "The store path $sourceStorePath created by $expr doesn't contain $subpath, but the corresponding store path $filesetStorePath created via fromSource does contain $subpath" + fi + elif [[ -z "$(find "$sourceStorePath"/"$subpath" -type f)" ]]; then + # If it's an empty directory in the source store path, it shouldn't be in the file set store path + if [[ -e "$filesetStorePath"/"$subpath" ]]; then + die "The store path $sourceStorePath created by $expr contains the path $subpath without any files, but the corresponding store path $filesetStorePath created via fromSource didn't omit it" + fi + else + # If it's non-empty directory or a file, it should be in the file set store path + if [[ ! -e "$filesetStorePath"/"$subpath" ]]; then + die "The store path $sourceStorePath created by $expr contains the non-empty path $subpath, but the corresponding store path $filesetStorePath created via fromSource doesn't include it" + fi + fi + done < <(find . -mindepth 1 -print0) + + rm -rf -- * +} + +# Check whether the filter is evaluated correctly +tree=( + [a]= + [b/]= + [b/c]= + [b/d]= + [e/]= + [e/e/]= +) +# We fill out the above tree values with all possible combinations of 0 and 1 +# Then check whether a filter based on those return values gets turned into the corresponding file set +for i in $(seq 0 $((2 ** ${#tree[@]} - 1 ))); do + for p in "${!tree[@]}"; do + tree[$p]=$(( i % 2 )) + (( i /= 2 )) || true + done + checkSource +done + +# The filter is called with the same arguments in the same order +mkdir a e +touch a/b a/c d e +expectEqual ' + trace (fromSource (cleanSourceWith { + src = ./.; + filter = pathString: type: builtins.trace "${pathString} ${toString type}" true; + })) null +' ' + builtins.seq (cleanSourceWith { + src = ./.; + filter = pathString: type: builtins.trace "${pathString} ${toString type}" true; + }).outPath + builtins.trace "'"$work"' (all files in directory)" + null +' +rm -rf -- * + +# Test that if a directory is not included, the filter isn't called on its contents +mkdir a b +touch a/c b/d +expectEqual 'trace (fromSource (cleanSourceWith { + src = ./.; + filter = pathString: type: + if pathString == toString ./a then + false + else if pathString == toString ./b then + true + else if pathString == toString ./b/d then + true + else + abort "This filter should not be called with path ${pathString}"; +})) null' 'trace (_create ./. { b = "directory"; }) null' +rm -rf -- * + +# The filter is called lazily: +# If a later say intersection removes a part of the tree, the filter won't run on it +mkdir a d +touch a/{b,c} d/e +expectEqual 'trace (intersection ./a (fromSource (lib.cleanSourceWith { + src = ./.; + filter = pathString: type: + if pathString == toString ./a || pathString == toString ./a/b then + true + else if pathString == toString ./a/c then + false + else + abort "filter should not be called on ${pathString}"; +}))) null' 'trace ./a/b null' +rm -rf -- * + # TODO: Once we have combinators and a property testing library, derive property tests from https://en.wikipedia.org/wiki/Algebra_of_sets echo >&2 tests ok -- cgit 1.4.1 From e07e80e8419f35d596712ecfff57259f0230a33c Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Wed, 8 Nov 2023 21:44:44 +0100 Subject: lib.fileset.toSource: Mention fromSource in errors --- lib/fileset/README.md | 1 - lib/fileset/default.nix | 7 ++++++- lib/fileset/internal.nix | 7 ++++++- lib/fileset/tests.sh | 7 +++++++ 4 files changed, 19 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/fileset/README.md b/lib/fileset/README.md index ebe13f08fdef..d7398438826e 100644 --- a/lib/fileset/README.md +++ b/lib/fileset/README.md @@ -241,5 +241,4 @@ Here's a list of places in the library that need to be updated in the future: - > The file set library is currently somewhat limited but is being expanded to include more functions over time. in [the manual](../../doc/functions/fileset.section.md) -- If/Once a function to convert `lib.sources` values into file sets exists, the `_coerce` and `toSource` functions should be updated to mention that function in the error when such a value is passed - If/Once a function exists that can optionally include a path depending on whether it exists, the error message for the path not existing in `_coerce` should mention the new function diff --git a/lib/fileset/default.nix b/lib/fileset/default.nix index 53edc9e77097..640d0b49d3ea 100644 --- a/lib/fileset/default.nix +++ b/lib/fileset/default.nix @@ -153,7 +153,12 @@ If a directory does not recursively contain any file, it is omitted from the sto sourceFilter = _toSourceFilter fileset; in if ! isPath root then - if isStringLike root then + if root ? _isLibCleanSourceWith then + throw '' + lib.fileset.toSource: `root` is a `lib.sources`-based value, but it should be a path instead. + To use a `lib.sources`-based value, convert it to a file set using `lib.fileset.fromSource` and pass it as `fileset`. + Note that this only works for sources created from paths.'' + else if isStringLike root then throw '' lib.fileset.toSource: `root` ("${toString root}") is a string-like value, but it should be a path instead. Paths in strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.'' diff --git a/lib/fileset/internal.nix b/lib/fileset/internal.nix index 031262520785..45115d5a8a3d 100644 --- a/lib/fileset/internal.nix +++ b/lib/fileset/internal.nix @@ -170,7 +170,12 @@ rec { else value else if ! isPath value then - if isStringLike value then + if value ? _isLibCleanSourceWith then + throw '' + ${context} is a `lib.sources`-based value, but it should be a file set or a path instead. + To convert a `lib.sources`-based value to a file set you can use `lib.fileset.fromSource`. + Note that this only works for sources created from paths.'' + else if isStringLike value then throw '' ${context} ("${toString value}") is a string-like value, but it should be a file set or a path instead. Paths represented as strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.'' diff --git a/lib/fileset/tests.sh b/lib/fileset/tests.sh index 13f02bb656b9..617794524824 100755 --- a/lib/fileset/tests.sh +++ b/lib/fileset/tests.sh @@ -339,6 +339,10 @@ checkFileset() { expectFailure 'toSource { root = "/nix/store/foobar"; fileset = ./.; }' 'lib.fileset.toSource: `root` \("/nix/store/foobar"\) is a string-like value, but it should be a path instead. \s*Paths in strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.' +expectFailure 'toSource { root = cleanSourceWith { src = ./.; }; fileset = ./.; }' 'lib.fileset.toSource: `root` is a `lib.sources`-based value, but it should be a path instead. +\s*To use a `lib.sources`-based value, convert it to a file set using `lib.fileset.fromSource` and pass it as `fileset`. +\s*Note that this only works for sources created from paths.' + # Only paths are accepted as `root` expectFailure 'toSource { root = 10; fileset = ./.; }' 'lib.fileset.toSource: `root` is of type int, but it should be a path instead.' @@ -376,6 +380,9 @@ rm -rf * expectFailure 'toSource { root = ./.; fileset = 10; }' 'lib.fileset.toSource: `fileset` is of type int, but it should be a file set or a path instead.' expectFailure 'toSource { root = ./.; fileset = "/some/path"; }' 'lib.fileset.toSource: `fileset` \("/some/path"\) is a string-like value, but it should be a file set or a path instead. \s*Paths represented as strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.' +expectFailure 'toSource { root = ./.; fileset = cleanSourceWith { src = ./.; }; }' 'lib.fileset.toSource: `fileset` is a `lib.sources`-based value, but it should be a file set or a path instead. +\s*To convert a `lib.sources`-based value to a file set you can use `lib.fileset.fromSource`. +\s*Note that this only works for sources created from paths.' # Path coercion errors for non-existent paths expectFailure 'toSource { root = ./.; fileset = ./a; }' 'lib.fileset.toSource: `fileset` \('"$work"'/a\) does not exist.' -- cgit 1.4.1