diff options
Diffstat (limited to 'nixpkgs/nixos/modules/virtualisation/includes-to-excludes.py')
-rw-r--r-- | nixpkgs/nixos/modules/virtualisation/includes-to-excludes.py | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/nixpkgs/nixos/modules/virtualisation/includes-to-excludes.py b/nixpkgs/nixos/modules/virtualisation/includes-to-excludes.py new file mode 100644 index 000000000000..05ef9c0f23b9 --- /dev/null +++ b/nixpkgs/nixos/modules/virtualisation/includes-to-excludes.py @@ -0,0 +1,86 @@ + +# Convert a list of strings to a regex that matches everything but those strings +# ... and it had to be a POSIX regex; no negative lookahead :( +# This is a workaround for erofs supporting only exclude regex, not an include list + +import sys +import re +from collections import defaultdict + +# We can configure this script to match in different ways if we need to. +# The regex got too long for the argument list, so we had to truncate the +# hashes and use MATCH_STRING_PREFIX. That's less accurate, and might pick up some +# garbage like .lock files, but only if the sandbox doesn't hide those. Even +# then it should be harmless. + +# Produce the negation of ^a$ +MATCH_EXACTLY = ".+" +# Produce the negation of ^a +MATCH_STRING_PREFIX = "//X" # //X should be epsilon regex instead. Not supported?? +# Produce the negation of ^a/? +MATCH_SUBPATHS = "[^/].*$" + +# match_end = MATCH_SUBPATHS +match_end = MATCH_STRING_PREFIX +# match_end = MATCH_EXACTLY + +def chars_to_inverted_class(letters): + assert len(letters) > 0 + letters = list(letters) + + s = "[^" + + if "]" in letters: + s += "]" + letters.remove("]") + + final = "" + if "-" in letters: + final = "-" + letters.remove("-") + + s += "".join(letters) + + s += final + + s += "]" + + return s + +# There's probably at least one bug in here, but it seems to works well enough +# for filtering store paths. +def strings_to_inverted_regex(strings): + s = "(" + + # Match anything that starts with the wrong character + + chars = defaultdict(list) + + for item in strings: + if item != "": + chars[item[0]].append(item[1:]) + + if len(chars) == 0: + s += match_end + else: + s += chars_to_inverted_class(chars) + + # Now match anything that starts with the right char, but then goes wrong + + for char, sub in chars.items(): + s += "|(" + re.escape(char) + strings_to_inverted_regex(sub) + ")" + + s += ")" + return s + +if __name__ == "__main__": + stdin_lines = [] + for line in sys.stdin: + if line.strip() != "": + stdin_lines.append(line.strip()) + + print("^" + strings_to_inverted_regex(stdin_lines)) + +# Test: +# (echo foo; echo fo/; echo foo/; echo foo/ba/r; echo b; echo az; echo az/; echo az/a; echo ab; echo ab/a; echo ab/; echo abc; echo abcde; echo abb; echo ac; echo b) | grep -vE "$((echo ab; echo az; echo foo;) | python includes-to-excludes.py | tee /dev/stderr )" +# should print ab, az, foo and their subpaths |