1 files changed, 86 insertions, 0 deletions
diff --git a/nixpkgs/nixos/modules/virtualisation/includes-to-excludes.py b/nixpkgs/nixos/modules/virtualisation/includes-to-excludes.py
new file mode 100644
index 000000000000..05ef9c0f23b9
--- /dev/null
+++ b/nixpkgs/nixos/modules/virtualisation/includes-to-excludes.py
@@ -0,0 +1,86 @@
+
+# Convert a list of strings to a regex that matches everything but those strings
+# ... and it had to be a POSIX regex; no negative lookahead :(
+# This is a workaround for erofs supporting only exclude regex, not an include list
+
+import sys
+import re
+from collections import defaultdict
+
+# We can configure this script to match in different ways if we need to.
+# The regex got too long for the argument list, so we had to truncate the
+# hashes and use MATCH_STRING_PREFIX. That's less accurate, and might pick up some
+# garbage like .lock files, but only if the sandbox doesn't hide those. Even
+# then it should be harmless.
+
+# Produce the negation of ^a$
+MATCH_EXACTLY = ".+"
+# Produce the negation of ^a
+MATCH_STRING_PREFIX = "//X" # //X should be epsilon regex instead. Not supported??
+# Produce the negation of ^a/?
+MATCH_SUBPATHS = "[^/].*$"
+
+# match_end = MATCH_SUBPATHS
+match_end = MATCH_STRING_PREFIX
+# match_end = MATCH_EXACTLY
+
+def chars_to_inverted_class(letters):
+    assert len(letters) > 0
+    letters = list(letters)
+
+    s = "[^"
+
+    if "]" in letters:
+        s += "]"
+        letters.remove("]")
+
+    final = ""
+    if "-" in letters:
+        final = "-"
+        letters.remove("-")
+
+    s += "".join(letters)
+
+    s += final
+
+    s += "]"
+
+    return s
+
+# There's probably at least one bug in here, but it seems to works well enough
+# for filtering store paths.
+def strings_to_inverted_regex(strings):
+    s = "("
+
+    # Match anything that starts with the wrong character
+
+    chars = defaultdict(list)
+
+    for item in strings:
+        if item != "":
+            chars[item[0]].append(item[1:])
+
+    if len(chars) == 0:
+        s += match_end
+    else:
+        s += chars_to_inverted_class(chars)
+
+    # Now match anything that starts with the right char, but then goes wrong
+
+    for char, sub in chars.items():
+        s += "|(" + re.escape(char) + strings_to_inverted_regex(sub) + ")"
+
+    s += ")"
+    return s
+
+if __name__ == "__main__":
+    stdin_lines = []
+    for line in sys.stdin:
+        if line.strip() != "":
+            stdin_lines.append(line.strip())
+
+    print("^" + strings_to_inverted_regex(stdin_lines))
+
+# Test:
+# (echo foo; echo fo/; echo foo/; echo foo/ba/r; echo b; echo az; echo az/; echo az/a; echo ab; echo ab/a; echo ab/; echo abc; echo abcde; echo abb; echo ac; echo b) | grep -vE "$((echo ab; echo az; echo foo;) | python includes-to-excludes.py | tee /dev/stderr )"
+# should print ab, az, foo and their subpaths