diff options
Diffstat (limited to 'nixpkgs/lib/strings.nix')
-rw-r--r-- | nixpkgs/lib/strings.nix | 202 |
1 files changed, 197 insertions, 5 deletions
diff --git a/nixpkgs/lib/strings.nix b/nixpkgs/lib/strings.nix index b2fd495e4c84..295d98900e99 100644 --- a/nixpkgs/lib/strings.nix +++ b/nixpkgs/lib/strings.nix @@ -17,6 +17,7 @@ rec { head isInt isList + isAttrs isString match parseDrvName @@ -253,10 +254,7 @@ rec { => false */ hasInfix = infix: content: - let - drop = x: substring 1 (stringLength x) x; - in hasPrefix infix content - || content != "" && hasInfix infix (drop content); + builtins.match ".*${escapeRegex infix}.*" "${content}" != null; /* Convert a string to a list of characters (i.e. singleton strings). This allows you to, e.g., map a function over each character. However, @@ -327,6 +325,66 @@ rec { */ escapeShellArgs = concatMapStringsSep " " escapeShellArg; + /* Test whether the given name is a valid POSIX shell variable name. + + Type: string -> bool + + Example: + isValidPosixName "foo_bar000" + => true + isValidPosixName "0-bad.jpg" + => false + */ + isValidPosixName = name: match "[a-zA-Z_][a-zA-Z0-9_]*" name != null; + + /* Translate a Nix value into a shell variable declaration, with proper escaping. + + The value can be a string (mapped to a regular variable), a list of strings + (mapped to a Bash-style array) or an attribute set of strings (mapped to a + Bash-style associative array). Note that "string" includes string-coercible + values like paths or derivations. + + Strings are translated into POSIX sh-compatible code; lists and attribute sets + assume a shell that understands Bash syntax (e.g. Bash or ZSH). + + Type: string -> (string | listOf string | attrsOf string) -> string + + Example: + '' + ${toShellVar "foo" "some string"} + [[ "$foo" == "some string" ]] + '' + */ + toShellVar = name: value: + lib.throwIfNot (isValidPosixName name) "toShellVar: ${name} is not a valid shell variable name" ( + if isAttrs value && ! isCoercibleToString value then + "declare -A ${name}=(${ + concatStringsSep " " (lib.mapAttrsToList (n: v: + "[${escapeShellArg n}]=${escapeShellArg v}" + ) value) + })" + else if isList value then + "declare -a ${name}=(${escapeShellArgs value})" + else + "${name}=${escapeShellArg value}" + ); + + /* Translate an attribute set into corresponding shell variable declarations + using `toShellVar`. + + Type: attrsOf (string | listOf string | attrsOf string) -> string + + Example: + let + foo = "value"; + bar = foo; + in '' + ${toShellVars { inherit foo bar; }} + [[ "$foo" == "$bar" ]] + '' + */ + toShellVars = vars: concatStringsSep "\n" (lib.mapAttrsToList toShellVar vars); + /* Turn a string into a Nix expression representing that string Type: string -> string @@ -756,7 +814,14 @@ rec { sanitizeDerivationName pkgs.hello => "-nix-store-2g75chlbpxlrqn15zlby2dfh8hr9qwbk-hello-2.10" */ - sanitizeDerivationName = string: lib.pipe string [ + sanitizeDerivationName = + let okRegex = match "[[:alnum:]+_?=-][[:alnum:]+._?=-]*"; + in + string: + # First detect the common case of already valid strings, to speed those up + if stringLength string <= 207 && okRegex string != null + then unsafeDiscardStringContext string + else lib.pipe string [ # Get rid of string context. This is safe under the assumption that the # resulting string is only used as a derivation name unsafeDiscardStringContext @@ -774,4 +839,131 @@ rec { (x: if stringLength x == 0 then "unknown" else x) ]; + /* Computes the Levenshtein distance between two strings. + Complexity O(n*m) where n and m are the lengths of the strings. + Algorithm adjusted from https://stackoverflow.com/a/9750974/6605742 + + Type: levenshtein :: string -> string -> int + + Example: + levenshtein "foo" "foo" + => 0 + levenshtein "book" "hook" + => 1 + levenshtein "hello" "Heyo" + => 3 + */ + levenshtein = a: b: let + # Two dimensional array with dimensions (stringLength a + 1, stringLength b + 1) + arr = lib.genList (i: + lib.genList (j: + dist i j + ) (stringLength b + 1) + ) (stringLength a + 1); + d = x: y: lib.elemAt (lib.elemAt arr x) y; + dist = i: j: + let c = if substring (i - 1) 1 a == substring (j - 1) 1 b + then 0 else 1; + in + if j == 0 then i + else if i == 0 then j + else lib.min + ( lib.min (d (i - 1) j + 1) (d i (j - 1) + 1)) + ( d (i - 1) (j - 1) + c ); + in d (stringLength a) (stringLength b); + + /* Returns the length of the prefix common to both strings. + */ + commonPrefixLength = a: b: + let + m = lib.min (stringLength a) (stringLength b); + go = i: if i >= m then m else if substring i 1 a == substring i 1 b then go (i + 1) else i; + in go 0; + + /* Returns the length of the suffix common to both strings. + */ + commonSuffixLength = a: b: + let + m = lib.min (stringLength a) (stringLength b); + go = i: if i >= m then m else if substring (stringLength a - i - 1) 1 a == substring (stringLength b - i - 1) 1 b then go (i + 1) else i; + in go 0; + + /* Returns whether the levenshtein distance between two strings is at most some value + Complexity is O(min(n,m)) for k <= 2 and O(n*m) otherwise + + Type: levenshteinAtMost :: int -> string -> string -> bool + + Example: + levenshteinAtMost 0 "foo" "foo" + => true + levenshteinAtMost 1 "foo" "boa" + => false + levenshteinAtMost 2 "foo" "boa" + => true + levenshteinAtMost 2 "This is a sentence" "this is a sentense." + => false + levenshteinAtMost 3 "This is a sentence" "this is a sentense." + => true + + */ + levenshteinAtMost = let + infixDifferAtMost1 = x: y: stringLength x <= 1 && stringLength y <= 1; + + # This function takes two strings stripped by their common pre and suffix, + # and returns whether they differ by at most two by Levenshtein distance. + # Because of this stripping, if they do indeed differ by at most two edits, + # we know that those edits were (if at all) done at the start or the end, + # while the middle has to have stayed the same. This fact is used in the + # implementation. + infixDifferAtMost2 = x: y: + let + xlen = stringLength x; + ylen = stringLength y; + # This function is only called with |x| >= |y| and |x| - |y| <= 2, so + # diff is one of 0, 1 or 2 + diff = xlen - ylen; + + # Infix of x and y, stripped by the left and right most character + xinfix = substring 1 (xlen - 2) x; + yinfix = substring 1 (ylen - 2) y; + + # x and y but a character deleted at the left or right + xdelr = substring 0 (xlen - 1) x; + xdell = substring 1 (xlen - 1) x; + ydelr = substring 0 (ylen - 1) y; + ydell = substring 1 (ylen - 1) y; + in + # A length difference of 2 can only be gotten with 2 delete edits, + # which have to have happened at the start and end of x + # Example: "abcdef" -> "bcde" + if diff == 2 then xinfix == y + # A length difference of 1 can only be gotten with a deletion on the + # right and a replacement on the left or vice versa. + # Example: "abcdef" -> "bcdez" or "zbcde" + else if diff == 1 then xinfix == ydelr || xinfix == ydell + # No length difference can either happen through replacements on both + # sides, or a deletion on the left and an insertion on the right or + # vice versa + # Example: "abcdef" -> "zbcdez" or "bcdefz" or "zabcde" + else xinfix == yinfix || xdelr == ydell || xdell == ydelr; + + in k: if k <= 0 then a: b: a == b else + let f = a: b: + let + alen = stringLength a; + blen = stringLength b; + prelen = commonPrefixLength a b; + suflen = commonSuffixLength a b; + presuflen = prelen + suflen; + ainfix = substring prelen (alen - presuflen) a; + binfix = substring prelen (blen - presuflen) b; + in + # Make a be the bigger string + if alen < blen then f b a + # If a has over k more characters than b, even with k deletes on a, b can't be reached + else if alen - blen > k then false + else if k == 1 then infixDifferAtMost1 ainfix binfix + else if k == 2 then infixDifferAtMost2 ainfix binfix + else levenshtein ainfix binfix <= k; + in f; } |