diff options
Diffstat (limited to 'nixpkgs-overlays/nixpkgs-mozilla/lib/parseTOML.nix')
-rw-r--r-- | nixpkgs-overlays/nixpkgs-mozilla/lib/parseTOML.nix | 210 |
1 files changed, 210 insertions, 0 deletions
diff --git a/nixpkgs-overlays/nixpkgs-mozilla/lib/parseTOML.nix b/nixpkgs-overlays/nixpkgs-mozilla/lib/parseTOML.nix new file mode 100644 index 000000000000..07a93696340c --- /dev/null +++ b/nixpkgs-overlays/nixpkgs-mozilla/lib/parseTOML.nix @@ -0,0 +1,210 @@ +with builtins; + +# Tokenizer. +let + layout_pat = "[ \n]+"; + layout_pat_opt = "[ \n]*"; + token_pat = ''=|[[][[][a-zA-Z0-9_."*-]+[]][]]|[[][a-zA-Z0-9_."*-]+[]]|[a-zA-Z0-9_-]+|"[^"]*"''; #" + + tokenizer_1_11 = str: + let + tokenizer_rec = len: prevTokens: patterns: str: + let + pattern = head patterns; + layoutAndTokens = match pattern str; + matchLength = stringLength (head layoutAndTokens); + tokens = prevTokens ++ tail layoutAndTokens; + in + if layoutAndTokens == null then + # if we cannot reduce the pattern, return the list of token + if tail patterns == [] then prevTokens + # otherwise, take the next pattern, which only captures half the token. + else tokenizer_rec len prevTokens (tail patterns) str + else tokenizer_rec len tokens patterns (substring matchLength len str); + + avgTokenSize = 100; + ceilLog2 = v: + let inner = n: i: if i < v then inner (n + 1) (i * 2) else n; in + inner 1 1; + + # The builtins.match function match the entire string, and generate a list of all captured + # elements. This is the most efficient way to make a tokenizer, if we can make a pattern which + # capture all token of the file. Unfortunately C++ std::regex does not support captures in + # repeated patterns. As a work-around, we generate patterns which are matching tokens in multiple + # of 2, such that we can avoid iterating too many times over the content. + generatePatterns = str: + let + depth = ceilLog2 (stringLength str / avgTokenSize); + inner = depth: + if depth == 0 then [ "(${token_pat})" ] + else + let next = inner (depth - 1); in + [ "${head next}${layout_pat}${head next}" ] ++ next; + in + map (pat: "(${layout_pat_opt}${pat}).*" ) (inner depth); + + in + tokenizer_rec (stringLength str) [] (generatePatterns str) str; + + tokenizer_1_12 = str: + let + # Nix 1.12 has the builtins.split function which allow to tokenize the + # file quickly. by iterating with a simple regexp. + layoutTokenList = split "(${token_pat})" str; + isLayout = s: match layout_pat_opt s != null; + filterLayout = list: + filter (s: + if isString s then + if isLayout s then false + else throw "Error: Unexpected token: '${s}'" + else true) list; + removeTokenWrapper = list: + map (x: assert tail x == []; head x) list; + in + removeTokenWrapper (filterLayout layoutTokenList); + + tokenizer = + if builtins ? split + then tokenizer_1_12 + else tokenizer_1_11; +in + +# Parse entry headers +let + unescapeString = str: + # Let's ignore any escape character for the moment. + assert match ''"[^"]*"'' str != null; #" + substring 1 (stringLength str - 2) str; + + # Match the content of TOML format section names. + ident_pat = ''[a-zA-Z0-9_-]+|"[^"]*"''; #" + + removeBraces = token: wrapLen: + substring wrapLen (stringLength token - 2 * wrapLen) token; + + # Note, this implementation is limited to 11 identifiers. + matchPathFun_1_11 = token: + let + # match header_pat "a.b.c" == [ "a" ".b" "b" ".c" "c" ] + header_pat = + foldl' (pat: n: "(${ident_pat})([.]${pat})?") + "(${ident_pat})" (genList (n: 0) 10); + matchPath = match header_pat token; + filterDot = filter (s: substring 0 1 s != ".") matchPath; + in + filterDot; + + matchPathFun_1_12 = token: + map (e: head e) + (filter (s: isList s) + (split "(${ident_pat})" token)); + + matchPathFun = + if builtins ? split + then matchPathFun_1_12 + else matchPathFun_1_11; + + headerToPath = token: wrapLen: + let + token' = removeBraces token wrapLen; + matchPath = matchPathFun token'; + path = + map (s: + if substring 0 1 s != ''"'' then s #" + else unescapeString s + ) matchPath; + in + assert matchPath != null; + # assert trace "Path: ${token'}; match as ${toString path}" true; + path; +in + +# Reconstruct the equivalent attribute set. +let + tokenToValue = token: + if token == "true" then true + else if token == "false" then false + else unescapeString token; + + parserInitState = { + idx = 0; + path = []; + isList = false; + output = []; + elem = {}; + }; + + # Imported from nixpkgs library. + setAttrByPath = attrPath: value: + if attrPath == [] then value + else listToAttrs + [ { name = head attrPath; value = setAttrByPath (tail attrPath) value; } ]; + + closeSection = state: + state // { + output = state.output ++ [ (setAttrByPath state.path ( + if state.isList then [ state.elem ] + else state.elem + )) ]; + }; + + readToken = state: token: + # assert trace "Read '${token}'" true; + if state.idx == 0 then + if substring 0 2 token == "[[" then + (closeSection state) // { + path = headerToPath token 2; + isList = true; + elem = {}; + } + else if substring 0 1 token == "[" then + (closeSection state) // { + path = headerToPath token 1; + isList = false; + elem = {}; + } + else + assert match "[a-zA-Z0-9_-]+" token != null; + state // { idx = 1; name = token; } + else if state.idx == 1 then + assert token == "="; + state // { idx = 2; } + else + assert state.idx == 2; + state // { + idx = 0; + elem = state.elem // { + "${state.name}" = tokenToValue token; + }; + }; + + # aggregate each section as individual attribute sets. + parser = str: + closeSection (foldl' readToken parserInitState (tokenizer str)); + + fromTOML = toml: + let + sections = (parser toml).output; + # Inlined from nixpkgs library functions. + zipAttrs = sets: + listToAttrs (map (n: { + name = n; + value = + let v = catAttrs n sets; in + # assert trace "Visiting ${n}" true; + if tail v == [] then head v + else if isList (head v) then concatLists v + else if isAttrs (head v) then zipAttrs v + else throw "cannot merge sections"; + }) (concatLists (map attrNames sets))); + in + zipAttrs sections; +in + +{ + testing = fromTOML (builtins.readFile ./channel-rust-nightly.toml); + testing_url = fromTOML (builtins.readFile (builtins.fetchurl + https://static.rust-lang.org/dist/channel-rust-nightly.toml)); + inherit fromTOML; +} + |