From 5e94fe5693e4846502c79b47168e7ba26aa85dd1 Mon Sep 17 00:00:00 2001 From: Philipp Otterbein Date: Tue, 5 Aug 2025 21:59:59 +0200 Subject: [PATCH] speedup derivation parsing --- src/libstore/derivations.cc | 213 ++++++++++-------- .../include/nix/store/parsed-derivations.hh | 4 +- src/libstore/parsed-derivations.cc | 6 +- 3 files changed, 124 insertions(+), 99 deletions(-) diff --git a/src/libstore/derivations.cc b/src/libstore/derivations.cc index 30048c522..1afc343d7 100644 --- a/src/libstore/derivations.cc +++ b/src/libstore/derivations.cc @@ -15,6 +15,8 @@ namespace nix { +using namespace std::literals::string_view_literals; + std::optional DerivationOutput::path(const StoreDirConfig & store, std::string_view drvName, OutputNameView outputName) const { @@ -183,36 +185,60 @@ static void expect(StringViewStream & str, std::string_view s) str.remaining.remove_prefix(s.size()); } +static void expect(StringViewStream & str, char c) +{ + if (str.remaining.empty() || str.remaining[0] != c) + throw FormatError("expected string '%1%'", c); + str.remaining.remove_prefix(1); +} + /* Read a C-style string from stream `str'. */ static BackedStringView parseString(StringViewStream & str) { - expect(str, "\""); - auto c = str.remaining.begin(), end = str.remaining.end(); - bool escaped = false; - for (; c != end && *c != '"'; c++) { - if (*c == '\\') { - c++; - if (c == end) - throw FormatError("unterminated string in derivation"); - escaped = true; + expect(str, '"'); + size_t start = 0; + size_t end = str.remaining.size(); + const auto data = str.remaining.data(); + while (start < end) { + auto idx = str.remaining.find('"', start); + if (idx == std::string_view::npos) { + break; } + size_t pos = idx; + for (; pos > 0 && data[pos - 1] == '\\'; pos--) + ; + if ((idx - pos) % 2 == 0) { // even number of backslashes + end = idx; + break; + } + start = idx + 1; } - const auto contentLen = c - str.remaining.begin(); - const auto content = str.remaining.substr(0, contentLen); - str.remaining.remove_prefix(contentLen + 1); + start = 0; + const auto content = str.remaining.substr(start, end); + str.remaining.remove_prefix(end + 1); - if (!escaped) + auto nextBackslash = content.find('\\', start); + if (nextBackslash == std::string_view::npos) { return content; + } std::string res; - res.reserve(content.size()); - for (c = content.begin(), end = content.end(); c != end; c++) - if (*c == '\\') { - c++; - res += escapes[*c]; - } else - res += *c; + res.reserve(end); + do { + if (nextBackslash == end - 1) { + throw FormatError("unterminated string in derivation"); + } + if (nextBackslash > start) { + res.append(&data[start], nextBackslash - start); + } + res.push_back(escapes[data[nextBackslash + 1]]); + start = nextBackslash + 2; + nextBackslash = content.find('\\', start); + } while (nextBackslash != std::string_view::npos); + if (end > start) { + res.append(&data[start], end - start); + } return res; } @@ -245,7 +271,7 @@ static bool endOfList(StringViewStream & str) static StringSet parseStrings(StringViewStream & str, bool arePaths) { StringSet res; - expect(str, "["); + expect(str, '['); while (!endOfList(str)) res.insert((arePaths ? parsePath(str) : parseString(str)).toOwned()); return res; @@ -258,20 +284,20 @@ static DerivationOutput parseDerivationOutput( std::string_view hashS, const ExperimentalFeatureSettings & xpSettings) { - if (hashAlgoStr != "") { + if (!hashAlgoStr.empty()) { ContentAddressMethod method = ContentAddressMethod::parsePrefix(hashAlgoStr); if (method == ContentAddressMethod::Raw::Text) xpSettings.require(Xp::DynamicDerivations); const auto hashAlgo = parseHashAlgo(hashAlgoStr); - if (hashS == "impure") { + if (hashS == "impure"sv) { xpSettings.require(Xp::ImpureDerivations); - if (pathS != "") + if (!pathS.empty()) throw FormatError("impure derivation output should not specify output path"); return DerivationOutput::Impure{ .method = std::move(method), .hashAlgo = std::move(hashAlgo), }; - } else if (hashS != "") { + } else if (!hashS.empty()) { validatePath(pathS); auto hash = Hash::parseNonSRIUnprefixed(hashS, hashAlgo); return DerivationOutput::CAFixed{ @@ -283,7 +309,7 @@ static DerivationOutput parseDerivationOutput( }; } else { xpSettings.require(Xp::CaDerivations); - if (pathS != "") + if (!pathS.empty()) throw FormatError("content-addressing derivation output should not specify output path"); return DerivationOutput::CAFloating{ .method = std::move(method), @@ -291,7 +317,7 @@ static DerivationOutput parseDerivationOutput( }; } } else { - if (pathS == "") { + if (pathS.empty()) { return DerivationOutput::Deferred{}; } validatePath(pathS); @@ -306,13 +332,13 @@ static DerivationOutput parseDerivationOutput( StringViewStream & str, const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings) { - expect(str, ","); + expect(str, ','); const auto pathS = parseString(str); - expect(str, ","); + expect(str, ','); const auto hashAlgo = parseString(str); - expect(str, ","); + expect(str, ','); const auto hash = parseString(str); - expect(str, ")"); + expect(str, ')'); return parseDerivationOutput(store, *pathS, *hashAlgo, *hash, xpSettings); } @@ -353,17 +379,17 @@ parseDerivedPathMapNode(const StoreDirConfig & store, StringViewStream & str, De parseNonDynamic(); break; case '(': - expect(str, "("); + expect(str, '('); node.value = parseStrings(str, false); - expect(str, ",["); + expect(str, ",["sv); while (!endOfList(str)) { - expect(str, "("); + expect(str, '('); auto outputName = parseString(str).toOwned(); - expect(str, ","); + expect(str, ','); node.childMap.insert_or_assign(outputName, parseDerivedPathMapNode(store, str, version)); - expect(str, ")"); + expect(str, ')'); } - expect(str, ")"); + expect(str, ')'); break; default: throw FormatError("invalid inputDrvs entry in derivation"); @@ -386,24 +412,24 @@ Derivation parseDerivation( drv.name = name; StringViewStream str{s}; - expect(str, "D"); + expect(str, 'D'); DerivationATermVersion version; switch (str.peek()) { case 'e': - expect(str, "erive("); + expect(str, "erive("sv); version = DerivationATermVersion::Traditional; break; case 'r': { - expect(str, "rvWithVersion("); + expect(str, "rvWithVersion("sv); auto versionS = parseString(str); - if (*versionS == "xp-dyn-drv") { + if (*versionS == "xp-dyn-drv"sv) { // Only version we have so far version = DerivationATermVersion::DynamicDerivations; xpSettings.require(Xp::DynamicDerivations); } else { throw FormatError("Unknown derivation ATerm format version '%s'", *versionS); } - expect(str, ","); + expect(str, ','); break; } default: @@ -411,50 +437,53 @@ Derivation parseDerivation( } /* Parse the list of outputs. */ - expect(str, "["); + expect(str, '['); while (!endOfList(str)) { - expect(str, "("); + expect(str, '('); std::string id = parseString(str).toOwned(); auto output = parseDerivationOutput(store, str, xpSettings); drv.outputs.emplace(std::move(id), std::move(output)); } /* Parse the list of input derivations. */ - expect(str, ",["); + expect(str, ",["sv); while (!endOfList(str)) { - expect(str, "("); + expect(str, '('); auto drvPath = parsePath(str); - expect(str, ","); + expect(str, ','); drv.inputDrvs.map.insert_or_assign( store.parseStorePath(*drvPath), parseDerivedPathMapNode(store, str, version)); - expect(str, ")"); + expect(str, ')'); } - expect(str, ","); + expect(str, ','); drv.inputSrcs = store.parseStorePathSet(parseStrings(str, true)); - expect(str, ","); + expect(str, ','); drv.platform = parseString(str).toOwned(); - expect(str, ","); + expect(str, ','); drv.builder = parseString(str).toOwned(); /* Parse the builder arguments. */ - expect(str, ",["); + expect(str, ",["sv); while (!endOfList(str)) drv.args.push_back(parseString(str).toOwned()); /* Parse the environment variables. */ - expect(str, ",["); + expect(str, ",["sv); while (!endOfList(str)) { - expect(str, "("); + expect(str, '('); auto name = parseString(str).toOwned(); - expect(str, ","); - auto value = parseString(str).toOwned(); - expect(str, ")"); - drv.env.insert_or_assign(std::move(name), std::move(value)); + expect(str, ','); + auto value = parseString(str); + if (name == StructuredAttrs::envVarName) { + drv.structuredAttrs = StructuredAttrs::parse(*std::move(value)); + } else { + drv.env.insert_or_assign(std::move(name), std::move(value).toOwned()); + } + expect(str, ')'); } - drv.structuredAttrs = StructuredAttrs::tryExtract(drv.env); - expect(str, ")"); + expect(str, ')'); return drv; } @@ -537,9 +566,9 @@ static void unparseDerivedPathMapNode( if (node.childMap.empty()) { printUnquotedStrings(s, node.value.begin(), node.value.end()); } else { - s += "("; + s += '('; printUnquotedStrings(s, node.value.begin(), node.value.end()); - s += ",["; + s += ",["sv; bool first = true; for (auto & [outputName, childNode] : node.childMap) { if (first) @@ -551,7 +580,7 @@ static void unparseDerivedPathMapNode( unparseDerivedPathMapNode(store, s, childNode); s += ')'; } - s += "])"; + s += "])"sv; } } @@ -583,16 +612,16 @@ std::string Derivation::unparse( newer form only if we need it, which we do for `Xp::DynamicDerivations`. */ if (hasDynamicDrvDep(*this)) { - s += "DrvWithVersion("; + s += "DrvWithVersion("sv; // Only version we have so far - printUnquotedString(s, "xp-dyn-drv"); - s += ","; + printUnquotedString(s, "xp-dyn-drv"sv); + s += ','; } else { - s += "Derive("; + s += "Derive("sv; } bool first = true; - s += "["; + s += '['; for (auto & i : outputs) { if (first) first = false; @@ -604,15 +633,15 @@ std::string Derivation::unparse( overloaded{ [&](const DerivationOutput::InputAddressed & doi) { s += ','; - printUnquotedString(s, maskOutputs ? "" : store.printStorePath(doi.path)); + printUnquotedString(s, maskOutputs ? ""sv : store.printStorePath(doi.path)); s += ','; - printUnquotedString(s, ""); + printUnquotedString(s, {}); s += ','; - printUnquotedString(s, ""); + printUnquotedString(s, {}); }, [&](const DerivationOutput::CAFixed & dof) { s += ','; - printUnquotedString(s, maskOutputs ? "" : store.printStorePath(dof.path(store, name, i.first))); + printUnquotedString(s, maskOutputs ? ""sv : store.printStorePath(dof.path(store, name, i.first))); s += ','; printUnquotedString(s, dof.ca.printMethodAlgo()); s += ','; @@ -620,34 +649,34 @@ std::string Derivation::unparse( }, [&](const DerivationOutput::CAFloating & dof) { s += ','; - printUnquotedString(s, ""); + printUnquotedString(s, {}); s += ','; printUnquotedString(s, std::string{dof.method.renderPrefix()} + printHashAlgo(dof.hashAlgo)); s += ','; - printUnquotedString(s, ""); + printUnquotedString(s, {}); }, [&](const DerivationOutput::Deferred &) { s += ','; - printUnquotedString(s, ""); + printUnquotedString(s, {}); s += ','; - printUnquotedString(s, ""); + printUnquotedString(s, {}); s += ','; - printUnquotedString(s, ""); + printUnquotedString(s, {}); }, [&](const DerivationOutput::Impure & doi) { // FIXME s += ','; - printUnquotedString(s, ""); + printUnquotedString(s, {}); s += ','; printUnquotedString(s, std::string{doi.method.renderPrefix()} + printHashAlgo(doi.hashAlgo)); s += ','; - printUnquotedString(s, "impure"); + printUnquotedString(s, "impure"sv); }}, i.second.raw); s += ')'; } - s += "],["; + s += "],["sv; first = true; if (actualInputs) { for (auto & [drvHashModulo, childMap] : *actualInputs) { @@ -673,7 +702,7 @@ std::string Derivation::unparse( } } - s += "],"; + s += "],"sv; auto paths = store.printStorePathSet(inputSrcs); // FIXME: slow printUnquotedStrings(s, paths.begin(), paths.end()); @@ -684,7 +713,7 @@ std::string Derivation::unparse( s += ','; printStrings(s, args.begin(), args.end()); - s += ",["; + s += ",["sv; first = true; auto unparseEnv = [&](const StringPairs atermEnv) { @@ -696,7 +725,7 @@ std::string Derivation::unparse( s += '('; printString(s, i.first); s += ','; - printString(s, maskOutputs && outputs.count(i.first) ? "" : i.second); + printString(s, maskOutputs && outputs.count(i.first) ? ""sv : i.second); s += ')'; } }; @@ -710,7 +739,7 @@ std::string Derivation::unparse( unparseEnv(env); } - s += "])"; + s += "])"sv; return s; } @@ -724,8 +753,8 @@ bool isDerivation(std::string_view fileName) std::string outputPathName(std::string_view drvName, OutputNameView outputName) { std::string res{drvName}; - if (outputName != "out") { - res += "-"; + if (outputName != "out"sv) { + res += '-'; res += outputName; } return res; @@ -772,7 +801,7 @@ DerivationType BasicDerivation::type() const if (fixedCAOutputs.size() > 1) // FIXME: Experimental feature? throw Error("only one fixed output is allowed for now"); - if (*fixedCAOutputs.begin() != "out") + if (*fixedCAOutputs.begin() != "out"sv) throw Error("single fixed output must be named \"out\""); return DerivationType::ContentAddressed{ .sandboxed = false, @@ -1046,13 +1075,9 @@ void BasicDerivation::applyRewrites(const StringMap & rewrites) if (structuredAttrs) { // TODO rewrite the JSON AST properly, rather than dump parse round trip. - auto [k, jsonS] = structuredAttrs->unparse(); - jsonS = rewriteStrings(jsonS, rewrites); - StringPairs newEnv; - newEnv.insert(std::pair{k, std::move(jsonS)}); - auto newStructuredAttrs = StructuredAttrs::tryExtract(newEnv); - assert(newStructuredAttrs); - structuredAttrs = std::move(*newStructuredAttrs); + auto [_, jsonS] = structuredAttrs->unparse(); + jsonS = rewriteStrings(std::move(jsonS), rewrites); + structuredAttrs = StructuredAttrs::parse(jsonS); } } diff --git a/src/libstore/include/nix/store/parsed-derivations.hh b/src/libstore/include/nix/store/parsed-derivations.hh index 2638a3500..edef1b2d2 100644 --- a/src/libstore/include/nix/store/parsed-derivations.hh +++ b/src/libstore/include/nix/store/parsed-derivations.hh @@ -16,6 +16,8 @@ typedef std::map DerivationOutputs; struct StructuredAttrs { + static constexpr std::string_view envVarName{"__json"}; + nlohmann::json structuredAttrs; bool operator==(const StructuredAttrs &) const = default; @@ -23,7 +25,7 @@ struct StructuredAttrs /** * Unconditionally parse from a JSON string. Used by `tryExtract`. */ - static StructuredAttrs parse(const std::string & encoded); + static StructuredAttrs parse(std::string_view encoded); /** * Like `tryParse`, but removes the env var which encoded the structured diff --git a/src/libstore/parsed-derivations.cc b/src/libstore/parsed-derivations.cc index 797230e97..1006bbc0a 100644 --- a/src/libstore/parsed-derivations.cc +++ b/src/libstore/parsed-derivations.cc @@ -8,16 +8,14 @@ namespace nix { -static constexpr std::string_view envVarName = "__json"; - -StructuredAttrs StructuredAttrs::parse(const std::string & encoded) +StructuredAttrs StructuredAttrs::parse(std::string_view encoded) { try { return StructuredAttrs{ .structuredAttrs = nlohmann::json::parse(encoded), }; } catch (std::exception & e) { - throw Error("cannot process __json attribute: %s", e.what()); + throw Error("cannot process %s attribute: %s", envVarName, e.what()); } }