From 723c47550e9059737133cca2ecb39b6bbf56fb56 Mon Sep 17 00:00:00 2001 From: Kamil Monicz Date: Thu, 18 Dec 2025 03:07:34 +0000 Subject: [PATCH 1/3] libexpr-tests: add nix-expr-benchmarks Provides focused microbenchmarks for expression evaluation hot paths (dynamic attrs, getDerivations attr scanning, and repeated builtins.match). --- src/libexpr-tests/bench-main.cc | 14 ++++++ src/libexpr-tests/dynamic-attrs-bench.cc | 55 ++++++++++++++++++++ src/libexpr-tests/get-drvs-bench.cc | 64 ++++++++++++++++++++++++ src/libexpr-tests/meson.build | 30 +++++++++++ src/libexpr-tests/meson.options | 9 ++++ src/libexpr-tests/package.nix | 2 +- src/libexpr-tests/regex-cache-bench.cc | 45 +++++++++++++++++ 7 files changed, 218 insertions(+), 1 deletion(-) create mode 100644 src/libexpr-tests/bench-main.cc create mode 100644 src/libexpr-tests/dynamic-attrs-bench.cc create mode 100644 src/libexpr-tests/get-drvs-bench.cc create mode 100644 src/libexpr-tests/meson.options create mode 100644 src/libexpr-tests/regex-cache-bench.cc diff --git a/src/libexpr-tests/bench-main.cc b/src/libexpr-tests/bench-main.cc new file mode 100644 index 000000000..13dca6b4e --- /dev/null +++ b/src/libexpr-tests/bench-main.cc @@ -0,0 +1,14 @@ +#include + +#include "nix/expr/eval-gc.hh" +#include "nix/store/globals.hh" + +int main(int argc, char ** argv) +{ + nix::initLibStore(false); + nix::initGC(); + + ::benchmark::Initialize(&argc, argv); + ::benchmark::RunSpecifiedBenchmarks(); + return 0; +} diff --git a/src/libexpr-tests/dynamic-attrs-bench.cc b/src/libexpr-tests/dynamic-attrs-bench.cc new file mode 100644 index 000000000..553d5f9a8 --- /dev/null +++ b/src/libexpr-tests/dynamic-attrs-bench.cc @@ -0,0 +1,55 @@ +#include + +#include "nix/expr/eval.hh" +#include "nix/expr/eval-settings.hh" +#include "nix/fetchers/fetch-settings.hh" +#include "nix/store/store-open.hh" + +using namespace nix; + +static std::string mkDynamicAttrsExpr(size_t attrCount) +{ + std::string res; + res.reserve(attrCount * 24); + res += "{ "; + for (size_t i = 0; i < attrCount; ++i) { + res += "${\"a"; + res += std::to_string(i); + res += "\"} = "; + res += std::to_string(i); + res += "; "; + } + res += "}"; + return res; +} + +static void BM_EvalDynamicAttrs(benchmark::State & state) +{ + const auto attrCount = static_cast(state.range(0)); + const auto exprStr = mkDynamicAttrsExpr(attrCount); + + for (auto _ : state) { + state.PauseTiming(); + + auto store = openStore("dummy://"); + fetchers::Settings fetchSettings{}; + bool readOnlyMode = true; + EvalSettings evalSettings{readOnlyMode}; + evalSettings.nixPath = {}; + + EvalState st({}, store, fetchSettings, evalSettings, nullptr); + Expr * expr = st.parseExprFromString(exprStr, st.rootPath(CanonPath::root)); + + Value v; + + state.ResumeTiming(); + + st.eval(expr, v); + st.forceValue(v, noPos); + benchmark::DoNotOptimize(v); + } + + state.SetItemsProcessed(state.iterations() * attrCount); +} + +BENCHMARK(BM_EvalDynamicAttrs)->Arg(100)->Arg(500)->Arg(2'000); diff --git a/src/libexpr-tests/get-drvs-bench.cc b/src/libexpr-tests/get-drvs-bench.cc new file mode 100644 index 000000000..c6a6fc32c --- /dev/null +++ b/src/libexpr-tests/get-drvs-bench.cc @@ -0,0 +1,64 @@ +#include + +#include "nix/expr/get-drvs.hh" +#include "nix/expr/eval-settings.hh" +#include "nix/fetchers/fetch-settings.hh" +#include "nix/store/store-open.hh" +#include "nix/util/fmt.hh" + +using namespace nix; + +namespace { + +struct GetDerivationsEnv +{ + ref store = openStore("dummy://"); + fetchers::Settings fetchSettings{}; + bool readOnlyMode = true; + EvalSettings evalSettings{readOnlyMode}; + EvalState state; + + Bindings * autoArgs = nullptr; + Value attrsValue; + + explicit GetDerivationsEnv(size_t attrCount) + : evalSettings([&]() { + EvalSettings settings{readOnlyMode}; + settings.nixPath = {}; + return settings; + }()) + , state({}, store, fetchSettings, evalSettings, nullptr) + { + autoArgs = state.buildBindings(0).finish(); + + auto attrs = state.buildBindings(attrCount); + + for (size_t i = 0; i < attrCount; ++i) { + auto name = fmt("pkg%|1$06d|", i); + auto sym = state.symbols.create(name); + auto & v = attrs.alloc(sym); + v.mkInt(i); + } + + attrsValue.mkAttrs(attrs.finish()); + } +}; + +} // namespace + +static void BM_GetDerivationsAttrScan(benchmark::State & state) +{ + const auto attrCount = static_cast(state.range(0)); + GetDerivationsEnv env(attrCount); + + for (auto _ : state) { + PackageInfos drvs; + getDerivations( + env.state, env.attrsValue, /*pathPrefix=*/"", *env.autoArgs, drvs, /*ignoreAssertionFailures=*/true); + benchmark::DoNotOptimize(drvs.size()); + } + + state.SetItemsProcessed(state.iterations() * attrCount); +} + +BENCHMARK(BM_GetDerivationsAttrScan)->Arg(1'000)->Arg(5'000)->Arg(10'000); diff --git a/src/libexpr-tests/meson.build b/src/libexpr-tests/meson.build index c5dafe0de..c5b72851d 100644 --- a/src/libexpr-tests/meson.build +++ b/src/libexpr-tests/meson.build @@ -87,3 +87,33 @@ test( }, protocol : 'gtest', ) + +# Build benchmarks if enabled +if get_option('benchmarks') + gbenchmark = dependency('benchmark', required : true) + + benchmark_sources = files( + 'bench-main.cc', + 'dynamic-attrs-bench.cc', + 'get-drvs-bench.cc', + 'regex-cache-bench.cc', + ) + + benchmark_exe = executable( + 'nix-expr-benchmarks', + benchmark_sources, + config_priv_h, + dependencies : deps_private_subproject + deps_private + deps_other + [ + gbenchmark, + ], + include_directories : include_dirs, + link_args : linker_export_flags, + install : true, + cpp_pch : do_pch ? [ 'pch/precompiled-headers.hh' ] : [], + ) + + benchmark( + 'nix-expr-benchmarks', + benchmark_exe, + ) +endif diff --git a/src/libexpr-tests/meson.options b/src/libexpr-tests/meson.options new file mode 100644 index 000000000..2b3c1af60 --- /dev/null +++ b/src/libexpr-tests/meson.options @@ -0,0 +1,9 @@ +# vim: filetype=meson + +option( + 'benchmarks', + type : 'boolean', + value : false, + description : 'Build benchmarks (requires gbenchmark)', + yield : true, +) diff --git a/src/libexpr-tests/package.nix b/src/libexpr-tests/package.nix index 51d52e935..3af1f52d3 100644 --- a/src/libexpr-tests/package.nix +++ b/src/libexpr-tests/package.nix @@ -33,7 +33,7 @@ mkMesonExecutable (finalAttrs: { ../../.version ./.version ./meson.build - # ./meson.options + ./meson.options (fileset.fileFilter (file: file.hasExt "cc") ./.) (fileset.fileFilter (file: file.hasExt "hh") ./.) ]; diff --git a/src/libexpr-tests/regex-cache-bench.cc b/src/libexpr-tests/regex-cache-bench.cc new file mode 100644 index 000000000..36a350e2e --- /dev/null +++ b/src/libexpr-tests/regex-cache-bench.cc @@ -0,0 +1,45 @@ +#include + +#include "nix/expr/eval.hh" +#include "nix/expr/eval-settings.hh" +#include "nix/fetchers/fetch-settings.hh" +#include "nix/store/store-open.hh" + +using namespace nix; + +static void BM_EvalManyBuiltinsMatchSameRegex(benchmark::State & state) +{ + static constexpr int iterations = 5'000; + + static constexpr std::string_view exprStr = + "builtins.foldl' " + "(acc: _: acc + builtins.length (builtins.match \"a\" \"a\")) " + "0 " + "(builtins.genList (x: x) " + "5000)"; + + for (auto _ : state) { + state.PauseTiming(); + + auto store = openStore("dummy://"); + fetchers::Settings fetchSettings{}; + bool readOnlyMode = true; + EvalSettings evalSettings{readOnlyMode}; + evalSettings.nixPath = {}; + + EvalState st({}, store, fetchSettings, evalSettings, nullptr); + Expr * expr = st.parseExprFromString(std::string(exprStr), st.rootPath(CanonPath::root)); + + Value v; + + state.ResumeTiming(); + + st.eval(expr, v); + st.forceValue(v, noPos); + benchmark::DoNotOptimize(v); + } + + state.SetItemsProcessed(state.iterations() * iterations); +} + +BENCHMARK(BM_EvalManyBuiltinsMatchSameRegex); From 0c8751d3f42b467556eba4a9662d0ca539e533ca Mon Sep 17 00:00:00 2001 From: Kamil Monicz Date: Thu, 18 Dec 2025 03:11:38 +0000 Subject: [PATCH 2/3] libexpr: avoid std::regex copies on RegexCache hits - RegexCache::get() returned std::regex by value, copying the compiled regex on every cache hit. - Store the compiled regex behind std::shared_ptr and return the shared pointer instead, so callers reuse the same compiled object. - BM_EvalManyBuiltinsMatchSameRegex_mean improved about 8% --- src/libexpr/primops.cc | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 9ac65cecd..98b4c1296 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -4709,21 +4709,28 @@ static RegisterPrimOp primop_convertHash({ struct RegexCache { - boost::concurrent_flat_map> cache; - - std::regex get(std::string_view re) + struct Entry { - std::regex regex; - /* No std::regex constructor overload from std::string_view, but can be constructed - from a pointer + size or an iterator range. */ + ref regex; + + Entry(const char * s, size_t count) + : regex(make_ref(s, count, std::regex::extended)) + { + } + }; + + boost::concurrent_flat_map> cache; + + ref get(std::string_view re) + { + std::optional> regex; cache.try_emplace_and_cvisit( re, /*s=*/re.data(), /*count=*/re.size(), - std::regex::extended, - [®ex](const auto & kv) { regex = kv.second; }, - [®ex](const auto & kv) { regex = kv.second; }); - return regex; + [®ex](const auto & kv) { regex = kv.second.regex; }, + [®ex](const auto & kv) { regex = kv.second.regex; }); + return *regex; } }; @@ -4745,7 +4752,7 @@ void prim_match(EvalState & state, const PosIdx pos, Value ** args, Value & v) state.forceString(*args[1], context, pos, "while evaluating the second argument passed to builtins.match"); std::cmatch match; - if (!std::regex_match(str.begin(), str.end(), match, regex)) { + if (!std::regex_match(str.begin(), str.end(), match, *regex)) { v.mkNull(); return; } @@ -4818,7 +4825,7 @@ void prim_split(EvalState & state, const PosIdx pos, Value ** args, Value & v) const auto str = state.forceString(*args[1], context, pos, "while evaluating the second argument passed to builtins.split"); - auto begin = std::cregex_iterator(str.begin(), str.end(), regex); + auto begin = std::cregex_iterator(str.begin(), str.end(), *regex); auto end = std::cregex_iterator(); // Any matches results are surrounded by non-matching results. From 048d0b67817a903b9fb4fe89139853eae57b7858 Mon Sep 17 00:00:00 2001 From: Kamil Monicz Date: Thu, 18 Dec 2025 03:26:35 +0000 Subject: [PATCH 3/3] libexpr: avoid regex engine in getDerivations attr filtering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - getDerivations() filters attribute names with std::regex_match, which runs the regex engine for every attribute visited during nixpkgs scanning. - BM_GetDerivationsAttrScan/10000_mean: 3.338 ms → 1.506 ms (≈ -54.9%) --- src/libexpr/get-drvs.cc | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/libexpr/get-drvs.cc b/src/libexpr/get-drvs.cc index c4a2b00af..a15f1085d 100644 --- a/src/libexpr/get-drvs.cc +++ b/src/libexpr/get-drvs.cc @@ -367,7 +367,26 @@ static std::string addToPath(const std::string & s1, std::string_view s2) return s1.empty() ? std::string(s2) : s1 + "." + s2; } -static std::regex attrRegex("[A-Za-z_][A-Za-z0-9-_+]*"); +static bool isAttrPathComponent(std::string_view symbol) +{ + if (symbol.empty()) + return false; + + /* [A-Za-z_] */ + unsigned char first = symbol[0]; + if (!((first >= 'A' && first <= 'Z') || (first >= 'a' && first <= 'z') || first == '_')) + return false; + + /* [A-Za-z0-9-_+]* */ + for (unsigned char c : symbol.substr(1)) { + if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '-' || c == '_' + || c == '+') + continue; + return false; + } + + return true; +} static void getDerivations( EvalState & state, @@ -400,7 +419,7 @@ static void getDerivations( std::string_view symbol{state.symbols[i->name]}; try { debug("evaluating attribute '%1%'", symbol); - if (!std::regex_match(symbol.begin(), symbol.end(), attrRegex)) + if (!isAttrPathComponent(symbol)) continue; std::string pathPrefix2 = addToPath(pathPrefix, symbol); if (combineChannels)