From 9d2100a165bcdbadf23e56791e462c5531c68639 Mon Sep 17 00:00:00 2001 From: Kamil Monicz Date: Thu, 18 Dec 2025 03:34:32 +0000 Subject: [PATCH 1/4] libstore-tests: benchmark registerValidPaths on derivations - Add a focused nix-store-benchmarks benchmark that registers many derivation paths into a temporary local store root --- src/libstore-tests/meson.build | 1 + .../register-valid-paths-bench.cc | 79 +++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 src/libstore-tests/register-valid-paths-bench.cc diff --git a/src/libstore-tests/meson.build b/src/libstore-tests/meson.build index 123bccb90..2d12b14d3 100644 --- a/src/libstore-tests/meson.build +++ b/src/libstore-tests/meson.build @@ -124,6 +124,7 @@ if get_option('benchmarks') 'bench-main.cc', 'derivation-parser-bench.cc', 'ref-scan-bench.cc', + 'register-valid-paths-bench.cc', ) benchmark_exe = executable( diff --git a/src/libstore-tests/register-valid-paths-bench.cc b/src/libstore-tests/register-valid-paths-bench.cc new file mode 100644 index 000000000..bd8cb113b --- /dev/null +++ b/src/libstore-tests/register-valid-paths-bench.cc @@ -0,0 +1,79 @@ +#include + +#include "nix/store/derivations.hh" +#include "nix/store/local-store.hh" +#include "nix/store/store-open.hh" +#include "nix/util/file-system.hh" +#include "nix/util/hash.hh" +#include "nix/util/tests/test-data.hh" + +#ifndef _WIN32 + +# include +# include + +using namespace nix; + +static void BM_RegisterValidPathsDerivations(benchmark::State & state) +{ + const int derivationCount = state.range(0); + + for (auto _ : state) { + state.PauseTiming(); + + auto tmpRoot = createTempDir(); + auto realStoreDir = tmpRoot / "nix/store"; + std::filesystem::create_directories(realStoreDir); + + std::shared_ptr store = openStore(fmt("local?root=%s", tmpRoot.string())); + auto localStore = std::dynamic_pointer_cast(store); + if (!localStore) + throw Error("expected local store"); + + ValidPathInfos infos; + for (int i = 0; i < derivationCount; ++i) { + std::string drvName = fmt("register-valid-paths-bench-%d", i); + auto drvPath = StorePath::random(drvName + ".drv"); + + Derivation drv; + drv.name = drvName; + drv.outputs.emplace("out", DerivationOutput{DerivationOutput::Deferred{}}); + drv.platform = "x86_64-linux"; + drv.builder = "foo"; + drv.env["out"] = ""; + drv.fillInOutputPaths(*localStore); + + auto drvContents = drv.unparse(*localStore, /*maskOutputs=*/false); + + /* Create an on-disk store object without registering it + in the SQLite DB. LocalFSStore::getFSAccessor(path, false) + allows reading store objects based on their filesystem + presence alone. */ + std::ofstream out(realStoreDir / std::string(drvPath.to_string()), std::ios::binary); + out.write(drvContents.data(), drvContents.size()); + if (!out) + throw SysError("writing derivation to store"); + + ValidPathInfo info{drvPath, UnkeyedValidPathInfo(*localStore, Hash::dummy)}; + info.narSize = drvContents.size(); + + infos.emplace(drvPath, std::move(info)); + } + + state.ResumeTiming(); + + localStore->registerValidPaths(infos); + + state.PauseTiming(); + localStore.reset(); + store.reset(); + std::filesystem::remove_all(tmpRoot); + state.ResumeTiming(); + } + + state.SetItemsProcessed(state.iterations() * derivationCount); +} + +BENCHMARK(BM_RegisterValidPathsDerivations)->Arg(10)->Arg(50)->Arg(200); + +#endif From cccfa385e66c5b010a89db16a8e3b39b978ceeea Mon Sep 17 00:00:00 2001 From: Kamil Monicz Date: Thu, 18 Dec 2025 03:46:44 +0000 Subject: [PATCH 2/4] libstore: reuse parsed derivations in registerValidPaths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - LocalStore::registerValidPaths() parsed derivations twice: once in addValidPath() and again when calling checkInvariants(), despite already having loaded the derivation. - Plumb the parsed Derivation out of addValidPath() and reuse it for the invariant check pass, falling back to re-parsing only when a derivation wasn’t newly registered in this call. - BM_RegisterValidPathsDerivations/200_mean runs 32% faster --- src/libstore/include/nix/store/local-store.hh | 3 ++- src/libstore/local-store.cc | 27 ++++++++++++++----- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/libstore/include/nix/store/local-store.hh b/src/libstore/include/nix/store/local-store.hh index 7d93d7045..0dd0498a2 100644 --- a/src/libstore/include/nix/store/local-store.hh +++ b/src/libstore/include/nix/store/local-store.hh @@ -420,7 +420,8 @@ private: uint64_t queryValidPathId(State & state, const StorePath & path); - uint64_t addValidPath(State & state, const ValidPathInfo & info, bool checkOutputs = true); + uint64_t + addValidPath(State & state, const ValidPathInfo & info, bool checkOutputs = true, Derivation * drv = nullptr); void invalidatePath(State & state, const StorePath & path); diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index c5fd85fff..2a5c65be2 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -647,7 +647,7 @@ void LocalStore::cacheDrvOutputMapping( [&]() { state.stmts->AddDerivationOutput.use()(deriver)(outputName) (printStorePath(output)).exec(); }); } -uint64_t LocalStore::addValidPath(State & state, const ValidPathInfo & info, bool checkOutputs) +uint64_t LocalStore::addValidPath(State & state, const ValidPathInfo & info, bool checkOutputs, Derivation * drv) { if (info.ca.has_value() && !info.isContentAddressed(*this)) throw Error( @@ -668,7 +668,7 @@ uint64_t LocalStore::addValidPath(State & state, const ValidPathInfo & info, boo efficiently query whether a path is an output of some derivation. */ if (info.path.isDerivation()) { - auto drv = readInvalidDerivation(info.path); + auto parsedDrv = readInvalidDerivation(info.path); /* Verify that the output paths in the derivation are correct (i.e., follow the scheme for computing output paths from @@ -676,14 +676,17 @@ uint64_t LocalStore::addValidPath(State & state, const ValidPathInfo & info, boo DB transaction is rolled back, so the path validity registration above is undone. */ if (checkOutputs) - drv.checkInvariants(*this, info.path); + parsedDrv.checkInvariants(*this, info.path); - for (auto & i : drv.outputsAndOptPaths(*this)) { + for (auto & i : parsedDrv.outputsAndOptPaths(*this)) { /* Floating CA derivations have indeterminate output paths until they are built, so don't register anything in that case */ if (i.second.second) cacheDrvOutputMapping(state, id, i.first, *i.second.second); } + + if (drv) + *drv = std::move(parsedDrv); } pathInfoCache->lock()->upsert(info.path, PathInfoCacheValue{.value = std::make_shared(info)}); @@ -924,12 +927,19 @@ void LocalStore::registerValidPaths(const ValidPathInfos & infos) SQLiteTxn txn(state->db); StorePathSet paths; + std::map derivations; + for (auto & [_, i] : infos) { assert(i.narHash.algo == HashAlgorithm::SHA256); if (isValidPath_(*state, i.path)) updatePathInfo(*state, i); - else + else if (i.path.isDerivation()) { + Derivation drv; + addValidPath(*state, i, false, &drv); + derivations.emplace(i.path, std::move(drv)); + } else { addValidPath(*state, i, false); + } paths.insert(i.path); } @@ -944,8 +954,11 @@ void LocalStore::registerValidPaths(const ValidPathInfos & infos) not be valid yet. */ for (auto & [_, i] : infos) if (i.path.isDerivation()) { - // FIXME: inefficient; we already loaded the derivation in addValidPath(). - readInvalidDerivation(i.path).checkInvariants(*this, i.path); + if (auto drv = derivations.find(i.path); drv != derivations.end()) { + drv->second.checkInvariants(*this, i.path); + } else { + readInvalidDerivation(i.path).checkInvariants(*this, i.path); + } } /* Do a topological sort of the paths. This will throw an From 1f739961e5639635246ae9fd5491adb0a6c02cf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Thu, 18 Dec 2025 09:28:12 +0100 Subject: [PATCH 3/4] libstore: simplify registerValidPaths by removing redundant checkInvariants loop The separate checkInvariants loop after addValidPath was added in 2014 (d210cdc43) to work around an assertion failure: nix-store: derivations.cc:242: Assertion 'store.isValidPath(i->first)' failed. At that time, hashDerivationModulo() contained assert(store.isValidPath(...)) which required input derivations to be registered as valid in the database before computing their hash. The workaround was to: 1. Call addValidPath with checkOutputs=false 2. Add all references to the database 3. Run checkInvariants in a separate loop after paths were valid In 2020 (bccff827d), the isValidPath assertion was removed to fix a deadlock in IFD through the daemon (issue #4235). The fix changed hashDerivationModulo to use readInvalidDerivation, which reads directly from the filesystem without requiring database validity. This made the separate checkInvariants loop unnecessary, but nobody noticed the code could be simplified. The comment "We can't do this in addValidPath() above, because the references might not be valid yet" became stale. Now we simply call addValidPath() with the default checkOutputs=true, which runs checkInvariants internally using the already-parsed derivation. This commit eliminates the separate loop over derivations. --- src/libstore/include/nix/store/local-store.hh | 3 +- src/libstore/local-store.cc | 31 +++---------------- 2 files changed, 5 insertions(+), 29 deletions(-) diff --git a/src/libstore/include/nix/store/local-store.hh b/src/libstore/include/nix/store/local-store.hh index 0dd0498a2..b5de22095 100644 --- a/src/libstore/include/nix/store/local-store.hh +++ b/src/libstore/include/nix/store/local-store.hh @@ -420,8 +420,7 @@ private: uint64_t queryValidPathId(State & state, const StorePath & path); - uint64_t - addValidPath(State & state, const ValidPathInfo & info, bool checkOutputs = true, Derivation * drv = nullptr); + uint64_t addValidPath(State & state, const ValidPathInfo & info); void invalidatePath(State & state, const StorePath & path); diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 2a5c65be2..70ce6b4f2 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -647,7 +647,7 @@ void LocalStore::cacheDrvOutputMapping( [&]() { state.stmts->AddDerivationOutput.use()(deriver)(outputName) (printStorePath(output)).exec(); }); } -uint64_t LocalStore::addValidPath(State & state, const ValidPathInfo & info, bool checkOutputs, Derivation * drv) +uint64_t LocalStore::addValidPath(State & state, const ValidPathInfo & info) { if (info.ca.has_value() && !info.isContentAddressed(*this)) throw Error( @@ -675,8 +675,7 @@ uint64_t LocalStore::addValidPath(State & state, const ValidPathInfo & info, boo derivations). Note that if this throws an error, then the DB transaction is rolled back, so the path validity registration above is undone. */ - if (checkOutputs) - parsedDrv.checkInvariants(*this, info.path); + parsedDrv.checkInvariants(*this, info.path); for (auto & i : parsedDrv.outputsAndOptPaths(*this)) { /* Floating CA derivations have indeterminate output paths until @@ -684,9 +683,6 @@ uint64_t LocalStore::addValidPath(State & state, const ValidPathInfo & info, boo if (i.second.second) cacheDrvOutputMapping(state, id, i.first, *i.second.second); } - - if (drv) - *drv = std::move(parsedDrv); } pathInfoCache->lock()->upsert(info.path, PathInfoCacheValue{.value = std::make_shared(info)}); @@ -927,19 +923,12 @@ void LocalStore::registerValidPaths(const ValidPathInfos & infos) SQLiteTxn txn(state->db); StorePathSet paths; - std::map derivations; - for (auto & [_, i] : infos) { assert(i.narHash.algo == HashAlgorithm::SHA256); if (isValidPath_(*state, i.path)) updatePathInfo(*state, i); - else if (i.path.isDerivation()) { - Derivation drv; - addValidPath(*state, i, false, &drv); - derivations.emplace(i.path, std::move(drv)); - } else { - addValidPath(*state, i, false); - } + else + addValidPath(*state, i); paths.insert(i.path); } @@ -949,18 +938,6 @@ void LocalStore::registerValidPaths(const ValidPathInfos & infos) state->stmts->AddReference.use()(referrer)(queryValidPathId(*state, j)).exec(); } - /* Check that the derivation outputs are correct. We can't do - this in addValidPath() above, because the references might - not be valid yet. */ - for (auto & [_, i] : infos) - if (i.path.isDerivation()) { - if (auto drv = derivations.find(i.path); drv != derivations.end()) { - drv->second.checkInvariants(*this, i.path); - } else { - readInvalidDerivation(i.path).checkInvariants(*this, i.path); - } - } - /* Do a topological sort of the paths. This will throw an error if a cycle is detected and roll back the transaction. Cycles can only occur when a derivation From 994324fedaa7295e2449c8a881236c75278715d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Thu, 18 Dec 2025 09:46:03 +0100 Subject: [PATCH 4/4] libstore-tests: reduce registerValidPaths benchmark to single test case Testing with 10 derivations is sufficient to verify performance characteristics. The larger test cases (50, 200) don't provide additional insight and slow down the benchmark unnecessarily. --- src/libstore-tests/register-valid-paths-bench.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libstore-tests/register-valid-paths-bench.cc b/src/libstore-tests/register-valid-paths-bench.cc index bd8cb113b..1d7958183 100644 --- a/src/libstore-tests/register-valid-paths-bench.cc +++ b/src/libstore-tests/register-valid-paths-bench.cc @@ -74,6 +74,6 @@ static void BM_RegisterValidPathsDerivations(benchmark::State & state) state.SetItemsProcessed(state.iterations() * derivationCount); } -BENCHMARK(BM_RegisterValidPathsDerivations)->Arg(10)->Arg(50)->Arg(200); +BENCHMARK(BM_RegisterValidPathsDerivations)->Arg(10); #endif