From 20df0e5fa7820bfc6cafaac39ca29f33542fe146 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Mon, 4 Aug 2025 02:11:14 +0300 Subject: [PATCH] libstore-tests: Add reference scanning benchmark This benchmark should provide a relatively realistic scenario for reference scanning. As shown by the following results, reference scanning code is already plenty fast and is definitely not a bottleneck: ``` BM_RefScanSinkRandom/10000 1672 ns 1682 ns 413354 bytes_per_second=5.53691Gi/s BM_RefScanSinkRandom/100000 11217 ns 11124 ns 64341 bytes_per_second=8.37231Gi/s BM_RefScanSinkRandom/1000000 205745 ns 204421 ns 3360 bytes_per_second=4.55591Gi/s BM_RefScanSinkRandom/5000000 1208407 ns 1201046 ns 597 bytes_per_second=3.87713Gi/s BM_RefScanSinkRandom/10000000 2534397 ns 2523344 ns 273 bytes_per_second=3.69083Gi/s ``` (Measurements on Ryzen 5900X via `nix build --file ci/gha/tests componentTests.nix-store-tests-run -L`) --- src/libstore-tests/meson.build | 1 + src/libstore-tests/ref-scan-bench.cc | 92 ++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 src/libstore-tests/ref-scan-bench.cc diff --git a/src/libstore-tests/meson.build b/src/libstore-tests/meson.build index dfb936fef..bba991388 100644 --- a/src/libstore-tests/meson.build +++ b/src/libstore-tests/meson.build @@ -113,6 +113,7 @@ if get_option('benchmarks') benchmark_sources = files( 'bench-main.cc', 'derivation-parser-bench.cc', + 'ref-scan-bench.cc', ) benchmark_exe = executable( diff --git a/src/libstore-tests/ref-scan-bench.cc b/src/libstore-tests/ref-scan-bench.cc new file mode 100644 index 000000000..8219c05bf --- /dev/null +++ b/src/libstore-tests/ref-scan-bench.cc @@ -0,0 +1,92 @@ +#include "nix/util/references.hh" +#include "nix/store/path.hh" + +#include + +#include + +using namespace nix; + +template +static void randomReference(std::mt19937 & urng, OIt outIter) +{ + auto dist = std::uniform_int_distribution(0, nix32Chars.size() - 1); + dist(urng); + std::generate_n(outIter, StorePath::HashLen, [&]() { return nix32Chars[dist(urng)]; }); +} + +/** + * Generate a random byte sequence with interleaved + * + * @param charWeight relative frequency of a byte not belonging to a reference (hash part of the store path) + */ +static std::string +randomBytesWithReferences(std::mt19937 & urng, std::size_t size, double charWeight, StringSet & hashes) +{ + std::string res; + res.reserve(size); + + /* NOTE: std::uniform_int_distribution isn't guaranteed to be implemented for char. */ + auto charGen = [&, + charDist = std::uniform_int_distribution{ + std::numeric_limits::min(), + std::numeric_limits::max(), + }]() mutable { res.push_back(charDist(urng)); }; + + auto refGen = [&]() { + std::string ref; + randomReference(urng, std::back_inserter(ref)); + hashes.insert(ref); + res += ref; + }; + + std::discrete_distribution genDist{1.0, StorePath::HashLen * charWeight}; + + while (res.size() < size) { + auto c = genDist(urng); + if (c == 0) + refGen(); + else + charGen(); + } + + res.resize(size); + return res; +} + +// Benchmark reference scanning +static void BM_RefScanSinkRandom(benchmark::State & state) +{ + auto size = state.range(); + auto chunkSize = 4199; + + std::mt19937 urng(0); + StringSet hashes; + auto bytes = randomBytesWithReferences(urng, size, /*charWeight=*/100.0, hashes); + assert(hashes.size() > 0); + + std::size_t processed = 0; + + for (auto _ : state) { + state.PauseTiming(); + RefScanSink Sink{StringSet(hashes)}; + state.ResumeTiming(); + + auto data = std::string_view(bytes); + while (!data.empty()) { + auto chunk = data.substr(0, std::min(chunkSize, data.size())); + data = data.substr(chunk.size()); + Sink(chunk); + processed += chunk.size(); + } + + benchmark::DoNotOptimize(Sink.getResult()); + state.PauseTiming(); + assert(Sink.getResult() == hashes); + state.ResumeTiming(); + } + + state.SetBytesProcessed(processed); +} + +BENCHMARK(BM_RefScanSinkRandom)->Arg(10'000)->Arg(100'000)->Arg(1'000'000)->Arg(5'000'000)->Arg(10'000'000);