1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-14 22:42:41 +01:00

libstore-tests: Add reference scanning benchmark

This benchmark should provide a relatively realistic
scenario for reference scanning.

As shown by the following results, reference scanning code
is already plenty fast and is definitely not a bottleneck:

```
BM_RefScanSinkRandom/10000               1672 ns         1682 ns       413354 bytes_per_second=5.53691Gi/s
BM_RefScanSinkRandom/100000             11217 ns        11124 ns        64341 bytes_per_second=8.37231Gi/s
BM_RefScanSinkRandom/1000000           205745 ns       204421 ns         3360 bytes_per_second=4.55591Gi/s
BM_RefScanSinkRandom/5000000          1208407 ns      1201046 ns          597 bytes_per_second=3.87713Gi/s
BM_RefScanSinkRandom/10000000         2534397 ns      2523344 ns          273 bytes_per_second=3.69083Gi/s
```

(Measurements on Ryzen 5900X via `nix build --file ci/gha/tests componentTests.nix-store-tests-run -L`)
This commit is contained in:
Sergei Zimmerman 2025-08-04 02:11:14 +03:00
parent 51a32e4645
commit 20df0e5fa7
No known key found for this signature in database
2 changed files with 93 additions and 0 deletions

View file

@ -113,6 +113,7 @@ if get_option('benchmarks')
benchmark_sources = files(
'bench-main.cc',
'derivation-parser-bench.cc',
'ref-scan-bench.cc',
)
benchmark_exe = executable(

View file

@ -0,0 +1,92 @@
#include "nix/util/references.hh"
#include "nix/store/path.hh"
#include <benchmark/benchmark.h>
#include <random>
using namespace nix;
template<typename OIt>
static void randomReference(std::mt19937 & urng, OIt outIter)
{
auto dist = std::uniform_int_distribution<std::size_t>(0, nix32Chars.size() - 1);
dist(urng);
std::generate_n(outIter, StorePath::HashLen, [&]() { return nix32Chars[dist(urng)]; });
}
/**
* Generate a random byte sequence with interleaved
*
* @param charWeight relative frequency of a byte not belonging to a reference (hash part of the store path)
*/
static std::string
randomBytesWithReferences(std::mt19937 & urng, std::size_t size, double charWeight, StringSet & hashes)
{
std::string res;
res.reserve(size);
/* NOTE: std::uniform_int_distribution isn't guaranteed to be implemented for char. */
auto charGen = [&,
charDist = std::uniform_int_distribution<int>{
std::numeric_limits<char>::min(),
std::numeric_limits<char>::max(),
}]() mutable { res.push_back(charDist(urng)); };
auto refGen = [&]() {
std::string ref;
randomReference(urng, std::back_inserter(ref));
hashes.insert(ref);
res += ref;
};
std::discrete_distribution<std::size_t> genDist{1.0, StorePath::HashLen * charWeight};
while (res.size() < size) {
auto c = genDist(urng);
if (c == 0)
refGen();
else
charGen();
}
res.resize(size);
return res;
}
// Benchmark reference scanning
static void BM_RefScanSinkRandom(benchmark::State & state)
{
auto size = state.range();
auto chunkSize = 4199;
std::mt19937 urng(0);
StringSet hashes;
auto bytes = randomBytesWithReferences(urng, size, /*charWeight=*/100.0, hashes);
assert(hashes.size() > 0);
std::size_t processed = 0;
for (auto _ : state) {
state.PauseTiming();
RefScanSink Sink{StringSet(hashes)};
state.ResumeTiming();
auto data = std::string_view(bytes);
while (!data.empty()) {
auto chunk = data.substr(0, std::min<std::string_view::size_type>(chunkSize, data.size()));
data = data.substr(chunk.size());
Sink(chunk);
processed += chunk.size();
}
benchmark::DoNotOptimize(Sink.getResult());
state.PauseTiming();
assert(Sink.getResult() == hashes);
state.ResumeTiming();
}
state.SetBytesProcessed(processed);
}
BENCHMARK(BM_RefScanSinkRandom)->Arg(10'000)->Arg(100'000)->Arg(1'000'000)->Arg(5'000'000)->Arg(10'000'000);