diff --git a/src/libstore-tests/meson.build b/src/libstore-tests/meson.build index dfb936fef..bba991388 100644 --- a/src/libstore-tests/meson.build +++ b/src/libstore-tests/meson.build @@ -113,6 +113,7 @@ if get_option('benchmarks') benchmark_sources = files( 'bench-main.cc', 'derivation-parser-bench.cc', + 'ref-scan-bench.cc', ) benchmark_exe = executable( diff --git a/src/libstore-tests/ref-scan-bench.cc b/src/libstore-tests/ref-scan-bench.cc new file mode 100644 index 000000000..8219c05bf --- /dev/null +++ b/src/libstore-tests/ref-scan-bench.cc @@ -0,0 +1,92 @@ +#include "nix/util/references.hh" +#include "nix/store/path.hh" + +#include + +#include + +using namespace nix; + +template +static void randomReference(std::mt19937 & urng, OIt outIter) +{ + auto dist = std::uniform_int_distribution(0, nix32Chars.size() - 1); + dist(urng); + std::generate_n(outIter, StorePath::HashLen, [&]() { return nix32Chars[dist(urng)]; }); +} + +/** + * Generate a random byte sequence with interleaved + * + * @param charWeight relative frequency of a byte not belonging to a reference (hash part of the store path) + */ +static std::string +randomBytesWithReferences(std::mt19937 & urng, std::size_t size, double charWeight, StringSet & hashes) +{ + std::string res; + res.reserve(size); + + /* NOTE: std::uniform_int_distribution isn't guaranteed to be implemented for char. */ + auto charGen = [&, + charDist = std::uniform_int_distribution{ + std::numeric_limits::min(), + std::numeric_limits::max(), + }]() mutable { res.push_back(charDist(urng)); }; + + auto refGen = [&]() { + std::string ref; + randomReference(urng, std::back_inserter(ref)); + hashes.insert(ref); + res += ref; + }; + + std::discrete_distribution genDist{1.0, StorePath::HashLen * charWeight}; + + while (res.size() < size) { + auto c = genDist(urng); + if (c == 0) + refGen(); + else + charGen(); + } + + res.resize(size); + return res; +} + +// Benchmark reference scanning +static void BM_RefScanSinkRandom(benchmark::State & state) +{ + auto size = state.range(); + auto chunkSize = 4199; + + std::mt19937 urng(0); + StringSet hashes; + auto bytes = randomBytesWithReferences(urng, size, /*charWeight=*/100.0, hashes); + assert(hashes.size() > 0); + + std::size_t processed = 0; + + for (auto _ : state) { + state.PauseTiming(); + RefScanSink Sink{StringSet(hashes)}; + state.ResumeTiming(); + + auto data = std::string_view(bytes); + while (!data.empty()) { + auto chunk = data.substr(0, std::min(chunkSize, data.size())); + data = data.substr(chunk.size()); + Sink(chunk); + processed += chunk.size(); + } + + benchmark::DoNotOptimize(Sink.getResult()); + state.PauseTiming(); + assert(Sink.getResult() == hashes); + state.ResumeTiming(); + } + + state.SetBytesProcessed(processed); +} + +BENCHMARK(BM_RefScanSinkRandom)->Arg(10'000)->Arg(100'000)->Arg(1'000'000)->Arg(5'000'000)->Arg(10'000'000);