From e1c9bc0ef61628e2cfa2438a38638fbfdea7ffb8 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Sun, 31 Aug 2025 00:48:37 +0300 Subject: [PATCH] libstore: Get rid of allocations in printString, allocate 2K bytes on the stack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Looking at perf: 0.21 │ push %rbp 0.99 │ mov %rsp,%rbp │ push %r15 0.25 │ push %r14 │ push %r13 0.49 │ push %r12 0.66 │ push %rbx 1.23 │ lea -0x10000(%rsp),%r11 0.23 │ 15: sub $0x1000,%rsp 1.01 │ orq $0x0,(%rsp) 59.12 │ cmp %r11,%rsp 0.27 │ ↑ jne 15 Seems like 64K is too much to have on the stack for each invocation, considering that only a minuscule number of allocations are actually larger than 4K. There's actually no good reason this function should use so much stack space. Or use small_string at all. Everything can be done in small chunks that don't require any memory allocations and use up 2K bytes on the stack. This patch also adds a microbenchmark for tracking the unparsing performance. Here are the results for this change: (Before) BM_UnparseRealDerivationFile/hello 7275 ns 7247 ns 96093 bytes_per_second=232.136Mi/s BM_UnparseRealDerivationFile/firefox 40538 ns 40376 ns 17327 bytes_per_second=378.534Mi/s (After) BM_UnparseRealDerivationFile/hello 3228 ns 3218 ns 215671 bytes_per_second=522.775Mi/s BM_UnparseRealDerivationFile/firefox 39724 ns 39584 ns 17617 bytes_per_second=386.101Mi/s This translates into nice evaluation performance improvements (compared to 18c3d2348f59032f1c630e6a232fe3637efb8200): Benchmark 1: GC_INITIAL_HEAP_SIZE=8G old-nix/bin/nix-instantiate ../nixpkgs -A nixosTests.gnome --readonly-mode Time (mean ± σ): 3.111 s ± 0.021 s [User: 2.513 s, System: 0.580 s] Range (min … max): 3.083 s … 3.143 s 10 runs Benchmark 2: GC_INITIAL_HEAP_SIZE=8G result/bin/nix-instantiate ../nixpkgs -A nixosTests.gnome --readonly-mode Time (mean ± σ): 3.037 s ± 0.038 s [User: 2.461 s, System: 0.558 s] Range (min … max): 2.960 s … 3.086 s 10 runs --- src/libstore-tests/derivation-parser-bench.cc | 29 +++++++++++ src/libstore/derivations.cc | 49 ++++++++++--------- 2 files changed, 56 insertions(+), 22 deletions(-) diff --git a/src/libstore-tests/derivation-parser-bench.cc b/src/libstore-tests/derivation-parser-bench.cc index ef698b205..61c9807a6 100644 --- a/src/libstore-tests/derivation-parser-bench.cc +++ b/src/libstore-tests/derivation-parser-bench.cc @@ -28,6 +28,27 @@ static void BM_ParseRealDerivationFile(benchmark::State & state, const std::stri state.SetBytesProcessed(state.iterations() * content.size()); } +// Benchmark unparsing real derivation files +static void BM_UnparseRealDerivationFile(benchmark::State & state, const std::string & filename) +{ + // Read the file once + std::ifstream file(filename); + std::stringstream buffer; + buffer << file.rdbuf(); + std::string content = buffer.str(); + + auto store = openStore("dummy://"); + ExperimentalFeatureSettings xpSettings; + auto drv = parseDerivation(*store, std::string(content), "test", xpSettings); + + for (auto _ : state) { + auto unparsed = drv.unparse(*store, /*maskOutputs=*/false); + benchmark::DoNotOptimize(unparsed); + assert(unparsed.size() == content.size()); + } + state.SetBytesProcessed(state.iterations() * content.size()); +} + // Register benchmarks for actual test derivation files if they exist BENCHMARK_CAPTURE( BM_ParseRealDerivationFile, @@ -37,3 +58,11 @@ BENCHMARK_CAPTURE( BM_ParseRealDerivationFile, firefox, getEnvNonEmpty("_NIX_TEST_UNIT_DATA").value_or(NIX_UNIT_TEST_DATA) + "/derivation/firefox.drv"); +BENCHMARK_CAPTURE( + BM_UnparseRealDerivationFile, + hello, + getEnvNonEmpty("_NIX_TEST_UNIT_DATA").value_or(NIX_UNIT_TEST_DATA) + "/derivation/hello.drv"); +BENCHMARK_CAPTURE( + BM_UnparseRealDerivationFile, + firefox, + getEnvNonEmpty("_NIX_TEST_UNIT_DATA").value_or(NIX_UNIT_TEST_DATA) + "/derivation/firefox.drv"); diff --git a/src/libstore/derivations.cc b/src/libstore/derivations.cc index 1afc343d7..a1831efc6 100644 --- a/src/libstore/derivations.cc +++ b/src/libstore/derivations.cc @@ -498,28 +498,33 @@ Derivation parseDerivation( */ static void printString(std::string & res, std::string_view s) { - boost::container::small_vector buffer; - buffer.reserve(s.size() * 2 + 2); - char * buf = buffer.data(); - char * p = buf; - *p++ = '"'; - for (auto c : s) - if (c == '\"' || c == '\\') { - *p++ = '\\'; - *p++ = c; - } else if (c == '\n') { - *p++ = '\\'; - *p++ = 'n'; - } else if (c == '\r') { - *p++ = '\\'; - *p++ = 'r'; - } else if (c == '\t') { - *p++ = '\\'; - *p++ = 't'; - } else - *p++ = c; - *p++ = '"'; - res.append(buf, p - buf); + res.reserve(res.size() + s.size() * 2 + 2); + res += '"'; + static constexpr auto chunkSize = 1024; + std::array buffer; + while (!s.empty()) { + auto chunk = s.substr(0, /*n=*/chunkSize); + s.remove_prefix(chunk.size()); + char * buf = buffer.data(); + char * p = buf; + for (auto c : chunk) + if (c == '\"' || c == '\\') { + *p++ = '\\'; + *p++ = c; + } else if (c == '\n') { + *p++ = '\\'; + *p++ = 'n'; + } else if (c == '\r') { + *p++ = '\\'; + *p++ = 'r'; + } else if (c == '\t') { + *p++ = '\\'; + *p++ = 't'; + } else + *p++ = c; + res.append(buf, p - buf); + } + res += '"'; } static void printUnquotedString(std::string & res, std::string_view s)