From ef8dc34bd00f8e8936f8b0cfd1efe7bb202a4971 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Thu, 15 May 2025 23:07:25 +0000 Subject: [PATCH] libexpr: Actually cache line information in PosTable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous code had a sneaky bug due to which no caching actually happened: ```cpp auto linesForInput = (*lines)[origin->offset]; ``` That should have been: ```cpp auto & linesForInput = (*lines)[origin->offset]; ``` See [1]. Now that it also makes sense to make the cache bound in side in order not to memoize all the sources without freeing any memory. The default cache size has been chosen somewhat arbitrarily to be ~64k origins. For reference, 25.05 nixpkgs has ~50k .nix files. Simple benchmark: ```nix let pkgs = import { }; in builtins.foldl' (acc: el: acc + el.line) 0 ( builtins.genList (x: builtins.unsafeGetAttrPos "gcc" pkgs) 10000 ) ``` (After) ``` $ hyperfine "result/bin/nix eval -f ./test.nix" Benchmark 1: result/bin/nix eval -f ./test.nix Time (mean ± σ): 292.7 ms ± 3.9 ms [User: 131.0 ms, System: 120.5 ms] Range (min … max): 288.1 ms … 300.5 ms 10 runs ``` (Before) ``` hyperfine "nix eval -f ./test.nix" Benchmark 1: nix eval -f ./test.nix Time (mean ± σ): 666.7 ms ± 6.4 ms [User: 428.3 ms, System: 191.2 ms] Range (min … max): 659.7 ms … 681.3 ms 10 runs ``` If the origin happens to be a `all-packages.nix` or similar in size then the difference is much more dramatic. [1]: https://www.github.com/lix-project/lix/commit/22e3f0e9875082be7f4eec8e3caeb134a7f1c05f (cherry picked from commit 5ea81f5b8f36f351981f53d27e85c6e4733c2b3e) --- src/libutil/include/nix/util/pos-table.hh | 18 ++++++++++- src/libutil/pos-table.cc | 38 ++++++++++++++++------- 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/src/libutil/include/nix/util/pos-table.hh b/src/libutil/include/nix/util/pos-table.hh index 8ef7bd893..d944b1353 100644 --- a/src/libutil/include/nix/util/pos-table.hh +++ b/src/libutil/include/nix/util/pos-table.hh @@ -4,6 +4,7 @@ #include #include +#include "nix/util/lru-cache.hh" #include "nix/util/pos-idx.hh" #include "nix/util/position.hh" #include "nix/util/sync.hh" @@ -37,10 +38,20 @@ public: }; private: + /** + * Vector of byte offsets (in the virtual input buffer) of initial line character's position. + * Sorted by construction. Binary search over it allows for efficient translation of arbitrary + * byte offsets in the virtual input buffer to its line + column position. + */ using Lines = std::vector; + /** + * Cache from byte offset in the virtual buffer of Origins -> @ref Lines in that origin. + */ + using LinesCache = LRUCache; std::map origins; - mutable Sync> lines; + + mutable Sync linesCache; const Origin * resolve(PosIdx p) const { @@ -56,6 +67,11 @@ private: } public: + PosTable(std::size_t linesCacheCapacity = 65536) + : linesCache(linesCacheCapacity) + { + } + Origin addOrigin(Pos::Origin origin, size_t size) { uint32_t offset = 0; diff --git a/src/libutil/pos-table.cc b/src/libutil/pos-table.cc index cdee4adb0..e24aff4b1 100644 --- a/src/libutil/pos-table.cc +++ b/src/libutil/pos-table.cc @@ -15,21 +15,35 @@ Pos PosTable::operator[](PosIdx p) const const auto offset = origin->offsetOf(p); Pos result{0, 0, origin->origin}; - auto lines = this->lines.lock(); - auto linesForInput = (*lines)[origin->offset]; + auto linesCache = this->linesCache.lock(); - if (linesForInput.empty()) { - auto source = result.getSource().value_or(""); - const char * begin = source.data(); - for (Pos::LinesIterator it(source), end; it != end; it++) - linesForInput.push_back(it->data() - begin); - if (linesForInput.empty()) - linesForInput.push_back(0); + /* Try the origin's line cache */ + const auto * linesForInput = linesCache->getOrNullptr(origin->offset); + + auto fillCacheForOrigin = [](std::string_view content) { + auto contentLines = Lines(); + + const char * begin = content.data(); + for (Pos::LinesIterator it(content), end; it != end; it++) + contentLines.push_back(it->data() - begin); + if (contentLines.empty()) + contentLines.push_back(0); + + return contentLines; + }; + + /* Calculate line offsets and fill the cache */ + if (!linesForInput) { + auto originContent = result.getSource().value_or(""); + linesCache->upsert(origin->offset, fillCacheForOrigin(originContent)); + linesForInput = linesCache->getOrNullptr(origin->offset); } - // as above: the first line starts at byte 0 and is always present - auto lineStartOffset = std::prev(std::upper_bound(linesForInput.begin(), linesForInput.end(), offset)); - result.line = 1 + (lineStartOffset - linesForInput.begin()); + assert(linesForInput); + + // as above: the first line starts at byte 0 and is always present + auto lineStartOffset = std::prev(std::upper_bound(linesForInput->begin(), linesForInput->end(), offset)); + result.line = 1 + (lineStartOffset - linesForInput->begin()); result.column = 1 + (offset - *lineStartOffset); return result; }