diff --git a/src/libutil/include/nix/util/lru-cache.hh b/src/libutil/include/nix/util/lru-cache.hh index 80ec6f8c4..23cfa91e1 100644 --- a/src/libutil/include/nix/util/lru-cache.hh +++ b/src/libutil/include/nix/util/lru-cache.hh @@ -33,6 +33,18 @@ private: Data data; LRU lru; + /** + * Move this item to the back of the LRU list. + */ + void promote(LRU::iterator it) + { + /* Think of std::list iterators as stable pointers to the list node, + * which never get invalidated. Thus, we can reuse the same lru list + * element and just splice it to the back of the list without the need + * to update its value in the key -> list iterator map. */ + lru.splice(/*pos=*/lru.end(), /*other=*/lru, it); + } + public: LRUCache(size_t capacity) @@ -83,7 +95,9 @@ public: /** * Look up an item in the cache. If it exists, it becomes the most * recently used item. - * */ + * + * @returns corresponding cache entry, std::nullopt if it's not in the cache + */ template std::optional get(const K & key) { @@ -91,20 +105,30 @@ public: if (i == data.end()) return {}; - /** - * Move this item to the back of the LRU list. - * - * Think of std::list iterators as stable pointers to the list node, - * which never get invalidated. Thus, we can reuse the same lru list - * element and just splice it to the back of the list without the need - * to update its value in the key -> list iterator map. - */ auto & [it, value] = i->second; - lru.splice(/*pos=*/lru.end(), /*other=*/lru, it.it); - + promote(it.it); return value; } + /** + * Look up an item in the cache. If it exists, it becomes the most + * recently used item. + * + * @returns mutable pointer to the corresponding cache entry, nullptr if + * it's not in the cache + */ + template + Value * getOrNullptr(const K & key) + { + auto i = data.find(key); + if (i == data.end()) + return nullptr; + + auto & [it, value] = i->second; + promote(it.it); + return &value; + } + size_t size() const noexcept { return data.size(); diff --git a/src/libutil/include/nix/util/pos-table.hh b/src/libutil/include/nix/util/pos-table.hh index 8ef7bd893..d944b1353 100644 --- a/src/libutil/include/nix/util/pos-table.hh +++ b/src/libutil/include/nix/util/pos-table.hh @@ -4,6 +4,7 @@ #include #include +#include "nix/util/lru-cache.hh" #include "nix/util/pos-idx.hh" #include "nix/util/position.hh" #include "nix/util/sync.hh" @@ -37,10 +38,20 @@ public: }; private: + /** + * Vector of byte offsets (in the virtual input buffer) of initial line character's position. + * Sorted by construction. Binary search over it allows for efficient translation of arbitrary + * byte offsets in the virtual input buffer to its line + column position. + */ using Lines = std::vector; + /** + * Cache from byte offset in the virtual buffer of Origins -> @ref Lines in that origin. + */ + using LinesCache = LRUCache; std::map origins; - mutable Sync> lines; + + mutable Sync linesCache; const Origin * resolve(PosIdx p) const { @@ -56,6 +67,11 @@ private: } public: + PosTable(std::size_t linesCacheCapacity = 65536) + : linesCache(linesCacheCapacity) + { + } + Origin addOrigin(Pos::Origin origin, size_t size) { uint32_t offset = 0; diff --git a/src/libutil/pos-table.cc b/src/libutil/pos-table.cc index cdee4adb0..e24aff4b1 100644 --- a/src/libutil/pos-table.cc +++ b/src/libutil/pos-table.cc @@ -15,21 +15,35 @@ Pos PosTable::operator[](PosIdx p) const const auto offset = origin->offsetOf(p); Pos result{0, 0, origin->origin}; - auto lines = this->lines.lock(); - auto linesForInput = (*lines)[origin->offset]; + auto linesCache = this->linesCache.lock(); - if (linesForInput.empty()) { - auto source = result.getSource().value_or(""); - const char * begin = source.data(); - for (Pos::LinesIterator it(source), end; it != end; it++) - linesForInput.push_back(it->data() - begin); - if (linesForInput.empty()) - linesForInput.push_back(0); + /* Try the origin's line cache */ + const auto * linesForInput = linesCache->getOrNullptr(origin->offset); + + auto fillCacheForOrigin = [](std::string_view content) { + auto contentLines = Lines(); + + const char * begin = content.data(); + for (Pos::LinesIterator it(content), end; it != end; it++) + contentLines.push_back(it->data() - begin); + if (contentLines.empty()) + contentLines.push_back(0); + + return contentLines; + }; + + /* Calculate line offsets and fill the cache */ + if (!linesForInput) { + auto originContent = result.getSource().value_or(""); + linesCache->upsert(origin->offset, fillCacheForOrigin(originContent)); + linesForInput = linesCache->getOrNullptr(origin->offset); } - // as above: the first line starts at byte 0 and is always present - auto lineStartOffset = std::prev(std::upper_bound(linesForInput.begin(), linesForInput.end(), offset)); - result.line = 1 + (lineStartOffset - linesForInput.begin()); + assert(linesForInput); + + // as above: the first line starts at byte 0 and is always present + auto lineStartOffset = std::prev(std::upper_bound(linesForInput->begin(), linesForInput->end(), offset)); + result.line = 1 + (lineStartOffset - linesForInput->begin()); result.column = 1 + (offset - *lineStartOffset); return result; }