From fd86343b89093684929f72ae7f6a4964ef2bc516 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Thu, 13 Mar 2025 12:55:39 +0000 Subject: [PATCH 1/4] {libutil,libexpr}: Move pos-idx,pos-table code to libutil All of this code doesn't actually depend on anything from libexpr. Because Pos is so tigtly coupled with Error, it makes sense to have in the same library. (cherry picked from commit a53b184e63114ec390e3a1b1f7cd45b8a012ab04) --- maintainers/flake-module.nix | 1 - src/libexpr/meson.build | 2 -- src/libexpr/nixexpr.cc | 35 ------------------------- src/libutil/meson.build | 3 +++ src/{libexpr => libutil}/pos-idx.hh | 1 + src/libutil/pos-table.cc | 37 +++++++++++++++++++++++++++ src/{libexpr => libutil}/pos-table.hh | 10 +++++--- 7 files changed, 48 insertions(+), 41 deletions(-) rename src/{libexpr => libutil}/pos-idx.hh (98%) create mode 100644 src/libutil/pos-table.cc rename src/{libexpr => libutil}/pos-table.hh (94%) diff --git a/maintainers/flake-module.nix b/maintainers/flake-module.nix index 5dfc93e58..e01d38e9e 100644 --- a/maintainers/flake-module.nix +++ b/maintainers/flake-module.nix @@ -77,7 +77,6 @@ ''^src/libexpr/nixexpr\.cc$'' ''^src/libexpr/nixexpr\.hh$'' ''^src/libexpr/parser-state\.hh$'' - ''^src/libexpr/pos-table\.hh$'' ''^src/libexpr/primops\.cc$'' ''^src/libexpr/primops\.hh$'' ''^src/libexpr/primops/context\.cc$'' diff --git a/src/libexpr/meson.build b/src/libexpr/meson.build index 4d8a38b43..7b5557bd1 100644 --- a/src/libexpr/meson.build +++ b/src/libexpr/meson.build @@ -175,8 +175,6 @@ headers = [config_h] + files( # internal: 'lexer-helpers.hh', 'nixexpr.hh', 'parser-state.hh', - 'pos-idx.hh', - 'pos-table.hh', 'primops.hh', 'print-ambiguous.hh', 'print-options.hh', diff --git a/src/libexpr/nixexpr.cc b/src/libexpr/nixexpr.cc index 063ff0753..5312b8d30 100644 --- a/src/libexpr/nixexpr.cc +++ b/src/libexpr/nixexpr.cc @@ -601,41 +601,6 @@ void ExprLambda::setDocComment(DocComment docComment) { } }; - - -/* Position table. */ - -Pos PosTable::operator[](PosIdx p) const -{ - auto origin = resolve(p); - if (!origin) - return {}; - - const auto offset = origin->offsetOf(p); - - Pos result{0, 0, origin->origin}; - auto lines = this->lines.lock(); - auto linesForInput = (*lines)[origin->offset]; - - if (linesForInput.empty()) { - auto source = result.getSource().value_or(""); - const char * begin = source.data(); - for (Pos::LinesIterator it(source), end; it != end; it++) - linesForInput.push_back(it->data() - begin); - if (linesForInput.empty()) - linesForInput.push_back(0); - } - // as above: the first line starts at byte 0 and is always present - auto lineStartOffset = std::prev( - std::upper_bound(linesForInput.begin(), linesForInput.end(), offset)); - - result.line = 1 + (lineStartOffset - linesForInput.begin()); - result.column = 1 + (offset - *lineStartOffset); - return result; -} - - - /* Symbol table. */ size_t SymbolTable::totalSize() const diff --git a/src/libutil/meson.build b/src/libutil/meson.build index a6dc86394..dfe2bf9e6 100644 --- a/src/libutil/meson.build +++ b/src/libutil/meson.build @@ -149,6 +149,7 @@ sources = files( 'logging.cc', 'memory-source-accessor.cc', 'position.cc', + 'pos-table.cc', 'posix-source-accessor.cc', 'references.cc', 'serialise.cc', @@ -214,6 +215,8 @@ headers = [config_h] + files( 'muxable-pipe.hh', 'os-string.hh', 'pool.hh', + 'pos-idx.hh', + 'pos-table.hh', 'position.hh', 'posix-source-accessor.hh', 'processes.hh', diff --git a/src/libexpr/pos-idx.hh b/src/libutil/pos-idx.hh similarity index 98% rename from src/libexpr/pos-idx.hh rename to src/libutil/pos-idx.hh index 2faa6b7fe..c1749ba69 100644 --- a/src/libexpr/pos-idx.hh +++ b/src/libutil/pos-idx.hh @@ -1,4 +1,5 @@ #pragma once +///@file #include #include diff --git a/src/libutil/pos-table.cc b/src/libutil/pos-table.cc new file mode 100644 index 000000000..8178beb90 --- /dev/null +++ b/src/libutil/pos-table.cc @@ -0,0 +1,37 @@ +#include "pos-table.hh" + +#include + +namespace nix { + +/* Position table. */ + +Pos PosTable::operator[](PosIdx p) const +{ + auto origin = resolve(p); + if (!origin) + return {}; + + const auto offset = origin->offsetOf(p); + + Pos result{0, 0, origin->origin}; + auto lines = this->lines.lock(); + auto linesForInput = (*lines)[origin->offset]; + + if (linesForInput.empty()) { + auto source = result.getSource().value_or(""); + const char * begin = source.data(); + for (Pos::LinesIterator it(source), end; it != end; it++) + linesForInput.push_back(it->data() - begin); + if (linesForInput.empty()) + linesForInput.push_back(0); + } + // as above: the first line starts at byte 0 and is always present + auto lineStartOffset = std::prev(std::upper_bound(linesForInput.begin(), linesForInput.end(), offset)); + + result.line = 1 + (lineStartOffset - linesForInput.begin()); + result.column = 1 + (offset - *lineStartOffset); + return result; +} + +} diff --git a/src/libexpr/pos-table.hh b/src/libutil/pos-table.hh similarity index 94% rename from src/libexpr/pos-table.hh rename to src/libutil/pos-table.hh index ba2b91cf3..673cf62ae 100644 --- a/src/libexpr/pos-table.hh +++ b/src/libutil/pos-table.hh @@ -1,4 +1,5 @@ #pragma once +///@file #include #include @@ -18,9 +19,12 @@ public: private: uint32_t offset; - Origin(Pos::Origin origin, uint32_t offset, size_t size): - offset(offset), origin(origin), size(size) - {} + Origin(Pos::Origin origin, uint32_t offset, size_t size) + : offset(offset) + , origin(origin) + , size(size) + { + } public: const Pos::Origin origin; From 7eaf8ec028a5277486b38d3b2bb20b81e9084a99 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Thu, 13 Mar 2025 12:55:42 +0000 Subject: [PATCH 2/4] libutil: Document hacks and problems around Pos class This should provide context for follow-up commits in the patch series. (cherry picked from commit bf12aedf2edb10feb4605ebcde395e3b418ec58a) --- src/libutil/error.hh | 8 ++++++++ src/libutil/pos-table.hh | 11 +++++++++++ src/libutil/position.hh | 1 + 3 files changed, 20 insertions(+) diff --git a/src/libutil/error.hh b/src/libutil/error.hh index 58d902622..04fa18e35 100644 --- a/src/libutil/error.hh +++ b/src/libutil/error.hh @@ -50,6 +50,14 @@ struct LinesOfCode { std::optional nextLineOfCode; }; +/* NOTE: position.hh recursively depends on source-path.hh -> source-accessor.hh + -> hash.hh -> config.hh -> experimental-features.hh -> error.hh -> Pos. + There are other such cycles. + Thus, Pos has to be an incomplete type in this header. But since ErrorInfo/Trace + have to refer to Pos, they have to use pointer indirection via std::shared_ptr + to break the recursive header dependency. + FIXME: Untangle this mess. Should there be AbstractPos as there used to be before + 4feb7d9f71? */ struct Pos; void printCodeLines(std::ostream & out, diff --git a/src/libutil/pos-table.hh b/src/libutil/pos-table.hh index 673cf62ae..a6fe09d79 100644 --- a/src/libutil/pos-table.hh +++ b/src/libutil/pos-table.hh @@ -76,6 +76,17 @@ public: return PosIdx(1 + origin.offset + offset); } + /** + * Convert a byte-offset PosIdx into a Pos with line/column information. + * + * @param p Byte offset into the virtual concatenation of all parsed contents + * @return Position + * + * @warning Very expensive to call, as this has to read the entire source + * into memory each time. Call this only if absolutely necessary. Prefer + * to keep PosIdx around instead of needlessly converting it into Pos by + * using this lookup method. + */ Pos operator[](PosIdx p) const; Pos::Origin originOf(PosIdx p) const diff --git a/src/libutil/position.hh b/src/libutil/position.hh index 25217069c..2ac68d15a 100644 --- a/src/libutil/position.hh +++ b/src/libutil/position.hh @@ -50,6 +50,7 @@ struct Pos explicit operator bool() const { return line > 0; } + /* TODO: Why std::shared_ptr and not std::shared_ptr? */ operator std::shared_ptr() const; /** From b7c8d5246e28d104c9d20bd44a29de674abc269b Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Thu, 13 Mar 2025 12:55:45 +0000 Subject: [PATCH 3/4] libutil: Fix Pos::getSourcePath Previous implementation didn't actually check if std::get_if returned a nullptr: std::optional getSourcePath() const { return *std::get_if(&origin); } (cherry picked from commit 50123f2a566bd9157ef6ed64d95799473e5d8670) --- src/libutil/position.cc | 7 +++++++ src/libutil/position.hh | 4 +--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/libutil/position.cc b/src/libutil/position.cc index 946f167b6..275985c8c 100644 --- a/src/libutil/position.cc +++ b/src/libutil/position.cc @@ -66,6 +66,13 @@ std::optional Pos::getSource() const }, origin); } +std::optional Pos::getSourcePath() const +{ + if (auto * path = std::get_if(&origin)) + return *path; + return std::nullopt; +} + void Pos::print(std::ostream & out, bool showOrigin) const { if (showOrigin) { diff --git a/src/libutil/position.hh b/src/libutil/position.hh index 2ac68d15a..07e261c4c 100644 --- a/src/libutil/position.hh +++ b/src/libutil/position.hh @@ -70,9 +70,7 @@ struct Pos /** * Get the SourcePath, if the source was loaded from a file. */ - std::optional getSourcePath() const { - return *std::get_if(&origin); - } + std::optional getSourcePath() const; struct LinesIterator { using difference_type = size_t; From 0823a0c6e15d5f7be13cc9827e79dbb2a43d3677 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Thu, 13 Mar 2025 16:24:30 +0000 Subject: [PATCH 4/4] {libexpr,libcmd}: Make debugger significantly faster MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The underlying issue is that debugger code path was calling PosTable::operator[] in each eval method. This has become incredibly expensive since 5d9fdab3de. While we are it it, I've reworked the code to not use std::shared_ptr where it really isn't necessary. As I've documented in previous commits, this is actually more a workaround for recursive header dependencies now and is only necessary in `error.hh` code. Some ad-hoc benchmarking: After this commit: ``` Benchmark 1: nix eval nixpkgs#hello --impure --ignore-try --no-eval-cache --debugger Time (mean ± σ): 784.2 ms ± 7.1 ms [User: 561.4 ms, System: 147.7 ms] Range (min … max): 773.5 ms … 792.6 ms 10 runs ``` On master 3604c7c51: ``` Benchmark 1: nix eval nixpkgs#hello --impure --ignore-try --no-eval-cache --debugger Time (mean ± σ): 22.914 s ± 0.178 s [User: 18.524 s, System: 4.151 s] Range (min … max): 22.738 s … 23.290 s 10 runs ``` (cherry picked from commit adbd08399c1817bc4dc5a1a3a32b160eaed49c6f) --- src/libcmd/repl.cc | 11 ++++------ src/libexpr/eval-error.cc | 2 +- src/libexpr/eval.cc | 44 +++++++++++++++++++++------------------ src/libexpr/eval.hh | 19 ++++++++++++++++- 4 files changed, 47 insertions(+), 29 deletions(-) diff --git a/src/libcmd/repl.cc b/src/libcmd/repl.cc index f292f06bb..1e984201c 100644 --- a/src/libcmd/repl.cc +++ b/src/libcmd/repl.cc @@ -138,16 +138,13 @@ static std::ostream & showDebugTrace(std::ostream & out, const PosTable & positi out << ANSI_RED "error: " << ANSI_NORMAL; out << dt.hint.str() << "\n"; - // prefer direct pos, but if noPos then try the expr. - auto pos = dt.pos - ? dt.pos - : positions[dt.expr.getPos() ? dt.expr.getPos() : noPos]; + auto pos = dt.getPos(positions); if (pos) { - out << *pos; - if (auto loc = pos->getCodeLines()) { + out << pos; + if (auto loc = pos.getCodeLines()) { out << "\n"; - printCodeLines(out, "", *pos, *loc); + printCodeLines(out, "", pos, *loc); out << "\n"; } } diff --git a/src/libexpr/eval-error.cc b/src/libexpr/eval-error.cc index cdb0b4772..b9742d3ea 100644 --- a/src/libexpr/eval-error.cc +++ b/src/libexpr/eval-error.cc @@ -45,7 +45,7 @@ EvalErrorBuilder & EvalErrorBuilder::withFrame(const Env & env, const Expr // TODO: check compatibility with nested debugger calls. // TODO: What side-effects?? error.state.debugTraces.push_front(DebugTrace{ - .pos = error.state.positions[expr.getPos()], + .pos = expr.getPos(), .expr = expr, .env = env, .hint = HintFmt("Fake frame for debugging purposes"), diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index d27e07f01..caf591258 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -734,18 +734,26 @@ void EvalState::runDebugRepl(const Error * error, const Env & env, const Expr & if (!debugRepl || inDebugger) return; - auto dts = - error && expr.getPos() - ? std::make_unique( - *this, - DebugTrace { - .pos = error->info().pos ? error->info().pos : positions[expr.getPos()], + auto dts = [&]() -> std::unique_ptr { + if (error && expr.getPos()) { + auto trace = DebugTrace{ + .pos = [&]() -> std::variant { + if (error->info().pos) { + if (auto * pos = error->info().pos.get()) + return *pos; + return noPos; + } + return expr.getPos(); + }(), .expr = expr, .env = env, .hint = error->info().msg, - .isError = true - }) - : nullptr; + .isError = true}; + + return std::make_unique(*this, std::move(trace)); + } + return nullptr; + }(); if (error) { @@ -790,7 +798,7 @@ static std::unique_ptr makeDebugTraceStacker( EvalState & state, Expr & expr, Env & env, - std::shared_ptr && pos, + std::variant pos, const Args & ... formatArgs) { return std::make_unique(state, @@ -1067,7 +1075,7 @@ void EvalState::evalFile(const SourcePath & path, Value & v, bool mustBeTrivial) *this, *e, this->baseEnv, - e->getPos() ? std::make_shared(positions[e->getPos()]) : nullptr, + e->getPos(), "while evaluating the file '%1%':", resolvedPath.to_string()) : nullptr; @@ -1293,9 +1301,7 @@ void ExprLet::eval(EvalState & state, Env & env, Value & v) state, *this, env2, - getPos() - ? std::make_shared(state.positions[getPos()]) - : nullptr, + getPos(), "while evaluating a '%1%' expression", "let" ) @@ -1364,7 +1370,7 @@ void ExprSelect::eval(EvalState & state, Env & env, Value & v) state, *this, env, - state.positions[getPos()], + getPos(), "while evaluating the attribute '%1%'", showAttrPath(state, env, attrPath)) : nullptr; @@ -1565,7 +1571,7 @@ void EvalState::callFunction(Value & fun, size_t nrArgs, Value * * args, Value & try { auto dts = debugRepl ? makeDebugTraceStacker( - *this, *lambda.body, env2, positions[lambda.pos], + *this, *lambda.body, env2, lambda.pos, "while calling %s", lambda.name ? concatStrings("'", symbols[lambda.name], "'") @@ -1704,9 +1710,7 @@ void ExprCall::eval(EvalState & state, Env & env, Value & v) state, *this, env, - getPos() - ? std::make_shared(state.positions[getPos()]) - : nullptr, + getPos(), "while calling a function" ) : nullptr; @@ -2087,7 +2091,7 @@ void EvalState::forceValueDeep(Value & v) try { // If the value is a thunk, we're evaling. Otherwise no trace necessary. auto dts = debugRepl && i.value->isThunk() - ? makeDebugTraceStacker(*this, *i.value->payload.thunk.expr, *i.value->payload.thunk.env, positions[i.pos], + ? makeDebugTraceStacker(*this, *i.value->payload.thunk.expr, *i.value->payload.thunk.env, i.pos, "while evaluating the attribute '%1%'", symbols[i.name]) : nullptr; diff --git a/src/libexpr/eval.hh b/src/libexpr/eval.hh index a616b10b4..6bd2d0cf0 100644 --- a/src/libexpr/eval.hh +++ b/src/libexpr/eval.hh @@ -171,11 +171,28 @@ struct RegexCache; std::shared_ptr makeRegexCache(); struct DebugTrace { - std::shared_ptr pos; + /* WARNING: Converting PosIdx -> Pos should be done with extra care. This is + due to the fact that operator[] of PosTable is incredibly expensive. */ + std::variant pos; const Expr & expr; const Env & env; HintFmt hint; bool isError; + + Pos getPos(const PosTable & table) const + { + return std::visit( + overloaded{ + [&](PosIdx idx) { + // Prefer direct pos, but if noPos then try the expr. + if (!idx) + idx = expr.getPos(); + return table[idx]; + }, + [&](Pos pos) { return pos; }, + }, + pos); + } }; class EvalState : public std::enable_shared_from_this