From eab467ecfb829182548276df7d56a4d1c525057a Mon Sep 17 00:00:00 2001 From: Taeer Bar-Yam Date: Fri, 19 Sep 2025 14:06:15 -0400 Subject: [PATCH] libexpr: introduce arena to hold ExprString strings 1. Saves 24-32 bytes per string (size of std::string) 2. Saves additional bytes by not over-allocating strings (in total we save ~1% memory) 3. Sets us up to perform a similar transformation on the other Expr subclasses 4. Makes ExprString trivially moveable (before the string data might move, causing the Value's pointer to become invalid). This is important so we can put ExprStrings in an std::vector and refer to them by index We have introduced a string copy in ParserState::stripIndentation(). This could be removed by pre-allocating the right sized string in the arena, but this adds complexity and doesn't seem to improve performance, so for now we've left the copy in. --- src/libexpr/eval.cc | 3 +- src/libexpr/include/nix/expr/eval.hh | 5 ++ src/libexpr/include/nix/expr/nixexpr.hh | 31 ++++++-- src/libexpr/include/nix/expr/parser-state.hh | 3 +- src/libexpr/nixexpr.cc | 2 +- src/libexpr/parser.y | 74 +++++++++++--------- 6 files changed, 79 insertions(+), 39 deletions(-) diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 6cf902e35..2df373520 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -3217,7 +3217,8 @@ Expr * EvalState::parse( docComments = &it->second; } - auto result = parseExprFromBuf(text, length, origin, basePath, symbols, settings, positions, *docComments, rootFS); + auto result = parseExprFromBuf( + text, length, origin, basePath, mem.exprs.alloc, symbols, settings, positions, *docComments, rootFS); result->bindVars(*this, staticEnv); diff --git a/src/libexpr/include/nix/expr/eval.hh b/src/libexpr/include/nix/expr/eval.hh index f61dab3a8..2601d8de8 100644 --- a/src/libexpr/include/nix/expr/eval.hh +++ b/src/libexpr/include/nix/expr/eval.hh @@ -355,6 +355,11 @@ public: return stats; } + /** + * Storage for the AST nodes + */ + Exprs exprs; + private: Statistics stats; }; diff --git a/src/libexpr/include/nix/expr/nixexpr.hh b/src/libexpr/include/nix/expr/nixexpr.hh index e0203c732..747a8e4b2 100644 --- a/src/libexpr/include/nix/expr/nixexpr.hh +++ b/src/libexpr/include/nix/expr/nixexpr.hh @@ -3,6 +3,7 @@ #include #include +#include #include "nix/expr/gc-small-vector.hh" #include "nix/expr/value.hh" @@ -84,6 +85,13 @@ std::string showAttrPath(const SymbolTable & symbols, const AttrPath & attrPath) using UpdateQueue = SmallTemporaryValueVector; +class Exprs +{ + std::pmr::monotonic_buffer_resource buffer; +public: + std::pmr::polymorphic_allocator alloc{&buffer}; +}; + /* Abstract syntax of Nix expressions. */ struct Expr @@ -173,13 +181,28 @@ struct ExprFloat : Expr struct ExprString : Expr { - std::string s; Value v; - ExprString(std::string && s) - : s(std::move(s)) + /** + * This is only for strings already allocated in our polymorphic allocator, + * or that live at least that long (e.g. c++ string literals) + */ + ExprString(const char * s) { - v.mkStringNoCopy(this->s.data()); + v.mkStringNoCopy(s); + }; + + ExprString(std::pmr::polymorphic_allocator & alloc, std::string_view sv) + { + auto len = sv.length(); + if (len == 0) { + v.mkStringNoCopy(""); + return; + } + char * s = alloc.allocate(len + 1); + sv.copy(s, len); + s[len] = '\0'; + v.mkStringNoCopy(s); }; Value * maybeThunk(EvalState & state, Env & env) override; diff --git a/src/libexpr/include/nix/expr/parser-state.hh b/src/libexpr/include/nix/expr/parser-state.hh index e689678de..758bedd97 100644 --- a/src/libexpr/include/nix/expr/parser-state.hh +++ b/src/libexpr/include/nix/expr/parser-state.hh @@ -82,6 +82,7 @@ struct LexerState struct ParserState { const LexerState & lexerState; + std::pmr::polymorphic_allocator & alloc; SymbolTable & symbols; PosTable & positions; Expr * result; @@ -327,7 +328,7 @@ ParserState::stripIndentation(const PosIdx pos, std::vectoremplace_back(i->first, new ExprString(std::move(s2))); + es2->emplace_back(i->first, new ExprString(alloc, s2)); } }; for (; i != es.end(); ++i, --n) { diff --git a/src/libexpr/nixexpr.cc b/src/libexpr/nixexpr.cc index 43e85cb16..a2980af6b 100644 --- a/src/libexpr/nixexpr.cc +++ b/src/libexpr/nixexpr.cc @@ -40,7 +40,7 @@ void ExprFloat::show(const SymbolTable & symbols, std::ostream & str) const void ExprString::show(const SymbolTable & symbols, std::ostream & str) const { - printLiteralString(str, s); + printLiteralString(str, v.string_view()); } void ExprPath::show(const SymbolTable & symbols, std::ostream & str) const diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y index 89da001ef..515e08e62 100644 --- a/src/libexpr/parser.y +++ b/src/libexpr/parser.y @@ -64,6 +64,7 @@ Expr * parseExprFromBuf( size_t length, Pos::Origin origin, const SourcePath & basePath, + std::pmr::polymorphic_allocator & alloc, SymbolTable & symbols, const EvalSettings & settings, PosTable & positions, @@ -134,6 +135,7 @@ static Expr * makeCall(PosIdx pos, Expr * fn, Expr * arg) { std::vector * attrNames; std::vector> * inheritAttrs; std::vector> * string_parts; + std::variant * to_be_string; std::vector>> * ind_string_parts; } @@ -148,7 +150,8 @@ static Expr * makeCall(PosIdx pos, Expr * fn, Expr * arg) { %type attrs %type string_parts_interpolated %type ind_string_parts -%type path_start string_parts string_attr +%type path_start +%type string_parts string_attr %type attr %token ID %token STR IND_STR @@ -303,7 +306,13 @@ expr_simple } | INT_LIT { $$ = new ExprInt($1); } | FLOAT_LIT { $$ = new ExprFloat($1); } - | '"' string_parts '"' { $$ = $2; } + | '"' string_parts '"' { + std::visit(overloaded{ + [&](std::string_view str) { $$ = new ExprString(state->alloc, str); }, + [&](Expr * expr) { $$ = expr; }}, + *$2); + delete $2; + } | IND_STRING_OPEN ind_string_parts IND_STRING_CLOSE { $$ = state->stripIndentation(CUR_POS, std::move(*$2)); delete $2; @@ -314,11 +323,11 @@ expr_simple $$ = new ExprConcatStrings(CUR_POS, false, $2); } | SPATH { - std::string path($1.p + 1, $1.l - 2); + std::string_view path($1.p + 1, $1.l - 2); $$ = new ExprCall(CUR_POS, new ExprVar(state->s.findFile), {new ExprVar(state->s.nixPath), - new ExprString(std::move(path))}); + new ExprString(state->alloc, path)}); } | URI { static bool noURLLiterals = experimentalFeatureSettings.isEnabled(Xp::NoUrlLiterals); @@ -327,7 +336,7 @@ expr_simple .msg = HintFmt("URL literals are disabled"), .pos = state->positions[CUR_POS] }); - $$ = new ExprString(std::string($1)); + $$ = new ExprString(state->alloc, $1); } | '(' expr ')' { $$ = $2; } /* Let expressions `let {..., body = ...}' are just desugared @@ -344,19 +353,19 @@ expr_simple ; string_parts - : STR { $$ = new ExprString(std::string($1)); } - | string_parts_interpolated { $$ = new ExprConcatStrings(CUR_POS, true, $1); } - | { $$ = new ExprString(""); } + : STR { $$ = new std::variant($1); } + | string_parts_interpolated { $$ = new std::variant(new ExprConcatStrings(CUR_POS, true, $1)); } + | { $$ = new std::variant(std::string_view()); } ; string_parts_interpolated : string_parts_interpolated STR - { $$ = $1; $1->emplace_back(state->at(@2), new ExprString(std::string($2))); } + { $$ = $1; $1->emplace_back(state->at(@2), new ExprString(state->alloc, $2)); } | string_parts_interpolated DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(state->at(@2), $3); } | DOLLAR_CURLY expr '}' { $$ = new std::vector>; $$->emplace_back(state->at(@1), $2); } | STR DOLLAR_CURLY expr '}' { $$ = new std::vector>; - $$->emplace_back(state->at(@1), new ExprString(std::string($1))); + $$->emplace_back(state->at(@1), new ExprString(state->alloc, $1)); $$->emplace_back(state->at(@2), $3); } ; @@ -454,15 +463,16 @@ attrs : attrs attr { $$ = $1; $1->emplace_back(AttrName(state->symbols.create($2)), state->at(@2)); } | attrs string_attr { $$ = $1; - ExprString * str = dynamic_cast($2); - if (str) { - $$->emplace_back(AttrName(state->symbols.create(str->s)), state->at(@2)); - delete str; - } else - throw ParseError({ - .msg = HintFmt("dynamic attributes not allowed in inherit"), - .pos = state->positions[state->at(@2)] - }); + std::visit(overloaded { + [&](std::string_view str) { $$->emplace_back(AttrName(state->symbols.create(str)), state->at(@2)); }, + [&](Expr * expr) { + throw ParseError({ + .msg = HintFmt("dynamic attributes not allowed in inherit"), + .pos = state->positions[state->at(@2)] + }); + } + }, *$2); + delete $2; } | { $$ = new std::vector>; } ; @@ -471,22 +481,20 @@ attrpath : attrpath '.' attr { $$ = $1; $1->push_back(AttrName(state->symbols.create($3))); } | attrpath '.' string_attr { $$ = $1; - ExprString * str = dynamic_cast($3); - if (str) { - $$->push_back(AttrName(state->symbols.create(str->s))); - delete str; - } else - $$->push_back(AttrName($3)); + std::visit(overloaded { + [&](std::string_view str) { $$->push_back(AttrName(state->symbols.create(str))); }, + [&](Expr * expr) { $$->push_back(AttrName(expr)); } + }, *$3); + delete $3; } | attr { $$ = new std::vector; $$->push_back(AttrName(state->symbols.create($1))); } | string_attr { $$ = new std::vector; - ExprString *str = dynamic_cast($1); - if (str) { - $$->push_back(AttrName(state->symbols.create(str->s))); - delete str; - } else - $$->push_back(AttrName($1)); + std::visit(overloaded { + [&](std::string_view str) { $$->push_back(AttrName(state->symbols.create(str))); }, + [&](Expr * expr) { $$->push_back(AttrName(expr)); } + }, *$1); + delete $1; } ; @@ -497,7 +505,7 @@ attr string_attr : '"' string_parts '"' { $$ = $2; } - | DOLLAR_CURLY expr '}' { $$ = $2; } + | DOLLAR_CURLY expr '}' { $$ = new std::variant($2); } ; expr_list @@ -537,6 +545,7 @@ Expr * parseExprFromBuf( size_t length, Pos::Origin origin, const SourcePath & basePath, + std::pmr::polymorphic_allocator & alloc, SymbolTable & symbols, const EvalSettings & settings, PosTable & positions, @@ -551,6 +560,7 @@ Expr * parseExprFromBuf( }; ParserState state { .lexerState = lexerState, + .alloc = alloc, .symbols = symbols, .positions = positions, .basePath = basePath,