1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-09 20:16:03 +01:00

libexpr: introduce arena to hold ExprString strings

1. Saves 24-32 bytes per string (size of std::string)
2. Saves additional bytes by not over-allocating strings (in total we
save ~1% memory)
3. Sets us up to perform a similar transformation on the other Expr
subclasses
4. Makes ExprString trivially moveable (before the string data might
move, causing the Value's pointer to become invalid). This is important
so we can put ExprStrings in an std::vector and refer to them by index

We have introduced a string copy in ParserState::stripIndentation().
This could be removed by pre-allocating the right sized string in the
arena, but this adds complexity and doesn't seem to improve performance,
so for now we've left the copy in.
This commit is contained in:
Taeer Bar-Yam 2025-09-19 14:06:15 -04:00
parent c43ea09b9b
commit eab467ecfb
6 changed files with 79 additions and 39 deletions

View file

@ -3217,7 +3217,8 @@ Expr * EvalState::parse(
docComments = &it->second; docComments = &it->second;
} }
auto result = parseExprFromBuf(text, length, origin, basePath, symbols, settings, positions, *docComments, rootFS); auto result = parseExprFromBuf(
text, length, origin, basePath, mem.exprs.alloc, symbols, settings, positions, *docComments, rootFS);
result->bindVars(*this, staticEnv); result->bindVars(*this, staticEnv);

View file

@ -355,6 +355,11 @@ public:
return stats; return stats;
} }
/**
* Storage for the AST nodes
*/
Exprs exprs;
private: private:
Statistics stats; Statistics stats;
}; };

View file

@ -3,6 +3,7 @@
#include <map> #include <map>
#include <vector> #include <vector>
#include <memory_resource>
#include "nix/expr/gc-small-vector.hh" #include "nix/expr/gc-small-vector.hh"
#include "nix/expr/value.hh" #include "nix/expr/value.hh"
@ -84,6 +85,13 @@ std::string showAttrPath(const SymbolTable & symbols, const AttrPath & attrPath)
using UpdateQueue = SmallTemporaryValueVector<conservativeStackReservation>; using UpdateQueue = SmallTemporaryValueVector<conservativeStackReservation>;
class Exprs
{
std::pmr::monotonic_buffer_resource buffer;
public:
std::pmr::polymorphic_allocator<char> alloc{&buffer};
};
/* Abstract syntax of Nix expressions. */ /* Abstract syntax of Nix expressions. */
struct Expr struct Expr
@ -173,13 +181,28 @@ struct ExprFloat : Expr
struct ExprString : Expr struct ExprString : Expr
{ {
std::string s;
Value v; Value v;
ExprString(std::string && s) /**
: s(std::move(s)) * This is only for strings already allocated in our polymorphic allocator,
* or that live at least that long (e.g. c++ string literals)
*/
ExprString(const char * s)
{ {
v.mkStringNoCopy(this->s.data()); v.mkStringNoCopy(s);
};
ExprString(std::pmr::polymorphic_allocator<char> & alloc, std::string_view sv)
{
auto len = sv.length();
if (len == 0) {
v.mkStringNoCopy("");
return;
}
char * s = alloc.allocate(len + 1);
sv.copy(s, len);
s[len] = '\0';
v.mkStringNoCopy(s);
}; };
Value * maybeThunk(EvalState & state, Env & env) override; Value * maybeThunk(EvalState & state, Env & env) override;

View file

@ -82,6 +82,7 @@ struct LexerState
struct ParserState struct ParserState
{ {
const LexerState & lexerState; const LexerState & lexerState;
std::pmr::polymorphic_allocator<char> & alloc;
SymbolTable & symbols; SymbolTable & symbols;
PosTable & positions; PosTable & positions;
Expr * result; Expr * result;
@ -327,7 +328,7 @@ ParserState::stripIndentation(const PosIdx pos, std::vector<std::pair<PosIdx, st
// Ignore empty strings for a minor optimisation and AST simplification // Ignore empty strings for a minor optimisation and AST simplification
if (s2 != "") { if (s2 != "") {
es2->emplace_back(i->first, new ExprString(std::move(s2))); es2->emplace_back(i->first, new ExprString(alloc, s2));
} }
}; };
for (; i != es.end(); ++i, --n) { for (; i != es.end(); ++i, --n) {

View file

@ -40,7 +40,7 @@ void ExprFloat::show(const SymbolTable & symbols, std::ostream & str) const
void ExprString::show(const SymbolTable & symbols, std::ostream & str) const void ExprString::show(const SymbolTable & symbols, std::ostream & str) const
{ {
printLiteralString(str, s); printLiteralString(str, v.string_view());
} }
void ExprPath::show(const SymbolTable & symbols, std::ostream & str) const void ExprPath::show(const SymbolTable & symbols, std::ostream & str) const

View file

@ -64,6 +64,7 @@ Expr * parseExprFromBuf(
size_t length, size_t length,
Pos::Origin origin, Pos::Origin origin,
const SourcePath & basePath, const SourcePath & basePath,
std::pmr::polymorphic_allocator<char> & alloc,
SymbolTable & symbols, SymbolTable & symbols,
const EvalSettings & settings, const EvalSettings & settings,
PosTable & positions, PosTable & positions,
@ -134,6 +135,7 @@ static Expr * makeCall(PosIdx pos, Expr * fn, Expr * arg) {
std::vector<nix::AttrName> * attrNames; std::vector<nix::AttrName> * attrNames;
std::vector<std::pair<nix::AttrName, nix::PosIdx>> * inheritAttrs; std::vector<std::pair<nix::AttrName, nix::PosIdx>> * inheritAttrs;
std::vector<std::pair<nix::PosIdx, nix::Expr *>> * string_parts; std::vector<std::pair<nix::PosIdx, nix::Expr *>> * string_parts;
std::variant<nix::Expr *, std::string_view> * to_be_string;
std::vector<std::pair<nix::PosIdx, std::variant<nix::Expr *, nix::StringToken>>> * ind_string_parts; std::vector<std::pair<nix::PosIdx, std::variant<nix::Expr *, nix::StringToken>>> * ind_string_parts;
} }
@ -148,7 +150,8 @@ static Expr * makeCall(PosIdx pos, Expr * fn, Expr * arg) {
%type <inheritAttrs> attrs %type <inheritAttrs> attrs
%type <string_parts> string_parts_interpolated %type <string_parts> string_parts_interpolated
%type <ind_string_parts> ind_string_parts %type <ind_string_parts> ind_string_parts
%type <e> path_start string_parts string_attr %type <e> path_start
%type <to_be_string> string_parts string_attr
%type <id> attr %type <id> attr
%token <id> ID %token <id> ID
%token <str> STR IND_STR %token <str> STR IND_STR
@ -303,7 +306,13 @@ expr_simple
} }
| INT_LIT { $$ = new ExprInt($1); } | INT_LIT { $$ = new ExprInt($1); }
| FLOAT_LIT { $$ = new ExprFloat($1); } | FLOAT_LIT { $$ = new ExprFloat($1); }
| '"' string_parts '"' { $$ = $2; } | '"' string_parts '"' {
std::visit(overloaded{
[&](std::string_view str) { $$ = new ExprString(state->alloc, str); },
[&](Expr * expr) { $$ = expr; }},
*$2);
delete $2;
}
| IND_STRING_OPEN ind_string_parts IND_STRING_CLOSE { | IND_STRING_OPEN ind_string_parts IND_STRING_CLOSE {
$$ = state->stripIndentation(CUR_POS, std::move(*$2)); $$ = state->stripIndentation(CUR_POS, std::move(*$2));
delete $2; delete $2;
@ -314,11 +323,11 @@ expr_simple
$$ = new ExprConcatStrings(CUR_POS, false, $2); $$ = new ExprConcatStrings(CUR_POS, false, $2);
} }
| SPATH { | SPATH {
std::string path($1.p + 1, $1.l - 2); std::string_view path($1.p + 1, $1.l - 2);
$$ = new ExprCall(CUR_POS, $$ = new ExprCall(CUR_POS,
new ExprVar(state->s.findFile), new ExprVar(state->s.findFile),
{new ExprVar(state->s.nixPath), {new ExprVar(state->s.nixPath),
new ExprString(std::move(path))}); new ExprString(state->alloc, path)});
} }
| URI { | URI {
static bool noURLLiterals = experimentalFeatureSettings.isEnabled(Xp::NoUrlLiterals); static bool noURLLiterals = experimentalFeatureSettings.isEnabled(Xp::NoUrlLiterals);
@ -327,7 +336,7 @@ expr_simple
.msg = HintFmt("URL literals are disabled"), .msg = HintFmt("URL literals are disabled"),
.pos = state->positions[CUR_POS] .pos = state->positions[CUR_POS]
}); });
$$ = new ExprString(std::string($1)); $$ = new ExprString(state->alloc, $1);
} }
| '(' expr ')' { $$ = $2; } | '(' expr ')' { $$ = $2; }
/* Let expressions `let {..., body = ...}' are just desugared /* Let expressions `let {..., body = ...}' are just desugared
@ -344,19 +353,19 @@ expr_simple
; ;
string_parts string_parts
: STR { $$ = new ExprString(std::string($1)); } : STR { $$ = new std::variant<Expr *, std::string_view>($1); }
| string_parts_interpolated { $$ = new ExprConcatStrings(CUR_POS, true, $1); } | string_parts_interpolated { $$ = new std::variant<Expr *, std::string_view>(new ExprConcatStrings(CUR_POS, true, $1)); }
| { $$ = new ExprString(""); } | { $$ = new std::variant<Expr *, std::string_view>(std::string_view()); }
; ;
string_parts_interpolated string_parts_interpolated
: string_parts_interpolated STR : string_parts_interpolated STR
{ $$ = $1; $1->emplace_back(state->at(@2), new ExprString(std::string($2))); } { $$ = $1; $1->emplace_back(state->at(@2), new ExprString(state->alloc, $2)); }
| string_parts_interpolated DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(state->at(@2), $3); } | string_parts_interpolated DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(state->at(@2), $3); }
| DOLLAR_CURLY expr '}' { $$ = new std::vector<std::pair<PosIdx, Expr *>>; $$->emplace_back(state->at(@1), $2); } | DOLLAR_CURLY expr '}' { $$ = new std::vector<std::pair<PosIdx, Expr *>>; $$->emplace_back(state->at(@1), $2); }
| STR DOLLAR_CURLY expr '}' { | STR DOLLAR_CURLY expr '}' {
$$ = new std::vector<std::pair<PosIdx, Expr *>>; $$ = new std::vector<std::pair<PosIdx, Expr *>>;
$$->emplace_back(state->at(@1), new ExprString(std::string($1))); $$->emplace_back(state->at(@1), new ExprString(state->alloc, $1));
$$->emplace_back(state->at(@2), $3); $$->emplace_back(state->at(@2), $3);
} }
; ;
@ -454,16 +463,17 @@ attrs
: attrs attr { $$ = $1; $1->emplace_back(AttrName(state->symbols.create($2)), state->at(@2)); } : attrs attr { $$ = $1; $1->emplace_back(AttrName(state->symbols.create($2)), state->at(@2)); }
| attrs string_attr | attrs string_attr
{ $$ = $1; { $$ = $1;
ExprString * str = dynamic_cast<ExprString *>($2); std::visit(overloaded {
if (str) { [&](std::string_view str) { $$->emplace_back(AttrName(state->symbols.create(str)), state->at(@2)); },
$$->emplace_back(AttrName(state->symbols.create(str->s)), state->at(@2)); [&](Expr * expr) {
delete str;
} else
throw ParseError({ throw ParseError({
.msg = HintFmt("dynamic attributes not allowed in inherit"), .msg = HintFmt("dynamic attributes not allowed in inherit"),
.pos = state->positions[state->at(@2)] .pos = state->positions[state->at(@2)]
}); });
} }
}, *$2);
delete $2;
}
| { $$ = new std::vector<std::pair<AttrName, PosIdx>>; } | { $$ = new std::vector<std::pair<AttrName, PosIdx>>; }
; ;
@ -471,22 +481,20 @@ attrpath
: attrpath '.' attr { $$ = $1; $1->push_back(AttrName(state->symbols.create($3))); } : attrpath '.' attr { $$ = $1; $1->push_back(AttrName(state->symbols.create($3))); }
| attrpath '.' string_attr | attrpath '.' string_attr
{ $$ = $1; { $$ = $1;
ExprString * str = dynamic_cast<ExprString *>($3); std::visit(overloaded {
if (str) { [&](std::string_view str) { $$->push_back(AttrName(state->symbols.create(str))); },
$$->push_back(AttrName(state->symbols.create(str->s))); [&](Expr * expr) { $$->push_back(AttrName(expr)); }
delete str; }, *$3);
} else delete $3;
$$->push_back(AttrName($3));
} }
| attr { $$ = new std::vector<AttrName>; $$->push_back(AttrName(state->symbols.create($1))); } | attr { $$ = new std::vector<AttrName>; $$->push_back(AttrName(state->symbols.create($1))); }
| string_attr | string_attr
{ $$ = new std::vector<AttrName>; { $$ = new std::vector<AttrName>;
ExprString *str = dynamic_cast<ExprString *>($1); std::visit(overloaded {
if (str) { [&](std::string_view str) { $$->push_back(AttrName(state->symbols.create(str))); },
$$->push_back(AttrName(state->symbols.create(str->s))); [&](Expr * expr) { $$->push_back(AttrName(expr)); }
delete str; }, *$1);
} else delete $1;
$$->push_back(AttrName($1));
} }
; ;
@ -497,7 +505,7 @@ attr
string_attr string_attr
: '"' string_parts '"' { $$ = $2; } : '"' string_parts '"' { $$ = $2; }
| DOLLAR_CURLY expr '}' { $$ = $2; } | DOLLAR_CURLY expr '}' { $$ = new std::variant<Expr *, std::string_view>($2); }
; ;
expr_list expr_list
@ -537,6 +545,7 @@ Expr * parseExprFromBuf(
size_t length, size_t length,
Pos::Origin origin, Pos::Origin origin,
const SourcePath & basePath, const SourcePath & basePath,
std::pmr::polymorphic_allocator<char> & alloc,
SymbolTable & symbols, SymbolTable & symbols,
const EvalSettings & settings, const EvalSettings & settings,
PosTable & positions, PosTable & positions,
@ -551,6 +560,7 @@ Expr * parseExprFromBuf(
}; };
ParserState state { ParserState state {
.lexerState = lexerState, .lexerState = lexerState,
.alloc = alloc,
.symbols = symbols, .symbols = symbols,
.positions = positions, .positions = positions,
.basePath = basePath, .basePath = basePath,