1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-08 19:46:02 +01:00

libexpr: Structural sharing of attrsets

This changes the implementation of Bindings to allow
for a more space-efficient implementation of attribute
set merges. This is accomplished by "layering" over the "base" Bindings.
The top "layer" is naturally the right-hand-side of the update operator //.

Such an implementation leads to significantly better memory usage on
something like nixpkgs:

nix-env --query --available --out-path --file ../nixpkgs --eval-system x86_64-linux > /dev/null

Comparison against 2b0fd88324 for x86_64-linux on nixpkgs f06c7c3b6f5074dbffcf02542fb86af3a5526afa:

| metric                 | mean_before     | mean_after      | mean_diff       | mean_%_change | p_value | t_stat  |
| -                      | -               | -               | -               | -             | -       | -       |
| cpuTime                | 21.1520         | 21.3414         | 0.1894          | 0.7784        | 0.3190  | 1.0219  |
| envs.bytes             | 461451951.6190  | 461451951.6190  | -               | -             | -       | -       |
| envs.elements          | 34344544.8571   | 34344544.8571   | -               | -             | -       | -       |
| envs.number            | 23336949.0952   | 23336949.0952   | -               | -             | -       | -       |
| gc.cycles              | 7.5238          | 7.2857          | -0.2381         | -4.6825       | 0.0565  | -2.0244 |
| gc.heapSize            | 1777848124.9524 | 1252162023.6190 | -525686101.3333 | -29.9472      | 0.0000  | -8.7041 |
| gc.totalBytes          | 3102787383.6190 | 2498431578.6667 | -604355804.9524 | -19.7704      | 0.0000  | -9.3502 |
| list.bytes             | 59928225.9048   | 59928225.9048   | -               | -             | -       | -       |
| list.concats           | 1240028.2857    | 1240028.2857    | -               | -             | -       | -       |
| list.elements          | 7491028.2381    | 7491028.2381    | -               | -             | -       | -       |
| nrAvoided              | 28165342.2381   | 28165342.2381   | -               | -             | -       | -       |
| nrExprs                | 1577412.9524    | 1577412.9524    | -               | -             | -       | -       |
| nrFunctionCalls        | 20970743.4286   | 20970743.4286   | -               | -             | -       | -       |
| nrLookups              | 10867306.0952   | 10867306.0952   | -               | -             | -       | -       |
| nrOpUpdateValuesCopied | 61206062.0000   | 25748169.5238   | -35457892.4762  | -58.8145      | 0.0000  | -8.9189 |
| nrOpUpdates            | 2167097.4286    | 2167097.4286    | -               | -             | -       | -       |
| nrPrimOpCalls          | 12337423.4286   | 12337423.4286   | -               | -             | -       | -       |
| nrThunks               | 29361806.7619   | 29361806.7619   | -               | -             | -       | -       |
| sets.bytes             | 1393822818.6667 | 897587655.2381  | -496235163.4286 | -36.7168      | 0.0000  | -9.1115 |
| sets.elements          | 84504465.3333   | 48270845.9524   | -36233619.3810  | -43.8698      | 0.0000  | -8.9181 |
| sets.number            | 5218921.6667    | 5218921.6667    | -               | -             | -       | -       |
| sizes.Attr             | 16.0000         | 16.0000         | -               | -             | -       | -       |
| sizes.Bindings         | 8.0000          | 24.0000         | 16.0000         | 200.0000      | -       | inf     |
| sizes.Env              | 8.0000          | 8.0000          | -               | -             | -       | -       |
| sizes.Value            | 16.0000         | 16.0000         | -               | -             | -       | -       |
| symbols.bytes          | 1368494.0952    | 1368494.0952    | -               | -             | -       | -       |
| symbols.number         | 109147.1905     | 109147.1905     | -               | -             | -       | -       |
| time.cpu               | 21.1520         | 21.3414         | 0.1894          | 0.7784        | 0.3190  | 1.0219  |
| time.gc                | 1.6011          | 0.8508          | -0.7503         | -37.1507      | 0.0017  | -3.6328 |
| time.gcFraction        | 0.0849          | 0.0399          | -0.0450         | -37.4504      | 0.0035  | -3.3116 |
| values.bytes           | 615968144.7619  | 615968144.7619  | -               | -             | -       | -       |
| values.number          | 38498009.0476   | 38498009.0476   | -               | -             | -       | -       |

Overall this does slow down the evaluator slightly (no more than ~10% in most cases),
but this seems like a very decent tradeoff for shaving off 33% of memory usage.
This commit is contained in:
Sergei Zimmerman 2025-09-15 22:16:17 +03:00
parent 3eb223f4bb
commit 6138bc3de3
No known key found for this signature in database
7 changed files with 424 additions and 63 deletions

View file

@ -371,13 +371,24 @@ bool nix_has_attr_byname(nix_c_context * context, const nix_value * value, EvalS
NIXC_CATCH_ERRS_RES(false);
}
nix_value * nix_get_attr_byidx(
nix_c_context * context, const nix_value * value, EvalState * state, unsigned int i, const char ** name)
static void collapse_attrset_layer_chain_if_needed(nix::Value & v, EvalState * state)
{
auto & attrs = *v.attrs();
if (attrs.isLayered()) {
auto bindings = state->state.buildBindings(attrs.size());
std::ranges::copy(attrs, std::back_inserter(bindings));
v.mkAttrs(bindings);
}
}
nix_value *
nix_get_attr_byidx(nix_c_context * context, nix_value * value, EvalState * state, unsigned int i, const char ** name)
{
if (context)
context->last_err_code = NIX_OK;
try {
auto & v = check_value_in(value);
collapse_attrset_layer_chain_if_needed(v, state);
const nix::Attr & a = (*v.attrs())[i];
*name = state->state.symbols[a.name].c_str();
nix_gc_incref(nullptr, a.value);
@ -387,13 +398,13 @@ nix_value * nix_get_attr_byidx(
NIXC_CATCH_ERRS_NULL
}
const char *
nix_get_attr_name_byidx(nix_c_context * context, const nix_value * value, EvalState * state, unsigned int i)
const char * nix_get_attr_name_byidx(nix_c_context * context, nix_value * value, EvalState * state, unsigned int i)
{
if (context)
context->last_err_code = NIX_OK;
try {
auto & v = check_value_in(value);
collapse_attrset_layer_chain_if_needed(v, state);
const nix::Attr & a = (*v.attrs())[i];
return state->state.symbols[a.name].c_str();
}

View file

@ -297,8 +297,8 @@ bool nix_has_attr_byname(nix_c_context * context, const nix_value * value, EvalS
* @param[out] name will store a pointer to the attribute name
* @return value, NULL in case of errors
*/
nix_value * nix_get_attr_byidx(
nix_c_context * context, const nix_value * value, EvalState * state, unsigned int i, const char ** name);
nix_value *
nix_get_attr_byidx(nix_c_context * context, nix_value * value, EvalState * state, unsigned int i, const char ** name);
/** @brief Get an attribute name by index in the sorted bindings
*
@ -311,8 +311,7 @@ nix_value * nix_get_attr_byidx(
* @param[in] i attribute index
* @return name, NULL in case of errors
*/
const char *
nix_get_attr_name_byidx(nix_c_context * context, const nix_value * value, EvalState * state, unsigned int i);
const char * nix_get_attr_name_byidx(nix_c_context * context, nix_value * value, EvalState * state, unsigned int i);
/**@}*/
/** @name Initializers

View file

@ -437,4 +437,31 @@ TEST_F(nix_api_expr_test, nix_value_call_multi_no_args)
assert_ctx_ok();
ASSERT_EQ(3, rInt);
}
TEST_F(nix_api_expr_test, nix_expr_attrset_update)
{
nix_expr_eval_from_string(ctx, state, "{ a = 0; b = 2; } // { a = 1; b = 3; } // { a = 2; }", ".", value);
assert_ctx_ok();
ASSERT_EQ(nix_get_attrs_size(ctx, value), 2);
assert_ctx_ok();
std::array<std::pair<std::string_view, nix_value *>, 2> values;
for (unsigned int i = 0; i < 2; ++i) {
const char * name;
values[i].second = nix_get_attr_byidx(ctx, value, state, i, &name);
assert_ctx_ok();
values[i].first = name;
}
std::sort(values.begin(), values.end(), [](const auto & lhs, const auto & rhs) { return lhs.first < rhs.first; });
nix_value * a = values[0].second;
ASSERT_EQ("a", values[0].first);
ASSERT_EQ(nix_get_int(ctx, a), 2);
assert_ctx_ok();
nix_value * b = values[1].second;
ASSERT_EQ("b", values[1].first);
ASSERT_EQ(nix_get_int(ctx, b), 3);
assert_ctx_ok();
}
} // namespace nixC

View file

@ -14,7 +14,7 @@ Bindings * EvalState::allocBindings(size_t capacity)
{
if (capacity == 0)
return &Bindings::emptyBindings;
if (capacity > std::numeric_limits<Bindings::size_t>::max())
if (capacity > std::numeric_limits<Bindings::size_type>::max())
throw Error("attribute set of size %d is too big", capacity);
nrAttrsets++;
nrAttrsInAttrsets += capacity;
@ -35,7 +35,7 @@ Value & BindingsBuilder::alloc(std::string_view name, PosIdx pos)
void Bindings::sort()
{
std::sort(attrs, attrs + size_);
std::sort(attrs, attrs + numAttrs);
}
Value & Value::mkAttrs(BindingsBuilder & bindings)

View file

@ -1873,37 +1873,71 @@ void ExprOpUpdate::eval(EvalState & state, Env & env, Value & v)
state.nrOpUpdates++;
if (v1.attrs()->size() == 0) {
const Bindings & bindings1 = *v1.attrs();
if (bindings1.empty()) {
v = v2;
return;
}
if (v2.attrs()->size() == 0) {
const Bindings & bindings2 = *v2.attrs();
if (bindings2.empty()) {
v = v1;
return;
}
auto attrs = state.buildBindings(v1.attrs()->size() + v2.attrs()->size());
/* Simple heuristic for determining whether attrs2 should be "layered" on top of
attrs1 instead of copying to a new Bindings. */
bool shouldLayer = [&]() -> bool {
if (bindings1.isLayerListFull())
return false;
if (bindings2.size() > state.settings.bindingsUpdateLayerRhsSizeThreshold)
return false;
return true;
}();
if (shouldLayer) {
auto attrs = state.buildBindings(bindings2.size());
attrs.layerOnTopOf(bindings1);
std::ranges::copy(bindings2, std::back_inserter(attrs));
v.mkAttrs(attrs.alreadySorted());
state.nrOpUpdateValuesCopied += bindings2.size();
return;
}
auto attrs = state.buildBindings(bindings1.size() + bindings2.size());
/* Merge the sets, preferring values from the second set. Make
sure to keep the resulting vector in sorted order. */
auto i = v1.attrs()->begin();
auto j = v2.attrs()->begin();
auto i = bindings1.begin();
auto j = bindings2.begin();
while (i != v1.attrs()->end() && j != v2.attrs()->end()) {
while (i != bindings1.end() && j != bindings2.end()) {
if (i->name == j->name) {
attrs.insert(*j);
++i;
++j;
} else if (i->name < j->name)
attrs.insert(*i++);
else
attrs.insert(*j++);
} else if (i->name < j->name) {
attrs.insert(*i);
++i;
} else {
attrs.insert(*j);
++j;
}
}
while (i != v1.attrs()->end())
attrs.insert(*i++);
while (j != v2.attrs()->end())
attrs.insert(*j++);
while (i != bindings1.end()) {
attrs.insert(*i);
++i;
}
while (j != bindings2.end()) {
attrs.insert(*j);
++j;
}
v.mkAttrs(attrs.alreadySorted());

View file

@ -4,9 +4,12 @@
#include "nix/expr/nixexpr.hh"
#include "nix/expr/symbol-table.hh"
#include <boost/container/static_vector.hpp>
#include <algorithm>
#include <functional>
#include <concepts>
#include <ranges>
#include <optional>
namespace nix {
@ -48,11 +51,18 @@ static_assert(
* by its size and its capacity, the capacity being the number of Attr
* elements allocated after this structure, while the size corresponds to
* the number of elements already inserted in this structure.
*
* Bindings can be efficiently `//`-composed into an intrusive linked list of "layers"
* that saves on copies and allocations. Each lookup (@see Bindings::get) traverses
* this linked list until a matching attribute is found (thus overlays earlier in
* the list take precedence). For iteration over the whole Bindings, an on-the-fly
* k-way merge is performed by Bindings::iterator class.
*/
class Bindings
{
public:
typedef uint32_t size_t;
using size_type = uint32_t;
PosIdx pos;
/**
@ -62,7 +72,32 @@ public:
static Bindings emptyBindings;
private:
size_t size_ = 0;
/**
* Number of attributes in the attrs FAM (Flexible Array Member).
*/
size_type numAttrs = 0;
/**
* Number of attributes with unique names in the layer chain.
*
* This is the *real* user-facing size of bindings, whereas @ref numAttrs is
* an implementation detail of the data structure.
*/
size_type numAttrsInChain = 0;
/**
* Length of the layers list.
*/
uint32_t numLayers = 1;
/**
* Bindings that this attrset is "layered" on top of.
*/
const Bindings * baseLayer = nullptr;
/**
* Flexible array member of attributes.
*/
Attr attrs[0];
Bindings() = default;
@ -71,15 +106,22 @@ private:
Bindings & operator=(const Bindings &) = delete;
Bindings & operator=(Bindings &&) = delete;
friend class BindingsBuilder;
/**
* Maximum length of the Bindings layer chains.
*/
static constexpr unsigned maxLayers = 8;
public:
size_t size() const
size_type size() const
{
return size_;
return numAttrsInChain;
}
bool empty() const
{
return !size_;
return size() == 0;
}
class iterator
@ -94,77 +136,276 @@ public:
friend class Bindings;
private:
pointer ptr = nullptr;
explicit iterator(pointer ptr)
: ptr(ptr)
struct BindingsCursor
{
/**
* Attr that the cursor currently points to.
*/
pointer current;
/**
* One past the end pointer to the contiguous buffer of Attrs.
*/
pointer end;
/**
* Priority of the value. Lesser values have more priority (i.e. they override
* attributes that appear later in the linked list of Bindings).
*/
uint32_t priority;
pointer operator->() const noexcept
{
return current;
}
reference get() const noexcept
{
return *current;
}
bool empty() const noexcept
{
return current == end;
}
void increment() noexcept
{
++current;
}
void consume(Symbol name) noexcept
{
while (!empty() && current->name <= name)
++current;
}
GENERATE_CMP(BindingsCursor, me->current->name, me->priority)
};
using QueueStorageType = boost::container::static_vector<BindingsCursor, maxLayers>;
/**
* Comparator implementing the override priority / name ordering
* for BindingsCursor.
*/
static constexpr auto comp = std::greater<BindingsCursor>();
/**
* A priority queue used to implement an on-the-fly k-way merge.
*/
QueueStorageType cursorHeap;
/**
* The attribute the iterator currently points to.
*/
pointer current = nullptr;
/**
* Whether iterating over a single attribute and not a merge chain.
*/
bool doMerge = true;
void push(BindingsCursor cursor) noexcept
{
cursorHeap.push_back(cursor);
std::ranges::make_heap(cursorHeap, comp);
}
[[nodiscard]] BindingsCursor pop() noexcept
{
std::ranges::pop_heap(cursorHeap, comp);
auto cursor = cursorHeap.back();
cursorHeap.pop_back();
return cursor;
}
iterator & finished() noexcept
{
current = nullptr;
return *this;
}
void next(BindingsCursor cursor) noexcept
{
current = &cursor.get();
cursor.increment();
if (!cursor.empty())
push(cursor);
}
std::optional<BindingsCursor> consumeAllUntilCurrentName() noexcept
{
auto cursor = pop();
Symbol lastHandledName = current->name;
while (cursor->name <= lastHandledName) {
cursor.consume(lastHandledName);
if (!cursor.empty())
push(cursor);
if (cursorHeap.empty())
return std::nullopt;
cursor = pop();
}
return cursor;
}
explicit iterator(const Bindings & attrs) noexcept
: doMerge(attrs.baseLayer)
{
auto pushBindings = [this, priority = unsigned{0}](const Bindings & layer) mutable {
auto first = layer.attrs;
push(
BindingsCursor{
.current = first,
.end = first + layer.numAttrs,
.priority = priority++,
});
};
if (!doMerge) {
if (attrs.empty())
return;
current = attrs.attrs;
pushBindings(attrs);
return;
}
const Bindings * layer = &attrs;
while (layer) {
if (layer->numAttrs != 0)
pushBindings(*layer);
layer = layer->baseLayer;
}
if (cursorHeap.empty())
return;
next(pop());
}
public:
iterator() = default;
reference operator*() const
reference operator*() const noexcept
{
return *ptr;
return *current;
}
const value_type * operator->() const
pointer operator->() const noexcept
{
return ptr;
return current;
}
iterator & operator++()
iterator & operator++() noexcept
{
++ptr;
if (!doMerge) {
++current;
if (current == cursorHeap.front().end)
return finished();
return *this;
}
if (cursorHeap.empty())
return finished();
auto cursor = consumeAllUntilCurrentName();
if (!cursor)
return finished();
next(*cursor);
return *this;
}
iterator operator++(int)
iterator operator++(int) noexcept
{
pointer tmp = ptr;
iterator tmp = *this;
++*this;
return iterator(tmp);
return tmp;
}
bool operator==(const iterator & rhs) const = default;
bool operator==(const iterator & rhs) const noexcept
{
return current == rhs.current;
}
};
using const_iterator = iterator;
void push_back(const Attr & attr)
{
attrs[size_++] = attr;
attrs[numAttrs++] = attr;
numAttrsInChain = numAttrs;
}
const Attr * get(Symbol name) const
/**
* Get attribute by name or nullptr if no such attribute exists.
*/
const Attr * get(Symbol name) const noexcept
{
Attr key(name, 0);
auto first = attrs;
auto last = attrs + size_;
const Attr * i = std::lower_bound(first, last, key);
if (i != last && i->name == name)
return i;
auto getInChunk = [key = Attr{name, nullptr}](const Bindings & chunk) -> const Attr * {
auto first = chunk.attrs;
auto last = first + chunk.numAttrs;
const Attr * i = std::lower_bound(first, last, key);
if (i != last && i->name == key.name)
return i;
return nullptr;
};
const Bindings * currentChunk = this;
while (currentChunk) {
const Attr * maybeAttr = getInChunk(*currentChunk);
if (maybeAttr)
return maybeAttr;
currentChunk = currentChunk->baseLayer;
}
return nullptr;
}
/**
* Check if the layer chain is full.
*/
bool isLayerListFull() const noexcept
{
return numLayers == Bindings::maxLayers;
}
/**
* Test if the length of the linked list of layers is greater than 1.
*/
bool isLayered() const noexcept
{
return numLayers > 1;
}
const_iterator begin() const
{
return const_iterator(attrs);
return const_iterator(*this);
}
const_iterator end() const
{
return const_iterator(attrs + size_);
return const_iterator();
}
Attr & operator[](size_t pos)
Attr & operator[](size_type pos)
{
if (isLayered()) [[unlikely]]
unreachable();
return attrs[pos];
}
const Attr & operator[](size_t pos) const
const Attr & operator[](size_type pos) const
{
if (isLayered()) [[unlikely]]
unreachable();
return attrs[pos];
}
@ -176,10 +417,9 @@ public:
std::vector<const Attr *> lexicographicOrder(const SymbolTable & symbols) const
{
std::vector<const Attr *> res;
res.reserve(size_);
for (size_t n = 0; n < size_; n++)
res.emplace_back(&attrs[n]);
std::sort(res.begin(), res.end(), [&](const Attr * a, const Attr * b) {
res.reserve(size());
std::ranges::transform(*this, std::back_inserter(res), [](const Attr & a) { return &a; });
std::ranges::sort(res, [&](const Attr * a, const Attr * b) {
std::string_view sa = symbols[a->name], sb = symbols[b->name];
return sa < sb;
});
@ -202,11 +442,11 @@ class BindingsBuilder final
public:
// needed by std::back_inserter
using value_type = Attr;
using size_type = Bindings::size_t;
using size_type = Bindings::size_type;
private:
Bindings * bindings;
Bindings::size_t capacity_;
Bindings::size_type capacity_;
friend class EvalState;
@ -217,6 +457,19 @@ private:
{
}
bool hasBaseLayer() const noexcept
{
return bindings->baseLayer;
}
void finishSizeIfNecessary()
{
if (hasBaseLayer())
/* NOTE: Do not use std::ranges::distance, since Bindings is a sized
range, but we are calculating this size here. */
bindings->numAttrsInChain = std::distance(bindings->begin(), bindings->end());
}
public:
std::reference_wrapper<EvalState> state;
@ -232,10 +485,26 @@ public:
void push_back(const Attr & attr)
{
assert(bindings->size() < capacity_);
assert(bindings->numAttrs < capacity_);
bindings->push_back(attr);
}
/**
* "Layer" the newly constructured Bindings on top of another attribute set.
*
* This effectively performs an attribute set merge, while giving preference
* to attributes from the newly constructed Bindings in case of duplicate attribute
* names.
*
* This operation amortizes the need to copy over all attributes and allows
* for efficient implementation of attribute set merges (ExprOpUpdate::eval).
*/
void layerOnTopOf(const Bindings & base) noexcept
{
bindings->baseLayer = &base;
bindings->numLayers = base.numLayers + 1;
}
Value & alloc(Symbol name, PosIdx pos = noPos);
Value & alloc(std::string_view name, PosIdx pos = noPos);
@ -243,11 +512,13 @@ public:
Bindings * finish()
{
bindings->sort();
finishSizeIfNecessary();
return bindings;
}
Bindings * alreadySorted()
{
finishSizeIfNecessary();
return bindings;
}

View file

@ -342,6 +342,25 @@ struct EvalSettings : Config
This is useful for improving code readability and making path literals
more explicit.
)"};
Setting<unsigned> bindingsUpdateLayerRhsSizeThreshold{
this,
sizeof(void *) == 4 ? 8192 : 16,
"eval-attrset-update-layer-rhs-threshold",
R"(
Tunes the maximum size of an attribute set that, when used
as a right operand in an [attribute set update expression](@docroot@/language/operators.md#update),
uses a more space-efficient linked-list representation of attribute sets.
Setting this to larger values generally leads to less memory allocations,
but may lead to worse evaluation performance.
A value of `0` disables this optimization completely.
This is an advanced performance tuning option and typically should not be changed.
The default value is chosen to balance performance and memory usage. On 32 bit systems
where memory is scarce, the default is a large value to reduce the amount of allocations.
)"};
};
/**