From 074e0678bd78a6c7b5bb443c8391f6fac24905bf Mon Sep 17 00:00:00 2001 From: regnat Date: Wed, 2 Jun 2021 14:52:09 +0200 Subject: [PATCH] Add an (always empty) eval cache to the attr sets --- src/libexpr/attr-set.hh | 2 + src/libexpr/context.cc | 21 ++ src/libexpr/context.hh | 11 ++ src/libexpr/eval.cc | 12 -- src/libexpr/eval.hh | 5 +- src/libexpr/tree-cache.cc | 390 ++++++++++++++++++++++++++++++++++++++ src/libexpr/tree-cache.hh | 156 +++++++++++++++ 7 files changed, 581 insertions(+), 16 deletions(-) create mode 100644 src/libexpr/context.cc create mode 100644 src/libexpr/context.hh create mode 100644 src/libexpr/tree-cache.cc create mode 100644 src/libexpr/tree-cache.hh diff --git a/src/libexpr/attr-set.hh b/src/libexpr/attr-set.hh index 1da8d91df..7793e53b5 100644 --- a/src/libexpr/attr-set.hh +++ b/src/libexpr/attr-set.hh @@ -2,6 +2,7 @@ #include "nixexpr.hh" #include "symbol-table.hh" +#include "tree-cache.hh" #include #include @@ -36,6 +37,7 @@ class Bindings public: typedef uint32_t size_t; Pos *pos; + std::shared_ptr eval_cache; private: size_t size_, capacity_; diff --git a/src/libexpr/context.cc b/src/libexpr/context.cc new file mode 100644 index 000000000..0ca96f8ae --- /dev/null +++ b/src/libexpr/context.cc @@ -0,0 +1,21 @@ +#include "context.hh" + +namespace nix { + +/* Decode a context string ‘!!’ into a pair . */ +std::pair decodeContext(std::string_view s) +{ + if (s.at(0) == '!') { + size_t index = s.find("!", 1); + return {std::string(s.substr(index + 1)), std::string(s.substr(1, index - 1))}; + } else + return {s.at(0) == '/' ? std::string(s) : std::string(s.substr(1)), ""}; +} + +std::string encodeContext(std::string_view name, std::string_view path) +{ + return "!" + std::string(name) + "!" + std::string(path); +} + +} diff --git a/src/libexpr/context.hh b/src/libexpr/context.hh new file mode 100644 index 000000000..65f12de93 --- /dev/null +++ b/src/libexpr/context.hh @@ -0,0 +1,11 @@ +#include "util.hh" + +namespace nix { + +/* Decode a context string ‘!!’ into a pair . */ +std::pair decodeContext(std::string_view s); + +std::string encodeContext(std::string_view name, std::string_view path); + +} diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index ef9f8efca..951cce60b 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -1699,18 +1699,6 @@ string EvalState::forceString(Value & v, const Pos & pos) } -/* Decode a context string ‘!!’ into a pair . */ -std::pair decodeContext(std::string_view s) -{ - if (s.at(0) == '!') { - size_t index = s.find("!", 1); - return {std::string(s.substr(index + 1)), std::string(s.substr(1, index - 1))}; - } else - return {s.at(0) == '/' ? std::string(s) : std::string(s.substr(1)), ""}; -} - - void copyContext(const Value & v, PathSet & context) { if (v.string.context) diff --git a/src/libexpr/eval.hh b/src/libexpr/eval.hh index e3eaed6d3..8b89b1ee5 100644 --- a/src/libexpr/eval.hh +++ b/src/libexpr/eval.hh @@ -1,6 +1,7 @@ #pragma once #include "attr-set.hh" +#include "context.hh" #include "value.hh" #include "nixexpr.hh" #include "symbol-table.hh" @@ -349,10 +350,6 @@ private: string showType(ValueType type); string showType(const Value & v); -/* Decode a context string ‘!!’ into a pair . */ -std::pair decodeContext(std::string_view s); - /* If `path' refers to a directory, then append "/default.nix". */ Path resolveExprPath(Path path); diff --git a/src/libexpr/tree-cache.cc b/src/libexpr/tree-cache.cc new file mode 100644 index 000000000..08fd9e59a --- /dev/null +++ b/src/libexpr/tree-cache.cc @@ -0,0 +1,390 @@ +#include "tree-cache.hh" +#include "sqlite.hh" +#include "store-api.hh" +#include "context.hh" + +namespace nix::tree_cache { + +static const char * schema = R"sql( +create table if not exists Attributes ( + id integer primary key autoincrement not null, + parent integer not null, + name text, + type integer not null, + value text, + context text, + unique (parent, name) +); + +create index if not exists IndexByParent on Attributes(parent, name); +)sql"; + +struct AttrDb +{ + std::atomic_bool failed{false}; + + struct State + { + SQLite db; + SQLiteStmt insertAttribute; + SQLiteStmt updateAttribute; + SQLiteStmt insertAttributeWithContext; + SQLiteStmt queryAttribute; + SQLiteStmt queryAttributes; + std::unique_ptr txn; + }; + + std::unique_ptr> _state; + + AttrDb(const Hash & fingerprint) + : _state(std::make_unique>()) + { + auto state(_state->lock()); + + Path cacheDir = getCacheDir() + "/nix/eval-cache-v3"; + createDirs(cacheDir); + + Path dbPath = cacheDir + "/" + fingerprint.to_string(Base16, false) + ".sqlite"; + + state->db = SQLite(dbPath); + state->db.isCache(); + state->db.exec(schema); + + state->insertAttribute.create(state->db, + "insert into Attributes(parent, name, type, value) values (?, ?, ?, ?)"); + + state->updateAttribute.create(state->db, + "update Attributes set type = ?, value = ?, context = ? where id = ?"); + + state->insertAttributeWithContext.create(state->db, + "insert into Attributes(parent, name, type, value, context) values (?, ?, ?, ?, ?)"); + + state->queryAttribute.create(state->db, + "select id, type, value, context from Attributes where parent = ? and name = ?"); + + state->queryAttributes.create(state->db, + "select name from Attributes where parent = ?"); + + state->txn = std::make_unique(state->db); + } + + ~AttrDb() + { + try { + auto state(_state->lock()); + if (!failed) + state->txn->commit(); + state->txn.reset(); + } catch (...) { + ignoreException(); + } + } + + template + AttrId doSQLite(F && fun) + { + if (failed) return 0; + try { + return fun(); + } catch (SQLiteError &) { + ignoreException(); + failed = true; + return 0; + } + } + + /** + * Store a leaf of the tree in the db + */ + AttrId addEntry( + const AttrKey & key, + const AttrValue & value) + { + return doSQLite([&]() + { + auto state(_state->lock()); + auto rawValue = RawValue::fromVariant(value); + + state->insertAttributeWithContext.use() + (key.first) + (key.second) + (rawValue.type) + (rawValue.value.value_or(""), rawValue.value.has_value()) + (rawValue.serializeContext()) + .exec(); + AttrId rowId = state->db.getLastInsertedRowId(); + assert(rowId); + return rowId; + }); + } + + std::optional getId(const AttrKey& key) + { + auto state(_state->lock()); + + auto queryAttribute(state->queryAttribute.use()(key.first)(key.second)); + if (!queryAttribute.next()) return std::nullopt; + + return (AttrType) queryAttribute.getInt(0); + } + + AttrId setOrUpdate(const AttrKey& key, const AttrValue& value) + { + debug("cache: miss for the attribute %s", key.second); + if (auto existingId = getId(key)) { + setValue(*existingId, value); + return *existingId; + } + return addEntry(key, value); + } + + void setValue(const AttrId & id, const AttrValue & value) + { + auto state(_state->lock()); + auto rawValue = RawValue::fromVariant(value); + + state->updateAttribute.use() + (rawValue.type) + (rawValue.value.value_or(""), rawValue.value.has_value()) + (rawValue.serializeContext()) + (id) + .exec(); + } + + std::optional> getValue(AttrKey key) + { + auto state(_state->lock()); + + auto queryAttribute(state->queryAttribute.use()(key.first)(key.second)); + if (!queryAttribute.next()) return {}; + + auto rowId = (AttrType) queryAttribute.getInt(0); + auto type = (AttrType) queryAttribute.getInt(1); + + switch (type) { + case AttrType::Attrs: { + return {{rowId, attributeSet_t()}}; + } + case AttrType::String: { + std::vector> context; + if (!queryAttribute.isNull(3)) + for (auto & s : tokenizeString>(queryAttribute.getStr(3), ";")) + context.push_back(decodeContext(s)); + return {{rowId, string_t{queryAttribute.getStr(2), context}}}; + } + case AttrType::Bool: + return {{rowId, wrapped_basetype{queryAttribute.getInt(2) != 0}}}; + case AttrType::Int: + return {{rowId, wrapped_basetype{queryAttribute.getInt(2)}}}; + case AttrType::Double: + return {{rowId, wrapped_basetype{(double)queryAttribute.getInt(2)}}}; + case AttrType::Unknown: + return {{rowId, unknown_t{}}}; + case AttrType::Thunk: + return {{rowId, thunk_t{}}}; + case AttrType::Missing: + return {{rowId, missing_t{key.second}}}; + case AttrType::Failed: + return {{rowId, failed_t{queryAttribute.getStr(2)}}}; + default: + throw Error("unexpected type in evaluation cache"); + } + } + + std::vector getChildren(AttrId parentId) + { + std::vector res; + auto state(_state->lock()); + + auto queryAttributes(state->queryAttributes.use()(parentId)); + + while (queryAttributes.next()) + res.push_back(queryAttributes.getStr(0)); + + return res; + } +}; + +Cache::Cache(const Hash & useCache, + SymbolTable & symbols) + : db(std::make_shared(useCache)) + , symbols(symbols) + , rootSymbol(symbols.create("")) +{ +} + +std::shared_ptr Cache::tryCreate(const Hash & useCache, SymbolTable & symbols) +{ + try { + return std::make_shared(useCache, symbols); + } catch (SQLiteError &) { + ignoreException(); + return nullptr; + } +} + +void Cache::commit() +{ + if (db) { + debug("Saving the cache"); + auto state(db->_state->lock()); + if (state->txn->active) { + state->txn->commit(); + state->txn.reset(); + state->txn = std::make_unique(state->db); + } + } +} + +Cursor::Ref Cache::getRoot() +{ + return new Cursor(ref(shared_from_this()), std::nullopt, thunk_t{}); +} + +Cursor::Cursor( + ref root, + const Parent & parent, + const AttrValue& value + ) + : root(root) + , parentId(parent ? std::optional{parent->first.cachedValue.first} : std::nullopt) + , label(parent ? parent->second : root->rootSymbol) + , cachedValue({root->db->setOrUpdate(getKey(), value), value}) +{ +} + +Cursor::Cursor( + ref root, + const Parent & parent, + const AttrId & id, + const AttrValue & value + ) + : root(root) + , parentId(parent ? std::optional{parent->first.cachedValue.first} : std::nullopt) + , label(parent ? parent->second : root->rootSymbol) + , cachedValue({id, value}) +{ +} + + +AttrKey Cursor::getKey() +{ + if (!parentId) + return {0, root->rootSymbol}; + return {*parentId, label}; +} + +AttrValue Cursor::getCachedValue() +{ + return cachedValue.second; +} + +void Cursor::setValue(const AttrValue & v) +{ + root->db->setValue(cachedValue.first, v); + cachedValue.second = v; +} + +Cursor::Ref Cursor::addChild(const Symbol & attrPath, const AttrValue & v) +{ + Parent parent = {{*this, attrPath}}; + auto childCursor = new Cursor( + root, + parent, + v + ); + return childCursor; +} + +std::vector Cursor::getChildren() +{ + return root->db->getChildren(cachedValue.first); +} + +std::optional> Cursor::getChildrenAtPath(const std::vector & attrPath) +{ + auto cursorAtPath = findAlongAttrPath(attrPath); + if (cursorAtPath) + return cursorAtPath->getChildren(); + return std::nullopt; +} + +Cursor::Ref Cursor::maybeGetAttr(const Symbol & name) +{ + auto rawAttr = root->db->getValue({cachedValue.first, name}); + if (rawAttr) { + Parent parent = {{*this, name}}; + debug("cache: hit for the attribute %s", cachedValue.first); + return new Cursor ( + root, parent, rawAttr->first, + rawAttr->second); + } + if (std::holds_alternative(cachedValue.second)) { + // If the parent is an attribute set but we're not present in the db, + // then we're not a member of this attribute set. So mark as missing + return addChild(name, missing_t{name}); + } + return nullptr; +} + +Cursor::Ref Cursor::findAlongAttrPath(const std::vector & attrPath) +{ + auto currentCursor = this; + for (auto & currentAccessor : attrPath) { + currentCursor = currentCursor->maybeGetAttr(currentAccessor); + if (!currentCursor) + break; + if (std::holds_alternative(currentCursor->cachedValue.second)) + break; + if (std::holds_alternative(currentCursor->cachedValue.second)) + break; + } + return currentCursor; +} + +const RawValue RawValue::fromVariant(const AttrValue & value) +{ + RawValue res; + std::visit(overloaded{ + [&](attributeSet_t x) { res.type = AttrType::Attrs; }, + [&](string_t x) { + res.type = AttrType::String; + res.value = x.first; + res.context = x.second; + }, + [&](wrapped_basetype x) { + res.type = AttrType::Bool; + res.value = x.value ? "1" : "0"; + }, + [&](wrapped_basetype x) { + res.type = AttrType::Int; + res.value = std::to_string(x.value); + }, + [&](wrapped_basetype x) { + res.type = AttrType::Double; + res.value = std::to_string(x.value); + }, + [&](unknown_t x) { res.type = AttrType::Unknown; }, + [&](missing_t x) { res.type = AttrType::Missing; }, + [&](thunk_t x) { res.type = AttrType::Thunk; }, + [&](failed_t x) { + res.type = AttrType::Failed; + res.value = x.error; + } + }, value); + return res; +} + +std::string RawValue::serializeContext() const +{ + std::string res; + for (auto & elt : context) { + res.append(encodeContext(elt.second, elt.first)); + res.push_back(' '); + } + if (!res.empty()) + res.pop_back(); // Remove the trailing space + return res; +} + +} diff --git a/src/libexpr/tree-cache.hh b/src/libexpr/tree-cache.hh new file mode 100644 index 000000000..8f40fb15b --- /dev/null +++ b/src/libexpr/tree-cache.hh @@ -0,0 +1,156 @@ +/** + * caching for a tree-like data structure (like Nix values) + * + * The cache is an sqlite db whose rows are the nodes of the tree, with a + * pointer to their parent (except for the root of course) + */ + +#pragma once + +#include "sync.hh" +#include "hash.hh" +#include "symbol-table.hh" + +#include +#include + +namespace nix::tree_cache { + +struct AttrDb; +class Cursor; + +class Cache : public std::enable_shared_from_this +{ +private: + friend class Cursor; + + /** + * The database holding the cache + */ + std::shared_ptr db; + + SymbolTable & symbols; + + /** + * Distinguished symbol indicating the root of the tree + */ + const Symbol rootSymbol; + +public: + + Cache( + const Hash & useCache, + SymbolTable & symbols + ); + + static std::shared_ptr tryCreate(const Hash & useCache, SymbolTable & symbols); + + Cursor * getRoot(); + + /** + * Flush the cache to disk + */ + void commit(); +}; + +enum AttrType { + Unknown = 0, + Attrs = 1, + String = 2, + Bool = 3, + Int = 4, + Double = 5, + Thunk = 6, + Missing = 7, // Missing fields of attribute sets + Failed = 8, +}; + +struct attributeSet_t {}; +struct unknown_t {}; +struct thunk_t {}; +struct failed_t { string error; }; +struct missing_t { Symbol attrName; }; + +// Putting several different primitive types in an `std::variant` partially +// breaks the `std::visit(overloaded{...` hackery because of the implicit cast +// from one to another which breaks the exhaustiveness check. +// So we wrap them in a trivial class just to force the disambiguation +template +struct wrapped_basetype{ T value; }; + +typedef uint64_t AttrId; + +typedef std::pair AttrKey; +typedef std::pair>> string_t; + +typedef std::variant< + attributeSet_t, + string_t, + unknown_t, + thunk_t, + missing_t, + failed_t, + wrapped_basetype, + wrapped_basetype, + wrapped_basetype +> AttrValue; + +struct RawValue { + AttrType type; + std::optional value; + std::vector> context; + + std::string serializeContext() const; + + static const RawValue fromVariant(const AttrValue&); + AttrValue toVariant() const; +}; + +/** + * View inside the cache. + * + * A `Cursor` represents a node in the cached tree (be it a leaf or not) + */ +class Cursor : public std::enable_shared_from_this +{ + /** + * The overall cache of which this cursor is a view + */ + ref root; + + typedef std::optional> Parent; + + std::optional parentId; + Symbol label; + + std::pair cachedValue; + + /** + * Get the identifier for this node in the database + */ + AttrKey getKey(); + +public: + + using Ref = Cursor*; + + // Create a new cache entry + Cursor(ref root, const Parent & parent, const AttrValue&); + // Build a cursor from an existing cache entry + Cursor(ref root, const Parent & parent, const AttrId& id, const AttrValue&); + + AttrValue getCachedValue(); + + void setValue(const AttrValue & v); + + Ref addChild(const Symbol & attrPath, const AttrValue & v); + + Ref findAlongAttrPath(const std::vector & attrPath); + Ref maybeGetAttr(const Symbol & attrPath); + + + std::vector getChildren(); + std::optional> getChildrenAtPath(const std::vector & attrPath); +}; + +}