1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-09 03:56:01 +01:00

refactor(libstore): add BGL-based dependency graph for path analysis

Introduces a reusable directed graph template built on Boost Graph Library
(BGL) to provide graph operations for store path dependency analysis. This
will be used by `nix why-depends` and future cycle detection.
This commit is contained in:
Bernardo Meurer Costa 2025-10-28 02:48:55 +00:00
parent 70176ed317
commit 501c928c50
No known key found for this signature in database
7 changed files with 519 additions and 0 deletions

View file

@ -0,0 +1,97 @@
#include "nix/store/dependency-graph-impl.hh"
#include <gtest/gtest.h>
namespace nix {
TEST(DependencyGraph, BasicAddEdge)
{
FilePathGraph depGraph;
depGraph.addEdge("a", "b");
depGraph.addEdge("b", "c");
EXPECT_TRUE(depGraph.hasNode("a"));
EXPECT_TRUE(depGraph.hasNode("b"));
EXPECT_TRUE(depGraph.hasNode("c"));
EXPECT_FALSE(depGraph.hasNode("d"));
// Verify edges using high-level API
auto successors = depGraph.getSuccessors("a");
EXPECT_EQ(successors.size(), 1);
EXPECT_EQ(successors[0], "b");
}
TEST(DependencyGraph, DfsTraversalOrder)
{
// Build a graph: A->B->D, A->C->D
// Successors should be visited in distance order (B and C before recursing)
FilePathGraph depGraph;
depGraph.addEdge("a", "b");
depGraph.addEdge("a", "c");
depGraph.addEdge("b", "d");
depGraph.addEdge("c", "d");
std::vector<std::string> visitedNodes;
std::vector<std::pair<std::string, std::string>> visitedEdges;
depGraph.dfsFromTarget(
"a",
"d",
[&](const std::string & node, size_t depth) {
visitedNodes.push_back(node);
return true;
},
[&](const std::string & from, const std::string & to, bool isLast, size_t depth) {
visitedEdges.emplace_back(from, to);
},
[](const std::string &) { return false; });
EXPECT_EQ(visitedNodes[0], "a");
// B and C both at distance 1, could be in either order
EXPECT_TRUE(
(visitedNodes[1] == "b" && visitedNodes[2] == "d") || (visitedNodes[1] == "c" && visitedNodes[2] == "d"));
}
TEST(DependencyGraph, GetSuccessors)
{
FilePathGraph depGraph;
depGraph.addEdge("a", "b");
depGraph.addEdge("a", "c");
auto successors = depGraph.getSuccessors("a");
EXPECT_EQ(successors.size(), 2);
EXPECT_TRUE(std::ranges::contains(successors, "b"));
EXPECT_TRUE(std::ranges::contains(successors, "c"));
}
TEST(DependencyGraph, GetAllNodes)
{
FilePathGraph depGraph;
depGraph.addEdge("foo", "bar");
depGraph.addEdge("bar", "baz");
auto nodes = depGraph.getAllNodes();
EXPECT_EQ(nodes.size(), 3);
EXPECT_TRUE(std::ranges::contains(nodes, "foo"));
EXPECT_TRUE(std::ranges::contains(nodes, "bar"));
EXPECT_TRUE(std::ranges::contains(nodes, "baz"));
}
TEST(DependencyGraph, ThrowsOnMissingNode)
{
FilePathGraph depGraph;
depGraph.addEdge("a", "b");
EXPECT_THROW((void) depGraph.getSuccessors("nonexistent"), nix::Error);
}
TEST(DependencyGraph, EmptyGraph)
{
FilePathGraph depGraph;
EXPECT_FALSE(depGraph.hasNode("anything"));
EXPECT_EQ(depGraph.numVertices(), 0);
EXPECT_EQ(depGraph.getAllNodes().size(), 0);
}
} // namespace nix

View file

@ -56,6 +56,7 @@ subdir('nix-meson-build-support/common')
sources = files( sources = files(
'common-protocol.cc', 'common-protocol.cc',
'content-address.cc', 'content-address.cc',
'dependency-graph.cc',
'derivation-advanced-attrs.cc', 'derivation-advanced-attrs.cc',
'derivation.cc', 'derivation.cc',
'derived-path.cc', 'derived-path.cc',

View file

@ -0,0 +1,13 @@
#include "nix/store/dependency-graph.hh"
#include "nix/store/dependency-graph-impl.hh"
#include <string>
namespace nix {
// Explicit instantiations for common types
template class DependencyGraph<StorePath>;
template class DependencyGraph<std::string>;
template class DependencyGraph<StorePath, FileListEdgeProperty>;
} // namespace nix

View file

@ -0,0 +1,232 @@
#pragma once
/**
* @file
*
* Template implementations (as opposed to mere declarations).
*
* This file is an example of the "impl.hh" pattern. See the
* contributing guide.
*
* One only needs to include this when instantiating DependencyGraph
* with custom NodeId or EdgeProperty types beyond the pre-instantiated
* common types (StorePath, std::string).
*/
#include "nix/store/dependency-graph.hh"
#include "nix/store/store-api.hh"
#include "nix/util/error.hh"
#include <boost/graph/graph_traits.hpp>
#include <boost/graph/reverse_graph.hpp>
#include <boost/graph/properties.hpp>
#include <algorithm>
#include <ranges>
namespace nix {
template<GraphNodeId NodeId, typename EdgeProperty>
DependencyGraph<NodeId, EdgeProperty>::DependencyGraph(Store & store, const StorePathSet & closure)
requires std::same_as<NodeId, StorePath>
{
for (auto & path : closure) {
for (auto & ref : store.queryPathInfo(path)->references) {
addEdge(path, ref);
}
}
}
template<GraphNodeId NodeId, typename EdgeProperty>
typename DependencyGraph<NodeId, EdgeProperty>::vertex_descriptor
DependencyGraph<NodeId, EdgeProperty>::addOrGetVertex(const NodeId & id)
{
auto it = nodeToVertex.find(id);
if (it != nodeToVertex.end()) {
return it->second;
}
auto v = boost::add_vertex(VertexProperty{std::make_optional(id)}, graph);
nodeToVertex[id] = v;
return v;
}
template<GraphNodeId NodeId, typename EdgeProperty>
void DependencyGraph<NodeId, EdgeProperty>::addEdge(const NodeId & from, const NodeId & to)
{
auto vFrom = addOrGetVertex(from);
auto vTo = addOrGetVertex(to);
// Check for existing edge to prevent duplicates (idempotent)
auto [existingEdge, found] = boost::edge(vFrom, vTo, graph);
if (!found) {
boost::add_edge(vFrom, vTo, graph);
}
// If edge exists, this is a no-op (idempotent)
}
template<GraphNodeId NodeId, typename EdgeProperty>
void DependencyGraph<NodeId, EdgeProperty>::addEdge(const NodeId & from, const NodeId & to, const EdgeProperty & prop)
requires(!std::same_as<EdgeProperty, boost::no_property>)
{
auto vFrom = addOrGetVertex(from);
auto vTo = addOrGetVertex(to);
auto [existingEdge, found] = boost::edge(vFrom, vTo, graph);
if (found) {
// Merge properties for existing edge
if constexpr (std::same_as<EdgeProperty, FileListEdgeProperty>) {
// Set handles deduplication automatically
auto & edgeFiles = graph[existingEdge].files;
edgeFiles.insert(prop.files.begin(), prop.files.end());
} else {
// For other property types, overwrite with new value
graph[existingEdge] = prop;
}
} else {
// New edge
boost::add_edge(vFrom, vTo, prop, graph);
}
}
template<GraphNodeId NodeId, typename EdgeProperty>
std::optional<typename DependencyGraph<NodeId, EdgeProperty>::vertex_descriptor>
DependencyGraph<NodeId, EdgeProperty>::getVertex(const NodeId & id) const
{
auto it = nodeToVertex.find(id);
if (it == nodeToVertex.end()) {
return std::nullopt;
}
return it->second;
}
template<GraphNodeId NodeId, typename EdgeProperty>
const NodeId & DependencyGraph<NodeId, EdgeProperty>::getNodeId(vertex_descriptor v) const
{
return *graph[v].id;
}
template<GraphNodeId NodeId, typename EdgeProperty>
bool DependencyGraph<NodeId, EdgeProperty>::hasNode(const NodeId & id) const
{
return nodeToVertex.contains(id);
}
template<GraphNodeId NodeId, typename EdgeProperty>
typename DependencyGraph<NodeId, EdgeProperty>::vertex_descriptor
DependencyGraph<NodeId, EdgeProperty>::getVertexOrThrow(const NodeId & id) const
{
auto opt = getVertex(id);
if (!opt.has_value()) {
// Note: NodeId is not included as it may not be formattable in all instantiations
throw Error("node not found in graph");
}
return *opt;
}
template<GraphNodeId NodeId, typename EdgeProperty>
template<typename NodeVisitor, typename EdgeVisitor, typename StopPredicate>
void DependencyGraph<NodeId, EdgeProperty>::dfsFromTarget(
const NodeId & start,
const NodeId & target,
NodeVisitor && visitNode,
EdgeVisitor && visitEdge,
StopPredicate && shouldStop) const
{
// Compute distances locally for this traversal
auto targetVertex = getVertexOrThrow(target);
size_t n = boost::num_vertices(graph);
std::vector<size_t> distances(n, std::numeric_limits<size_t>::max());
distances[targetVertex] = 0;
// Use reverse_graph to follow incoming edges
auto reversedGraph = boost::make_reverse_graph(graph);
// Create uniform weight map (all edges have weight 1)
auto weightMap =
boost::make_constant_property<typename boost::graph_traits<decltype(reversedGraph)>::edge_descriptor>(1);
// Run Dijkstra on reversed graph with uniform weights
boost::dijkstra_shortest_paths(
reversedGraph,
targetVertex,
boost::weight_map(weightMap).distance_map(
boost::make_iterator_property_map(distances.begin(), boost::get(boost::vertex_index, reversedGraph))));
// DFS with distance-based ordering
std::function<bool(const NodeId &, size_t)> dfs = [&](const NodeId & node, size_t depth) -> bool {
// Visit node - if returns false, skip this subtree
if (!visitNode(node, depth)) {
return false;
}
// Check if we should stop the entire traversal
if (shouldStop(node)) {
return true; // Signal to stop
}
// Get and sort successors by distance
auto successors = getSuccessors(node);
auto sortedSuccessors = successors | std::views::transform([&](const auto & ref) -> std::pair<size_t, NodeId> {
auto v = getVertexOrThrow(ref);
return {distances[v], ref}; // Use local distances
})
| std::views::filter([](const auto & p) {
// Filter unreachable nodes
return p.first != std::numeric_limits<size_t>::max();
})
| std::ranges::to<std::vector>();
std::ranges::sort(sortedSuccessors);
// Visit each edge and recurse
for (size_t i = 0; i < sortedSuccessors.size(); ++i) {
const auto & [dist, successor] = sortedSuccessors[i];
bool isLast = (i == sortedSuccessors.size() - 1);
visitEdge(node, successor, isLast, depth);
if (dfs(successor, depth + 1)) {
return true; // Propagate stop signal
}
}
return false; // Continue traversal
};
dfs(start, 0);
}
template<GraphNodeId NodeId, typename EdgeProperty>
std::vector<NodeId> DependencyGraph<NodeId, EdgeProperty>::getSuccessors(const NodeId & node) const
{
auto v = getVertexOrThrow(node);
auto [adjBegin, adjEnd] = boost::adjacent_vertices(v, graph);
return std::ranges::subrange(adjBegin, adjEnd) | std::views::transform([&](auto v) { return getNodeId(v); })
| std::ranges::to<std::vector>();
}
template<GraphNodeId NodeId, typename EdgeProperty>
std::optional<EdgeProperty>
DependencyGraph<NodeId, EdgeProperty>::getEdgeProperty(const NodeId & from, const NodeId & to) const
requires(!std::same_as<EdgeProperty, boost::no_property>)
{
auto vFrom = getVertexOrThrow(from);
auto vTo = getVertexOrThrow(to);
auto [edge, found] = boost::edge(vFrom, vTo, graph);
if (!found) {
return std::nullopt;
}
return graph[edge];
}
template<GraphNodeId NodeId, typename EdgeProperty>
std::vector<NodeId> DependencyGraph<NodeId, EdgeProperty>::getAllNodes() const
{
return nodeToVertex | std::views::keys | std::ranges::to<std::vector>();
}
} // namespace nix

View file

@ -0,0 +1,173 @@
#pragma once
///@file
#include "nix/store/path.hh"
#include "nix/util/canon-path.hh"
#include <boost/graph/adjacency_list.hpp>
#include <boost/graph/dijkstra_shortest_paths.hpp>
#include <boost/graph/reverse_graph.hpp>
#include <map>
#include <set>
#include <vector>
#include <optional>
#include <concepts>
namespace nix {
class Store;
/**
* Concept for types usable as graph node IDs.
*/
template<typename T>
concept GraphNodeId = std::copyable<T> && std::totally_ordered<T>;
/**
* Directed graph for dependency analysis using Boost Graph Library.
*
* The graph is fully mutable - edges can be added at any time.
* Query methods compute what they need on-demand without caching.
*
* **Edge Semantics:**
* - addEdge() is idempotent - calling twice with same nodes is safe
* - For FileListEdgeProperty: files are automatically merged and deduplicated
* - For other property types: later addEdge() calls overwrite earlier values
*
* @tparam NodeId Node identifier type (e.g., StorePath, std::string)
* @tparam EdgeProperty Optional edge metadata type
*/
template<GraphNodeId NodeId, typename EdgeProperty = boost::no_property>
class DependencyGraph
{
public:
/**
* Bundled vertex property. Uses optional for default constructibility.
*/
struct VertexProperty
{
std::optional<NodeId> id;
};
/**
* BGL adjacency_list: bidirectional, vector storage.
*/
using Graph = boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS, VertexProperty, EdgeProperty>;
using vertex_descriptor = typename boost::graph_traits<Graph>::vertex_descriptor;
using edge_descriptor = typename boost::graph_traits<Graph>::edge_descriptor;
private:
Graph graph;
std::map<NodeId, vertex_descriptor> nodeToVertex;
// Internal helpers
vertex_descriptor addOrGetVertex(const NodeId & id);
std::optional<vertex_descriptor> getVertex(const NodeId & id) const;
const NodeId & getNodeId(vertex_descriptor v) const;
vertex_descriptor getVertexOrThrow(const NodeId & id) const;
public:
DependencyGraph() = default;
/**
* Build graph from Store closure (StorePath graphs only).
*
* @param store Store to query for references
* @param closure Store paths to include
*/
DependencyGraph(Store & store, const StorePathSet & closure)
requires std::same_as<NodeId, StorePath>;
/**
* Add edge, creating vertices if needed.
*/
void addEdge(const NodeId & from, const NodeId & to);
/**
* Add edge with property. If edge exists, merges properties:
* - FileListEdgeProperty: files are merged and automatically deduplicated
* - Other properties: later value overwrites earlier value
*/
void addEdge(const NodeId & from, const NodeId & to, const EdgeProperty & prop)
requires(!std::same_as<EdgeProperty, boost::no_property>);
[[nodiscard]] bool hasNode(const NodeId & id) const;
/**
* DFS traversal with distance-based successor ordering.
* Successors visited in order of increasing distance to target.
* Automatically computes distances if needed (lazy).
*
* Example traversal from A to D:
*
* A (dist=3)
* B (dist=2)
* D (dist=0) [target]
* C (dist=2)
* D (dist=0)
*
* Callbacks invoked:
* visitNode(A, depth=0) -> true
* visitEdge(A, B, isLast=false, depth=0)
* visitNode(B, depth=1) -> true
* visitEdge(B, D, isLast=true, depth=1)
* visitNode(D, depth=2) -> true
* shouldStop(D) -> true [stops traversal]
*
* @param start Starting node for traversal
* @param target Target node (used for distance-based sorting)
* @param visitNode Called when entering node: (node, depth) -> bool. Return false to skip subtree.
* @param visitEdge Called for each edge: (from, to, isLastEdge, depth) -> void
* @param shouldStop Called after visiting node: (node) -> bool. Return true to stop entire traversal.
*/
template<typename NodeVisitor, typename EdgeVisitor, typename StopPredicate>
void dfsFromTarget(
const NodeId & start,
const NodeId & target,
NodeVisitor && visitNode,
EdgeVisitor && visitEdge,
StopPredicate && shouldStop) const;
/**
* Get successor nodes (outgoing edges).
*/
[[nodiscard]] std::vector<NodeId> getSuccessors(const NodeId & node) const;
/**
* Get edge property. Returns nullopt if edge doesn't exist.
*/
[[nodiscard]] std::optional<EdgeProperty> getEdgeProperty(const NodeId & from, const NodeId & to) const
requires(!std::same_as<EdgeProperty, boost::no_property>);
[[nodiscard]] std::vector<NodeId> getAllNodes() const;
[[nodiscard]] size_t numVertices() const
{
return boost::num_vertices(graph);
}
};
/**
* Edge property storing which files created a dependency.
* Files are stored in a std::set, guaranteeing:
* - Automatic deduplication
* - Deterministic ordering (sorted by CanonPath comparison)
*/
struct FileListEdgeProperty
{
std::set<CanonPath> files;
};
// Convenience typedefs
using StorePathGraph = DependencyGraph<StorePath>;
using FilePathGraph = DependencyGraph<std::string>;
using StorePathGraphWithFiles = DependencyGraph<StorePath, FileListEdgeProperty>;
// Provided by src/libstore/dependency-graph.cc
extern template class DependencyGraph<StorePath>;
extern template class DependencyGraph<std::string>;
extern template class DependencyGraph<StorePath, FileListEdgeProperty>;
} // namespace nix

View file

@ -31,6 +31,8 @@ headers = [ config_pub_h ] + files(
'common-ssh-store-config.hh', 'common-ssh-store-config.hh',
'content-address.hh', 'content-address.hh',
'daemon.hh', 'daemon.hh',
'dependency-graph-impl.hh',
'dependency-graph.hh',
'derivation-options.hh', 'derivation-options.hh',
'derivations.hh', 'derivations.hh',
'derived-path-map.hh', 'derived-path-map.hh',

View file

@ -277,6 +277,7 @@ sources = files(
'common-ssh-store-config.cc', 'common-ssh-store-config.cc',
'content-address.cc', 'content-address.cc',
'daemon.cc', 'daemon.cc',
'dependency-graph.cc',
'derivation-options.cc', 'derivation-options.cc',
'derivations.cc', 'derivations.cc',
'derived-path-map.cc', 'derived-path-map.cc',