1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-08 19:46:02 +01:00
This commit is contained in:
Bernardo Meurer 2025-11-08 18:32:22 +01:00 committed by GitHub
commit 01a5363e1e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 990 additions and 225 deletions

View file

@ -0,0 +1,157 @@
#include "nix/store/dependency-graph-impl.hh"
#include <gtest/gtest.h>
namespace nix {
TEST(DependencyGraph, BasicAddEdge)
{
FilePathGraph depGraph;
depGraph.addEdge("a", "b");
depGraph.addEdge("b", "c");
EXPECT_TRUE(depGraph.hasNode("a"));
EXPECT_TRUE(depGraph.hasNode("b"));
EXPECT_TRUE(depGraph.hasNode("c"));
EXPECT_FALSE(depGraph.hasNode("d"));
// Verify edges using high-level API
auto successors = depGraph.getSuccessors("a");
EXPECT_EQ(successors.size(), 1);
EXPECT_EQ(successors[0], "b");
}
TEST(DependencyGraph, DfsTraversalOrder)
{
// Build a graph: A->B->D, A->C->D
// Successors should be visited in distance order (B and C before recursing)
FilePathGraph depGraph;
depGraph.addEdge("a", "b");
depGraph.addEdge("a", "c");
depGraph.addEdge("b", "d");
depGraph.addEdge("c", "d");
std::vector<std::string> visitedNodes;
std::vector<std::pair<std::string, std::string>> visitedEdges;
depGraph.dfsFromTarget(
"a",
"d",
[&](const std::string & node, size_t depth) {
visitedNodes.push_back(node);
return true;
},
[&](const std::string & from, const std::string & to, bool isLast, size_t depth) {
visitedEdges.emplace_back(from, to);
},
[](const std::string &) { return false; });
EXPECT_EQ(visitedNodes[0], "a");
// B and C both at distance 1, could be in either order
EXPECT_TRUE(
(visitedNodes[1] == "b" && visitedNodes[2] == "d") || (visitedNodes[1] == "c" && visitedNodes[2] == "d"));
}
TEST(DependencyGraph, GetSuccessors)
{
FilePathGraph depGraph;
depGraph.addEdge("a", "b");
depGraph.addEdge("a", "c");
auto successors = depGraph.getSuccessors("a");
EXPECT_EQ(successors.size(), 2);
EXPECT_TRUE(std::ranges::contains(successors, "b"));
EXPECT_TRUE(std::ranges::contains(successors, "c"));
}
TEST(DependencyGraph, GetAllNodes)
{
FilePathGraph depGraph;
depGraph.addEdge("foo", "bar");
depGraph.addEdge("bar", "baz");
auto nodes = depGraph.getAllNodes();
EXPECT_EQ(nodes.size(), 3);
EXPECT_TRUE(std::ranges::contains(nodes, "foo"));
EXPECT_TRUE(std::ranges::contains(nodes, "bar"));
EXPECT_TRUE(std::ranges::contains(nodes, "baz"));
}
TEST(DependencyGraph, ThrowsOnMissingNode)
{
FilePathGraph depGraph;
depGraph.addEdge("a", "b");
EXPECT_THROW(depGraph.getSuccessors("nonexistent"), nix::Error);
}
TEST(DependencyGraph, EmptyGraph)
{
FilePathGraph depGraph;
EXPECT_FALSE(depGraph.hasNode("anything"));
EXPECT_EQ(depGraph.numVertices(), 0);
EXPECT_EQ(depGraph.getAllNodes().size(), 0);
}
/**
* Parameters for cycle detection tests
*/
struct FindCyclesParams
{
std::string description;
std::vector<std::pair<std::string, std::string>> inputEdges;
std::vector<std::vector<std::string>> expectedCycles;
};
class FindCyclesTest : public ::testing::TestWithParam<FindCyclesParams>
{};
namespace {
bool compareCycles(const std::vector<std::string> & a, const std::vector<std::string> & b)
{
if (a.size() != b.size())
return a.size() < b.size();
return std::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end());
}
} // namespace
TEST_P(FindCyclesTest, FindCycles)
{
const auto & params = GetParam();
FilePathGraph depGraph;
for (const auto & [from, to] : params.inputEdges) {
depGraph.addEdge(from, to);
}
auto actualCycles = depGraph.findCycles();
EXPECT_EQ(actualCycles.size(), params.expectedCycles.size());
std::ranges::sort(actualCycles, compareCycles);
auto expectedCycles = params.expectedCycles;
std::ranges::sort(expectedCycles, compareCycles);
EXPECT_EQ(actualCycles, expectedCycles);
}
INSTANTIATE_TEST_CASE_P(
FindCycles,
FindCyclesTest,
::testing::Values(
FindCyclesParams{"empty input", {}, {}},
FindCyclesParams{"single edge no cycle", {{"a", "b"}}, {}},
FindCyclesParams{"simple cycle", {{"a", "b"}, {"b", "a"}}, {{"a", "b", "a"}}},
FindCyclesParams{"three node cycle", {{"a", "b"}, {"b", "c"}, {"c", "a"}}, {{"a", "b", "c", "a"}}},
FindCyclesParams{
"four node cycle", {{"a", "b"}, {"b", "c"}, {"c", "d"}, {"d", "a"}}, {{"a", "b", "c", "d", "a"}}},
FindCyclesParams{
"multiple disjoint cycles",
{{"a", "b"}, {"b", "a"}, {"c", "d"}, {"d", "c"}},
{{"a", "b", "a"}, {"c", "d", "c"}}},
FindCyclesParams{"cycle with extra edges", {{"a", "b"}, {"b", "a"}, {"c", "d"}}, {{"a", "b", "a"}}},
FindCyclesParams{"self-loop", {{"a", "a"}}, {{"a", "a"}}},
FindCyclesParams{"chain no cycle", {{"a", "b"}, {"b", "c"}, {"c", "d"}}, {}},
FindCyclesParams{"cycle with tail", {{"x", "a"}, {"a", "b"}, {"b", "c"}, {"c", "a"}}, {{"a", "b", "c", "a"}}}));
} // namespace nix

View file

@ -57,6 +57,7 @@ sources = files(
'build-result.cc',
'common-protocol.cc',
'content-address.cc',
'dependency-graph.cc',
'derivation-advanced-attrs.cc',
'derivation.cc',
'derived-path.cc',

View file

@ -32,7 +32,7 @@ TEST_P(RewriteTest, IdentityRewriteIsIdentity)
auto rewriter = RewritingSink(param.rewrites, rewritten);
rewriter(param.originalString);
rewriter.flush();
ASSERT_EQ(rewritten.s, param.finalString);
EXPECT_EQ(rewritten.s, param.finalString);
}
INSTANTIATE_TEST_CASE_P(
@ -52,14 +52,14 @@ TEST(references, scan)
RefScanSink scanner(StringSet{hash1});
auto s = "foobar";
scanner(s);
ASSERT_EQ(scanner.getResult(), StringSet{});
EXPECT_EQ(scanner.getResult(), StringSet{});
}
{
RefScanSink scanner(StringSet{hash1});
auto s = "foobar" + hash1 + "xyzzy";
scanner(s);
ASSERT_EQ(scanner.getResult(), StringSet{hash1});
EXPECT_EQ(scanner.getResult(), StringSet{hash1});
}
{
@ -69,7 +69,7 @@ TEST(references, scan)
scanner(((std::string_view) s).substr(10, 5));
scanner(((std::string_view) s).substr(15, 5));
scanner(((std::string_view) s).substr(20));
ASSERT_EQ(scanner.getResult(), StringSet({hash1, hash2}));
EXPECT_EQ(scanner.getResult(), StringSet({hash1, hash2}));
}
{
@ -77,7 +77,7 @@ TEST(references, scan)
auto s = "foobar" + hash1 + "xyzzy" + hash2;
for (auto & i : s)
scanner(std::string(1, i));
ASSERT_EQ(scanner.getResult(), StringSet({hash1, hash2}));
EXPECT_EQ(scanner.getResult(), StringSet({hash1, hash2}));
}
}
@ -161,7 +161,7 @@ TEST(references, scanForReferencesDeep)
{
CanonPath f1Path("/file1.txt");
auto it = foundRefs.find(f1Path);
ASSERT_TRUE(it != foundRefs.end());
EXPECT_TRUE(it != foundRefs.end());
EXPECT_EQ(it->second.size(), 1);
EXPECT_TRUE(it->second.count(path1));
}
@ -170,7 +170,7 @@ TEST(references, scanForReferencesDeep)
{
CanonPath f2Path("/file2.txt");
auto it = foundRefs.find(f2Path);
ASSERT_TRUE(it != foundRefs.end());
EXPECT_TRUE(it != foundRefs.end());
EXPECT_EQ(it->second.size(), 2);
EXPECT_TRUE(it->second.count(path2));
EXPECT_TRUE(it->second.count(path3));
@ -186,7 +186,7 @@ TEST(references, scanForReferencesDeep)
{
CanonPath f4Path("/subdir/file4.txt");
auto it = foundRefs.find(f4Path);
ASSERT_TRUE(it != foundRefs.end());
EXPECT_TRUE(it != foundRefs.end());
EXPECT_EQ(it->second.size(), 1);
EXPECT_TRUE(it->second.count(path1));
}
@ -195,7 +195,7 @@ TEST(references, scanForReferencesDeep)
{
CanonPath linkPath("/link1");
auto it = foundRefs.find(linkPath);
ASSERT_TRUE(it != foundRefs.end());
EXPECT_TRUE(it != foundRefs.end());
EXPECT_EQ(it->second.size(), 1);
EXPECT_TRUE(it->second.count(path2));
}

View file

@ -0,0 +1,10 @@
#include "nix/store/dependency-graph-impl.hh"
namespace nix {
// Explicit instantiations for common types
template class DependencyGraph<StorePath>;
template class DependencyGraph<std::string>;
template class DependencyGraph<StorePath, FileListEdgeProperty>;
} // namespace nix

View file

@ -0,0 +1,287 @@
#pragma once
/**
* @file
*
* Template implementations (as opposed to mere declarations).
*
* This file is an example of the "impl.hh" pattern. See the
* contributing guide.
*
* One only needs to include this when instantiating DependencyGraph
* with custom NodeId or EdgeProperty types beyond the pre-instantiated
* common types (StorePath, std::string).
*/
#include "nix/store/dependency-graph.hh"
#include "nix/store/store-api.hh"
#include "nix/util/error.hh"
#include <boost/graph/graph_traits.hpp>
#include <boost/graph/depth_first_search.hpp>
#include <boost/graph/reverse_graph.hpp>
#include <boost/graph/properties.hpp>
#include <algorithm>
#include <ranges>
namespace nix {
template<GraphNodeId NodeId, typename EdgeProperty>
DependencyGraph<NodeId, EdgeProperty>::DependencyGraph(Store & store, const StorePathSet & closure)
requires std::same_as<NodeId, StorePath>
{
for (auto & path : closure) {
for (auto & ref : store.queryPathInfo(path)->references) {
addEdge(path, ref);
}
}
}
template<GraphNodeId NodeId, typename EdgeProperty>
typename DependencyGraph<NodeId, EdgeProperty>::vertex_descriptor
DependencyGraph<NodeId, EdgeProperty>::addOrGetVertex(const NodeId & id)
{
auto it = nodeToVertex.find(id);
if (it != nodeToVertex.end()) {
return it->second;
}
auto v = boost::add_vertex(VertexProperty{std::make_optional(id)}, graph);
nodeToVertex[id] = v;
return v;
}
template<GraphNodeId NodeId, typename EdgeProperty>
void DependencyGraph<NodeId, EdgeProperty>::addEdge(const NodeId & from, const NodeId & to)
{
auto vFrom = addOrGetVertex(from);
auto vTo = addOrGetVertex(to);
boost::add_edge(vFrom, vTo, graph);
}
template<GraphNodeId NodeId, typename EdgeProperty>
void DependencyGraph<NodeId, EdgeProperty>::addEdge(const NodeId & from, const NodeId & to, const EdgeProperty & prop)
requires(!std::same_as<EdgeProperty, boost::no_property>)
{
auto vFrom = addOrGetVertex(from);
auto vTo = addOrGetVertex(to);
auto [existingEdge, found] = boost::edge(vFrom, vTo, graph);
if (found) {
if constexpr (std::same_as<EdgeProperty, FileListEdgeProperty>) {
auto & edgeFiles = graph[existingEdge].files;
edgeFiles.insert(edgeFiles.end(), prop.files.begin(), prop.files.end());
}
} else {
boost::add_edge(vFrom, vTo, prop, graph);
}
}
template<GraphNodeId NodeId, typename EdgeProperty>
std::optional<typename DependencyGraph<NodeId, EdgeProperty>::vertex_descriptor>
DependencyGraph<NodeId, EdgeProperty>::getVertex(const NodeId & id) const
{
auto it = nodeToVertex.find(id);
if (it == nodeToVertex.end()) {
return std::nullopt;
}
return it->second;
}
template<GraphNodeId NodeId, typename EdgeProperty>
const NodeId & DependencyGraph<NodeId, EdgeProperty>::getNodeId(vertex_descriptor v) const
{
return *graph[v].id;
}
template<GraphNodeId NodeId, typename EdgeProperty>
bool DependencyGraph<NodeId, EdgeProperty>::hasNode(const NodeId & id) const
{
return nodeToVertex.contains(id);
}
template<GraphNodeId NodeId, typename EdgeProperty>
typename DependencyGraph<NodeId, EdgeProperty>::vertex_descriptor
DependencyGraph<NodeId, EdgeProperty>::getVertexOrThrow(const NodeId & id) const
{
auto opt = getVertex(id);
if (!opt.has_value()) {
throw Error("node not found in graph");
}
return *opt;
}
template<GraphNodeId NodeId, typename EdgeProperty>
void DependencyGraph<NodeId, EdgeProperty>::computeDistancesFrom(const NodeId & target) const
{
// Check if already computed for this target (idempotent)
if (cachedDistances.has_value() && distanceTarget.has_value() && *distanceTarget == target) {
return;
}
auto targetVertex = getVertexOrThrow(target);
size_t n = boost::num_vertices(graph);
std::vector<size_t> distances(n, std::numeric_limits<size_t>::max());
distances[targetVertex] = 0;
// Use reverse_graph to follow incoming edges
auto reversedGraph = boost::make_reverse_graph(graph);
// Create uniform weight map (all edges have weight 1)
auto weightMap =
boost::make_constant_property<typename boost::graph_traits<decltype(reversedGraph)>::edge_descriptor>(1);
// Run Dijkstra on reversed graph with uniform weights
boost::dijkstra_shortest_paths(
reversedGraph,
targetVertex,
boost::weight_map(weightMap).distance_map(
boost::make_iterator_property_map(distances.begin(), boost::get(boost::vertex_index, reversedGraph))));
cachedDistances = std::move(distances);
distanceTarget = target;
}
template<GraphNodeId NodeId, typename EdgeProperty>
template<typename NodeVisitor, typename EdgeVisitor, typename StopPredicate>
void DependencyGraph<NodeId, EdgeProperty>::dfsFromTarget(
const NodeId & start,
const NodeId & target,
NodeVisitor && visitNode,
EdgeVisitor && visitEdge,
StopPredicate && shouldStop) const
{
computeDistancesFrom(target);
std::function<bool(const NodeId &, size_t)> dfs = [&](const NodeId & node, size_t depth) -> bool {
// Visit node - if returns false, skip this subtree
if (!visitNode(node, depth)) {
return false;
}
// Check if we should stop the entire traversal
if (shouldStop(node)) {
return true; // Signal to stop
}
// Get and sort successors by distance
auto successors = getSuccessors(node);
auto sortedSuccessors = successors | std::views::transform([&](const auto & ref) -> std::pair<size_t, NodeId> {
auto v = getVertexOrThrow(ref);
return {(*cachedDistances)[v], ref};
})
| std::views::filter([](const auto & p) {
// Filter unreachable nodes
return p.first != std::numeric_limits<size_t>::max();
})
| std::ranges::to<std::vector>();
std::ranges::sort(sortedSuccessors);
// Visit each edge and recurse
for (size_t i = 0; i < sortedSuccessors.size(); ++i) {
const auto & [dist, successor] = sortedSuccessors[i];
bool isLast = (i == sortedSuccessors.size() - 1);
visitEdge(node, successor, isLast, depth);
if (dfs(successor, depth + 1)) {
return true; // Propagate stop signal
}
}
return false; // Continue traversal
};
dfs(start, 0);
}
template<GraphNodeId NodeId, typename EdgeProperty>
std::vector<NodeId> DependencyGraph<NodeId, EdgeProperty>::getSuccessors(const NodeId & node) const
{
auto v = getVertexOrThrow(node);
auto [adjBegin, adjEnd] = boost::adjacent_vertices(v, graph);
return std::ranges::subrange(adjBegin, adjEnd) | std::views::transform([&](auto v) { return getNodeId(v); })
| std::ranges::to<std::vector>();
}
template<GraphNodeId NodeId, typename EdgeProperty>
std::optional<EdgeProperty>
DependencyGraph<NodeId, EdgeProperty>::getEdgeProperty(const NodeId & from, const NodeId & to) const
requires(!std::same_as<EdgeProperty, boost::no_property>)
{
auto vFrom = getVertexOrThrow(from);
auto vTo = getVertexOrThrow(to);
auto [edge, found] = boost::edge(vFrom, vTo, graph);
if (!found) {
return std::nullopt;
}
return graph[edge];
}
template<GraphNodeId NodeId, typename EdgeProperty>
std::vector<std::vector<NodeId>> DependencyGraph<NodeId, EdgeProperty>::findCycles() const
{
using vertex_descriptor = typename boost::graph_traits<Graph>::vertex_descriptor;
using edge_descriptor = typename boost::graph_traits<Graph>::edge_descriptor;
std::vector<std::vector<vertex_descriptor>> cycleDescriptors;
std::vector<vertex_descriptor> dfsPath;
// Custom DFS visitor to detect back edges and extract cycles
class CycleFinder : public boost::default_dfs_visitor
{
public:
std::vector<std::vector<vertex_descriptor>> & cycles;
std::vector<vertex_descriptor> & dfsPath;
CycleFinder(std::vector<std::vector<vertex_descriptor>> & cycles, std::vector<vertex_descriptor> & dfsPath)
: cycles(cycles)
, dfsPath(dfsPath)
{
}
void discover_vertex(vertex_descriptor v, const Graph & g)
{
dfsPath.push_back(v);
}
void finish_vertex(vertex_descriptor v, const Graph & g)
{
if (!dfsPath.empty() && dfsPath.back() == v) {
dfsPath.pop_back();
}
}
void back_edge(edge_descriptor e, const Graph & g)
{
auto target = boost::target(e, g);
auto cycleStart = std::ranges::find(dfsPath, target);
std::vector<vertex_descriptor> cycle(cycleStart, dfsPath.end());
cycle.push_back(target);
cycles.push_back(std::move(cycle));
}
};
CycleFinder visitor(cycleDescriptors, dfsPath);
boost::depth_first_search(graph, boost::visitor(visitor));
// Convert vertex_descriptors to NodeIds using ranges
return cycleDescriptors | std::views::transform([&](const auto & cycleVerts) {
return cycleVerts | std::views::transform([&](auto v) { return getNodeId(v); })
| std::ranges::to<std::vector<NodeId>>();
})
| std::ranges::to<std::vector>();
}
template<GraphNodeId NodeId, typename EdgeProperty>
std::vector<NodeId> DependencyGraph<NodeId, EdgeProperty>::getAllNodes() const
{
return nodeToVertex | std::views::keys | std::ranges::to<std::vector>();
}
} // namespace nix

View file

@ -0,0 +1,166 @@
#pragma once
///@file
#include "nix/store/path.hh"
#include "nix/util/canon-path.hh"
#include <boost/graph/adjacency_list.hpp>
#include <boost/graph/depth_first_search.hpp>
#include <boost/graph/dijkstra_shortest_paths.hpp>
#include <boost/graph/reverse_graph.hpp>
#include <map>
#include <vector>
#include <optional>
#include <concepts>
namespace nix {
class Store;
/**
* Concept for types usable as graph node IDs.
*/
template<typename T>
concept GraphNodeId = std::copyable<T> && std::totally_ordered<T>;
/**
* Directed graph for dependency analysis using Boost Graph Library.
*
* @tparam NodeId Node identifier type (e.g., StorePath, std::string)
* @tparam EdgeProperty Optional edge metadata type
*/
template<GraphNodeId NodeId, typename EdgeProperty = boost::no_property>
class DependencyGraph
{
public:
/**
* Bundled vertex property. Uses optional for default constructibility.
*/
struct VertexProperty
{
std::optional<NodeId> id;
};
/**
* BGL adjacency_list: bidirectional, vector storage.
*/
using Graph = boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS, VertexProperty, EdgeProperty>;
using vertex_descriptor = typename boost::graph_traits<Graph>::vertex_descriptor;
using edge_descriptor = typename boost::graph_traits<Graph>::edge_descriptor;
private:
Graph graph;
std::map<NodeId, vertex_descriptor> nodeToVertex;
// Cached algorithm results
mutable std::optional<std::vector<size_t>> cachedDistances;
mutable std::optional<NodeId> distanceTarget;
// Internal helpers
vertex_descriptor addOrGetVertex(const NodeId & id);
std::optional<vertex_descriptor> getVertex(const NodeId & id) const;
const NodeId & getNodeId(vertex_descriptor v) const;
vertex_descriptor getVertexOrThrow(const NodeId & id) const;
void computeDistancesFrom(const NodeId & target) const;
public:
DependencyGraph() = default;
/**
* Build graph from Store closure (StorePath graphs only).
*
* @param store Store to query for references
* @param closure Store paths to include
*/
DependencyGraph(Store & store, const StorePathSet & closure)
requires std::same_as<NodeId, StorePath>;
/**
* Add edge, creating vertices if needed.
*/
void addEdge(const NodeId & from, const NodeId & to);
/**
* Add edge with property. Merges property if edge exists.
*/
void addEdge(const NodeId & from, const NodeId & to, const EdgeProperty & prop)
requires(!std::same_as<EdgeProperty, boost::no_property>);
bool hasNode(const NodeId & id) const;
/**
* DFS traversal with distance-based successor ordering.
* Successors visited in order of increasing distance to target.
* Automatically computes distances if needed (lazy).
*
* Example traversal from A to D:
*
* A (dist=3)
* B (dist=2)
* D (dist=0) [target]
* C (dist=2)
* D (dist=0)
*
* Callbacks invoked:
* visitNode(A, depth=0) -> true
* visitEdge(A, B, isLast=false, depth=0)
* visitNode(B, depth=1) -> true
* visitEdge(B, D, isLast=true, depth=1)
* visitNode(D, depth=2) -> true
* shouldStop(D) -> true [stops traversal]
*
* @param start Starting node for traversal
* @param target Target node (used for distance-based sorting)
* @param visitNode Called when entering node: (node, depth) -> bool. Return false to skip subtree.
* @param visitEdge Called for each edge: (from, to, isLastEdge, depth) -> void
* @param shouldStop Called after visiting node: (node) -> bool. Return true to stop entire traversal.
*/
template<typename NodeVisitor, typename EdgeVisitor, typename StopPredicate>
void dfsFromTarget(
const NodeId & start,
const NodeId & target,
NodeVisitor && visitNode,
EdgeVisitor && visitEdge,
StopPredicate && shouldStop) const;
/**
* Get successor nodes (outgoing edges).
*/
std::vector<NodeId> getSuccessors(const NodeId & node) const;
/**
* Get edge property. Returns nullopt if edge doesn't exist.
*/
std::optional<EdgeProperty> getEdgeProperty(const NodeId & from, const NodeId & to) const
requires(!std::same_as<EdgeProperty, boost::no_property>);
std::vector<NodeId> getAllNodes() const;
/**
* Find all cycles in the graph using DFS.
* Returns vector of cycles, each represented as a path that starts and ends at the same node.
*/
std::vector<std::vector<NodeId>> findCycles() const;
size_t numVertices() const
{
return boost::num_vertices(graph);
}
};
/**
* Edge property storing which files created a dependency.
*/
struct FileListEdgeProperty
{
std::vector<CanonPath> files;
};
// Convenience typedefs
using StorePathGraph = DependencyGraph<StorePath>;
using FilePathGraph = DependencyGraph<std::string>;
using StorePathGraphWithFiles = DependencyGraph<StorePath, FileListEdgeProperty>;
} // namespace nix

View file

@ -31,6 +31,8 @@ headers = [ config_pub_h ] + files(
'common-ssh-store-config.hh',
'content-address.hh',
'daemon.hh',
'dependency-graph-impl.hh',
'dependency-graph.hh',
'derivation-options.hh',
'derivations.hh',
'derived-path-map.hh',

View file

@ -3,10 +3,12 @@
#include "nix/store/references.hh"
#include "nix/store/path.hh"
#include "nix/store/dependency-graph.hh"
#include "nix/util/source-accessor.hh"
#include <functional>
#include <vector>
#include <map>
namespace nix {
@ -59,7 +61,7 @@ void scanForReferencesDeep(
SourceAccessor & accessor,
const CanonPath & rootPath,
const StorePathSet & refs,
std::function<void(FileRefScanResult)> callback);
std::function<void(const FileRefScanResult &)> callback);
/**
* Scan a store path tree and return which references appear in which files.
@ -78,4 +80,25 @@ void scanForReferencesDeep(
std::map<CanonPath, StorePathSet>
scanForReferencesDeep(SourceAccessor & accessor, const CanonPath & rootPath, const StorePathSet & refs);
/**
* Build a StorePath-level dependency graph from file scanning.
*
* This scans the given path for references and builds a graph where:
* - Nodes are StorePaths
* - Edges represent dependencies between StorePaths
* - Edge properties store the files that created each dependency
*
* This unified approach allows both cycle detection and why-depends to share
* the same graph-building logic while maintaining file-level information for
* detailed error messages embedded directly in the graph.
*
* @param accessor Source accessor to read the tree
* @param rootPath Root path to scan
* @param rootStorePath The StorePath that rootPath belongs to
* @param refs Set of store paths to search for
* @return StorePathGraphWithFiles where edge properties contain file lists
*/
DependencyGraph<StorePath, FileListEdgeProperty> buildStorePathGraphFromScan(
SourceAccessor & accessor, const CanonPath & rootPath, const StorePath & rootStorePath, const StorePathSet & refs);
} // namespace nix

View file

@ -277,6 +277,7 @@ sources = files(
'common-ssh-store-config.cc',
'content-address.cc',
'daemon.cc',
'dependency-graph.cc',
'derivation-options.cc',
'derivations.cc',
'derived-path-map.cc',

View file

@ -1,4 +1,5 @@
#include "nix/store/path-references.hh"
#include "nix/store/dependency-graph.hh"
#include "nix/util/hash.hh"
#include "nix/util/archive.hh"
#include "nix/util/source-accessor.hh"
@ -62,7 +63,7 @@ void scanForReferencesDeep(
SourceAccessor & accessor,
const CanonPath & rootPath,
const StorePathSet & refs,
std::function<void(FileRefScanResult)> callback)
std::function<void(const FileRefScanResult &)> callback)
{
// Recursive tree walker
auto walk = [&](this auto & self, const CanonPath & path) -> void {
@ -137,11 +138,35 @@ scanForReferencesDeep(SourceAccessor & accessor, const CanonPath & rootPath, con
{
std::map<CanonPath, StorePathSet> results;
scanForReferencesDeep(accessor, rootPath, refs, [&](FileRefScanResult result) {
results[std::move(result.filePath)] = std::move(result.foundRefs);
scanForReferencesDeep(accessor, rootPath, refs, [&](const FileRefScanResult & result) {
results[result.filePath] = result.foundRefs;
});
return results;
}
DependencyGraph<StorePath, FileListEdgeProperty> buildStorePathGraphFromScan(
SourceAccessor & accessor, const CanonPath & rootPath, const StorePath & rootStorePath, const StorePathSet & refs)
{
DependencyGraph<StorePath, FileListEdgeProperty> graph;
scanForReferencesDeep(accessor, rootPath, refs, [&](const FileRefScanResult & result) {
// All files in this scan belong to rootStorePath
for (const auto & foundRef : result.foundRefs) {
// Add StorePath -> StorePath edge with file metadata
FileListEdgeProperty edgeProp;
edgeProp.files.push_back(result.filePath);
graph.addEdge(rootStorePath, foundRef, std::move(edgeProp));
debug(
"buildStorePathGraphFromScan: %s (in %s) → %s",
rootStorePath.to_string(),
result.filePath.abs(),
foundRef.to_string());
}
});
return graph;
}
} // namespace nix

View file

@ -4,7 +4,10 @@
#include "nix/util/processes.hh"
#include "nix/store/builtins.hh"
#include "nix/store/path-references.hh"
#include "nix/store/dependency-graph.hh"
#include "nix/util/finally.hh"
#include <ranges>
#include "nix/util/util.hh"
#include "nix/util/archive.hh"
#include "nix/util/git.hh"
@ -62,6 +65,103 @@ struct NotDeterministic : BuildError
}
};
/**
* Information about an output path for cycle analysis.
*/
struct OutputPathInfo
{
std::string outputName; ///< Name of the output (e.g., "out", "dev")
StorePath storePath; ///< The StorePath of this output
Path actualPath; ///< Actual filesystem path where the output is located
};
/**
* Helper to analyze cycles and throw a detailed error.
*
* This function always throws - either the detailed cycle error or re-throws
* the original exception if no cycles are found.
*
* @param drvName The formatted name of the derivation being built
* @param referenceablePaths Set of paths to search for in the outputs
* @param getOutputPaths Callback returning output information to scan
*/
[[noreturn]] static void analyzeCyclesAndThrow(
std::string_view drvName,
const StorePathSet & referenceablePaths,
std::function<std::vector<OutputPathInfo>()> getOutputPaths)
{
debug("cycle detected, analyzing for detailed error report");
// Scan all outputs and build dependency graph
auto accessor = getFSSourceAccessor();
DependencyGraph<StorePath, FileListEdgeProperty> depGraph;
for (auto & output : getOutputPaths()) {
debug("scanning output '%s' at path '%s' for cycles", output.outputName, output.actualPath);
auto outputGraph =
buildStorePathGraphFromScan(*accessor, CanonPath(output.actualPath), output.storePath, referenceablePaths);
// Merge into combined graph
for (auto & node : outputGraph.getAllNodes()) {
for (auto & successor : outputGraph.getSuccessors(node)) {
auto edgeProp = outputGraph.getEdgeProperty(node, successor);
if (edgeProp) {
depGraph.addEdge(node, successor, *edgeProp);
}
}
}
}
// Find cycles in the combined graph
auto cycles = depGraph.findCycles();
if (cycles.empty()) {
debug("no detailed cycles found, re-throwing original error");
throw;
}
debug("found %lu cycles", cycles.size());
// Build detailed error message with file annotations
std::string cycleDetails = fmt("Detailed cycle analysis found %d cycle path(s):", cycles.size());
for (const auto & [idx, cycle] : std::views::enumerate(cycles)) {
cycleDetails += fmt("\n\nCycle %d:", idx + 1);
for (const auto & window : cycle | std::views::slide(2)) {
const StorePath & from = window[0];
const StorePath & to = window[1];
cycleDetails += fmt("\n → %s", from.to_string());
// Add file annotations
auto edgeProp = depGraph.getEdgeProperty(from, to);
if (edgeProp && !edgeProp->files.empty()) {
for (const auto & file : edgeProp->files) {
cycleDetails += fmt("\n (via %s)", file.abs());
}
}
}
// Close the cycle
if (!cycle.empty()) {
cycleDetails += fmt("\n → %s", cycle.back().to_string());
}
}
cycleDetails +=
fmt("\n\nThis means there are circular references between output files.\n"
"The build cannot proceed because the outputs reference each other.");
// Add hint with temp paths for debugging
if (settings.keepFailed || verbosity >= lvlDebug) {
cycleDetails += "\n\nNote: Temporary build outputs are preserved for inspection.";
}
throw BuildError(
BuildResult::Failure::OutputRejected, "cycle detected in build of '%s': %s", drvName, cycleDetails);
}
/**
* This class represents the state for building locally.
*
@ -1473,43 +1573,62 @@ SingleDrvOutputs DerivationBuilderImpl::registerOutputs()
outputStats.insert_or_assign(outputName, std::move(st));
}
auto sortedOutputNames = topoSort(
outputsToSort,
{[&](const std::string & name) {
auto orifu = get(outputReferencesIfUnregistered, name);
if (!orifu)
throw BuildError(
BuildResult::Failure::OutputRejected,
"no output reference for '%s' in build of '%s'",
name,
store.printStorePath(drvPath));
return std::visit(
overloaded{
/* Since we'll use the already installed versions of these, we
can treat them as leaves and ignore any references they
have. */
[&](const AlreadyRegistered &) { return StringSet{}; },
[&](const PerhapsNeedToRegister & refs) {
StringSet referencedOutputs;
/* FIXME build inverted map up front so no quadratic waste here */
for (auto & r : refs.refs)
for (auto & [o, p] : scratchOutputs)
if (r == p)
referencedOutputs.insert(o);
return referencedOutputs;
},
},
*orifu);
}},
{[&](const std::string & path, const std::string & parent) {
// TODO with more -vvvv also show the temporary paths for manual inspection.
return BuildError(
BuildResult::Failure::OutputRejected,
"cycle detected in build of '%s' in the references of output '%s' from output '%s'",
store.printStorePath(drvPath),
path,
parent);
}});
auto sortedOutputNames = [&]() {
try {
return topoSort(
outputsToSort,
{[&](const std::string & name) {
auto orifu = get(outputReferencesIfUnregistered, name);
if (!orifu)
throw BuildError(
BuildResult::Failure::OutputRejected,
"no output reference for '%s' in build of '%s'",
name,
store.printStorePath(drvPath));
return std::visit(
overloaded{
/* Since we'll use the already installed versions of these, we
can treat them as leaves and ignore any references they
have. */
[&](const AlreadyRegistered &) { return StringSet{}; },
[&](const PerhapsNeedToRegister & refs) {
StringSet referencedOutputs;
/* FIXME build inverted map up front so no quadratic waste here */
for (auto & r : refs.refs)
for (auto & [o, p] : scratchOutputs)
if (r == p)
referencedOutputs.insert(o);
return referencedOutputs;
},
},
*orifu);
}},
{[&](const std::string & path, const std::string & parent) {
// TODO with more -vvvv also show the temporary paths for manual inspection.
return BuildError(
BuildResult::Failure::OutputRejected,
"cycle detected in build of '%s' in the references of output '%s' from output '%s'",
store.printStorePath(drvPath),
path,
parent);
}});
} catch (std::exception & e) {
analyzeCyclesAndThrow(store.printStorePath(drvPath), referenceablePaths, [&]() {
std::vector<OutputPathInfo> outputPaths;
for (auto & [outputName, _] : drv.outputs) {
auto scratchOutput = get(scratchOutputs, outputName);
if (scratchOutput) {
outputPaths.push_back(
OutputPathInfo{
.outputName = outputName,
.storePath = *scratchOutput,
.actualPath = realPathInSandbox(store.printStorePath(*scratchOutput))});
}
}
return outputPaths;
});
}
}();
std::reverse(sortedOutputNames.begin(), sortedOutputNames.end());
@ -1848,12 +1967,24 @@ SingleDrvOutputs DerivationBuilderImpl::registerOutputs()
/* Register each output path as valid, and register the sets of
paths referenced by each of them. If there are cycles in the
outputs, this will fail. */
{
try {
ValidPathInfos infos2;
for (auto & [outputName, newInfo] : infos) {
infos2.insert_or_assign(newInfo.path, newInfo);
}
store.registerValidPaths(infos2);
} catch (BuildError & e) {
analyzeCyclesAndThrow(store.printStorePath(drvPath), referenceablePaths, [&]() {
std::vector<OutputPathInfo> outputPaths;
for (auto & [outputName, newInfo] : infos) {
outputPaths.push_back(
OutputPathInfo{
.outputName = outputName,
.storePath = newInfo.path,
.actualPath = store.toRealPath(store.printStorePath(newInfo.path))});
}
return outputPaths;
});
}
/* If we made it this far, we are sure the output matches the

View file

@ -1,10 +1,11 @@
#include "nix/cmd/command.hh"
#include "nix/store/store-api.hh"
#include "nix/store/path-references.hh"
#include "nix/store/dependency-graph-impl.hh"
#include "nix/util/source-accessor.hh"
#include "nix/main/shared.hh"
#include <queue>
#include <ranges>
using namespace nix;
@ -21,6 +22,63 @@ static std::string filterPrintable(const std::string & s)
return res;
}
/**
* Find and format hash references in scanned files with context.
*
* @param accessor Source accessor for the store path
* @param refPaths Store paths to search for
* @param dependencyPathHash Hash of the dependency (for coloring)
* @return Map from hash string to list of formatted hit strings showing context
*/
static std::map<std::string, Strings>
findHashContexts(SourceAccessor & accessor, const StorePathSet & refPaths, std::string_view dependencyPathHash)
{
std::map<std::string, Strings> hits;
auto getColour = [&](const std::string & hash) { return hash == dependencyPathHash ? ANSI_GREEN : ANSI_BLUE; };
scanForReferencesDeep(accessor, CanonPath::root, refPaths, [&](const FileRefScanResult & result) {
std::string p2 =
result.filePath.isRoot() ? std::string(result.filePath.abs()) : std::string(result.filePath.rel());
auto st = accessor.lstat(result.filePath);
if (st.type == SourceAccessor::Type::tRegular) {
auto contents = accessor.readFile(result.filePath);
// For each reference found in this file, extract context
for (const auto & foundRef : result.foundRefs) {
std::string hash(foundRef.hashPart());
auto pos = contents.find(hash);
if (pos != std::string::npos) {
size_t margin = 32;
auto pos2 = pos >= margin ? pos - margin : 0;
hits[hash].emplace_back(
fmt("%s: …%s…",
p2,
hilite(
filterPrintable(std::string(contents, pos2, pos - pos2 + hash.size() + margin)),
pos - pos2,
StorePath::HashLen,
getColour(hash))));
}
}
} else if (st.type == SourceAccessor::Type::tSymlink) {
auto target = accessor.readLink(result.filePath);
// For each reference found in this symlink, show it
for (const auto & foundRef : result.foundRefs) {
std::string hash(foundRef.hashPart());
auto pos = target.find(hash);
if (pos != std::string::npos)
hits[hash].emplace_back(
fmt("%s -> %s", p2, hilite(target, pos, StorePath::HashLen, getColour(hash))));
}
}
});
return hits;
}
struct CmdWhyDepends : SourceExprCommand, MixOperateOnOptions
{
std::string _package, _dependency;
@ -109,187 +167,91 @@ struct CmdWhyDepends : SourceExprCommand, MixOperateOnOptions
auto dependencyPath = *optDependencyPath;
auto dependencyPathHash = dependencyPath.hashPart();
auto const inf = std::numeric_limits<size_t>::max();
struct Node
{
StorePath path;
StorePathSet refs;
StorePathSet rrefs;
size_t dist = inf;
Node * prev = nullptr;
bool queued = false;
bool visited = false;
};
std::map<StorePath, Node> graph;
for (auto & path : closure)
graph.emplace(
path,
Node{
.path = path,
.refs = store->queryPathInfo(path)->references,
.dist = path == dependencyPath ? 0 : inf});
// Transpose the graph.
for (auto & node : graph)
for (auto & ref : node.second.refs)
graph.find(ref)->second.rrefs.insert(node.first);
/* Run Dijkstra's shortest path algorithm to get the distance
of every path in the closure to 'dependency'. */
std::priority_queue<Node *> queue;
queue.push(&graph.at(dependencyPath));
while (!queue.empty()) {
auto & node = *queue.top();
queue.pop();
for (auto & rref : node.rrefs) {
auto & node2 = graph.at(rref);
auto dist = node.dist + 1;
if (dist < node2.dist) {
node2.dist = dist;
node2.prev = &node;
if (!node2.queued) {
node2.queued = true;
queue.push(&node2);
}
}
}
}
// Build dependency graph from closure using store metadata
StorePathGraph depGraph(*store, closure);
/* Print the subgraph of nodes that have 'dependency' in their
closure (i.e., that have a non-infinite distance to
'dependency'). Print every edge on a path between `package`
and `dependency`. */
std::function<void(Node &, const std::string &, const std::string &)> printNode;
struct BailOut
{};
printNode = [&](Node & node, const std::string & firstPad, const std::string & tailPad) {
assert(node.dist != inf);
if (precise) {
logger->cout(
"%s%s%s%s" ANSI_NORMAL,
firstPad,
node.visited ? "\e[38;5;244m" : "",
firstPad != "" ? "" : "",
store->printStorePath(node.path));
}
if (node.path == dependencyPath && !all && packagePath != dependencyPath)
throw BailOut();
if (node.visited)
return;
if (precise)
node.visited = true;
/* Sort the references by distance to `dependency` to
ensure that the shortest path is printed first. */
std::multimap<size_t, Node *> refs;
StorePathSet refPaths;
for (auto & ref : node.refs) {
if (ref == node.path && packagePath != dependencyPath)
continue;
auto & node2 = graph.at(ref);
if (node2.dist == inf)
continue;
refs.emplace(node2.dist, &node2);
refPaths.insert(node2.path);
}
/* For each reference, find the files and symlinks that
contain the reference. */
std::map<std::string, Strings> hits;
auto accessor = store->requireStoreObjectAccessor(node.path);
auto getColour = [&](const std::string & hash) {
return hash == dependencyPathHash ? ANSI_GREEN : ANSI_BLUE;
};
if (precise) {
// Use scanForReferencesDeep to find files containing references
scanForReferencesDeep(*accessor, CanonPath::root, refPaths, [&](FileRefScanResult result) {
auto p2 = result.filePath.isRoot() ? result.filePath.abs() : result.filePath.rel();
auto st = accessor->lstat(result.filePath);
if (st.type == SourceAccessor::Type::tRegular) {
auto contents = accessor->readFile(result.filePath);
// For each reference found in this file, extract context
for (auto & foundRef : result.foundRefs) {
std::string hash(foundRef.hashPart());
auto pos = contents.find(hash);
if (pos != std::string::npos) {
size_t margin = 32;
auto pos2 = pos >= margin ? pos - margin : 0;
hits[hash].emplace_back(fmt(
"%s: …%s…",
p2,
hilite(
filterPrintable(std::string(contents, pos2, pos - pos2 + hash.size() + margin)),
pos - pos2,
StorePath::HashLen,
getColour(hash))));
}
}
} else if (st.type == SourceAccessor::Type::tSymlink) {
auto target = accessor->readLink(result.filePath);
// For each reference found in this symlink, show it
for (auto & foundRef : result.foundRefs) {
std::string hash(foundRef.hashPart());
auto pos = target.find(hash);
if (pos != std::string::npos)
hits[hash].emplace_back(
fmt("%s -> %s", p2, hilite(target, pos, StorePath::HashLen, getColour(hash))));
}
}
});
}
for (auto & ref : refs) {
std::string hash(ref.second->path.hashPart());
bool last = all ? ref == *refs.rbegin() : true;
for (auto & hit : hits[hash]) {
bool first = hit == *hits[hash].begin();
logger->cout(
"%s%s%s", tailPad, (first ? (last ? treeLast : treeConn) : (last ? treeNull : treeLine)), hit);
if (!all)
break;
}
if (!precise) {
logger->cout(
"%s%s%s%s" ANSI_NORMAL,
firstPad,
ref.second->visited ? "\e[38;5;244m" : "",
last ? treeLast : treeConn,
store->printStorePath(ref.second->path));
node.visited = true;
}
printNode(*ref.second, tailPad + (last ? treeNull : treeLine), tailPad + (last ? treeNull : treeLine));
}
};
RunPager pager;
try {
if (!precise) {
logger->cout("%s", store->printStorePath(graph.at(packagePath).path));
}
printNode(graph.at(packagePath), "", "");
} catch (BailOut &) {
if (!precise) {
logger->cout("%s", store->printStorePath(packagePath));
}
std::set<StorePath> visited;
std::vector<std::string> padStack = {""};
depGraph.dfsFromTarget(
packagePath,
dependencyPath,
// Visit node callback
[&](const StorePath & node, size_t depth) -> bool {
std::string currentPad = padStack[depth];
if (precise) {
logger->cout(
"%s%s%s%s" ANSI_NORMAL,
currentPad,
visited.contains(node) ? "\e[38;5;244m" : "",
currentPad != "" ? "" : "",
store->printStorePath(node));
}
if (visited.contains(node)) {
return false; // Skip subtree
}
if (precise) {
visited.insert(node);
}
return true; // Continue with this node's children
},
// Visit edge callback
[&](const StorePath & from, const StorePath & to, bool isLast, size_t depth) {
std::string tailPad = padStack[depth];
// In non-all mode, we only traverse one path, so everything is "last"
bool effectivelyLast = !all || isLast;
if (precise) {
auto accessor = store->requireStoreObjectAccessor(from);
auto hits = findHashContexts(*accessor, {to}, dependencyPathHash);
std::string hash(to.hashPart());
auto & hashHits = hits[hash];
for (auto & hit : hashHits) {
bool first = hit == *hashHits.begin();
logger->cout(
"%s%s%s",
tailPad,
(first ? (effectivelyLast ? treeLast : treeConn) : (effectivelyLast ? treeNull : treeLine)),
hit);
if (!all)
break;
}
} else {
std::string currentPad = padStack[depth];
logger->cout(
"%s%s%s%s" ANSI_NORMAL,
currentPad,
visited.contains(to) ? "\e[38;5;244m" : "",
effectivelyLast ? treeLast : treeConn,
store->printStorePath(to));
visited.insert(from);
}
// Update padding for next level
if (padStack.size() == depth + 1) {
padStack.push_back(tailPad + (effectivelyLast ? treeNull : treeLine));
} else {
padStack[depth + 1] = tailPad + (effectivelyLast ? treeNull : treeLine);
}
},
// Stop condition
[&](const StorePath & node) { return node == dependencyPath && !all && packagePath != dependencyPath; });
}
};