diff --git a/src/libstore-tests/dependency-graph.cc b/src/libstore-tests/dependency-graph.cc new file mode 100644 index 000000000..eecaf14e4 --- /dev/null +++ b/src/libstore-tests/dependency-graph.cc @@ -0,0 +1,157 @@ +#include "nix/store/dependency-graph-impl.hh" + +#include + +namespace nix { + +TEST(DependencyGraph, BasicAddEdge) +{ + FilePathGraph depGraph; + depGraph.addEdge("a", "b"); + depGraph.addEdge("b", "c"); + + EXPECT_TRUE(depGraph.hasNode("a")); + EXPECT_TRUE(depGraph.hasNode("b")); + EXPECT_TRUE(depGraph.hasNode("c")); + EXPECT_FALSE(depGraph.hasNode("d")); + + // Verify edges using high-level API + auto successors = depGraph.getSuccessors("a"); + EXPECT_EQ(successors.size(), 1); + EXPECT_EQ(successors[0], "b"); +} + +TEST(DependencyGraph, DfsTraversalOrder) +{ + // Build a graph: A->B->D, A->C->D + // Successors should be visited in distance order (B and C before recursing) + FilePathGraph depGraph; + depGraph.addEdge("a", "b"); + depGraph.addEdge("a", "c"); + depGraph.addEdge("b", "d"); + depGraph.addEdge("c", "d"); + + std::vector visitedNodes; + std::vector> visitedEdges; + + depGraph.dfsFromTarget( + "a", + "d", + [&](const std::string & node, size_t depth) { + visitedNodes.push_back(node); + return true; + }, + [&](const std::string & from, const std::string & to, bool isLast, size_t depth) { + visitedEdges.emplace_back(from, to); + }, + [](const std::string &) { return false; }); + + EXPECT_EQ(visitedNodes[0], "a"); + // B and C both at distance 1, could be in either order + EXPECT_TRUE( + (visitedNodes[1] == "b" && visitedNodes[2] == "d") || (visitedNodes[1] == "c" && visitedNodes[2] == "d")); +} + +TEST(DependencyGraph, GetSuccessors) +{ + FilePathGraph depGraph; + depGraph.addEdge("a", "b"); + depGraph.addEdge("a", "c"); + + auto successors = depGraph.getSuccessors("a"); + EXPECT_EQ(successors.size(), 2); + EXPECT_TRUE(std::ranges::contains(successors, "b")); + EXPECT_TRUE(std::ranges::contains(successors, "c")); +} + +TEST(DependencyGraph, GetAllNodes) +{ + FilePathGraph depGraph; + depGraph.addEdge("foo", "bar"); + depGraph.addEdge("bar", "baz"); + + auto nodes = depGraph.getAllNodes(); + EXPECT_EQ(nodes.size(), 3); + EXPECT_TRUE(std::ranges::contains(nodes, "foo")); + EXPECT_TRUE(std::ranges::contains(nodes, "bar")); + EXPECT_TRUE(std::ranges::contains(nodes, "baz")); +} + +TEST(DependencyGraph, ThrowsOnMissingNode) +{ + FilePathGraph depGraph; + depGraph.addEdge("a", "b"); + + EXPECT_THROW(depGraph.getSuccessors("nonexistent"), nix::Error); +} + +TEST(DependencyGraph, EmptyGraph) +{ + FilePathGraph depGraph; + + EXPECT_FALSE(depGraph.hasNode("anything")); + EXPECT_EQ(depGraph.numVertices(), 0); + EXPECT_EQ(depGraph.getAllNodes().size(), 0); +} + +/** + * Parameters for cycle detection tests + */ +struct FindCyclesParams +{ + std::string description; + std::vector> inputEdges; + std::vector> expectedCycles; +}; + +class FindCyclesTest : public ::testing::TestWithParam +{}; + +namespace { +bool compareCycles(const std::vector & a, const std::vector & b) +{ + if (a.size() != b.size()) + return a.size() < b.size(); + return std::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end()); +} +} // namespace + +TEST_P(FindCyclesTest, FindCycles) +{ + const auto & params = GetParam(); + + FilePathGraph depGraph; + for (const auto & [from, to] : params.inputEdges) { + depGraph.addEdge(from, to); + } + + auto actualCycles = depGraph.findCycles(); + EXPECT_EQ(actualCycles.size(), params.expectedCycles.size()); + + std::ranges::sort(actualCycles, compareCycles); + auto expectedCycles = params.expectedCycles; + std::ranges::sort(expectedCycles, compareCycles); + + EXPECT_EQ(actualCycles, expectedCycles); +} + +INSTANTIATE_TEST_CASE_P( + FindCycles, + FindCyclesTest, + ::testing::Values( + FindCyclesParams{"empty input", {}, {}}, + FindCyclesParams{"single edge no cycle", {{"a", "b"}}, {}}, + FindCyclesParams{"simple cycle", {{"a", "b"}, {"b", "a"}}, {{"a", "b", "a"}}}, + FindCyclesParams{"three node cycle", {{"a", "b"}, {"b", "c"}, {"c", "a"}}, {{"a", "b", "c", "a"}}}, + FindCyclesParams{ + "four node cycle", {{"a", "b"}, {"b", "c"}, {"c", "d"}, {"d", "a"}}, {{"a", "b", "c", "d", "a"}}}, + FindCyclesParams{ + "multiple disjoint cycles", + {{"a", "b"}, {"b", "a"}, {"c", "d"}, {"d", "c"}}, + {{"a", "b", "a"}, {"c", "d", "c"}}}, + FindCyclesParams{"cycle with extra edges", {{"a", "b"}, {"b", "a"}, {"c", "d"}}, {{"a", "b", "a"}}}, + FindCyclesParams{"self-loop", {{"a", "a"}}, {{"a", "a"}}}, + FindCyclesParams{"chain no cycle", {{"a", "b"}, {"b", "c"}, {"c", "d"}}, {}}, + FindCyclesParams{"cycle with tail", {{"x", "a"}, {"a", "b"}, {"b", "c"}, {"c", "a"}}, {{"a", "b", "c", "a"}}})); + +} // namespace nix diff --git a/src/libstore-tests/meson.build b/src/libstore-tests/meson.build index f76df8bcb..3a7b25a4c 100644 --- a/src/libstore-tests/meson.build +++ b/src/libstore-tests/meson.build @@ -57,6 +57,7 @@ sources = files( 'build-result.cc', 'common-protocol.cc', 'content-address.cc', + 'dependency-graph.cc', 'derivation-advanced-attrs.cc', 'derivation.cc', 'derived-path.cc', diff --git a/src/libstore-tests/references.cc b/src/libstore-tests/references.cc index 9cecd573e..09184f61a 100644 --- a/src/libstore-tests/references.cc +++ b/src/libstore-tests/references.cc @@ -32,7 +32,7 @@ TEST_P(RewriteTest, IdentityRewriteIsIdentity) auto rewriter = RewritingSink(param.rewrites, rewritten); rewriter(param.originalString); rewriter.flush(); - ASSERT_EQ(rewritten.s, param.finalString); + EXPECT_EQ(rewritten.s, param.finalString); } INSTANTIATE_TEST_CASE_P( @@ -52,14 +52,14 @@ TEST(references, scan) RefScanSink scanner(StringSet{hash1}); auto s = "foobar"; scanner(s); - ASSERT_EQ(scanner.getResult(), StringSet{}); + EXPECT_EQ(scanner.getResult(), StringSet{}); } { RefScanSink scanner(StringSet{hash1}); auto s = "foobar" + hash1 + "xyzzy"; scanner(s); - ASSERT_EQ(scanner.getResult(), StringSet{hash1}); + EXPECT_EQ(scanner.getResult(), StringSet{hash1}); } { @@ -69,7 +69,7 @@ TEST(references, scan) scanner(((std::string_view) s).substr(10, 5)); scanner(((std::string_view) s).substr(15, 5)); scanner(((std::string_view) s).substr(20)); - ASSERT_EQ(scanner.getResult(), StringSet({hash1, hash2})); + EXPECT_EQ(scanner.getResult(), StringSet({hash1, hash2})); } { @@ -77,7 +77,7 @@ TEST(references, scan) auto s = "foobar" + hash1 + "xyzzy" + hash2; for (auto & i : s) scanner(std::string(1, i)); - ASSERT_EQ(scanner.getResult(), StringSet({hash1, hash2})); + EXPECT_EQ(scanner.getResult(), StringSet({hash1, hash2})); } } @@ -161,7 +161,7 @@ TEST(references, scanForReferencesDeep) { CanonPath f1Path("/file1.txt"); auto it = foundRefs.find(f1Path); - ASSERT_TRUE(it != foundRefs.end()); + EXPECT_TRUE(it != foundRefs.end()); EXPECT_EQ(it->second.size(), 1); EXPECT_TRUE(it->second.count(path1)); } @@ -170,7 +170,7 @@ TEST(references, scanForReferencesDeep) { CanonPath f2Path("/file2.txt"); auto it = foundRefs.find(f2Path); - ASSERT_TRUE(it != foundRefs.end()); + EXPECT_TRUE(it != foundRefs.end()); EXPECT_EQ(it->second.size(), 2); EXPECT_TRUE(it->second.count(path2)); EXPECT_TRUE(it->second.count(path3)); @@ -186,7 +186,7 @@ TEST(references, scanForReferencesDeep) { CanonPath f4Path("/subdir/file4.txt"); auto it = foundRefs.find(f4Path); - ASSERT_TRUE(it != foundRefs.end()); + EXPECT_TRUE(it != foundRefs.end()); EXPECT_EQ(it->second.size(), 1); EXPECT_TRUE(it->second.count(path1)); } @@ -195,7 +195,7 @@ TEST(references, scanForReferencesDeep) { CanonPath linkPath("/link1"); auto it = foundRefs.find(linkPath); - ASSERT_TRUE(it != foundRefs.end()); + EXPECT_TRUE(it != foundRefs.end()); EXPECT_EQ(it->second.size(), 1); EXPECT_TRUE(it->second.count(path2)); } diff --git a/src/libstore/dependency-graph.cc b/src/libstore/dependency-graph.cc new file mode 100644 index 000000000..13c1fa7d9 --- /dev/null +++ b/src/libstore/dependency-graph.cc @@ -0,0 +1,10 @@ +#include "nix/store/dependency-graph-impl.hh" + +namespace nix { + +// Explicit instantiations for common types +template class DependencyGraph; +template class DependencyGraph; +template class DependencyGraph; + +} // namespace nix diff --git a/src/libstore/include/nix/store/dependency-graph-impl.hh b/src/libstore/include/nix/store/dependency-graph-impl.hh new file mode 100644 index 000000000..4e94db29a --- /dev/null +++ b/src/libstore/include/nix/store/dependency-graph-impl.hh @@ -0,0 +1,287 @@ +#pragma once +/** + * @file + * + * Template implementations (as opposed to mere declarations). + * + * This file is an example of the "impl.hh" pattern. See the + * contributing guide. + * + * One only needs to include this when instantiating DependencyGraph + * with custom NodeId or EdgeProperty types beyond the pre-instantiated + * common types (StorePath, std::string). + */ + +#include "nix/store/dependency-graph.hh" +#include "nix/store/store-api.hh" +#include "nix/util/error.hh" + +#include +#include +#include +#include + +#include +#include + +namespace nix { + +template +DependencyGraph::DependencyGraph(Store & store, const StorePathSet & closure) + requires std::same_as +{ + for (auto & path : closure) { + for (auto & ref : store.queryPathInfo(path)->references) { + addEdge(path, ref); + } + } +} + +template +typename DependencyGraph::vertex_descriptor +DependencyGraph::addOrGetVertex(const NodeId & id) +{ + auto it = nodeToVertex.find(id); + if (it != nodeToVertex.end()) { + return it->second; + } + + auto v = boost::add_vertex(VertexProperty{std::make_optional(id)}, graph); + nodeToVertex[id] = v; + return v; +} + +template +void DependencyGraph::addEdge(const NodeId & from, const NodeId & to) +{ + auto vFrom = addOrGetVertex(from); + auto vTo = addOrGetVertex(to); + boost::add_edge(vFrom, vTo, graph); +} + +template +void DependencyGraph::addEdge(const NodeId & from, const NodeId & to, const EdgeProperty & prop) + requires(!std::same_as) +{ + auto vFrom = addOrGetVertex(from); + auto vTo = addOrGetVertex(to); + + auto [existingEdge, found] = boost::edge(vFrom, vTo, graph); + if (found) { + if constexpr (std::same_as) { + auto & edgeFiles = graph[existingEdge].files; + edgeFiles.insert(edgeFiles.end(), prop.files.begin(), prop.files.end()); + } + } else { + boost::add_edge(vFrom, vTo, prop, graph); + } +} + +template +std::optional::vertex_descriptor> +DependencyGraph::getVertex(const NodeId & id) const +{ + auto it = nodeToVertex.find(id); + if (it == nodeToVertex.end()) { + return std::nullopt; + } + return it->second; +} + +template +const NodeId & DependencyGraph::getNodeId(vertex_descriptor v) const +{ + return *graph[v].id; +} + +template +bool DependencyGraph::hasNode(const NodeId & id) const +{ + return nodeToVertex.contains(id); +} + +template +typename DependencyGraph::vertex_descriptor +DependencyGraph::getVertexOrThrow(const NodeId & id) const +{ + auto opt = getVertex(id); + if (!opt.has_value()) { + throw Error("node not found in graph"); + } + return *opt; +} + +template +void DependencyGraph::computeDistancesFrom(const NodeId & target) const +{ + // Check if already computed for this target (idempotent) + if (cachedDistances.has_value() && distanceTarget.has_value() && *distanceTarget == target) { + return; + } + + auto targetVertex = getVertexOrThrow(target); + size_t n = boost::num_vertices(graph); + + std::vector distances(n, std::numeric_limits::max()); + distances[targetVertex] = 0; + + // Use reverse_graph to follow incoming edges + auto reversedGraph = boost::make_reverse_graph(graph); + + // Create uniform weight map (all edges have weight 1) + auto weightMap = + boost::make_constant_property::edge_descriptor>(1); + + // Run Dijkstra on reversed graph with uniform weights + boost::dijkstra_shortest_paths( + reversedGraph, + targetVertex, + boost::weight_map(weightMap).distance_map( + boost::make_iterator_property_map(distances.begin(), boost::get(boost::vertex_index, reversedGraph)))); + + cachedDistances = std::move(distances); + distanceTarget = target; +} + +template +template +void DependencyGraph::dfsFromTarget( + const NodeId & start, + const NodeId & target, + NodeVisitor && visitNode, + EdgeVisitor && visitEdge, + StopPredicate && shouldStop) const +{ + computeDistancesFrom(target); + + std::function dfs = [&](const NodeId & node, size_t depth) -> bool { + // Visit node - if returns false, skip this subtree + if (!visitNode(node, depth)) { + return false; + } + + // Check if we should stop the entire traversal + if (shouldStop(node)) { + return true; // Signal to stop + } + + // Get and sort successors by distance + auto successors = getSuccessors(node); + auto sortedSuccessors = successors | std::views::transform([&](const auto & ref) -> std::pair { + auto v = getVertexOrThrow(ref); + return {(*cachedDistances)[v], ref}; + }) + | std::views::filter([](const auto & p) { + // Filter unreachable nodes + return p.first != std::numeric_limits::max(); + }) + | std::ranges::to(); + + std::ranges::sort(sortedSuccessors); + + // Visit each edge and recurse + for (size_t i = 0; i < sortedSuccessors.size(); ++i) { + const auto & [dist, successor] = sortedSuccessors[i]; + bool isLast = (i == sortedSuccessors.size() - 1); + + visitEdge(node, successor, isLast, depth); + + if (dfs(successor, depth + 1)) { + return true; // Propagate stop signal + } + } + + return false; // Continue traversal + }; + + dfs(start, 0); +} + +template +std::vector DependencyGraph::getSuccessors(const NodeId & node) const +{ + auto v = getVertexOrThrow(node); + auto [adjBegin, adjEnd] = boost::adjacent_vertices(v, graph); + + return std::ranges::subrange(adjBegin, adjEnd) | std::views::transform([&](auto v) { return getNodeId(v); }) + | std::ranges::to(); +} + +template +std::optional +DependencyGraph::getEdgeProperty(const NodeId & from, const NodeId & to) const + requires(!std::same_as) +{ + auto vFrom = getVertexOrThrow(from); + auto vTo = getVertexOrThrow(to); + + auto [edge, found] = boost::edge(vFrom, vTo, graph); + if (!found) { + return std::nullopt; + } + + return graph[edge]; +} + +template +std::vector> DependencyGraph::findCycles() const +{ + using vertex_descriptor = typename boost::graph_traits::vertex_descriptor; + using edge_descriptor = typename boost::graph_traits::edge_descriptor; + + std::vector> cycleDescriptors; + std::vector dfsPath; + + // Custom DFS visitor to detect back edges and extract cycles + class CycleFinder : public boost::default_dfs_visitor + { + public: + std::vector> & cycles; + std::vector & dfsPath; + + CycleFinder(std::vector> & cycles, std::vector & dfsPath) + : cycles(cycles) + , dfsPath(dfsPath) + { + } + + void discover_vertex(vertex_descriptor v, const Graph & g) + { + dfsPath.push_back(v); + } + + void finish_vertex(vertex_descriptor v, const Graph & g) + { + if (!dfsPath.empty() && dfsPath.back() == v) { + dfsPath.pop_back(); + } + } + + void back_edge(edge_descriptor e, const Graph & g) + { + auto target = boost::target(e, g); + auto cycleStart = std::ranges::find(dfsPath, target); + std::vector cycle(cycleStart, dfsPath.end()); + cycle.push_back(target); + cycles.push_back(std::move(cycle)); + } + }; + + CycleFinder visitor(cycleDescriptors, dfsPath); + boost::depth_first_search(graph, boost::visitor(visitor)); + + // Convert vertex_descriptors to NodeIds using ranges + return cycleDescriptors | std::views::transform([&](const auto & cycleVerts) { + return cycleVerts | std::views::transform([&](auto v) { return getNodeId(v); }) + | std::ranges::to>(); + }) + | std::ranges::to(); +} + +template +std::vector DependencyGraph::getAllNodes() const +{ + return nodeToVertex | std::views::keys | std::ranges::to(); +} + +} // namespace nix diff --git a/src/libstore/include/nix/store/dependency-graph.hh b/src/libstore/include/nix/store/dependency-graph.hh new file mode 100644 index 000000000..e32be07ce --- /dev/null +++ b/src/libstore/include/nix/store/dependency-graph.hh @@ -0,0 +1,166 @@ +#pragma once +///@file + +#include "nix/store/path.hh" +#include "nix/util/canon-path.hh" + +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace nix { + +class Store; + +/** + * Concept for types usable as graph node IDs. + */ +template +concept GraphNodeId = std::copyable && std::totally_ordered; + +/** + * Directed graph for dependency analysis using Boost Graph Library. + * + * @tparam NodeId Node identifier type (e.g., StorePath, std::string) + * @tparam EdgeProperty Optional edge metadata type + */ +template +class DependencyGraph +{ +public: + /** + * Bundled vertex property. Uses optional for default constructibility. + */ + struct VertexProperty + { + std::optional id; + }; + + /** + * BGL adjacency_list: bidirectional, vector storage. + */ + using Graph = boost::adjacency_list; + + using vertex_descriptor = typename boost::graph_traits::vertex_descriptor; + using edge_descriptor = typename boost::graph_traits::edge_descriptor; + +private: + Graph graph; + std::map nodeToVertex; + + // Cached algorithm results + mutable std::optional> cachedDistances; + mutable std::optional distanceTarget; + + // Internal helpers + vertex_descriptor addOrGetVertex(const NodeId & id); + std::optional getVertex(const NodeId & id) const; + const NodeId & getNodeId(vertex_descriptor v) const; + vertex_descriptor getVertexOrThrow(const NodeId & id) const; + void computeDistancesFrom(const NodeId & target) const; + +public: + DependencyGraph() = default; + + /** + * Build graph from Store closure (StorePath graphs only). + * + * @param store Store to query for references + * @param closure Store paths to include + */ + DependencyGraph(Store & store, const StorePathSet & closure) + requires std::same_as; + + /** + * Add edge, creating vertices if needed. + */ + void addEdge(const NodeId & from, const NodeId & to); + + /** + * Add edge with property. Merges property if edge exists. + */ + void addEdge(const NodeId & from, const NodeId & to, const EdgeProperty & prop) + requires(!std::same_as); + + bool hasNode(const NodeId & id) const; + + /** + * DFS traversal with distance-based successor ordering. + * Successors visited in order of increasing distance to target. + * Automatically computes distances if needed (lazy). + * + * Example traversal from A to D: + * + * A (dist=3) + * ├─→ B (dist=2) + * │ └─→ D (dist=0) [target] + * └─→ C (dist=2) + * └─→ D (dist=0) + * + * Callbacks invoked: + * visitNode(A, depth=0) -> true + * visitEdge(A, B, isLast=false, depth=0) + * visitNode(B, depth=1) -> true + * visitEdge(B, D, isLast=true, depth=1) + * visitNode(D, depth=2) -> true + * shouldStop(D) -> true [stops traversal] + * + * @param start Starting node for traversal + * @param target Target node (used for distance-based sorting) + * @param visitNode Called when entering node: (node, depth) -> bool. Return false to skip subtree. + * @param visitEdge Called for each edge: (from, to, isLastEdge, depth) -> void + * @param shouldStop Called after visiting node: (node) -> bool. Return true to stop entire traversal. + */ + template + void dfsFromTarget( + const NodeId & start, + const NodeId & target, + NodeVisitor && visitNode, + EdgeVisitor && visitEdge, + StopPredicate && shouldStop) const; + + /** + * Get successor nodes (outgoing edges). + */ + std::vector getSuccessors(const NodeId & node) const; + + /** + * Get edge property. Returns nullopt if edge doesn't exist. + */ + std::optional getEdgeProperty(const NodeId & from, const NodeId & to) const + requires(!std::same_as); + + std::vector getAllNodes() const; + + /** + * Find all cycles in the graph using DFS. + * Returns vector of cycles, each represented as a path that starts and ends at the same node. + */ + std::vector> findCycles() const; + + size_t numVertices() const + { + return boost::num_vertices(graph); + } +}; + +/** + * Edge property storing which files created a dependency. + */ +struct FileListEdgeProperty +{ + std::vector files; +}; + +// Convenience typedefs +using StorePathGraph = DependencyGraph; +using FilePathGraph = DependencyGraph; +using StorePathGraphWithFiles = DependencyGraph; + +} // namespace nix diff --git a/src/libstore/include/nix/store/meson.build b/src/libstore/include/nix/store/meson.build index 5d6626ff8..6e4431da7 100644 --- a/src/libstore/include/nix/store/meson.build +++ b/src/libstore/include/nix/store/meson.build @@ -31,6 +31,8 @@ headers = [ config_pub_h ] + files( 'common-ssh-store-config.hh', 'content-address.hh', 'daemon.hh', + 'dependency-graph-impl.hh', + 'dependency-graph.hh', 'derivation-options.hh', 'derivations.hh', 'derived-path-map.hh', diff --git a/src/libstore/include/nix/store/path-references.hh b/src/libstore/include/nix/store/path-references.hh index 6aa506da4..cfaa76f3b 100644 --- a/src/libstore/include/nix/store/path-references.hh +++ b/src/libstore/include/nix/store/path-references.hh @@ -3,10 +3,12 @@ #include "nix/store/references.hh" #include "nix/store/path.hh" +#include "nix/store/dependency-graph.hh" #include "nix/util/source-accessor.hh" #include #include +#include namespace nix { @@ -59,7 +61,7 @@ void scanForReferencesDeep( SourceAccessor & accessor, const CanonPath & rootPath, const StorePathSet & refs, - std::function callback); + std::function callback); /** * Scan a store path tree and return which references appear in which files. @@ -78,4 +80,25 @@ void scanForReferencesDeep( std::map scanForReferencesDeep(SourceAccessor & accessor, const CanonPath & rootPath, const StorePathSet & refs); +/** + * Build a StorePath-level dependency graph from file scanning. + * + * This scans the given path for references and builds a graph where: + * - Nodes are StorePaths + * - Edges represent dependencies between StorePaths + * - Edge properties store the files that created each dependency + * + * This unified approach allows both cycle detection and why-depends to share + * the same graph-building logic while maintaining file-level information for + * detailed error messages embedded directly in the graph. + * + * @param accessor Source accessor to read the tree + * @param rootPath Root path to scan + * @param rootStorePath The StorePath that rootPath belongs to + * @param refs Set of store paths to search for + * @return StorePathGraphWithFiles where edge properties contain file lists + */ +DependencyGraph buildStorePathGraphFromScan( + SourceAccessor & accessor, const CanonPath & rootPath, const StorePath & rootStorePath, const StorePathSet & refs); + } // namespace nix diff --git a/src/libstore/meson.build b/src/libstore/meson.build index d1b3666cc..bdc1c6295 100644 --- a/src/libstore/meson.build +++ b/src/libstore/meson.build @@ -277,6 +277,7 @@ sources = files( 'common-ssh-store-config.cc', 'content-address.cc', 'daemon.cc', + 'dependency-graph.cc', 'derivation-options.cc', 'derivations.cc', 'derived-path-map.cc', diff --git a/src/libstore/path-references.cc b/src/libstore/path-references.cc index 3d783bbe4..60cdcfb80 100644 --- a/src/libstore/path-references.cc +++ b/src/libstore/path-references.cc @@ -1,4 +1,5 @@ #include "nix/store/path-references.hh" +#include "nix/store/dependency-graph.hh" #include "nix/util/hash.hh" #include "nix/util/archive.hh" #include "nix/util/source-accessor.hh" @@ -62,7 +63,7 @@ void scanForReferencesDeep( SourceAccessor & accessor, const CanonPath & rootPath, const StorePathSet & refs, - std::function callback) + std::function callback) { // Recursive tree walker auto walk = [&](this auto & self, const CanonPath & path) -> void { @@ -137,11 +138,35 @@ scanForReferencesDeep(SourceAccessor & accessor, const CanonPath & rootPath, con { std::map results; - scanForReferencesDeep(accessor, rootPath, refs, [&](FileRefScanResult result) { - results[std::move(result.filePath)] = std::move(result.foundRefs); + scanForReferencesDeep(accessor, rootPath, refs, [&](const FileRefScanResult & result) { + results[result.filePath] = result.foundRefs; }); return results; } +DependencyGraph buildStorePathGraphFromScan( + SourceAccessor & accessor, const CanonPath & rootPath, const StorePath & rootStorePath, const StorePathSet & refs) +{ + DependencyGraph graph; + + scanForReferencesDeep(accessor, rootPath, refs, [&](const FileRefScanResult & result) { + // All files in this scan belong to rootStorePath + for (const auto & foundRef : result.foundRefs) { + // Add StorePath -> StorePath edge with file metadata + FileListEdgeProperty edgeProp; + edgeProp.files.push_back(result.filePath); + graph.addEdge(rootStorePath, foundRef, std::move(edgeProp)); + + debug( + "buildStorePathGraphFromScan: %s (in %s) → %s", + rootStorePath.to_string(), + result.filePath.abs(), + foundRef.to_string()); + } + }); + + return graph; +} + } // namespace nix diff --git a/src/libstore/unix/build/derivation-builder.cc b/src/libstore/unix/build/derivation-builder.cc index c2ef730dc..a0d840c1c 100644 --- a/src/libstore/unix/build/derivation-builder.cc +++ b/src/libstore/unix/build/derivation-builder.cc @@ -4,7 +4,10 @@ #include "nix/util/processes.hh" #include "nix/store/builtins.hh" #include "nix/store/path-references.hh" +#include "nix/store/dependency-graph.hh" #include "nix/util/finally.hh" + +#include #include "nix/util/util.hh" #include "nix/util/archive.hh" #include "nix/util/git.hh" @@ -62,6 +65,103 @@ struct NotDeterministic : BuildError } }; +/** + * Information about an output path for cycle analysis. + */ +struct OutputPathInfo +{ + std::string outputName; ///< Name of the output (e.g., "out", "dev") + StorePath storePath; ///< The StorePath of this output + Path actualPath; ///< Actual filesystem path where the output is located +}; + +/** + * Helper to analyze cycles and throw a detailed error. + * + * This function always throws - either the detailed cycle error or re-throws + * the original exception if no cycles are found. + * + * @param drvName The formatted name of the derivation being built + * @param referenceablePaths Set of paths to search for in the outputs + * @param getOutputPaths Callback returning output information to scan + */ +[[noreturn]] static void analyzeCyclesAndThrow( + std::string_view drvName, + const StorePathSet & referenceablePaths, + std::function()> getOutputPaths) +{ + debug("cycle detected, analyzing for detailed error report"); + + // Scan all outputs and build dependency graph + auto accessor = getFSSourceAccessor(); + DependencyGraph depGraph; + + for (auto & output : getOutputPaths()) { + debug("scanning output '%s' at path '%s' for cycles", output.outputName, output.actualPath); + auto outputGraph = + buildStorePathGraphFromScan(*accessor, CanonPath(output.actualPath), output.storePath, referenceablePaths); + + // Merge into combined graph + for (auto & node : outputGraph.getAllNodes()) { + for (auto & successor : outputGraph.getSuccessors(node)) { + auto edgeProp = outputGraph.getEdgeProperty(node, successor); + if (edgeProp) { + depGraph.addEdge(node, successor, *edgeProp); + } + } + } + } + + // Find cycles in the combined graph + auto cycles = depGraph.findCycles(); + + if (cycles.empty()) { + debug("no detailed cycles found, re-throwing original error"); + throw; + } + + debug("found %lu cycles", cycles.size()); + + // Build detailed error message with file annotations + std::string cycleDetails = fmt("Detailed cycle analysis found %d cycle path(s):", cycles.size()); + + for (const auto & [idx, cycle] : std::views::enumerate(cycles)) { + cycleDetails += fmt("\n\nCycle %d:", idx + 1); + + for (const auto & window : cycle | std::views::slide(2)) { + const StorePath & from = window[0]; + const StorePath & to = window[1]; + + cycleDetails += fmt("\n → %s", from.to_string()); + + // Add file annotations + auto edgeProp = depGraph.getEdgeProperty(from, to); + if (edgeProp && !edgeProp->files.empty()) { + for (const auto & file : edgeProp->files) { + cycleDetails += fmt("\n (via %s)", file.abs()); + } + } + } + + // Close the cycle + if (!cycle.empty()) { + cycleDetails += fmt("\n → %s", cycle.back().to_string()); + } + } + + cycleDetails += + fmt("\n\nThis means there are circular references between output files.\n" + "The build cannot proceed because the outputs reference each other."); + + // Add hint with temp paths for debugging + if (settings.keepFailed || verbosity >= lvlDebug) { + cycleDetails += "\n\nNote: Temporary build outputs are preserved for inspection."; + } + + throw BuildError( + BuildResult::Failure::OutputRejected, "cycle detected in build of '%s': %s", drvName, cycleDetails); +} + /** * This class represents the state for building locally. * @@ -1473,43 +1573,62 @@ SingleDrvOutputs DerivationBuilderImpl::registerOutputs() outputStats.insert_or_assign(outputName, std::move(st)); } - auto sortedOutputNames = topoSort( - outputsToSort, - {[&](const std::string & name) { - auto orifu = get(outputReferencesIfUnregistered, name); - if (!orifu) - throw BuildError( - BuildResult::Failure::OutputRejected, - "no output reference for '%s' in build of '%s'", - name, - store.printStorePath(drvPath)); - return std::visit( - overloaded{ - /* Since we'll use the already installed versions of these, we - can treat them as leaves and ignore any references they - have. */ - [&](const AlreadyRegistered &) { return StringSet{}; }, - [&](const PerhapsNeedToRegister & refs) { - StringSet referencedOutputs; - /* FIXME build inverted map up front so no quadratic waste here */ - for (auto & r : refs.refs) - for (auto & [o, p] : scratchOutputs) - if (r == p) - referencedOutputs.insert(o); - return referencedOutputs; - }, - }, - *orifu); - }}, - {[&](const std::string & path, const std::string & parent) { - // TODO with more -vvvv also show the temporary paths for manual inspection. - return BuildError( - BuildResult::Failure::OutputRejected, - "cycle detected in build of '%s' in the references of output '%s' from output '%s'", - store.printStorePath(drvPath), - path, - parent); - }}); + auto sortedOutputNames = [&]() { + try { + return topoSort( + outputsToSort, + {[&](const std::string & name) { + auto orifu = get(outputReferencesIfUnregistered, name); + if (!orifu) + throw BuildError( + BuildResult::Failure::OutputRejected, + "no output reference for '%s' in build of '%s'", + name, + store.printStorePath(drvPath)); + return std::visit( + overloaded{ + /* Since we'll use the already installed versions of these, we + can treat them as leaves and ignore any references they + have. */ + [&](const AlreadyRegistered &) { return StringSet{}; }, + [&](const PerhapsNeedToRegister & refs) { + StringSet referencedOutputs; + /* FIXME build inverted map up front so no quadratic waste here */ + for (auto & r : refs.refs) + for (auto & [o, p] : scratchOutputs) + if (r == p) + referencedOutputs.insert(o); + return referencedOutputs; + }, + }, + *orifu); + }}, + {[&](const std::string & path, const std::string & parent) { + // TODO with more -vvvv also show the temporary paths for manual inspection. + return BuildError( + BuildResult::Failure::OutputRejected, + "cycle detected in build of '%s' in the references of output '%s' from output '%s'", + store.printStorePath(drvPath), + path, + parent); + }}); + } catch (std::exception & e) { + analyzeCyclesAndThrow(store.printStorePath(drvPath), referenceablePaths, [&]() { + std::vector outputPaths; + for (auto & [outputName, _] : drv.outputs) { + auto scratchOutput = get(scratchOutputs, outputName); + if (scratchOutput) { + outputPaths.push_back( + OutputPathInfo{ + .outputName = outputName, + .storePath = *scratchOutput, + .actualPath = realPathInSandbox(store.printStorePath(*scratchOutput))}); + } + } + return outputPaths; + }); + } + }(); std::reverse(sortedOutputNames.begin(), sortedOutputNames.end()); @@ -1848,12 +1967,24 @@ SingleDrvOutputs DerivationBuilderImpl::registerOutputs() /* Register each output path as valid, and register the sets of paths referenced by each of them. If there are cycles in the outputs, this will fail. */ - { + try { ValidPathInfos infos2; for (auto & [outputName, newInfo] : infos) { infos2.insert_or_assign(newInfo.path, newInfo); } store.registerValidPaths(infos2); + } catch (BuildError & e) { + analyzeCyclesAndThrow(store.printStorePath(drvPath), referenceablePaths, [&]() { + std::vector outputPaths; + for (auto & [outputName, newInfo] : infos) { + outputPaths.push_back( + OutputPathInfo{ + .outputName = outputName, + .storePath = newInfo.path, + .actualPath = store.toRealPath(store.printStorePath(newInfo.path))}); + } + return outputPaths; + }); } /* If we made it this far, we are sure the output matches the diff --git a/src/nix/why-depends.cc b/src/nix/why-depends.cc index 29da9e953..465aae2ef 100644 --- a/src/nix/why-depends.cc +++ b/src/nix/why-depends.cc @@ -1,10 +1,11 @@ #include "nix/cmd/command.hh" #include "nix/store/store-api.hh" #include "nix/store/path-references.hh" +#include "nix/store/dependency-graph-impl.hh" #include "nix/util/source-accessor.hh" #include "nix/main/shared.hh" -#include +#include using namespace nix; @@ -21,6 +22,63 @@ static std::string filterPrintable(const std::string & s) return res; } +/** + * Find and format hash references in scanned files with context. + * + * @param accessor Source accessor for the store path + * @param refPaths Store paths to search for + * @param dependencyPathHash Hash of the dependency (for coloring) + * @return Map from hash string to list of formatted hit strings showing context + */ +static std::map +findHashContexts(SourceAccessor & accessor, const StorePathSet & refPaths, std::string_view dependencyPathHash) +{ + std::map hits; + + auto getColour = [&](const std::string & hash) { return hash == dependencyPathHash ? ANSI_GREEN : ANSI_BLUE; }; + + scanForReferencesDeep(accessor, CanonPath::root, refPaths, [&](const FileRefScanResult & result) { + std::string p2 = + result.filePath.isRoot() ? std::string(result.filePath.abs()) : std::string(result.filePath.rel()); + auto st = accessor.lstat(result.filePath); + + if (st.type == SourceAccessor::Type::tRegular) { + auto contents = accessor.readFile(result.filePath); + + // For each reference found in this file, extract context + for (const auto & foundRef : result.foundRefs) { + std::string hash(foundRef.hashPart()); + auto pos = contents.find(hash); + if (pos != std::string::npos) { + size_t margin = 32; + auto pos2 = pos >= margin ? pos - margin : 0; + hits[hash].emplace_back( + fmt("%s: …%s…", + p2, + hilite( + filterPrintable(std::string(contents, pos2, pos - pos2 + hash.size() + margin)), + pos - pos2, + StorePath::HashLen, + getColour(hash)))); + } + } + } else if (st.type == SourceAccessor::Type::tSymlink) { + auto target = accessor.readLink(result.filePath); + + // For each reference found in this symlink, show it + for (const auto & foundRef : result.foundRefs) { + std::string hash(foundRef.hashPart()); + auto pos = target.find(hash); + if (pos != std::string::npos) + hits[hash].emplace_back( + fmt("%s -> %s", p2, hilite(target, pos, StorePath::HashLen, getColour(hash)))); + } + } + }); + + return hits; +} + struct CmdWhyDepends : SourceExprCommand, MixOperateOnOptions { std::string _package, _dependency; @@ -109,187 +167,91 @@ struct CmdWhyDepends : SourceExprCommand, MixOperateOnOptions auto dependencyPath = *optDependencyPath; auto dependencyPathHash = dependencyPath.hashPart(); - auto const inf = std::numeric_limits::max(); - - struct Node - { - StorePath path; - StorePathSet refs; - StorePathSet rrefs; - size_t dist = inf; - Node * prev = nullptr; - bool queued = false; - bool visited = false; - }; - - std::map graph; - - for (auto & path : closure) - graph.emplace( - path, - Node{ - .path = path, - .refs = store->queryPathInfo(path)->references, - .dist = path == dependencyPath ? 0 : inf}); - - // Transpose the graph. - for (auto & node : graph) - for (auto & ref : node.second.refs) - graph.find(ref)->second.rrefs.insert(node.first); - - /* Run Dijkstra's shortest path algorithm to get the distance - of every path in the closure to 'dependency'. */ - std::priority_queue queue; - - queue.push(&graph.at(dependencyPath)); - - while (!queue.empty()) { - auto & node = *queue.top(); - queue.pop(); - - for (auto & rref : node.rrefs) { - auto & node2 = graph.at(rref); - auto dist = node.dist + 1; - if (dist < node2.dist) { - node2.dist = dist; - node2.prev = &node; - if (!node2.queued) { - node2.queued = true; - queue.push(&node2); - } - } - } - } + // Build dependency graph from closure using store metadata + StorePathGraph depGraph(*store, closure); /* Print the subgraph of nodes that have 'dependency' in their closure (i.e., that have a non-infinite distance to 'dependency'). Print every edge on a path between `package` and `dependency`. */ - std::function printNode; - - struct BailOut - {}; - - printNode = [&](Node & node, const std::string & firstPad, const std::string & tailPad) { - assert(node.dist != inf); - if (precise) { - logger->cout( - "%s%s%s%s" ANSI_NORMAL, - firstPad, - node.visited ? "\e[38;5;244m" : "", - firstPad != "" ? "→ " : "", - store->printStorePath(node.path)); - } - - if (node.path == dependencyPath && !all && packagePath != dependencyPath) - throw BailOut(); - - if (node.visited) - return; - if (precise) - node.visited = true; - - /* Sort the references by distance to `dependency` to - ensure that the shortest path is printed first. */ - std::multimap refs; - StorePathSet refPaths; - - for (auto & ref : node.refs) { - if (ref == node.path && packagePath != dependencyPath) - continue; - auto & node2 = graph.at(ref); - if (node2.dist == inf) - continue; - refs.emplace(node2.dist, &node2); - refPaths.insert(node2.path); - } - - /* For each reference, find the files and symlinks that - contain the reference. */ - std::map hits; - - auto accessor = store->requireStoreObjectAccessor(node.path); - - auto getColour = [&](const std::string & hash) { - return hash == dependencyPathHash ? ANSI_GREEN : ANSI_BLUE; - }; - - if (precise) { - // Use scanForReferencesDeep to find files containing references - scanForReferencesDeep(*accessor, CanonPath::root, refPaths, [&](FileRefScanResult result) { - auto p2 = result.filePath.isRoot() ? result.filePath.abs() : result.filePath.rel(); - auto st = accessor->lstat(result.filePath); - - if (st.type == SourceAccessor::Type::tRegular) { - auto contents = accessor->readFile(result.filePath); - - // For each reference found in this file, extract context - for (auto & foundRef : result.foundRefs) { - std::string hash(foundRef.hashPart()); - auto pos = contents.find(hash); - if (pos != std::string::npos) { - size_t margin = 32; - auto pos2 = pos >= margin ? pos - margin : 0; - hits[hash].emplace_back(fmt( - "%s: …%s…", - p2, - hilite( - filterPrintable(std::string(contents, pos2, pos - pos2 + hash.size() + margin)), - pos - pos2, - StorePath::HashLen, - getColour(hash)))); - } - } - } else if (st.type == SourceAccessor::Type::tSymlink) { - auto target = accessor->readLink(result.filePath); - - // For each reference found in this symlink, show it - for (auto & foundRef : result.foundRefs) { - std::string hash(foundRef.hashPart()); - auto pos = target.find(hash); - if (pos != std::string::npos) - hits[hash].emplace_back( - fmt("%s -> %s", p2, hilite(target, pos, StorePath::HashLen, getColour(hash)))); - } - } - }); - } - - for (auto & ref : refs) { - std::string hash(ref.second->path.hashPart()); - - bool last = all ? ref == *refs.rbegin() : true; - - for (auto & hit : hits[hash]) { - bool first = hit == *hits[hash].begin(); - logger->cout( - "%s%s%s", tailPad, (first ? (last ? treeLast : treeConn) : (last ? treeNull : treeLine)), hit); - if (!all) - break; - } - - if (!precise) { - logger->cout( - "%s%s%s%s" ANSI_NORMAL, - firstPad, - ref.second->visited ? "\e[38;5;244m" : "", - last ? treeLast : treeConn, - store->printStorePath(ref.second->path)); - node.visited = true; - } - - printNode(*ref.second, tailPad + (last ? treeNull : treeLine), tailPad + (last ? treeNull : treeLine)); - } - }; RunPager pager; - try { - if (!precise) { - logger->cout("%s", store->printStorePath(graph.at(packagePath).path)); - } - printNode(graph.at(packagePath), "", ""); - } catch (BailOut &) { + + if (!precise) { + logger->cout("%s", store->printStorePath(packagePath)); } + + std::set visited; + std::vector padStack = {""}; + + depGraph.dfsFromTarget( + packagePath, + dependencyPath, + // Visit node callback + [&](const StorePath & node, size_t depth) -> bool { + std::string currentPad = padStack[depth]; + + if (precise) { + logger->cout( + "%s%s%s%s" ANSI_NORMAL, + currentPad, + visited.contains(node) ? "\e[38;5;244m" : "", + currentPad != "" ? "→ " : "", + store->printStorePath(node)); + } + + if (visited.contains(node)) { + return false; // Skip subtree + } + if (precise) { + visited.insert(node); + } + + return true; // Continue with this node's children + }, + // Visit edge callback + [&](const StorePath & from, const StorePath & to, bool isLast, size_t depth) { + std::string tailPad = padStack[depth]; + + // In non-all mode, we only traverse one path, so everything is "last" + bool effectivelyLast = !all || isLast; + + if (precise) { + auto accessor = store->requireStoreObjectAccessor(from); + auto hits = findHashContexts(*accessor, {to}, dependencyPathHash); + std::string hash(to.hashPart()); + auto & hashHits = hits[hash]; + + for (auto & hit : hashHits) { + bool first = hit == *hashHits.begin(); + logger->cout( + "%s%s%s", + tailPad, + (first ? (effectivelyLast ? treeLast : treeConn) : (effectivelyLast ? treeNull : treeLine)), + hit); + if (!all) + break; + } + } else { + std::string currentPad = padStack[depth]; + logger->cout( + "%s%s%s%s" ANSI_NORMAL, + currentPad, + visited.contains(to) ? "\e[38;5;244m" : "", + effectivelyLast ? treeLast : treeConn, + store->printStorePath(to)); + visited.insert(from); + } + + // Update padding for next level + if (padStack.size() == depth + 1) { + padStack.push_back(tailPad + (effectivelyLast ? treeNull : treeLine)); + } else { + padStack[depth + 1] = tailPad + (effectivelyLast ? treeNull : treeLine); + } + }, + // Stop condition + [&](const StorePath & node) { return node == dependencyPath && !all && packagePath != dependencyPath; }); } };