From 182ae393d1574f4f97d9f42e8f7397896fffbeab Mon Sep 17 00:00:00 2001 From: Bernardo Meurer Costa Date: Mon, 3 Nov 2025 22:04:12 +0000 Subject: [PATCH] refactor(libutil/topo-sort): return variant instead of throwing The variant has on the left-hand side the topologically sorted vector and the right-hand side is a pair showing the path and its parent that represent a cycle in the graph making the sort impossible. This change prepares for enhanced cycle error messages that can provide more context about the cycle. The variant approach allows callers to handle cycles more flexibly, enabling better error reporting that shows the full cycle path and which files are involved. Adapted from Lix commit f7871fcb5. Change-Id: I70a987f470437df8beb3b1cc203ff88701d0aa1b Co-Authored-By: Maximilian Bosch --- src/libstore/local-store.cc | 29 +- src/libstore/misc.cc | 35 +- src/libstore/unix/build/derivation-builder.cc | 73 ++-- src/libutil-tests/meson.build | 1 + src/libutil-tests/topo-sort.cc | 318 ++++++++++++++++++ src/libutil/include/nix/util/topo-sort.hh | 48 ++- 6 files changed, 427 insertions(+), 77 deletions(-) create mode 100644 src/libutil-tests/topo-sort.cc diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index c6aeaf0d2..2c4d546f8 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -989,19 +989,22 @@ void LocalStore::registerValidPaths(const ValidPathInfos & infos) error if a cycle is detected and roll back the transaction. Cycles can only occur when a derivation has multiple outputs. */ - topoSort( - paths, - {[&](const StorePath & path) { - auto i = infos.find(path); - return i == infos.end() ? StorePathSet() : i->second.references; - }}, - {[&](const StorePath & path, const StorePath & parent) { - return BuildError( - BuildResult::Failure::OutputRejected, - "cycle detected in the references of '%s' from '%s'", - printStorePath(path), - printStorePath(parent)); - }}); + auto topoSortResult = topoSort(paths, {[&](const StorePath & path) { + auto i = infos.find(path); + return i == infos.end() ? StorePathSet() : i->second.references; + }}); + + std::visit( + overloaded{ + [&](const Cycle & cycle) { + throw BuildError( + BuildResult::Failure::OutputRejected, + "cycle detected in the references of '%s' from '%s'", + printStorePath(cycle.path), + printStorePath(cycle.parent)); + }, + [](auto &) { /* Success, continue */ }}, + topoSortResult); txn.commit(); }); diff --git a/src/libstore/misc.cc b/src/libstore/misc.cc index 8b2a7287e..34a369810 100644 --- a/src/libstore/misc.cc +++ b/src/libstore/misc.cc @@ -311,22 +311,25 @@ MissingPaths Store::queryMissing(const std::vector & targets) StorePaths Store::topoSortPaths(const StorePathSet & paths) { - return topoSort( - paths, - {[&](const StorePath & path) { - try { - return queryPathInfo(path)->references; - } catch (InvalidPath &) { - return StorePathSet(); - } - }}, - {[&](const StorePath & path, const StorePath & parent) { - return BuildError( - BuildResult::Failure::OutputRejected, - "cycle detected in the references of '%s' from '%s'", - printStorePath(path), - printStorePath(parent)); - }}); + auto result = topoSort(paths, {[&](const StorePath & path) { + try { + return queryPathInfo(path)->references; + } catch (InvalidPath &) { + return StorePathSet(); + } + }}); + + return std::visit( + overloaded{ + [&](const Cycle & cycle) -> StorePaths { + throw BuildError( + BuildResult::Failure::OutputRejected, + "cycle detected in the references of '%s' from '%s'", + printStorePath(cycle.path), + printStorePath(cycle.parent)); + }, + [](const auto & sorted) { return sorted; }}, + result); } std::map diff --git a/src/libstore/unix/build/derivation-builder.cc b/src/libstore/unix/build/derivation-builder.cc index c2ef730dc..6eedc5fa2 100644 --- a/src/libstore/unix/build/derivation-builder.cc +++ b/src/libstore/unix/build/derivation-builder.cc @@ -1473,43 +1473,46 @@ SingleDrvOutputs DerivationBuilderImpl::registerOutputs() outputStats.insert_or_assign(outputName, std::move(st)); } - auto sortedOutputNames = topoSort( - outputsToSort, - {[&](const std::string & name) { - auto orifu = get(outputReferencesIfUnregistered, name); - if (!orifu) + auto topoSortResult = topoSort(outputsToSort, {[&](const std::string & name) { + auto orifu = get(outputReferencesIfUnregistered, name); + if (!orifu) + throw BuildError( + BuildResult::Failure::OutputRejected, + "no output reference for '%s' in build of '%s'", + name, + store.printStorePath(drvPath)); + return std::visit( + overloaded{ + /* Since we'll use the already installed versions of these, we + can treat them as leaves and ignore any references they + have. */ + [&](const AlreadyRegistered &) { return StringSet{}; }, + [&](const PerhapsNeedToRegister & refs) { + StringSet referencedOutputs; + /* FIXME build inverted map up front so no quadratic waste here */ + for (auto & r : refs.refs) + for (auto & [o, p] : scratchOutputs) + if (r == p) + referencedOutputs.insert(o); + return referencedOutputs; + }, + }, + *orifu); + }}); + + auto sortedOutputNames = std::visit( + overloaded{ + [&](Cycle & cycle) -> std::vector { + // TODO with more -vvvv also show the temporary paths for manual inspection. throw BuildError( BuildResult::Failure::OutputRejected, - "no output reference for '%s' in build of '%s'", - name, - store.printStorePath(drvPath)); - return std::visit( - overloaded{ - /* Since we'll use the already installed versions of these, we - can treat them as leaves and ignore any references they - have. */ - [&](const AlreadyRegistered &) { return StringSet{}; }, - [&](const PerhapsNeedToRegister & refs) { - StringSet referencedOutputs; - /* FIXME build inverted map up front so no quadratic waste here */ - for (auto & r : refs.refs) - for (auto & [o, p] : scratchOutputs) - if (r == p) - referencedOutputs.insert(o); - return referencedOutputs; - }, - }, - *orifu); - }}, - {[&](const std::string & path, const std::string & parent) { - // TODO with more -vvvv also show the temporary paths for manual inspection. - return BuildError( - BuildResult::Failure::OutputRejected, - "cycle detected in build of '%s' in the references of output '%s' from output '%s'", - store.printStorePath(drvPath), - path, - parent); - }}); + "cycle detected in build of '%s' in the references of output '%s' from output '%s'", + store.printStorePath(drvPath), + cycle.path, + cycle.parent); + }, + [](auto & sorted) { return sorted; }}, + topoSortResult); std::reverse(sortedOutputNames.begin(), sortedOutputNames.end()); diff --git a/src/libutil-tests/meson.build b/src/libutil-tests/meson.build index c75f4d90a..2772ab060 100644 --- a/src/libutil-tests/meson.build +++ b/src/libutil-tests/meson.build @@ -74,6 +74,7 @@ sources = files( 'strings.cc', 'suggestions.cc', 'terminal.cc', + 'topo-sort.cc', 'url.cc', 'util.cc', 'xml-writer.cc', diff --git a/src/libutil-tests/topo-sort.cc b/src/libutil-tests/topo-sort.cc new file mode 100644 index 000000000..91030247e --- /dev/null +++ b/src/libutil-tests/topo-sort.cc @@ -0,0 +1,318 @@ +#include +#include +#include +#include +#include + +#include + +#include "nix/util/topo-sort.hh" +#include "nix/util/util.hh" + +namespace nix { + +/** + * Helper function to create a graph and run topoSort + */ +TopoSortResult +runTopoSort(const std::set & nodes, const std::map> & edges) +{ + return topoSort( + nodes, + std::function(const std::string &)>( + [&](const std::string & node) -> std::set { + auto it = edges.find(node); + return it != edges.end() ? it->second : std::set{}; + })); +} + +/** + * Helper to check if a sorted result respects dependencies + * + * @note `topoSort` returns results in REVERSE topological order (see + * line 61 of topo-sort.hh). This means dependents come BEFORE their + * dependencies in the output. + * + * In the edges std::map, if parent -> child, it means parent depends on + * child, so parent must come BEFORE child in the output from topoSort. + */ +bool isValidTopologicalOrder( + const std::vector & sorted, const std::map> & edges) +{ + std::map position; + for (size_t i = 0; i < sorted.size(); ++i) { + position[sorted[i]] = i; + } + + // For each edge parent -> children, parent depends on children + // topoSort reverses the output, so parent comes BEFORE children + for (const auto & [parent, children] : edges) { + for (const auto & child : children) { + if (position.count(parent) && position.count(child)) { + // parent should come before child (have a smaller index) + if (position[parent] > position[child]) { + return false; + } + } + } + } + return true; +} + +// ============================================================================ +// Parametrized Tests for Topological Sort +// ============================================================================ + +struct ExpectSuccess +{ + std::optional> order; // std::nullopt = any valid order is acceptable +}; + +struct ExpectCycle +{ + std::set involvedNodes; +}; + +using ExpectedResult = std::variant; + +struct TopoSortCase +{ + std::string name; + std::set nodes; + std::map> edges; + ExpectedResult expected; +}; + +class TopoSortTest : public ::testing::TestWithParam +{}; + +TEST_P(TopoSortTest, ProducesCorrectResult) +{ + const auto & testCase = GetParam(); + auto result = runTopoSort(testCase.nodes, testCase.edges); + + std::visit( + overloaded{ + [&](const ExpectSuccess & expect) { + // Success case + ASSERT_TRUE(holds_alternative>(result)) + << "Expected successful sort for: " << testCase.name; + + auto sorted = get>(result); + ASSERT_EQ(sorted.size(), testCase.nodes.size()) + << "Sorted output should contain all nodes for: " << testCase.name; + + ASSERT_TRUE(isValidTopologicalOrder(sorted, testCase.edges)) + << "Invalid topological order for: " << testCase.name; + + if (expect.order) { + ASSERT_EQ(sorted, *expect.order) << "Expected specific order for: " << testCase.name; + } + }, + [&](const ExpectCycle & expect) { + // Cycle detection case + ASSERT_TRUE(holds_alternative>(result)) + << "Expected cycle detection for: " << testCase.name; + + auto cycle = get>(result); + + // Verify that the cycle involves expected nodes + ASSERT_TRUE(expect.involvedNodes.count(cycle.path) > 0) + << "Cycle path '" << cycle.path << "' not in expected cycle nodes for: " << testCase.name; + ASSERT_TRUE(expect.involvedNodes.count(cycle.parent) > 0) + << "Cycle parent '" << cycle.parent << "' not in expected cycle nodes for: " << testCase.name; + + // Verify that there's actually an edge in the cycle + auto it = testCase.edges.find(cycle.parent); + ASSERT_TRUE(it != testCase.edges.end()) << "Parent node should have edges for: " << testCase.name; + ASSERT_TRUE(it->second.count(cycle.path) > 0) + << "Should be an edge from parent to path for: " << testCase.name; + }}, + testCase.expected); +} + +INSTANTIATE_TEST_SUITE_P( + TopoSort, + TopoSortTest, + ::testing::Values( + // Success cases + TopoSortCase{ + .name = "EmptySet", + .nodes = {}, + .edges = {}, + .expected = ExpectSuccess{.order = std::vector{}}, + }, + TopoSortCase{ + .name = "SingleNode", + .nodes = {"A"}, + .edges = {}, + .expected = ExpectSuccess{.order = std::vector{"A"}}, + }, + TopoSortCase{ + .name = "TwoIndependentNodes", + .nodes = {"A", "B"}, + .edges = {}, + // Order between independent nodes is unspecified + .expected = ExpectSuccess{.order = std::nullopt}, + }, + TopoSortCase{ + .name = "SimpleChain", + .nodes = {"A", "B", "C"}, + .edges{ + {"A", {"B"}}, + {"B", {"C"}}, + }, + .expected = ExpectSuccess{.order = std::vector{"A", "B", "C"}}, + }, + TopoSortCase{ + .name = "SimpleDag", + .nodes = {"A", "B", "C", "D"}, + .edges{ + {"A", {"B", "C"}}, + {"B", {"D"}}, + {"C", {"D"}}, + }, + .expected = ExpectSuccess{.order = std::nullopt}, + }, + TopoSortCase{ + .name = "DiamondDependency", + .nodes = {"A", "B", "C", "D"}, + .edges{ + {"A", {"B", "C"}}, + {"B", {"D"}}, + {"C", {"D"}}, + }, + .expected = ExpectSuccess{.order = std::nullopt}, + }, + TopoSortCase{ + .name = "DisconnectedComponents", + .nodes = {"A", "B", "C", "D"}, + .edges{ + {"A", {"B"}}, + {"C", {"D"}}, + }, + .expected = ExpectSuccess{.order = std::nullopt}, + }, + TopoSortCase{ + .name = "NodeWithNoReferences", + .nodes = {"A", "B", "C"}, + .edges{ + {"A", {"B"}}, + // C has no dependencies + }, + .expected = ExpectSuccess{.order = std::nullopt}, + }, + TopoSortCase{ + .name = "MissingReferences", + .nodes = {"A", "B"}, + .edges{ + // Z doesn't exist in nodes, should be ignored + {"A", {"B", "Z"}}, + }, + .expected = ExpectSuccess{.order = std::vector{"A", "B"}}, + }, + TopoSortCase{ + .name = "ComplexDag", + .nodes = {"A", "B", "C", "D", "E", "F", "G", "H"}, + .edges{ + {"A", {"B", "C", "D"}}, + {"B", {"E", "F"}}, + {"C", {"E", "F"}}, + {"D", {"G"}}, + {"E", {"H"}}, + {"F", {"H"}}, + {"G", {"H"}}, + }, + .expected = ExpectSuccess{.order = std::nullopt}, + }, + TopoSortCase{ + .name = "LongChain", + .nodes = {"A", "B", "C", "D", "E", "F", "G", "H"}, + .edges{ + {"A", {"B"}}, + {"B", {"C"}}, + {"C", {"D"}}, + {"D", {"E"}}, + {"E", {"F"}}, + {"F", {"G"}}, + {"G", {"H"}}, + }, + .expected = ExpectSuccess{.order = std::vector{"A", "B", "C", "D", "E", "F", "G", "H"}}, + }, + TopoSortCase{ + .name = "SelfLoopIgnored", + .nodes = {"A"}, + .edges{ + // Self-reference should be ignored per line 41 of topo-sort.hh + {"A", {"A"}}, + }, + .expected = ExpectSuccess{.order = std::vector{"A"}}, + }, + TopoSortCase{ + .name = "SelfLoopInChainIgnored", + .nodes = {"A", "B", "C"}, + .edges{ + // B has self-reference that should be ignored + {"A", {"B"}}, + {"B", {"B", "C"}}, + }, + .expected = ExpectSuccess{.order = std::vector{"A", "B", "C"}}, + }, + // Cycle detection cases + TopoSortCase{ + .name = "TwoNodeCycle", + .nodes = {"A", "B"}, + .edges{ + {"A", {"B"}}, + {"B", {"A"}}, + }, + .expected = ExpectCycle{.involvedNodes = {"A", "B"}}, + }, + TopoSortCase{ + .name = "ThreeNodeCycle", + .nodes = {"A", "B", "C"}, + .edges{ + {"A", {"B"}}, + {"B", {"C"}}, + {"C", {"A"}}, + }, + .expected = ExpectCycle{.involvedNodes = {"A", "B", "C"}}, + }, + TopoSortCase{ + .name = "CycleInLargerGraph", + .nodes = {"A", "B", "C", "D"}, + .edges{ + {"A", {"B"}}, + {"B", {"C"}}, + {"C", {"A"}}, + {"D", {"A"}}, + }, + .expected = ExpectCycle{.involvedNodes = {"A", "B", "C"}}, + }, + TopoSortCase{ + .name = "MultipleCycles", + .nodes = {"A", "B", "C", "D"}, + .edges{ + {"A", {"B"}}, + {"B", {"A"}}, + {"C", {"D"}}, + {"D", {"C"}}, + }, + // Either cycle is valid + .expected = ExpectCycle{.involvedNodes = {"A", "B", "C", "D"}}, + }, + TopoSortCase{ + .name = "ComplexCycleWithBranches", + .nodes = {"A", "B", "C", "D", "E"}, + .edges{ + // Cycle: B -> D -> E -> B + {"A", {"B", "C"}}, + {"B", {"D"}}, + {"C", {"D"}}, + {"D", {"E"}}, + {"E", {"B"}}, + }, + .expected = ExpectCycle{.involvedNodes = {"B", "D", "E"}}, + })); + +} // namespace nix diff --git a/src/libutil/include/nix/util/topo-sort.hh b/src/libutil/include/nix/util/topo-sort.hh index aaf5dff16..285c34316 100644 --- a/src/libutil/include/nix/util/topo-sort.hh +++ b/src/libutil/include/nix/util/topo-sort.hh @@ -2,39 +2,61 @@ ///@file #include "nix/util/error.hh" +#include namespace nix { +template +struct Cycle +{ + T path; + T parent; +}; + +template +using TopoSortResult = std::variant, Cycle>; + template -std::vector topoSort( - std::set items, - std::function(const T &)> getChildren, - std::function makeCycleError) +TopoSortResult topoSort(std::set items, std::function(const T &)> getChildren) { std::vector sorted; decltype(items) visited, parents; - auto dfsVisit = [&](this auto & dfsVisit, const T & path, const T * parent) { - if (parents.count(path)) - throw makeCycleError(path, *parent); + std::function>(const T & path, const T * parent)> dfsVisit; - if (!visited.insert(path).second) - return; + dfsVisit = [&](const T & path, const T * parent) -> std::optional> { + if (parents.count(path)) { + return Cycle{path, *parent}; + } + + if (!visited.insert(path).second) { + return std::nullopt; + } parents.insert(path); auto references = getChildren(path); for (auto & i : references) /* Don't traverse into items that don't exist in our starting set. */ - if (i != path && items.count(i)) - dfsVisit(i, &path); + if (i != path && items.count(i)) { + auto result = dfsVisit(i, &path); + if (result.has_value()) { + return result; + } + } sorted.push_back(path); parents.erase(path); + + return std::nullopt; }; - for (auto & i : items) - dfsVisit(i, nullptr); + for (auto & i : items) { + auto cycle = dfsVisit(i, nullptr); + if (cycle.has_value()) { + return *cycle; + } + } std::reverse(sorted.begin(), sorted.end());