diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index c6aeaf0d2..2c4d546f8 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -989,19 +989,22 @@ void LocalStore::registerValidPaths(const ValidPathInfos & infos) error if a cycle is detected and roll back the transaction. Cycles can only occur when a derivation has multiple outputs. */ - topoSort( - paths, - {[&](const StorePath & path) { - auto i = infos.find(path); - return i == infos.end() ? StorePathSet() : i->second.references; - }}, - {[&](const StorePath & path, const StorePath & parent) { - return BuildError( - BuildResult::Failure::OutputRejected, - "cycle detected in the references of '%s' from '%s'", - printStorePath(path), - printStorePath(parent)); - }}); + auto topoSortResult = topoSort(paths, {[&](const StorePath & path) { + auto i = infos.find(path); + return i == infos.end() ? StorePathSet() : i->second.references; + }}); + + std::visit( + overloaded{ + [&](const Cycle & cycle) { + throw BuildError( + BuildResult::Failure::OutputRejected, + "cycle detected in the references of '%s' from '%s'", + printStorePath(cycle.path), + printStorePath(cycle.parent)); + }, + [](auto &) { /* Success, continue */ }}, + topoSortResult); txn.commit(); }); diff --git a/src/libstore/misc.cc b/src/libstore/misc.cc index 8b2a7287e..34a369810 100644 --- a/src/libstore/misc.cc +++ b/src/libstore/misc.cc @@ -311,22 +311,25 @@ MissingPaths Store::queryMissing(const std::vector & targets) StorePaths Store::topoSortPaths(const StorePathSet & paths) { - return topoSort( - paths, - {[&](const StorePath & path) { - try { - return queryPathInfo(path)->references; - } catch (InvalidPath &) { - return StorePathSet(); - } - }}, - {[&](const StorePath & path, const StorePath & parent) { - return BuildError( - BuildResult::Failure::OutputRejected, - "cycle detected in the references of '%s' from '%s'", - printStorePath(path), - printStorePath(parent)); - }}); + auto result = topoSort(paths, {[&](const StorePath & path) { + try { + return queryPathInfo(path)->references; + } catch (InvalidPath &) { + return StorePathSet(); + } + }}); + + return std::visit( + overloaded{ + [&](const Cycle & cycle) -> StorePaths { + throw BuildError( + BuildResult::Failure::OutputRejected, + "cycle detected in the references of '%s' from '%s'", + printStorePath(cycle.path), + printStorePath(cycle.parent)); + }, + [](const auto & sorted) { return sorted; }}, + result); } std::map diff --git a/src/libstore/unix/build/derivation-builder.cc b/src/libstore/unix/build/derivation-builder.cc index c2ef730dc..6eedc5fa2 100644 --- a/src/libstore/unix/build/derivation-builder.cc +++ b/src/libstore/unix/build/derivation-builder.cc @@ -1473,43 +1473,46 @@ SingleDrvOutputs DerivationBuilderImpl::registerOutputs() outputStats.insert_or_assign(outputName, std::move(st)); } - auto sortedOutputNames = topoSort( - outputsToSort, - {[&](const std::string & name) { - auto orifu = get(outputReferencesIfUnregistered, name); - if (!orifu) + auto topoSortResult = topoSort(outputsToSort, {[&](const std::string & name) { + auto orifu = get(outputReferencesIfUnregistered, name); + if (!orifu) + throw BuildError( + BuildResult::Failure::OutputRejected, + "no output reference for '%s' in build of '%s'", + name, + store.printStorePath(drvPath)); + return std::visit( + overloaded{ + /* Since we'll use the already installed versions of these, we + can treat them as leaves and ignore any references they + have. */ + [&](const AlreadyRegistered &) { return StringSet{}; }, + [&](const PerhapsNeedToRegister & refs) { + StringSet referencedOutputs; + /* FIXME build inverted map up front so no quadratic waste here */ + for (auto & r : refs.refs) + for (auto & [o, p] : scratchOutputs) + if (r == p) + referencedOutputs.insert(o); + return referencedOutputs; + }, + }, + *orifu); + }}); + + auto sortedOutputNames = std::visit( + overloaded{ + [&](Cycle & cycle) -> std::vector { + // TODO with more -vvvv also show the temporary paths for manual inspection. throw BuildError( BuildResult::Failure::OutputRejected, - "no output reference for '%s' in build of '%s'", - name, - store.printStorePath(drvPath)); - return std::visit( - overloaded{ - /* Since we'll use the already installed versions of these, we - can treat them as leaves and ignore any references they - have. */ - [&](const AlreadyRegistered &) { return StringSet{}; }, - [&](const PerhapsNeedToRegister & refs) { - StringSet referencedOutputs; - /* FIXME build inverted map up front so no quadratic waste here */ - for (auto & r : refs.refs) - for (auto & [o, p] : scratchOutputs) - if (r == p) - referencedOutputs.insert(o); - return referencedOutputs; - }, - }, - *orifu); - }}, - {[&](const std::string & path, const std::string & parent) { - // TODO with more -vvvv also show the temporary paths for manual inspection. - return BuildError( - BuildResult::Failure::OutputRejected, - "cycle detected in build of '%s' in the references of output '%s' from output '%s'", - store.printStorePath(drvPath), - path, - parent); - }}); + "cycle detected in build of '%s' in the references of output '%s' from output '%s'", + store.printStorePath(drvPath), + cycle.path, + cycle.parent); + }, + [](auto & sorted) { return sorted; }}, + topoSortResult); std::reverse(sortedOutputNames.begin(), sortedOutputNames.end()); diff --git a/src/libutil-tests/meson.build b/src/libutil-tests/meson.build index c75f4d90a..2772ab060 100644 --- a/src/libutil-tests/meson.build +++ b/src/libutil-tests/meson.build @@ -74,6 +74,7 @@ sources = files( 'strings.cc', 'suggestions.cc', 'terminal.cc', + 'topo-sort.cc', 'url.cc', 'util.cc', 'xml-writer.cc', diff --git a/src/libutil-tests/topo-sort.cc b/src/libutil-tests/topo-sort.cc new file mode 100644 index 000000000..91030247e --- /dev/null +++ b/src/libutil-tests/topo-sort.cc @@ -0,0 +1,318 @@ +#include +#include +#include +#include +#include + +#include + +#include "nix/util/topo-sort.hh" +#include "nix/util/util.hh" + +namespace nix { + +/** + * Helper function to create a graph and run topoSort + */ +TopoSortResult +runTopoSort(const std::set & nodes, const std::map> & edges) +{ + return topoSort( + nodes, + std::function(const std::string &)>( + [&](const std::string & node) -> std::set { + auto it = edges.find(node); + return it != edges.end() ? it->second : std::set{}; + })); +} + +/** + * Helper to check if a sorted result respects dependencies + * + * @note `topoSort` returns results in REVERSE topological order (see + * line 61 of topo-sort.hh). This means dependents come BEFORE their + * dependencies in the output. + * + * In the edges std::map, if parent -> child, it means parent depends on + * child, so parent must come BEFORE child in the output from topoSort. + */ +bool isValidTopologicalOrder( + const std::vector & sorted, const std::map> & edges) +{ + std::map position; + for (size_t i = 0; i < sorted.size(); ++i) { + position[sorted[i]] = i; + } + + // For each edge parent -> children, parent depends on children + // topoSort reverses the output, so parent comes BEFORE children + for (const auto & [parent, children] : edges) { + for (const auto & child : children) { + if (position.count(parent) && position.count(child)) { + // parent should come before child (have a smaller index) + if (position[parent] > position[child]) { + return false; + } + } + } + } + return true; +} + +// ============================================================================ +// Parametrized Tests for Topological Sort +// ============================================================================ + +struct ExpectSuccess +{ + std::optional> order; // std::nullopt = any valid order is acceptable +}; + +struct ExpectCycle +{ + std::set involvedNodes; +}; + +using ExpectedResult = std::variant; + +struct TopoSortCase +{ + std::string name; + std::set nodes; + std::map> edges; + ExpectedResult expected; +}; + +class TopoSortTest : public ::testing::TestWithParam +{}; + +TEST_P(TopoSortTest, ProducesCorrectResult) +{ + const auto & testCase = GetParam(); + auto result = runTopoSort(testCase.nodes, testCase.edges); + + std::visit( + overloaded{ + [&](const ExpectSuccess & expect) { + // Success case + ASSERT_TRUE(holds_alternative>(result)) + << "Expected successful sort for: " << testCase.name; + + auto sorted = get>(result); + ASSERT_EQ(sorted.size(), testCase.nodes.size()) + << "Sorted output should contain all nodes for: " << testCase.name; + + ASSERT_TRUE(isValidTopologicalOrder(sorted, testCase.edges)) + << "Invalid topological order for: " << testCase.name; + + if (expect.order) { + ASSERT_EQ(sorted, *expect.order) << "Expected specific order for: " << testCase.name; + } + }, + [&](const ExpectCycle & expect) { + // Cycle detection case + ASSERT_TRUE(holds_alternative>(result)) + << "Expected cycle detection for: " << testCase.name; + + auto cycle = get>(result); + + // Verify that the cycle involves expected nodes + ASSERT_TRUE(expect.involvedNodes.count(cycle.path) > 0) + << "Cycle path '" << cycle.path << "' not in expected cycle nodes for: " << testCase.name; + ASSERT_TRUE(expect.involvedNodes.count(cycle.parent) > 0) + << "Cycle parent '" << cycle.parent << "' not in expected cycle nodes for: " << testCase.name; + + // Verify that there's actually an edge in the cycle + auto it = testCase.edges.find(cycle.parent); + ASSERT_TRUE(it != testCase.edges.end()) << "Parent node should have edges for: " << testCase.name; + ASSERT_TRUE(it->second.count(cycle.path) > 0) + << "Should be an edge from parent to path for: " << testCase.name; + }}, + testCase.expected); +} + +INSTANTIATE_TEST_SUITE_P( + TopoSort, + TopoSortTest, + ::testing::Values( + // Success cases + TopoSortCase{ + .name = "EmptySet", + .nodes = {}, + .edges = {}, + .expected = ExpectSuccess{.order = std::vector{}}, + }, + TopoSortCase{ + .name = "SingleNode", + .nodes = {"A"}, + .edges = {}, + .expected = ExpectSuccess{.order = std::vector{"A"}}, + }, + TopoSortCase{ + .name = "TwoIndependentNodes", + .nodes = {"A", "B"}, + .edges = {}, + // Order between independent nodes is unspecified + .expected = ExpectSuccess{.order = std::nullopt}, + }, + TopoSortCase{ + .name = "SimpleChain", + .nodes = {"A", "B", "C"}, + .edges{ + {"A", {"B"}}, + {"B", {"C"}}, + }, + .expected = ExpectSuccess{.order = std::vector{"A", "B", "C"}}, + }, + TopoSortCase{ + .name = "SimpleDag", + .nodes = {"A", "B", "C", "D"}, + .edges{ + {"A", {"B", "C"}}, + {"B", {"D"}}, + {"C", {"D"}}, + }, + .expected = ExpectSuccess{.order = std::nullopt}, + }, + TopoSortCase{ + .name = "DiamondDependency", + .nodes = {"A", "B", "C", "D"}, + .edges{ + {"A", {"B", "C"}}, + {"B", {"D"}}, + {"C", {"D"}}, + }, + .expected = ExpectSuccess{.order = std::nullopt}, + }, + TopoSortCase{ + .name = "DisconnectedComponents", + .nodes = {"A", "B", "C", "D"}, + .edges{ + {"A", {"B"}}, + {"C", {"D"}}, + }, + .expected = ExpectSuccess{.order = std::nullopt}, + }, + TopoSortCase{ + .name = "NodeWithNoReferences", + .nodes = {"A", "B", "C"}, + .edges{ + {"A", {"B"}}, + // C has no dependencies + }, + .expected = ExpectSuccess{.order = std::nullopt}, + }, + TopoSortCase{ + .name = "MissingReferences", + .nodes = {"A", "B"}, + .edges{ + // Z doesn't exist in nodes, should be ignored + {"A", {"B", "Z"}}, + }, + .expected = ExpectSuccess{.order = std::vector{"A", "B"}}, + }, + TopoSortCase{ + .name = "ComplexDag", + .nodes = {"A", "B", "C", "D", "E", "F", "G", "H"}, + .edges{ + {"A", {"B", "C", "D"}}, + {"B", {"E", "F"}}, + {"C", {"E", "F"}}, + {"D", {"G"}}, + {"E", {"H"}}, + {"F", {"H"}}, + {"G", {"H"}}, + }, + .expected = ExpectSuccess{.order = std::nullopt}, + }, + TopoSortCase{ + .name = "LongChain", + .nodes = {"A", "B", "C", "D", "E", "F", "G", "H"}, + .edges{ + {"A", {"B"}}, + {"B", {"C"}}, + {"C", {"D"}}, + {"D", {"E"}}, + {"E", {"F"}}, + {"F", {"G"}}, + {"G", {"H"}}, + }, + .expected = ExpectSuccess{.order = std::vector{"A", "B", "C", "D", "E", "F", "G", "H"}}, + }, + TopoSortCase{ + .name = "SelfLoopIgnored", + .nodes = {"A"}, + .edges{ + // Self-reference should be ignored per line 41 of topo-sort.hh + {"A", {"A"}}, + }, + .expected = ExpectSuccess{.order = std::vector{"A"}}, + }, + TopoSortCase{ + .name = "SelfLoopInChainIgnored", + .nodes = {"A", "B", "C"}, + .edges{ + // B has self-reference that should be ignored + {"A", {"B"}}, + {"B", {"B", "C"}}, + }, + .expected = ExpectSuccess{.order = std::vector{"A", "B", "C"}}, + }, + // Cycle detection cases + TopoSortCase{ + .name = "TwoNodeCycle", + .nodes = {"A", "B"}, + .edges{ + {"A", {"B"}}, + {"B", {"A"}}, + }, + .expected = ExpectCycle{.involvedNodes = {"A", "B"}}, + }, + TopoSortCase{ + .name = "ThreeNodeCycle", + .nodes = {"A", "B", "C"}, + .edges{ + {"A", {"B"}}, + {"B", {"C"}}, + {"C", {"A"}}, + }, + .expected = ExpectCycle{.involvedNodes = {"A", "B", "C"}}, + }, + TopoSortCase{ + .name = "CycleInLargerGraph", + .nodes = {"A", "B", "C", "D"}, + .edges{ + {"A", {"B"}}, + {"B", {"C"}}, + {"C", {"A"}}, + {"D", {"A"}}, + }, + .expected = ExpectCycle{.involvedNodes = {"A", "B", "C"}}, + }, + TopoSortCase{ + .name = "MultipleCycles", + .nodes = {"A", "B", "C", "D"}, + .edges{ + {"A", {"B"}}, + {"B", {"A"}}, + {"C", {"D"}}, + {"D", {"C"}}, + }, + // Either cycle is valid + .expected = ExpectCycle{.involvedNodes = {"A", "B", "C", "D"}}, + }, + TopoSortCase{ + .name = "ComplexCycleWithBranches", + .nodes = {"A", "B", "C", "D", "E"}, + .edges{ + // Cycle: B -> D -> E -> B + {"A", {"B", "C"}}, + {"B", {"D"}}, + {"C", {"D"}}, + {"D", {"E"}}, + {"E", {"B"}}, + }, + .expected = ExpectCycle{.involvedNodes = {"B", "D", "E"}}, + })); + +} // namespace nix diff --git a/src/libutil/include/nix/util/topo-sort.hh b/src/libutil/include/nix/util/topo-sort.hh index aaf5dff16..285c34316 100644 --- a/src/libutil/include/nix/util/topo-sort.hh +++ b/src/libutil/include/nix/util/topo-sort.hh @@ -2,39 +2,61 @@ ///@file #include "nix/util/error.hh" +#include namespace nix { +template +struct Cycle +{ + T path; + T parent; +}; + +template +using TopoSortResult = std::variant, Cycle>; + template -std::vector topoSort( - std::set items, - std::function(const T &)> getChildren, - std::function makeCycleError) +TopoSortResult topoSort(std::set items, std::function(const T &)> getChildren) { std::vector sorted; decltype(items) visited, parents; - auto dfsVisit = [&](this auto & dfsVisit, const T & path, const T * parent) { - if (parents.count(path)) - throw makeCycleError(path, *parent); + std::function>(const T & path, const T * parent)> dfsVisit; - if (!visited.insert(path).second) - return; + dfsVisit = [&](const T & path, const T * parent) -> std::optional> { + if (parents.count(path)) { + return Cycle{path, *parent}; + } + + if (!visited.insert(path).second) { + return std::nullopt; + } parents.insert(path); auto references = getChildren(path); for (auto & i : references) /* Don't traverse into items that don't exist in our starting set. */ - if (i != path && items.count(i)) - dfsVisit(i, &path); + if (i != path && items.count(i)) { + auto result = dfsVisit(i, &path); + if (result.has_value()) { + return result; + } + } sorted.push_back(path); parents.erase(path); + + return std::nullopt; }; - for (auto & i : items) - dfsVisit(i, nullptr); + for (auto & i : items) { + auto cycle = dfsVisit(i, nullptr); + if (cycle.has_value()) { + return *cycle; + } + } std::reverse(sorted.begin(), sorted.end());