1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-13 14:02:42 +01:00

Merge pull request #14479 from lovesegfault/topo-sort-handle-cycles

refactor(libutil/topo-sort): return variant instead of throwing
This commit is contained in:
John Ericson 2025-11-10 20:50:17 +00:00 committed by GitHub
commit 750306234d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 427 additions and 77 deletions

View file

@ -989,19 +989,22 @@ void LocalStore::registerValidPaths(const ValidPathInfos & infos)
error if a cycle is detected and roll back the error if a cycle is detected and roll back the
transaction. Cycles can only occur when a derivation transaction. Cycles can only occur when a derivation
has multiple outputs. */ has multiple outputs. */
topoSort( auto topoSortResult = topoSort(paths, {[&](const StorePath & path) {
paths,
{[&](const StorePath & path) {
auto i = infos.find(path); auto i = infos.find(path);
return i == infos.end() ? StorePathSet() : i->second.references; return i == infos.end() ? StorePathSet() : i->second.references;
}}, }});
{[&](const StorePath & path, const StorePath & parent) {
return BuildError( std::visit(
overloaded{
[&](const Cycle<StorePath> & cycle) {
throw BuildError(
BuildResult::Failure::OutputRejected, BuildResult::Failure::OutputRejected,
"cycle detected in the references of '%s' from '%s'", "cycle detected in the references of '%s' from '%s'",
printStorePath(path), printStorePath(cycle.path),
printStorePath(parent)); printStorePath(cycle.parent));
}}); },
[](auto &) { /* Success, continue */ }},
topoSortResult);
txn.commit(); txn.commit();
}); });

View file

@ -311,22 +311,25 @@ MissingPaths Store::queryMissing(const std::vector<DerivedPath> & targets)
StorePaths Store::topoSortPaths(const StorePathSet & paths) StorePaths Store::topoSortPaths(const StorePathSet & paths)
{ {
return topoSort( auto result = topoSort(paths, {[&](const StorePath & path) {
paths,
{[&](const StorePath & path) {
try { try {
return queryPathInfo(path)->references; return queryPathInfo(path)->references;
} catch (InvalidPath &) { } catch (InvalidPath &) {
return StorePathSet(); return StorePathSet();
} }
}}, }});
{[&](const StorePath & path, const StorePath & parent) {
return BuildError( return std::visit(
overloaded{
[&](const Cycle<StorePath> & cycle) -> StorePaths {
throw BuildError(
BuildResult::Failure::OutputRejected, BuildResult::Failure::OutputRejected,
"cycle detected in the references of '%s' from '%s'", "cycle detected in the references of '%s' from '%s'",
printStorePath(path), printStorePath(cycle.path),
printStorePath(parent)); printStorePath(cycle.parent));
}}); },
[](const auto & sorted) { return sorted; }},
result);
} }
std::map<DrvOutput, StorePath> std::map<DrvOutput, StorePath>

View file

@ -1470,9 +1470,7 @@ SingleDrvOutputs DerivationBuilderImpl::registerOutputs()
outputStats.insert_or_assign(outputName, std::move(st)); outputStats.insert_or_assign(outputName, std::move(st));
} }
auto sortedOutputNames = topoSort( auto topoSortResult = topoSort(outputsToSort, {[&](const std::string & name) {
outputsToSort,
{[&](const std::string & name) {
auto orifu = get(outputReferencesIfUnregistered, name); auto orifu = get(outputReferencesIfUnregistered, name);
if (!orifu) if (!orifu)
throw BuildError( throw BuildError(
@ -1497,16 +1495,21 @@ SingleDrvOutputs DerivationBuilderImpl::registerOutputs()
}, },
}, },
*orifu); *orifu);
}}, }});
{[&](const std::string & path, const std::string & parent) {
auto sortedOutputNames = std::visit(
overloaded{
[&](Cycle<std::string> & cycle) -> std::vector<std::string> {
// TODO with more -vvvv also show the temporary paths for manual inspection. // TODO with more -vvvv also show the temporary paths for manual inspection.
return BuildError( throw BuildError(
BuildResult::Failure::OutputRejected, BuildResult::Failure::OutputRejected,
"cycle detected in build of '%s' in the references of output '%s' from output '%s'", "cycle detected in build of '%s' in the references of output '%s' from output '%s'",
store.printStorePath(drvPath), store.printStorePath(drvPath),
path, cycle.path,
parent); cycle.parent);
}}); },
[](auto & sorted) { return sorted; }},
topoSortResult);
std::reverse(sortedOutputNames.begin(), sortedOutputNames.end()); std::reverse(sortedOutputNames.begin(), sortedOutputNames.end());

View file

@ -74,6 +74,7 @@ sources = files(
'strings.cc', 'strings.cc',
'suggestions.cc', 'suggestions.cc',
'terminal.cc', 'terminal.cc',
'topo-sort.cc',
'url.cc', 'url.cc',
'util.cc', 'util.cc',
'xml-writer.cc', 'xml-writer.cc',

View file

@ -0,0 +1,318 @@
#include <map>
#include <set>
#include <string>
#include <vector>
#include <algorithm>
#include <gtest/gtest.h>
#include "nix/util/topo-sort.hh"
#include "nix/util/util.hh"
namespace nix {
/**
* Helper function to create a graph and run topoSort
*/
TopoSortResult<std::string>
runTopoSort(const std::set<std::string> & nodes, const std::map<std::string, std::set<std::string>> & edges)
{
return topoSort(
nodes,
std::function<std::set<std::string>(const std::string &)>(
[&](const std::string & node) -> std::set<std::string> {
auto it = edges.find(node);
return it != edges.end() ? it->second : std::set<std::string>{};
}));
}
/**
* Helper to check if a sorted result respects dependencies
*
* @note `topoSort` returns results in REVERSE topological order (see
* line 61 of topo-sort.hh). This means dependents come BEFORE their
* dependencies in the output.
*
* In the edges std::map, if parent -> child, it means parent depends on
* child, so parent must come BEFORE child in the output from topoSort.
*/
bool isValidTopologicalOrder(
const std::vector<std::string> & sorted, const std::map<std::string, std::set<std::string>> & edges)
{
std::map<std::string, size_t> position;
for (size_t i = 0; i < sorted.size(); ++i) {
position[sorted[i]] = i;
}
// For each edge parent -> children, parent depends on children
// topoSort reverses the output, so parent comes BEFORE children
for (const auto & [parent, children] : edges) {
for (const auto & child : children) {
if (position.count(parent) && position.count(child)) {
// parent should come before child (have a smaller index)
if (position[parent] > position[child]) {
return false;
}
}
}
}
return true;
}
// ============================================================================
// Parametrized Tests for Topological Sort
// ============================================================================
struct ExpectSuccess
{
std::optional<std::vector<std::string>> order; // std::nullopt = any valid order is acceptable
};
struct ExpectCycle
{
std::set<std::string> involvedNodes;
};
using ExpectedResult = std::variant<ExpectSuccess, ExpectCycle>;
struct TopoSortCase
{
std::string name;
std::set<std::string> nodes;
std::map<std::string, std::set<std::string>> edges;
ExpectedResult expected;
};
class TopoSortTest : public ::testing::TestWithParam<TopoSortCase>
{};
TEST_P(TopoSortTest, ProducesCorrectResult)
{
const auto & testCase = GetParam();
auto result = runTopoSort(testCase.nodes, testCase.edges);
std::visit(
overloaded{
[&](const ExpectSuccess & expect) {
// Success case
ASSERT_TRUE(holds_alternative<std::vector<std::string>>(result))
<< "Expected successful sort for: " << testCase.name;
auto sorted = get<std::vector<std::string>>(result);
ASSERT_EQ(sorted.size(), testCase.nodes.size())
<< "Sorted output should contain all nodes for: " << testCase.name;
ASSERT_TRUE(isValidTopologicalOrder(sorted, testCase.edges))
<< "Invalid topological order for: " << testCase.name;
if (expect.order) {
ASSERT_EQ(sorted, *expect.order) << "Expected specific order for: " << testCase.name;
}
},
[&](const ExpectCycle & expect) {
// Cycle detection case
ASSERT_TRUE(holds_alternative<Cycle<std::string>>(result))
<< "Expected cycle detection for: " << testCase.name;
auto cycle = get<Cycle<std::string>>(result);
// Verify that the cycle involves expected nodes
ASSERT_TRUE(expect.involvedNodes.count(cycle.path) > 0)
<< "Cycle path '" << cycle.path << "' not in expected cycle nodes for: " << testCase.name;
ASSERT_TRUE(expect.involvedNodes.count(cycle.parent) > 0)
<< "Cycle parent '" << cycle.parent << "' not in expected cycle nodes for: " << testCase.name;
// Verify that there's actually an edge in the cycle
auto it = testCase.edges.find(cycle.parent);
ASSERT_TRUE(it != testCase.edges.end()) << "Parent node should have edges for: " << testCase.name;
ASSERT_TRUE(it->second.count(cycle.path) > 0)
<< "Should be an edge from parent to path for: " << testCase.name;
}},
testCase.expected);
}
INSTANTIATE_TEST_SUITE_P(
TopoSort,
TopoSortTest,
::testing::Values(
// Success cases
TopoSortCase{
.name = "EmptySet",
.nodes = {},
.edges = {},
.expected = ExpectSuccess{.order = std::vector<std::string>{}},
},
TopoSortCase{
.name = "SingleNode",
.nodes = {"A"},
.edges = {},
.expected = ExpectSuccess{.order = std::vector<std::string>{"A"}},
},
TopoSortCase{
.name = "TwoIndependentNodes",
.nodes = {"A", "B"},
.edges = {},
// Order between independent nodes is unspecified
.expected = ExpectSuccess{.order = std::nullopt},
},
TopoSortCase{
.name = "SimpleChain",
.nodes = {"A", "B", "C"},
.edges{
{"A", {"B"}},
{"B", {"C"}},
},
.expected = ExpectSuccess{.order = std::vector<std::string>{"A", "B", "C"}},
},
TopoSortCase{
.name = "SimpleDag",
.nodes = {"A", "B", "C", "D"},
.edges{
{"A", {"B", "C"}},
{"B", {"D"}},
{"C", {"D"}},
},
.expected = ExpectSuccess{.order = std::nullopt},
},
TopoSortCase{
.name = "DiamondDependency",
.nodes = {"A", "B", "C", "D"},
.edges{
{"A", {"B", "C"}},
{"B", {"D"}},
{"C", {"D"}},
},
.expected = ExpectSuccess{.order = std::nullopt},
},
TopoSortCase{
.name = "DisconnectedComponents",
.nodes = {"A", "B", "C", "D"},
.edges{
{"A", {"B"}},
{"C", {"D"}},
},
.expected = ExpectSuccess{.order = std::nullopt},
},
TopoSortCase{
.name = "NodeWithNoReferences",
.nodes = {"A", "B", "C"},
.edges{
{"A", {"B"}},
// C has no dependencies
},
.expected = ExpectSuccess{.order = std::nullopt},
},
TopoSortCase{
.name = "MissingReferences",
.nodes = {"A", "B"},
.edges{
// Z doesn't exist in nodes, should be ignored
{"A", {"B", "Z"}},
},
.expected = ExpectSuccess{.order = std::vector<std::string>{"A", "B"}},
},
TopoSortCase{
.name = "ComplexDag",
.nodes = {"A", "B", "C", "D", "E", "F", "G", "H"},
.edges{
{"A", {"B", "C", "D"}},
{"B", {"E", "F"}},
{"C", {"E", "F"}},
{"D", {"G"}},
{"E", {"H"}},
{"F", {"H"}},
{"G", {"H"}},
},
.expected = ExpectSuccess{.order = std::nullopt},
},
TopoSortCase{
.name = "LongChain",
.nodes = {"A", "B", "C", "D", "E", "F", "G", "H"},
.edges{
{"A", {"B"}},
{"B", {"C"}},
{"C", {"D"}},
{"D", {"E"}},
{"E", {"F"}},
{"F", {"G"}},
{"G", {"H"}},
},
.expected = ExpectSuccess{.order = std::vector<std::string>{"A", "B", "C", "D", "E", "F", "G", "H"}},
},
TopoSortCase{
.name = "SelfLoopIgnored",
.nodes = {"A"},
.edges{
// Self-reference should be ignored per line 41 of topo-sort.hh
{"A", {"A"}},
},
.expected = ExpectSuccess{.order = std::vector<std::string>{"A"}},
},
TopoSortCase{
.name = "SelfLoopInChainIgnored",
.nodes = {"A", "B", "C"},
.edges{
// B has self-reference that should be ignored
{"A", {"B"}},
{"B", {"B", "C"}},
},
.expected = ExpectSuccess{.order = std::vector<std::string>{"A", "B", "C"}},
},
// Cycle detection cases
TopoSortCase{
.name = "TwoNodeCycle",
.nodes = {"A", "B"},
.edges{
{"A", {"B"}},
{"B", {"A"}},
},
.expected = ExpectCycle{.involvedNodes = {"A", "B"}},
},
TopoSortCase{
.name = "ThreeNodeCycle",
.nodes = {"A", "B", "C"},
.edges{
{"A", {"B"}},
{"B", {"C"}},
{"C", {"A"}},
},
.expected = ExpectCycle{.involvedNodes = {"A", "B", "C"}},
},
TopoSortCase{
.name = "CycleInLargerGraph",
.nodes = {"A", "B", "C", "D"},
.edges{
{"A", {"B"}},
{"B", {"C"}},
{"C", {"A"}},
{"D", {"A"}},
},
.expected = ExpectCycle{.involvedNodes = {"A", "B", "C"}},
},
TopoSortCase{
.name = "MultipleCycles",
.nodes = {"A", "B", "C", "D"},
.edges{
{"A", {"B"}},
{"B", {"A"}},
{"C", {"D"}},
{"D", {"C"}},
},
// Either cycle is valid
.expected = ExpectCycle{.involvedNodes = {"A", "B", "C", "D"}},
},
TopoSortCase{
.name = "ComplexCycleWithBranches",
.nodes = {"A", "B", "C", "D", "E"},
.edges{
// Cycle: B -> D -> E -> B
{"A", {"B", "C"}},
{"B", {"D"}},
{"C", {"D"}},
{"D", {"E"}},
{"E", {"B"}},
},
.expected = ExpectCycle{.involvedNodes = {"B", "D", "E"}},
}));
} // namespace nix

View file

@ -2,39 +2,61 @@
///@file ///@file
#include "nix/util/error.hh" #include "nix/util/error.hh"
#include <variant>
namespace nix { namespace nix {
template<typename T>
struct Cycle
{
T path;
T parent;
};
template<typename T>
using TopoSortResult = std::variant<std::vector<T>, Cycle<T>>;
template<typename T, typename Compare> template<typename T, typename Compare>
std::vector<T> topoSort( TopoSortResult<T> topoSort(std::set<T, Compare> items, std::function<std::set<T, Compare>(const T &)> getChildren)
std::set<T, Compare> items,
std::function<std::set<T, Compare>(const T &)> getChildren,
std::function<Error(const T &, const T &)> makeCycleError)
{ {
std::vector<T> sorted; std::vector<T> sorted;
decltype(items) visited, parents; decltype(items) visited, parents;
auto dfsVisit = [&](this auto & dfsVisit, const T & path, const T * parent) { std::function<std::optional<Cycle<T>>(const T & path, const T * parent)> dfsVisit;
if (parents.count(path))
throw makeCycleError(path, *parent);
if (!visited.insert(path).second) dfsVisit = [&](const T & path, const T * parent) -> std::optional<Cycle<T>> {
return; if (parents.count(path)) {
return Cycle{path, *parent};
}
if (!visited.insert(path).second) {
return std::nullopt;
}
parents.insert(path); parents.insert(path);
auto references = getChildren(path); auto references = getChildren(path);
for (auto & i : references) for (auto & i : references)
/* Don't traverse into items that don't exist in our starting set. */ /* Don't traverse into items that don't exist in our starting set. */
if (i != path && items.count(i)) if (i != path && items.count(i)) {
dfsVisit(i, &path); auto result = dfsVisit(i, &path);
if (result.has_value()) {
return result;
}
}
sorted.push_back(path); sorted.push_back(path);
parents.erase(path); parents.erase(path);
return std::nullopt;
}; };
for (auto & i : items) for (auto & i : items) {
dfsVisit(i, nullptr); auto cycle = dfsVisit(i, nullptr);
if (cycle.has_value()) {
return *cycle;
}
}
std::reverse(sorted.begin(), sorted.end()); std::reverse(sorted.begin(), sorted.end());