mirror of
https://github.com/NixOS/nix.git
synced 2025-11-09 03:56:01 +01:00
Merge a9aaf0ed1d into 479b6b73a9
This commit is contained in:
commit
98eb0ecadc
9 changed files with 656 additions and 42 deletions
146
src/libstore-tests/find-cycles.cc
Normal file
146
src/libstore-tests/find-cycles.cc
Normal file
|
|
@ -0,0 +1,146 @@
|
||||||
|
#include "nix/store/build/find-cycles.hh"
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
namespace nix {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parameters for transformEdgesToMultiedges tests
|
||||||
|
*/
|
||||||
|
struct TransformEdgesParams
|
||||||
|
{
|
||||||
|
std::string description;
|
||||||
|
std::vector<std::vector<std::string>> inputEdges;
|
||||||
|
std::vector<std::vector<std::string>> expectedOutput;
|
||||||
|
|
||||||
|
friend std::ostream & operator<<(std::ostream & os, const TransformEdgesParams & params)
|
||||||
|
{
|
||||||
|
os << "Test: " << params.description << "\n";
|
||||||
|
os << "Input edges (" << params.inputEdges.size() << "):\n";
|
||||||
|
for (const auto & edge : params.inputEdges) {
|
||||||
|
os << " ";
|
||||||
|
for (size_t i = 0; i < edge.size(); ++i) {
|
||||||
|
if (i > 0)
|
||||||
|
os << " -> ";
|
||||||
|
os << edge[i];
|
||||||
|
}
|
||||||
|
os << "\n";
|
||||||
|
}
|
||||||
|
os << "Expected output (" << params.expectedOutput.size() << "):\n";
|
||||||
|
for (const auto & multiedge : params.expectedOutput) {
|
||||||
|
os << " ";
|
||||||
|
for (size_t i = 0; i < multiedge.size(); ++i) {
|
||||||
|
if (i > 0)
|
||||||
|
os << " -> ";
|
||||||
|
os << multiedge[i];
|
||||||
|
}
|
||||||
|
os << "\n";
|
||||||
|
}
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class TransformEdgesToMultiedgesTest : public ::testing::TestWithParam<TransformEdgesParams>
|
||||||
|
{};
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
// Helper to convert vector<vector<string>> to StoreCycleEdgeVec
|
||||||
|
StoreCycleEdgeVec toStoreCycleEdgeVec(const std::vector<std::vector<std::string>> & edges)
|
||||||
|
{
|
||||||
|
StoreCycleEdgeVec result;
|
||||||
|
result.reserve(edges.size());
|
||||||
|
for (const auto & edge : edges) {
|
||||||
|
result.emplace_back(edge.begin(), edge.end());
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Comparator for sorting multiedges deterministically
|
||||||
|
bool compareMultiedges(const StoreCycleEdge & a, const StoreCycleEdge & b)
|
||||||
|
{
|
||||||
|
if (a.size() != b.size())
|
||||||
|
return a.size() < b.size();
|
||||||
|
return std::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end());
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
TEST_P(TransformEdgesToMultiedgesTest, TransformEdges)
|
||||||
|
{
|
||||||
|
const auto & params = GetParam();
|
||||||
|
|
||||||
|
auto inputEdges = toStoreCycleEdgeVec(params.inputEdges);
|
||||||
|
StoreCycleEdgeVec actualOutput;
|
||||||
|
transformEdgesToMultiedges(inputEdges, actualOutput);
|
||||||
|
|
||||||
|
auto expectedOutput = toStoreCycleEdgeVec(params.expectedOutput);
|
||||||
|
|
||||||
|
ASSERT_EQ(actualOutput.size(), expectedOutput.size()) << "Number of multiedges doesn't match expected";
|
||||||
|
|
||||||
|
// Sort both for comparison (order may vary, but content should match)
|
||||||
|
std::sort(actualOutput.begin(), actualOutput.end(), compareMultiedges);
|
||||||
|
std::sort(expectedOutput.begin(), expectedOutput.end(), compareMultiedges);
|
||||||
|
|
||||||
|
// Compare each multiedge
|
||||||
|
EXPECT_EQ(actualOutput, expectedOutput);
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
FindCycles,
|
||||||
|
TransformEdgesToMultiedgesTest,
|
||||||
|
::testing::Values(
|
||||||
|
// Empty input
|
||||||
|
TransformEdgesParams{"empty input", {}, {}},
|
||||||
|
|
||||||
|
// Single edge - no joining possible
|
||||||
|
TransformEdgesParams{"single edge", {{"a", "b"}}, {{"a", "b"}}},
|
||||||
|
|
||||||
|
// Two edges that connect (append case: A->B, B->C becomes A->B->C)
|
||||||
|
TransformEdgesParams{"two edges connecting via append", {{"a", "b"}, {"b", "c"}}, {{"a", "b", "c"}}},
|
||||||
|
|
||||||
|
// Two edges that connect (prepend case: B->C, A->B becomes A->B->C)
|
||||||
|
TransformEdgesParams{"two edges connecting via prepend", {{"b", "c"}, {"a", "b"}}, {{"a", "b", "c"}}},
|
||||||
|
|
||||||
|
// Complete cycle (A->B, B->C, C->A becomes A->B->C->A)
|
||||||
|
TransformEdgesParams{"complete cycle", {{"a", "b"}, {"b", "c"}, {"c", "a"}}, {{"a", "b", "c", "a"}}},
|
||||||
|
|
||||||
|
// Two disjoint edges - no joining
|
||||||
|
TransformEdgesParams{"disjoint edges", {{"a", "b"}, {"c", "d"}}, {{"a", "b"}, {"c", "d"}}},
|
||||||
|
|
||||||
|
// Chain of multiple edges (A->B, B->C, C->D, D->E)
|
||||||
|
TransformEdgesParams{
|
||||||
|
"chain of edges", {{"a", "b"}, {"b", "c"}, {"c", "d"}, {"d", "e"}}, {{"a", "b", "c", "d", "e"}}},
|
||||||
|
|
||||||
|
// Multiple disjoint cycles
|
||||||
|
TransformEdgesParams{
|
||||||
|
"multiple disjoint cycles",
|
||||||
|
{{"a", "b"}, {"b", "a"}, {"c", "d"}, {"d", "c"}},
|
||||||
|
{{"a", "b", "a"}, {"c", "d", "c"}}},
|
||||||
|
|
||||||
|
// Complex graph requiring multiple merge passes
|
||||||
|
// First pass: (A->B, B->C) -> (A->B->C)
|
||||||
|
// Then: (A->B->C, C->D) -> (A->B->C->D)
|
||||||
|
// Then: (D->A, A->B->C->D) -> (A->B->C->D->A)
|
||||||
|
TransformEdgesParams{
|
||||||
|
"complex requiring multiple passes",
|
||||||
|
{{"a", "b"}, {"b", "c"}, {"c", "d"}, {"d", "a"}},
|
||||||
|
{{"a", "b", "c", "d", "a"}}},
|
||||||
|
|
||||||
|
// Y-shaped graph (A->B, B->C, B->D)
|
||||||
|
// B->C and B->D can't connect, but A->B can prepend to B->C and A->B can prepend to B->D
|
||||||
|
// However, once A->B joins with B->C to form A->B->C, the original A->B is consumed
|
||||||
|
// So we should get A->B->C and B->D (or A->B->D and B->C depending on order)
|
||||||
|
TransformEdgesParams{"Y-shaped graph", {{"a", "b"}, {"b", "c"}, {"b", "d"}}, {{"a", "b", "c"}, {"b", "d"}}},
|
||||||
|
|
||||||
|
// Edge with longer path (multi-hop edge)
|
||||||
|
TransformEdgesParams{
|
||||||
|
"edge with multiple hops", {{"a", "x", "y", "b"}, {"b", "c"}}, {{"a", "x", "y", "b", "c"}}},
|
||||||
|
|
||||||
|
// Self-loop edge
|
||||||
|
TransformEdgesParams{"self-loop", {{"a", "a"}}, {{"a", "a"}}},
|
||||||
|
|
||||||
|
// Reverse order joining (tests prepend logic thoroughly)
|
||||||
|
TransformEdgesParams{
|
||||||
|
"reverse order joining", {{"d", "e"}, {"c", "d"}, {"b", "c"}, {"a", "b"}}, {{"a", "b", "c", "d", "e"}}}));
|
||||||
|
|
||||||
|
} // namespace nix
|
||||||
|
|
@ -62,6 +62,7 @@ sources = files(
|
||||||
'derived-path.cc',
|
'derived-path.cc',
|
||||||
'downstream-placeholder.cc',
|
'downstream-placeholder.cc',
|
||||||
'dummy-store.cc',
|
'dummy-store.cc',
|
||||||
|
'find-cycles.cc',
|
||||||
'http-binary-cache-store.cc',
|
'http-binary-cache-store.cc',
|
||||||
'legacy-ssh-store.cc',
|
'legacy-ssh-store.cc',
|
||||||
'local-binary-cache-store.cc',
|
'local-binary-cache-store.cc',
|
||||||
|
|
|
||||||
222
src/libstore/build/find-cycles.cc
Normal file
222
src/libstore/build/find-cycles.cc
Normal file
|
|
@ -0,0 +1,222 @@
|
||||||
|
#include "nix/store/build/find-cycles.hh"
|
||||||
|
|
||||||
|
#include "nix/store/store-api.hh"
|
||||||
|
#include "nix/util/source-accessor.hh"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <filesystem>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
namespace nix {
|
||||||
|
|
||||||
|
CycleEdgeScanSink::CycleEdgeScanSink(StringSet && hashes, std::string storeDir)
|
||||||
|
: RefScanSink(std::move(hashes))
|
||||||
|
, storeDir(std::move(storeDir))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void CycleEdgeScanSink::setCurrentPath(const std::string & path)
|
||||||
|
{
|
||||||
|
currentFilePath = path;
|
||||||
|
// Clear tracking for new file
|
||||||
|
recordedForCurrentFile.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
void CycleEdgeScanSink::operator()(std::string_view data)
|
||||||
|
{
|
||||||
|
// Call parent's operator() to do the actual hash searching
|
||||||
|
// This reuses all the proven buffer boundary handling logic
|
||||||
|
RefScanSink::operator()(data);
|
||||||
|
|
||||||
|
// Check which hashes have been found and not yet recorded for this file
|
||||||
|
// getResult() returns the set of ALL hashes found so far
|
||||||
|
for (const auto & hash : getResult()) {
|
||||||
|
if (recordedForCurrentFile.insert(hash).second) {
|
||||||
|
// This hash was just found and not yet recorded for current file
|
||||||
|
// Create an edge from current file to the target
|
||||||
|
auto targetPath = storeDir + hash;
|
||||||
|
|
||||||
|
edges.push_back({currentFilePath, targetPath});
|
||||||
|
|
||||||
|
debug("found cycle edge: %s → %s (hash: %s)", currentFilePath, targetPath, hash);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
StoreCycleEdgeVec && CycleEdgeScanSink::getEdges()
|
||||||
|
{
|
||||||
|
return std::move(edges);
|
||||||
|
}
|
||||||
|
|
||||||
|
void scanForCycleEdges(const Path & path, const StorePathSet & refs, StoreCycleEdgeVec & edges)
|
||||||
|
{
|
||||||
|
StringSet hashes;
|
||||||
|
|
||||||
|
// Extract the store directory from the path
|
||||||
|
// Example: /run/user/1000/nix-test/store/abc-foo -> /run/user/1000/nix-test/store/
|
||||||
|
auto storePrefixPath = std::filesystem::path(path);
|
||||||
|
storePrefixPath.remove_filename();
|
||||||
|
std::string storePrefix = storePrefixPath.string();
|
||||||
|
|
||||||
|
debug("scanForCycleEdges: storePrefixPath = %s", storePrefixPath.string());
|
||||||
|
debug("scanForCycleEdges: storePrefix = %s", storePrefix);
|
||||||
|
|
||||||
|
// Collect hashes to search for
|
||||||
|
for (auto & i : refs) {
|
||||||
|
hashes.insert(std::string(i.hashPart()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create sink that reuses RefScanSink's hash-finding logic
|
||||||
|
CycleEdgeScanSink sink(std::move(hashes), storePrefix);
|
||||||
|
|
||||||
|
// Get filesystem accessor and walk the tree
|
||||||
|
auto accessor = getFSSourceAccessor();
|
||||||
|
walkAndScanPath(*accessor, CanonPath(path), path, sink);
|
||||||
|
|
||||||
|
// Extract the found edges
|
||||||
|
edges = sink.getEdges();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recursively walk filesystem and stream files into the sink.
|
||||||
|
* This reuses RefScanSink's hash-finding logic instead of reimplementing it.
|
||||||
|
*/
|
||||||
|
void walkAndScanPath(
|
||||||
|
SourceAccessor & accessor, const CanonPath & path, const std::string & displayPath, CycleEdgeScanSink & sink)
|
||||||
|
{
|
||||||
|
auto stat = accessor.lstat(path);
|
||||||
|
|
||||||
|
debug("walkAndScanPath: scanning path = %s", displayPath);
|
||||||
|
|
||||||
|
switch (stat.type) {
|
||||||
|
case SourceAccessor::tRegular: {
|
||||||
|
// Handle regular files - stream contents into sink
|
||||||
|
sink.setCurrentPath(displayPath);
|
||||||
|
accessor.readFile(path, sink);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case SourceAccessor::tDirectory: {
|
||||||
|
// Handle directories - recursively scan contents
|
||||||
|
auto entries = accessor.readDirectory(path);
|
||||||
|
for (const auto & [name, entryType] : entries) {
|
||||||
|
auto childPath = path / name;
|
||||||
|
auto childDisplayPath = displayPath + "/" + name;
|
||||||
|
debug("walkAndScanPath: recursing into %s", childDisplayPath);
|
||||||
|
walkAndScanPath(accessor, childPath, childDisplayPath, sink);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case SourceAccessor::tSymlink: {
|
||||||
|
// Handle symlinks - stream link target into sink
|
||||||
|
auto linkTarget = accessor.readLink(path);
|
||||||
|
|
||||||
|
debug("walkAndScanPath: scanning symlink %s -> %s", displayPath, linkTarget);
|
||||||
|
|
||||||
|
sink.setCurrentPath(displayPath);
|
||||||
|
sink(std::string_view(linkTarget));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case SourceAccessor::tChar:
|
||||||
|
case SourceAccessor::tBlock:
|
||||||
|
case SourceAccessor::tSocket:
|
||||||
|
case SourceAccessor::tFifo:
|
||||||
|
case SourceAccessor::tUnknown:
|
||||||
|
default:
|
||||||
|
throw Error("file '%1%' has an unsupported type", displayPath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void transformEdgesToMultiedges(StoreCycleEdgeVec & edges, StoreCycleEdgeVec & multiedges)
|
||||||
|
{
|
||||||
|
debug("transformEdgesToMultiedges: processing %lu edges", edges.size());
|
||||||
|
|
||||||
|
// Maps to track path endpoints for efficient joining
|
||||||
|
// Key: node name, Value: index into multiedges vector
|
||||||
|
std::map<std::string, size_t> pathStartingAt; // Maps start node -> path index
|
||||||
|
std::map<std::string, size_t> pathEndingAt; // Maps end node -> path index
|
||||||
|
|
||||||
|
for (auto & edge : edges) {
|
||||||
|
if (edge.empty())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
const std::string & edgeStart = edge.front();
|
||||||
|
const std::string & edgeEnd = edge.back();
|
||||||
|
|
||||||
|
// Check if this edge can connect to existing paths
|
||||||
|
auto startIt = pathEndingAt.find(edgeStart);
|
||||||
|
auto endIt = pathStartingAt.find(edgeEnd);
|
||||||
|
|
||||||
|
bool canPrepend = (startIt != pathEndingAt.end());
|
||||||
|
bool canAppend = (endIt != pathStartingAt.end());
|
||||||
|
|
||||||
|
if (canPrepend && canAppend && startIt->second == endIt->second) {
|
||||||
|
// Edge connects a path to itself - append it to form a cycle
|
||||||
|
size_t pathIdx = startIt->second;
|
||||||
|
auto & path = multiedges[pathIdx];
|
||||||
|
// Append all but first element of edge (first element is duplicate)
|
||||||
|
path.insert(path.end(), std::next(edge.begin()), edge.end());
|
||||||
|
// Update the end point (start point stays the same for a cycle)
|
||||||
|
pathEndingAt.erase(startIt);
|
||||||
|
pathEndingAt[edgeEnd] = pathIdx;
|
||||||
|
} else if (canPrepend && canAppend) {
|
||||||
|
// Edge joins two different paths - merge them
|
||||||
|
size_t prependIdx = startIt->second;
|
||||||
|
size_t appendIdx = endIt->second;
|
||||||
|
auto & prependPath = multiedges[prependIdx];
|
||||||
|
auto & appendPath = multiedges[appendIdx];
|
||||||
|
|
||||||
|
// Save endpoint before modifying appendPath
|
||||||
|
const std::string appendPathEnd = appendPath.back();
|
||||||
|
const std::string appendPathStart = appendPath.front();
|
||||||
|
|
||||||
|
// Append edge (without first element) to prependPath
|
||||||
|
prependPath.insert(prependPath.end(), std::next(edge.begin()), edge.end());
|
||||||
|
// Append appendPath (without first element) to prependPath
|
||||||
|
prependPath.insert(prependPath.end(), std::next(appendPath.begin()), appendPath.end());
|
||||||
|
|
||||||
|
// Update maps: prependPath now ends where appendPath ended
|
||||||
|
pathEndingAt.erase(startIt);
|
||||||
|
pathEndingAt[appendPathEnd] = prependIdx;
|
||||||
|
pathStartingAt.erase(appendPathStart);
|
||||||
|
|
||||||
|
// Mark appendPath for removal by clearing it
|
||||||
|
appendPath.clear();
|
||||||
|
} else if (canPrepend) {
|
||||||
|
// Edge extends an existing path at its end
|
||||||
|
size_t pathIdx = startIt->second;
|
||||||
|
auto & path = multiedges[pathIdx];
|
||||||
|
// Append all but first element of edge (first element is duplicate)
|
||||||
|
path.insert(path.end(), std::next(edge.begin()), edge.end());
|
||||||
|
// Update the end point
|
||||||
|
pathEndingAt.erase(startIt);
|
||||||
|
pathEndingAt[edgeEnd] = pathIdx;
|
||||||
|
} else if (canAppend) {
|
||||||
|
// Edge extends an existing path at its start
|
||||||
|
size_t pathIdx = endIt->second;
|
||||||
|
auto & path = multiedges[pathIdx];
|
||||||
|
// Prepend all but last element of edge (last element is duplicate)
|
||||||
|
path.insert(path.begin(), edge.begin(), std::prev(edge.end()));
|
||||||
|
// Update the start point
|
||||||
|
pathStartingAt.erase(endIt);
|
||||||
|
pathStartingAt[edgeStart] = pathIdx;
|
||||||
|
} else {
|
||||||
|
// Edge doesn't connect to anything - start a new path
|
||||||
|
size_t newIdx = multiedges.size();
|
||||||
|
multiedges.push_back(edge);
|
||||||
|
pathStartingAt[edgeStart] = newIdx;
|
||||||
|
pathEndingAt[edgeEnd] = newIdx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove empty paths (those that were merged into others)
|
||||||
|
multiedges.erase(
|
||||||
|
std::remove_if(multiedges.begin(), multiedges.end(), [](const StoreCycleEdge & p) { return p.empty(); }),
|
||||||
|
multiedges.end());
|
||||||
|
|
||||||
|
debug("transformEdgesToMultiedges: result has %lu multiedges", multiedges.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace nix
|
||||||
109
src/libstore/include/nix/store/build/find-cycles.hh
Normal file
109
src/libstore/include/nix/store/build/find-cycles.hh
Normal file
|
|
@ -0,0 +1,109 @@
|
||||||
|
#pragma once
|
||||||
|
///@file
|
||||||
|
|
||||||
|
#include "nix/store/store-api.hh"
|
||||||
|
#include "nix/store/references.hh"
|
||||||
|
#include "nix/util/types.hh"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <deque>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace nix {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents a cycle edge as a sequence of file paths.
|
||||||
|
* Uses deque to allow efficient prepend/append when joining edges.
|
||||||
|
*
|
||||||
|
* Example: {"/nix/store/abc-foo/file1", "/nix/store/def-bar/file2"}
|
||||||
|
* represents a reference from file1 to file2.
|
||||||
|
*/
|
||||||
|
using StoreCycleEdge = std::deque<std::string>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A collection of cycle edges found during scanning.
|
||||||
|
*/
|
||||||
|
using StoreCycleEdgeVec = std::vector<StoreCycleEdge>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A sink that extends RefScanSink to track file paths where references are found.
|
||||||
|
*
|
||||||
|
* This reuses the existing reference scanning logic from RefScanSink, but adds
|
||||||
|
* tracking of which file contains which reference. This is essential for providing
|
||||||
|
* detailed cycle error messages.
|
||||||
|
*/
|
||||||
|
class CycleEdgeScanSink : public RefScanSink
|
||||||
|
{
|
||||||
|
std::string currentFilePath;
|
||||||
|
std::string storeDir;
|
||||||
|
StoreCycleEdgeVec edges;
|
||||||
|
|
||||||
|
// Track hashes we've already recorded for current file
|
||||||
|
// to avoid duplicates
|
||||||
|
StringSet recordedForCurrentFile;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
CycleEdgeScanSink(StringSet && hashes, std::string storeDir);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the current file path being scanned.
|
||||||
|
* Must be called before processing each file.
|
||||||
|
*/
|
||||||
|
void setCurrentPath(const std::string & path);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Override to intercept when hashes are found and record the file location.
|
||||||
|
*/
|
||||||
|
void operator()(std::string_view data) override;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the accumulated cycle edges.
|
||||||
|
*/
|
||||||
|
StoreCycleEdgeVec && getEdges();
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scan output paths to find cycle edges with detailed file paths.
|
||||||
|
*
|
||||||
|
* This is the second pass of cycle detection. The first pass (scanForReferences)
|
||||||
|
* detects that a cycle exists. This function provides detailed information about
|
||||||
|
* where the cycles occur in the actual file system.
|
||||||
|
*
|
||||||
|
* @param path The store path to scan (e.g., an output directory)
|
||||||
|
* @param refs The set of potentially referenced store paths
|
||||||
|
* @param edges Output parameter that accumulates found cycle edges
|
||||||
|
*/
|
||||||
|
void scanForCycleEdges(const Path & path, const StorePathSet & refs, StoreCycleEdgeVec & edges);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recursively walk filesystem tree and scan each file for hash references.
|
||||||
|
*
|
||||||
|
* This function walks the file system tree, streaming file contents into
|
||||||
|
* the provided sink which performs the actual hash detection. This reuses
|
||||||
|
* the existing RefScanSink infrastructure for robustness.
|
||||||
|
*
|
||||||
|
* @param accessor Source accessor for reading files
|
||||||
|
* @param path Current path being scanned
|
||||||
|
* @param displayPath Physical path for error messages
|
||||||
|
* @param sink The CycleEdgeScanSink that will detect and record hash references
|
||||||
|
*/
|
||||||
|
void walkAndScanPath(
|
||||||
|
SourceAccessor & accessor, const CanonPath & path, const std::string & displayPath, CycleEdgeScanSink & sink);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Transform individual edges into connected multi-edges (paths).
|
||||||
|
*
|
||||||
|
* Takes a list of edges like [A→B, B→C, C→A] and connects them into
|
||||||
|
* longer paths like [A→B→C→A]. This makes it easier to visualize the
|
||||||
|
* actual cycle paths.
|
||||||
|
*
|
||||||
|
* Uses hashmaps to track path endpoints, enabling O(n) joining of edges
|
||||||
|
* where n is the number of input edges.
|
||||||
|
*
|
||||||
|
* @param edges Input edges to transform
|
||||||
|
* @param multiedges Output parameter with connected paths
|
||||||
|
*/
|
||||||
|
void transformEdgesToMultiedges(StoreCycleEdgeVec & edges, StoreCycleEdgeVec & multiedges);
|
||||||
|
|
||||||
|
} // namespace nix
|
||||||
|
|
@ -21,6 +21,7 @@ headers = [ config_pub_h ] + files(
|
||||||
'build/derivation-resolution-goal.hh',
|
'build/derivation-resolution-goal.hh',
|
||||||
'build/derivation-trampoline-goal.hh',
|
'build/derivation-trampoline-goal.hh',
|
||||||
'build/drv-output-substitution-goal.hh',
|
'build/drv-output-substitution-goal.hh',
|
||||||
|
'build/find-cycles.hh',
|
||||||
'build/goal.hh',
|
'build/goal.hh',
|
||||||
'build/substitution-goal.hh',
|
'build/substitution-goal.hh',
|
||||||
'build/worker.hh',
|
'build/worker.hh',
|
||||||
|
|
|
||||||
|
|
@ -267,6 +267,7 @@ sources = files(
|
||||||
'build/derivation-trampoline-goal.cc',
|
'build/derivation-trampoline-goal.cc',
|
||||||
'build/drv-output-substitution-goal.cc',
|
'build/drv-output-substitution-goal.cc',
|
||||||
'build/entry-points.cc',
|
'build/entry-points.cc',
|
||||||
|
'build/find-cycles.cc',
|
||||||
'build/goal.cc',
|
'build/goal.cc',
|
||||||
'build/substitution-goal.cc',
|
'build/substitution-goal.cc',
|
||||||
'build/worker.cc',
|
'build/worker.cc',
|
||||||
|
|
|
||||||
|
|
@ -45,6 +45,7 @@
|
||||||
|
|
||||||
#include "store-config-private.hh"
|
#include "store-config-private.hh"
|
||||||
#include "build/derivation-check.hh"
|
#include "build/derivation-check.hh"
|
||||||
|
#include "nix/store/build/find-cycles.hh"
|
||||||
|
|
||||||
#if NIX_WITH_AWS_AUTH
|
#if NIX_WITH_AWS_AUTH
|
||||||
# include "nix/store/aws-creds.hh"
|
# include "nix/store/aws-creds.hh"
|
||||||
|
|
@ -1473,43 +1474,100 @@ SingleDrvOutputs DerivationBuilderImpl::registerOutputs()
|
||||||
outputStats.insert_or_assign(outputName, std::move(st));
|
outputStats.insert_or_assign(outputName, std::move(st));
|
||||||
}
|
}
|
||||||
|
|
||||||
auto sortedOutputNames = topoSort(
|
std::vector<std::string> sortedOutputNames;
|
||||||
outputsToSort,
|
|
||||||
{[&](const std::string & name) {
|
try {
|
||||||
auto orifu = get(outputReferencesIfUnregistered, name);
|
sortedOutputNames = topoSort(
|
||||||
if (!orifu)
|
outputsToSort,
|
||||||
throw BuildError(
|
{[&](const std::string & name) {
|
||||||
BuildResult::Failure::OutputRejected,
|
auto orifu = get(outputReferencesIfUnregistered, name);
|
||||||
"no output reference for '%s' in build of '%s'",
|
if (!orifu)
|
||||||
name,
|
throw BuildError(
|
||||||
store.printStorePath(drvPath));
|
BuildResult::Failure::OutputRejected,
|
||||||
return std::visit(
|
"no output reference for '%s' in build of '%s'",
|
||||||
overloaded{
|
name,
|
||||||
/* Since we'll use the already installed versions of these, we
|
store.printStorePath(drvPath));
|
||||||
can treat them as leaves and ignore any references they
|
return std::visit(
|
||||||
have. */
|
overloaded{
|
||||||
[&](const AlreadyRegistered &) { return StringSet{}; },
|
/* Since we'll use the already installed versions of these, we
|
||||||
[&](const PerhapsNeedToRegister & refs) {
|
can treat them as leaves and ignore any references they
|
||||||
StringSet referencedOutputs;
|
have. */
|
||||||
/* FIXME build inverted map up front so no quadratic waste here */
|
[&](const AlreadyRegistered &) { return StringSet{}; },
|
||||||
for (auto & r : refs.refs)
|
[&](const PerhapsNeedToRegister & refs) {
|
||||||
for (auto & [o, p] : scratchOutputs)
|
StringSet referencedOutputs;
|
||||||
if (r == p)
|
/* FIXME build inverted map up front so no quadratic waste here */
|
||||||
referencedOutputs.insert(o);
|
for (auto & r : refs.refs)
|
||||||
return referencedOutputs;
|
for (auto & [o, p] : scratchOutputs)
|
||||||
|
if (r == p)
|
||||||
|
referencedOutputs.insert(o);
|
||||||
|
return referencedOutputs;
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
*orifu);
|
||||||
*orifu);
|
}},
|
||||||
}},
|
{[&](const std::string & path, const std::string & parent) {
|
||||||
{[&](const std::string & path, const std::string & parent) {
|
// TODO with more -vvvv also show the temporary paths for manual inspection.
|
||||||
// TODO with more -vvvv also show the temporary paths for manual inspection.
|
return BuildError(
|
||||||
return BuildError(
|
BuildResult::Failure::OutputRejected,
|
||||||
BuildResult::Failure::OutputRejected,
|
"cycle detected in build of '%s' in the references of output '%s' from output '%s'",
|
||||||
"cycle detected in build of '%s' in the references of output '%s' from output '%s'",
|
store.printStorePath(drvPath),
|
||||||
store.printStorePath(drvPath),
|
path,
|
||||||
path,
|
parent);
|
||||||
parent);
|
}});
|
||||||
}});
|
} catch (std::exception & e) {
|
||||||
|
debug("cycle detected during topoSort, analyzing for detailed error report");
|
||||||
|
|
||||||
|
// Scan all outputs for cycle edges with exact file paths
|
||||||
|
StoreCycleEdgeVec edges;
|
||||||
|
for (auto & [outputName, _] : drv.outputs) {
|
||||||
|
auto scratchOutput = get(scratchOutputs, outputName);
|
||||||
|
if (!scratchOutput)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
auto actualPath = realPathInSandbox(store.printStorePath(*scratchOutput));
|
||||||
|
debug("scanning output '%s' at path '%s' for cycle edges", outputName, actualPath);
|
||||||
|
|
||||||
|
scanForCycleEdges(actualPath, referenceablePaths, edges);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (edges.empty()) {
|
||||||
|
debug("no detailed cycle edges found, re-throwing original error");
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
|
||||||
|
debug("found %lu cycle edges, transforming to connected paths", edges.size());
|
||||||
|
|
||||||
|
// Transform individual edges into connected multi-edges (paths)
|
||||||
|
StoreCycleEdgeVec multiedges;
|
||||||
|
transformEdgesToMultiedges(edges, multiedges);
|
||||||
|
|
||||||
|
// Build detailed error message
|
||||||
|
std::string cycleDetails = fmt("Detailed cycle analysis found %d cycle path(s):", multiedges.size());
|
||||||
|
|
||||||
|
for (size_t i = 0; i < multiedges.size(); i++) {
|
||||||
|
auto & multiedge = multiedges[i];
|
||||||
|
cycleDetails += fmt("\n\nCycle %d:", i + 1);
|
||||||
|
for (auto & file : multiedge) {
|
||||||
|
cycleDetails += fmt("\n → %s", file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cycleDetails +=
|
||||||
|
fmt("\n\nThis means there are circular references between output files.\n"
|
||||||
|
"The build cannot proceed because the outputs reference each other.");
|
||||||
|
|
||||||
|
// Add hint with temp paths for debugging
|
||||||
|
if (settings.keepFailed || verbosity >= lvlDebug) {
|
||||||
|
cycleDetails +=
|
||||||
|
fmt("\n\nNote: Build outputs are kept for inspection.\n"
|
||||||
|
"You can examine the files listed above to understand the cycle.");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Throw new error with original message + cycle details
|
||||||
|
BuildError * buildErr = dynamic_cast<BuildError *>(&e);
|
||||||
|
std::string originalMsg = buildErr ? std::string(buildErr->msg()) : std::string(e.what());
|
||||||
|
throw BuildError(BuildResult::Failure::OutputRejected, "%s\n\n%s", originalMsg, cycleDetails);
|
||||||
|
}
|
||||||
|
|
||||||
std::reverse(sortedOutputNames.begin(), sortedOutputNames.end());
|
std::reverse(sortedOutputNames.begin(), sortedOutputNames.end());
|
||||||
|
|
||||||
|
|
@ -1848,12 +1906,61 @@ SingleDrvOutputs DerivationBuilderImpl::registerOutputs()
|
||||||
/* Register each output path as valid, and register the sets of
|
/* Register each output path as valid, and register the sets of
|
||||||
paths referenced by each of them. If there are cycles in the
|
paths referenced by each of them. If there are cycles in the
|
||||||
outputs, this will fail. */
|
outputs, this will fail. */
|
||||||
{
|
try {
|
||||||
ValidPathInfos infos2;
|
ValidPathInfos infos2;
|
||||||
for (auto & [outputName, newInfo] : infos) {
|
for (auto & [outputName, newInfo] : infos) {
|
||||||
infos2.insert_or_assign(newInfo.path, newInfo);
|
infos2.insert_or_assign(newInfo.path, newInfo);
|
||||||
}
|
}
|
||||||
store.registerValidPaths(infos2);
|
store.registerValidPaths(infos2);
|
||||||
|
} catch (BuildError & e) {
|
||||||
|
debug("cycle detected during registerValidPaths, analyzing for detailed error report");
|
||||||
|
|
||||||
|
// Scan all outputs for cycle edges with exact file paths
|
||||||
|
StoreCycleEdgeVec edges;
|
||||||
|
for (auto & [outputName, newInfo] : infos) {
|
||||||
|
auto actualPath = store.toRealPath(store.printStorePath(newInfo.path));
|
||||||
|
debug("scanning registered output '%s' at path '%s' for cycle edges", outputName, actualPath);
|
||||||
|
|
||||||
|
scanForCycleEdges(actualPath, referenceablePaths, edges);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (edges.empty()) {
|
||||||
|
debug("no detailed cycle edges found, re-throwing original error");
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
|
||||||
|
debug("found %lu cycle edges, transforming to connected paths", edges.size());
|
||||||
|
|
||||||
|
// Transform individual edges into connected multi-edges (paths)
|
||||||
|
StoreCycleEdgeVec multiedges;
|
||||||
|
transformEdgesToMultiedges(edges, multiedges);
|
||||||
|
|
||||||
|
// Build detailed error message
|
||||||
|
std::string cycleDetails = fmt("Detailed cycle analysis found %d cycle path(s):", multiedges.size());
|
||||||
|
|
||||||
|
for (size_t i = 0; i < multiedges.size(); i++) {
|
||||||
|
auto & multiedge = multiedges[i];
|
||||||
|
cycleDetails += fmt("\n\nCycle %d:", i + 1);
|
||||||
|
for (auto & file : multiedge) {
|
||||||
|
cycleDetails += fmt("\n → %s", file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cycleDetails +=
|
||||||
|
fmt("\n\nThis means there are circular references between output files.\n"
|
||||||
|
"The build cannot proceed because the outputs reference each other.");
|
||||||
|
|
||||||
|
// Add hint with temp paths for debugging
|
||||||
|
if (settings.keepFailed || verbosity >= lvlDebug) {
|
||||||
|
cycleDetails +=
|
||||||
|
fmt("\n\nNote: Build outputs were kept for inspection.\n"
|
||||||
|
"You can examine the files listed above to understand the cycle.");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Throw new error with original message + cycle details
|
||||||
|
BuildError * buildErr = dynamic_cast<BuildError *>(&e);
|
||||||
|
std::string originalMsg = buildErr ? std::string(buildErr->msg()) : std::string(e.what());
|
||||||
|
throw BuildError(BuildResult::Failure::OutputRejected, "%s\n\n%s", originalMsg, cycleDetails);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If we made it this far, we are sure the output matches the
|
/* If we made it this far, we are sure the output matches the
|
||||||
|
|
|
||||||
|
|
@ -83,6 +83,8 @@ rec {
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# Test for cycle detection with detailed error messages
|
||||||
|
# This creates multiple cycles: a→b→c→a and a→c→b→a
|
||||||
cyclic =
|
cyclic =
|
||||||
(mkDerivation {
|
(mkDerivation {
|
||||||
name = "cyclic-outputs";
|
name = "cyclic-outputs";
|
||||||
|
|
@ -92,10 +94,22 @@ rec {
|
||||||
"c"
|
"c"
|
||||||
];
|
];
|
||||||
builder = builtins.toFile "builder.sh" ''
|
builder = builtins.toFile "builder.sh" ''
|
||||||
mkdir $a $b $c
|
mkdir -p $a/subdir $b/subdir $c/subdir
|
||||||
echo $a > $b/foo
|
|
||||||
echo $b > $c/bar
|
# First cycle: a → b → c → a
|
||||||
echo $c > $a/baz
|
echo "$b/subdir/b-to-c" > $a/subdir/a-to-b
|
||||||
|
echo "$c/subdir/c-to-a" > $b/subdir/b-to-c
|
||||||
|
echo "$a/subdir/a-to-b" > $c/subdir/c-to-a
|
||||||
|
|
||||||
|
# Second cycle: a → c → b → a
|
||||||
|
echo "$c/subdir/c-to-b-2" > $a/subdir/a-to-c-2
|
||||||
|
echo "$b/subdir/b-to-a-2" > $c/subdir/c-to-b-2
|
||||||
|
echo "$a/subdir/a-to-c-2" > $b/subdir/b-to-a-2
|
||||||
|
|
||||||
|
# Non-cyclic reference (just for complexity)
|
||||||
|
echo "non-cyclic-data" > $a/data
|
||||||
|
echo "non-cyclic-data" > $b/data
|
||||||
|
echo "non-cyclic-data" > $c/data
|
||||||
'';
|
'';
|
||||||
}).a;
|
}).a;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -91,9 +91,22 @@ nix-build multiple-outputs.nix -A a.first --no-out-link
|
||||||
|
|
||||||
# Cyclic outputs should be rejected.
|
# Cyclic outputs should be rejected.
|
||||||
echo "building cyclic..."
|
echo "building cyclic..."
|
||||||
if nix-build multiple-outputs.nix -A cyclic --no-out-link; then
|
if cyclicOutput=$(nix-build multiple-outputs.nix -A cyclic --no-out-link 2>&1); then
|
||||||
echo "Cyclic outputs incorrectly accepted!"
|
echo "Cyclic outputs incorrectly accepted!"
|
||||||
exit 1
|
exit 1
|
||||||
|
else
|
||||||
|
echo "Cyclic outputs correctly rejected"
|
||||||
|
# Verify error message mentions cycles
|
||||||
|
echo "$cyclicOutput" | grepQuiet "cycle"
|
||||||
|
|
||||||
|
# Enhanced cycle error messages were added in 2.33
|
||||||
|
if isDaemonNewer "2.33"; then
|
||||||
|
echo "$cyclicOutput" | grepQuiet "Detailed cycle analysis"
|
||||||
|
echo "$cyclicOutput" | grepQuiet "Cycle 1:"
|
||||||
|
# The error should mention actual file paths with subdirectories
|
||||||
|
echo "$cyclicOutput" | grepQuiet "subdir"
|
||||||
|
echo "Enhanced cycle error messages verified"
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Do a GC. This should leave an empty store.
|
# Do a GC. This should leave an empty store.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue