1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-13 14:02:42 +01:00

feat(libstore): make cycle errors more verbose

Co-Authored-By: Milan Hauth <milahu@gmail.com>
This commit is contained in:
Bernardo Meurer Costa 2025-10-11 18:42:18 +00:00
parent bef3c37cb2
commit d44c8c8b69
No known key found for this signature in database
6 changed files with 574 additions and 42 deletions

View file

@ -0,0 +1,328 @@
#include "find-cycles.hh"
#include "nix/store/store-api.hh"
#include "nix/util/file-system.hh"
#include "nix/util/archive.hh"
#include "nix/util/base-nix-32.hh"
#include <filesystem>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
namespace nix {
// Hash length in characters (32 for base32-encoded sha256)
static constexpr size_t refLength = StorePath::HashLen;
// Maximum expected file path length for buffer carry-over
static constexpr size_t MAX_FILEPATH_LENGTH = 1000;
void scanForCycleEdges(const Path & path, const StorePathSet & refs, StoreCycleEdgeVec & edges)
{
StringSet hashes;
std::map<std::string, StorePath> hashPathMap;
// Extract the store directory from the path
// Example: /run/user/1000/nix-test/store/abc-foo -> /run/user/1000/nix-test/store/
auto storePrefixPath = std::filesystem::path(path);
storePrefixPath.remove_filename();
std::string storePrefix = storePrefixPath.string();
debug("scanForCycleEdges: storePrefixPath = %s", storePrefixPath.string());
debug("scanForCycleEdges: storePrefix = %s", storePrefix);
// Build map of hash -> StorePath and collect hashes to search for
for (auto & i : refs) {
std::string hashPart(i.hashPart());
auto inserted = hashPathMap.emplace(hashPart, i).second;
assert(inserted);
hashes.insert(hashPart);
}
scanForCycleEdges2(path, hashes, edges, storePrefix);
}
void scanForCycleEdges2(std::string path, const StringSet & hashes, StoreCycleEdgeVec & edges, std::string storeDir)
{
auto st = lstat(path);
debug("scanForCycleEdges2: scanning path = %s", path);
if (S_ISREG(st.st_mode)) {
// Handle regular files - scan contents for hash references
AutoCloseFD fd = open(path.c_str(), O_RDONLY | O_CLOEXEC);
if (!fd)
throw SysError("opening file '%1%'", path);
std::vector<char> buf(65536);
size_t rest = st.st_size;
size_t start = 0;
// Buffer to carry data between reads (for references spanning chunks)
std::vector<char> bufCarry(MAX_FILEPATH_LENGTH);
bool bufCarryUsed = false;
size_t bufCarrySize = 0;
while (rest > 0) {
auto n = std::min(rest, buf.size());
readFull(fd.get(), buf.data(), n);
debug("scanForCycleEdges2: read file %s: n = %lu", path, n);
// Check if we have carry-over data from previous iteration
if (bufCarryUsed) {
// Search in the overlap region (carry + start of new buffer)
size_t searchSize = std::min(bufCarrySize + n, MAX_FILEPATH_LENGTH);
std::vector<char> searchBuf(searchSize);
// Copy carry buffer
std::copy(bufCarry.begin(), bufCarry.begin() + bufCarrySize, searchBuf.begin());
// Copy start of new buffer
size_t newDataSize = searchSize - bufCarrySize;
std::copy(buf.begin(), buf.begin() + newDataSize, searchBuf.begin() + bufCarrySize);
// Search for hashes in the overlap region
for (size_t i = 0; i + refLength <= searchBuf.size();) {
bool match = true;
for (int j = refLength - 1; j >= 0; --j) {
if (!BaseNix32::lookupReverse(searchBuf[i + j])) {
i += j + 1;
match = false;
break;
}
}
if (!match)
continue;
std::string hash(searchBuf.begin() + i, searchBuf.begin() + i + refLength);
if (hashes.find(hash) != hashes.end()) {
debug("scanForCycleEdges2: found hash '%s' in overlap region", hash);
// Try to find the full path
size_t storeDirLength = storeDir.size();
if (i >= storeDirLength) {
std::string targetPath = storeDir + hash;
StoreCycleEdge edge;
edge.push_back(path);
edge.push_back(targetPath);
edges.push_back(edge);
}
}
++i;
}
bufCarryUsed = false;
}
// Search in the main buffer
for (size_t i = 0; i + refLength <= n;) {
bool match = true;
for (int j = refLength - 1; j >= 0; --j) {
if (!BaseNix32::lookupReverse(buf[i + j])) {
i += j + 1;
match = false;
break;
}
}
if (!match)
continue;
// Found a potential hash
std::string hash(buf.begin() + i, buf.begin() + i + refLength);
if (hashes.find(hash) != hashes.end()) {
debug("scanForCycleEdges2: found reference to hash '%s' at offset %lu", hash, start + i);
// Try to reconstruct the full store path
size_t storeDirLength = storeDir.size();
std::string targetPath = storeDir + hash;
std::string targetStorePath;
// Check if we have storeDir + hash in the buffer
if (i >= (size_t) storeDirLength
&& std::string(buf.begin() + i - storeDirLength, buf.begin() + i + refLength) == targetPath) {
debug("scanForCycleEdges2: found store path prefix at offset %lu", start + i - storeDirLength);
// Try to find the complete path by checking what exists on disk
// We probe incrementally to find the longest existing path
size_t testNameLength = refLength + 2; // Minimum: hash + "-x"
size_t targetPathLastEnd = 0;
bool foundStorePath = false;
bool foundPath = false;
for (; testNameLength < 255 && i + (size_t) targetPathLastEnd + (size_t) testNameLength <= n;
testNameLength++) {
std::string testPath(
buf.begin() + i - storeDirLength, buf.begin() + i + targetPathLastEnd + testNameLength);
struct stat testStat;
if (stat(testPath.c_str(), &testStat) == 0) {
debug("scanForCycleEdges2: found existing path: %s", testPath);
if (!foundStorePath) {
// First match is the store path component
targetStorePath = testPath.substr(storeDirLength);
foundStorePath = true;
}
foundPath = true;
targetPath = testPath;
// Check if this is a directory (followed by '/')
if (buf[i + targetPathLastEnd + testNameLength] == '/') {
debug("scanForCycleEdges2: path is a directory, continuing");
targetPathLastEnd += testNameLength;
testNameLength = 1; // Reset for next component
continue;
}
}
}
if (foundPath) {
debug("scanForCycleEdges2: cycle edge: %s -> %s", path, targetPath);
} else {
// Couldn't find exact path, use store path + hash
targetPath = storeDir + hash;
}
}
StoreCycleEdge edge;
edge.push_back(path);
edge.push_back(targetPath);
edges.push_back(edge);
}
++i;
}
start += n;
rest -= n;
// Carry over the end of the buffer for next iteration
if (n == buf.size() && rest > 0) {
size_t carrySize = std::min(MAX_FILEPATH_LENGTH, n);
std::copy(buf.end() - carrySize, buf.end(), bufCarry.begin());
bufCarrySize = carrySize;
bufCarryUsed = true;
}
}
} else if (S_ISDIR(st.st_mode)) {
// Handle directories - recursively scan contents
std::map<std::string, std::string> unhacked;
for (DirectoryIterator i(path); i != DirectoryIterator(); ++i) {
std::string entryName = i->path().filename().string();
#ifdef __APPLE__
// Handle case-insensitive filesystems on macOS
std::string name(entryName);
size_t pos = entryName.find(caseHackSuffix);
if (pos != std::string::npos) {
debug("removing case hack suffix from '%s'", path + "/" + entryName);
name.erase(pos);
}
if (unhacked.find(name) != unhacked.end()) {
throw Error(
"file name collision between '%1%' and '%2%'", path + "/" + unhacked[name], path + "/" + entryName);
}
unhacked[name] = entryName;
#else
unhacked[entryName] = entryName;
#endif
}
for (auto & [name, actualName] : unhacked) {
debug("scanForCycleEdges2: recursing into %s/%s", path, actualName);
scanForCycleEdges2(path + "/" + actualName, hashes, edges, storeDir);
}
} else if (S_ISLNK(st.st_mode)) {
// Handle symlinks - scan link target for hash references
std::string linkTarget = readLink(path);
debug("scanForCycleEdges2: scanning symlink %s -> %s", path, linkTarget);
for (size_t i = 0; i + refLength <= linkTarget.size();) {
bool match = true;
for (int j = refLength - 1; j >= 0; --j) {
if (!BaseNix32::lookupReverse(linkTarget[i + j])) {
i += j + 1;
match = false;
break;
}
}
if (!match)
continue;
std::string ref(linkTarget.begin() + i, linkTarget.begin() + i + refLength);
if (hashes.find(ref) != hashes.end()) {
debug("scanForCycleEdges2: found reference '%s' in symlink at offset %lu", ref, i);
// Try to extract full path from link target
std::string targetPath = storeDir + ref;
StoreCycleEdge edge;
edge.push_back(path);
edge.push_back(targetPath);
edges.push_back(edge);
}
++i;
}
} else {
throw Error("file '%1%' has an unsupported type", path);
}
}
void transformEdgesToMultiedges(StoreCycleEdgeVec & edges, StoreCycleEdgeVec & multiedges)
{
debug("transformEdgesToMultiedges: processing %lu edges", edges.size());
for (auto & edge2 : edges) {
bool edge2Joined = false;
for (auto & edge1 : multiedges) {
debug("comparing edge1 (size=%lu) with edge2 (size=%lu)", edge1.size(), edge2.size());
// Check if edge1.back() == edge2.front()
// This means we can extend edge1 by appending edge2
if (edge1.back() == edge2.front()) {
debug("appending: edge1.back()='%s' == edge2.front()='%s'", edge1.back(), edge2.front());
// Append all but the first element of edge2 (to avoid duplication)
for (size_t i = 1; i < edge2.size(); i++) {
debug(" appending edge2[%lu] = %s", i, edge2[i]);
edge1.push_back(edge2[i]);
}
edge2Joined = true;
break;
}
// Check if edge2.back() == edge1.front()
// This means we can extend edge1 by prepending edge2
if (edge2.back() == edge1.front()) {
debug("prepending: edge2.back()='%s' == edge1.front()='%s'", edge2.back(), edge1.front());
// Prepend all but the last element of edge2 (to avoid duplication)
for (int i = edge2.size() - 2; i >= 0; i--) {
debug(" prepending edge2[%d] = %s", i, edge2[i]);
edge1.push_front(edge2[i]);
}
edge2Joined = true;
break;
}
}
if (!edge2Joined) {
debug("edge2 is new, adding as separate multiedge");
multiedges.push_back(edge2);
}
}
debug("transformEdgesToMultiedges: result has %lu multiedges", multiedges.size());
}
} // namespace nix

View file

@ -0,0 +1,69 @@
#pragma once
///@file
#include "nix/store/store-api.hh"
#include "nix/util/types.hh"
#include <string>
#include <deque>
#include <vector>
namespace nix {
/**
* Represents a cycle edge as a sequence of file paths.
* Uses deque to allow efficient prepend/append when joining edges.
*
* Example: {"/nix/store/abc-foo/file1", "/nix/store/def-bar/file2"}
* represents a reference from file1 to file2.
*/
typedef std::deque<std::string> StoreCycleEdge;
/**
* A collection of cycle edges found during scanning.
*/
typedef std::vector<StoreCycleEdge> StoreCycleEdgeVec;
/**
* Scan output paths to find cycle edges with detailed file paths.
*
* This is the second pass of cycle detection. The first pass (scanForReferences)
* detects that a cycle exists. This function provides detailed information about
* where the cycles occur in the actual file system.
*
* @param path The store path to scan (e.g., an output directory)
* @param refs The set of potentially referenced store paths
* @param edges Output parameter that accumulates found cycle edges
*/
void scanForCycleEdges(const Path & path, const StorePathSet & refs, StoreCycleEdgeVec & edges);
/**
* Recursively scan files and directories for hash references.
*
* This function walks the file system tree and searches for store path hashes
* in file contents, symlinks, etc. When a hash is found, it attempts to
* reconstruct the full store path by checking what files actually exist.
*
* @param path Current path being scanned
* @param hashes Set of hash strings to look for (32-char base32 hashes)
* @param edges Output parameter that accumulates found cycle edges
* @param storeDir The store directory prefix (e.g., "/nix/store/")
*/
void scanForCycleEdges2(std::string path, const StringSet & hashes, StoreCycleEdgeVec & edges, std::string storeDir);
/**
* Transform individual edges into connected multi-edges (paths).
*
* Takes a list of edges like [AB, BC, CA] and connects them into
* longer paths like [ABCA]. This makes it easier to visualize the
* actual cycle paths.
*
* The algorithm is greedy: it tries to extend each edge by finding
* matching edges to prepend or append.
*
* @param edges Input edges to transform
* @param multiedges Output parameter with connected paths
*/
void transformEdgesToMultiedges(StoreCycleEdgeVec & edges, StoreCycleEdgeVec & multiedges);
} // namespace nix

View file

@ -267,6 +267,7 @@ sources = files(
'build/derivation-trampoline-goal.cc',
'build/drv-output-substitution-goal.cc',
'build/entry-points.cc',
'build/find-cycles.cc',
'build/goal.cc',
'build/substitution-goal.cc',
'build/worker.cc',

View file

@ -45,6 +45,7 @@
#include "store-config-private.hh"
#include "build/derivation-check.hh"
#include "build/find-cycles.hh"
#if NIX_WITH_AWS_AUTH
# include "nix/store/aws-creds.hh"
@ -1473,43 +1474,100 @@ SingleDrvOutputs DerivationBuilderImpl::registerOutputs()
outputStats.insert_or_assign(outputName, std::move(st));
}
auto sortedOutputNames = topoSort(
outputsToSort,
{[&](const std::string & name) {
auto orifu = get(outputReferencesIfUnregistered, name);
if (!orifu)
throw BuildError(
BuildResult::Failure::OutputRejected,
"no output reference for '%s' in build of '%s'",
name,
store.printStorePath(drvPath));
return std::visit(
overloaded{
/* Since we'll use the already installed versions of these, we
can treat them as leaves and ignore any references they
have. */
[&](const AlreadyRegistered &) { return StringSet{}; },
[&](const PerhapsNeedToRegister & refs) {
StringSet referencedOutputs;
/* FIXME build inverted map up front so no quadratic waste here */
for (auto & r : refs.refs)
for (auto & [o, p] : scratchOutputs)
if (r == p)
referencedOutputs.insert(o);
return referencedOutputs;
std::vector<std::string> sortedOutputNames;
try {
sortedOutputNames = topoSort(
outputsToSort,
{[&](const std::string & name) {
auto orifu = get(outputReferencesIfUnregistered, name);
if (!orifu)
throw BuildError(
BuildResult::Failure::OutputRejected,
"no output reference for '%s' in build of '%s'",
name,
store.printStorePath(drvPath));
return std::visit(
overloaded{
/* Since we'll use the already installed versions of these, we
can treat them as leaves and ignore any references they
have. */
[&](const AlreadyRegistered &) { return StringSet{}; },
[&](const PerhapsNeedToRegister & refs) {
StringSet referencedOutputs;
/* FIXME build inverted map up front so no quadratic waste here */
for (auto & r : refs.refs)
for (auto & [o, p] : scratchOutputs)
if (r == p)
referencedOutputs.insert(o);
return referencedOutputs;
},
},
},
*orifu);
}},
{[&](const std::string & path, const std::string & parent) {
// TODO with more -vvvv also show the temporary paths for manual inspection.
return BuildError(
BuildResult::Failure::OutputRejected,
"cycle detected in build of '%s' in the references of output '%s' from output '%s'",
store.printStorePath(drvPath),
path,
parent);
}});
*orifu);
}},
{[&](const std::string & path, const std::string & parent) {
// TODO with more -vvvv also show the temporary paths for manual inspection.
return BuildError(
BuildResult::Failure::OutputRejected,
"cycle detected in build of '%s' in the references of output '%s' from output '%s'",
store.printStorePath(drvPath),
path,
parent);
}});
} catch (std::exception & e) {
debug("cycle detected during topoSort, analyzing for detailed error report");
// Scan all outputs for cycle edges with exact file paths
StoreCycleEdgeVec edges;
for (auto & [outputName, _] : drv.outputs) {
auto scratchOutput = get(scratchOutputs, outputName);
if (!scratchOutput)
continue;
auto actualPath = realPathInSandbox(store.printStorePath(*scratchOutput));
debug("scanning output '%s' at path '%s' for cycle edges", outputName, actualPath);
scanForCycleEdges(actualPath, referenceablePaths, edges);
}
if (edges.empty()) {
debug("no detailed cycle edges found, re-throwing original error");
throw;
}
debug("found %lu cycle edges, transforming to connected paths", edges.size());
// Transform individual edges into connected multi-edges (paths)
StoreCycleEdgeVec multiedges;
transformEdgesToMultiedges(edges, multiedges);
// Build detailed error message
std::string cycleDetails = fmt("Detailed cycle analysis found %d cycle path(s):", multiedges.size());
for (size_t i = 0; i < multiedges.size(); i++) {
auto & multiedge = multiedges[i];
cycleDetails += fmt("\n\nCycle %d:", i + 1);
for (auto & file : multiedge) {
cycleDetails += fmt("\n → %s", file);
}
}
cycleDetails +=
fmt("\n\nThis means there are circular references between output files.\n"
"The build cannot proceed because the outputs reference each other.");
// Add hint with temp paths for debugging
if (settings.keepFailed || verbosity >= lvlDebug) {
cycleDetails +=
fmt("\n\nNote: Build outputs are kept for inspection.\n"
"You can examine the files listed above to understand the cycle.");
}
// Throw new error with original message + cycle details
BuildError * buildErr = dynamic_cast<BuildError *>(&e);
std::string originalMsg = buildErr ? std::string(buildErr->msg()) : std::string(e.what());
throw BuildError(BuildResult::Failure::OutputRejected, "%s\n\n%s", originalMsg, cycleDetails);
}
std::reverse(sortedOutputNames.begin(), sortedOutputNames.end());
@ -1848,12 +1906,61 @@ SingleDrvOutputs DerivationBuilderImpl::registerOutputs()
/* Register each output path as valid, and register the sets of
paths referenced by each of them. If there are cycles in the
outputs, this will fail. */
{
try {
ValidPathInfos infos2;
for (auto & [outputName, newInfo] : infos) {
infos2.insert_or_assign(newInfo.path, newInfo);
}
store.registerValidPaths(infos2);
} catch (BuildError & e) {
debug("cycle detected during registerValidPaths, analyzing for detailed error report");
// Scan all outputs for cycle edges with exact file paths
StoreCycleEdgeVec edges;
for (auto & [outputName, newInfo] : infos) {
auto actualPath = store.toRealPath(store.printStorePath(newInfo.path));
debug("scanning registered output '%s' at path '%s' for cycle edges", outputName, actualPath);
scanForCycleEdges(actualPath, referenceablePaths, edges);
}
if (edges.empty()) {
debug("no detailed cycle edges found, re-throwing original error");
throw;
}
debug("found %lu cycle edges, transforming to connected paths", edges.size());
// Transform individual edges into connected multi-edges (paths)
StoreCycleEdgeVec multiedges;
transformEdgesToMultiedges(edges, multiedges);
// Build detailed error message
std::string cycleDetails = fmt("Detailed cycle analysis found %d cycle path(s):", multiedges.size());
for (size_t i = 0; i < multiedges.size(); i++) {
auto & multiedge = multiedges[i];
cycleDetails += fmt("\n\nCycle %d:", i + 1);
for (auto & file : multiedge) {
cycleDetails += fmt("\n → %s", file);
}
}
cycleDetails +=
fmt("\n\nThis means there are circular references between output files.\n"
"The build cannot proceed because the outputs reference each other.");
// Add hint with temp paths for debugging
if (settings.keepFailed || verbosity >= lvlDebug) {
cycleDetails +=
fmt("\n\nNote: Build outputs were kept for inspection.\n"
"You can examine the files listed above to understand the cycle.");
}
// Throw new error with original message + cycle details
BuildError * buildErr = dynamic_cast<BuildError *>(&e);
std::string originalMsg = buildErr ? std::string(buildErr->msg()) : std::string(e.what());
throw BuildError(BuildResult::Failure::OutputRejected, "%s\n\n%s", originalMsg, cycleDetails);
}
/* If we made it this far, we are sure the output matches the

View file

@ -83,6 +83,8 @@ rec {
'';
};
# Test for cycle detection with detailed error messages
# This creates multiple cycles: a→b→c→a and a→c→b→a
cyclic =
(mkDerivation {
name = "cyclic-outputs";
@ -92,10 +94,22 @@ rec {
"c"
];
builder = builtins.toFile "builder.sh" ''
mkdir $a $b $c
echo $a > $b/foo
echo $b > $c/bar
echo $c > $a/baz
mkdir -p $a/subdir $b/subdir $c/subdir
# First cycle: a → b → c → a
echo "$b/subdir/b-to-c" > $a/subdir/a-to-b
echo "$c/subdir/c-to-a" > $b/subdir/b-to-c
echo "$a/subdir/a-to-b" > $c/subdir/c-to-a
# Second cycle: a → c → b → a
echo "$c/subdir/c-to-b-2" > $a/subdir/a-to-c-2
echo "$b/subdir/b-to-a-2" > $c/subdir/c-to-b-2
echo "$a/subdir/a-to-c-2" > $b/subdir/b-to-a-2
# Non-cyclic reference (just for complexity)
echo "non-cyclic-data" > $a/data
echo "non-cyclic-data" > $b/data
echo "non-cyclic-data" > $c/data
'';
}).a;

View file

@ -91,9 +91,22 @@ nix-build multiple-outputs.nix -A a.first --no-out-link
# Cyclic outputs should be rejected.
echo "building cyclic..."
if nix-build multiple-outputs.nix -A cyclic --no-out-link; then
if cyclicOutput=$(nix-build multiple-outputs.nix -A cyclic --no-out-link 2>&1); then
echo "Cyclic outputs incorrectly accepted!"
exit 1
else
echo "Cyclic outputs correctly rejected"
# Verify error message mentions cycles
echo "$cyclicOutput" | grepQuiet "cycle"
# Enhanced cycle error messages were added in 2.33
if isDaemonNewer "2.33"; then
echo "$cyclicOutput" | grepQuiet "Detailed cycle analysis"
echo "$cyclicOutput" | grepQuiet "Cycle 1:"
# The error should mention actual file paths with subdirectories
echo "$cyclicOutput" | grepQuiet "subdir"
echo "Enhanced cycle error messages verified"
fi
fi
# Do a GC. This should leave an empty store.