1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-14 06:22:42 +01:00

feat(libstore): make cycle errors more verbose

Co-Authored-By: Milan Hauth <milahu@gmail.com>
This commit is contained in:
Bernardo Meurer Costa 2025-10-11 18:42:18 +00:00
parent bef3c37cb2
commit d44c8c8b69
No known key found for this signature in database
6 changed files with 574 additions and 42 deletions

View file

@ -0,0 +1,328 @@
#include "find-cycles.hh"
#include "nix/store/store-api.hh"
#include "nix/util/file-system.hh"
#include "nix/util/archive.hh"
#include "nix/util/base-nix-32.hh"
#include <filesystem>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
namespace nix {
// Hash length in characters (32 for base32-encoded sha256)
static constexpr size_t refLength = StorePath::HashLen;
// Maximum expected file path length for buffer carry-over
static constexpr size_t MAX_FILEPATH_LENGTH = 1000;
void scanForCycleEdges(const Path & path, const StorePathSet & refs, StoreCycleEdgeVec & edges)
{
StringSet hashes;
std::map<std::string, StorePath> hashPathMap;
// Extract the store directory from the path
// Example: /run/user/1000/nix-test/store/abc-foo -> /run/user/1000/nix-test/store/
auto storePrefixPath = std::filesystem::path(path);
storePrefixPath.remove_filename();
std::string storePrefix = storePrefixPath.string();
debug("scanForCycleEdges: storePrefixPath = %s", storePrefixPath.string());
debug("scanForCycleEdges: storePrefix = %s", storePrefix);
// Build map of hash -> StorePath and collect hashes to search for
for (auto & i : refs) {
std::string hashPart(i.hashPart());
auto inserted = hashPathMap.emplace(hashPart, i).second;
assert(inserted);
hashes.insert(hashPart);
}
scanForCycleEdges2(path, hashes, edges, storePrefix);
}
void scanForCycleEdges2(std::string path, const StringSet & hashes, StoreCycleEdgeVec & edges, std::string storeDir)
{
auto st = lstat(path);
debug("scanForCycleEdges2: scanning path = %s", path);
if (S_ISREG(st.st_mode)) {
// Handle regular files - scan contents for hash references
AutoCloseFD fd = open(path.c_str(), O_RDONLY | O_CLOEXEC);
if (!fd)
throw SysError("opening file '%1%'", path);
std::vector<char> buf(65536);
size_t rest = st.st_size;
size_t start = 0;
// Buffer to carry data between reads (for references spanning chunks)
std::vector<char> bufCarry(MAX_FILEPATH_LENGTH);
bool bufCarryUsed = false;
size_t bufCarrySize = 0;
while (rest > 0) {
auto n = std::min(rest, buf.size());
readFull(fd.get(), buf.data(), n);
debug("scanForCycleEdges2: read file %s: n = %lu", path, n);
// Check if we have carry-over data from previous iteration
if (bufCarryUsed) {
// Search in the overlap region (carry + start of new buffer)
size_t searchSize = std::min(bufCarrySize + n, MAX_FILEPATH_LENGTH);
std::vector<char> searchBuf(searchSize);
// Copy carry buffer
std::copy(bufCarry.begin(), bufCarry.begin() + bufCarrySize, searchBuf.begin());
// Copy start of new buffer
size_t newDataSize = searchSize - bufCarrySize;
std::copy(buf.begin(), buf.begin() + newDataSize, searchBuf.begin() + bufCarrySize);
// Search for hashes in the overlap region
for (size_t i = 0; i + refLength <= searchBuf.size();) {
bool match = true;
for (int j = refLength - 1; j >= 0; --j) {
if (!BaseNix32::lookupReverse(searchBuf[i + j])) {
i += j + 1;
match = false;
break;
}
}
if (!match)
continue;
std::string hash(searchBuf.begin() + i, searchBuf.begin() + i + refLength);
if (hashes.find(hash) != hashes.end()) {
debug("scanForCycleEdges2: found hash '%s' in overlap region", hash);
// Try to find the full path
size_t storeDirLength = storeDir.size();
if (i >= storeDirLength) {
std::string targetPath = storeDir + hash;
StoreCycleEdge edge;
edge.push_back(path);
edge.push_back(targetPath);
edges.push_back(edge);
}
}
++i;
}
bufCarryUsed = false;
}
// Search in the main buffer
for (size_t i = 0; i + refLength <= n;) {
bool match = true;
for (int j = refLength - 1; j >= 0; --j) {
if (!BaseNix32::lookupReverse(buf[i + j])) {
i += j + 1;
match = false;
break;
}
}
if (!match)
continue;
// Found a potential hash
std::string hash(buf.begin() + i, buf.begin() + i + refLength);
if (hashes.find(hash) != hashes.end()) {
debug("scanForCycleEdges2: found reference to hash '%s' at offset %lu", hash, start + i);
// Try to reconstruct the full store path
size_t storeDirLength = storeDir.size();
std::string targetPath = storeDir + hash;
std::string targetStorePath;
// Check if we have storeDir + hash in the buffer
if (i >= (size_t) storeDirLength
&& std::string(buf.begin() + i - storeDirLength, buf.begin() + i + refLength) == targetPath) {
debug("scanForCycleEdges2: found store path prefix at offset %lu", start + i - storeDirLength);
// Try to find the complete path by checking what exists on disk
// We probe incrementally to find the longest existing path
size_t testNameLength = refLength + 2; // Minimum: hash + "-x"
size_t targetPathLastEnd = 0;
bool foundStorePath = false;
bool foundPath = false;
for (; testNameLength < 255 && i + (size_t) targetPathLastEnd + (size_t) testNameLength <= n;
testNameLength++) {
std::string testPath(
buf.begin() + i - storeDirLength, buf.begin() + i + targetPathLastEnd + testNameLength);
struct stat testStat;
if (stat(testPath.c_str(), &testStat) == 0) {
debug("scanForCycleEdges2: found existing path: %s", testPath);
if (!foundStorePath) {
// First match is the store path component
targetStorePath = testPath.substr(storeDirLength);
foundStorePath = true;
}
foundPath = true;
targetPath = testPath;
// Check if this is a directory (followed by '/')
if (buf[i + targetPathLastEnd + testNameLength] == '/') {
debug("scanForCycleEdges2: path is a directory, continuing");
targetPathLastEnd += testNameLength;
testNameLength = 1; // Reset for next component
continue;
}
}
}
if (foundPath) {
debug("scanForCycleEdges2: cycle edge: %s -> %s", path, targetPath);
} else {
// Couldn't find exact path, use store path + hash
targetPath = storeDir + hash;
}
}
StoreCycleEdge edge;
edge.push_back(path);
edge.push_back(targetPath);
edges.push_back(edge);
}
++i;
}
start += n;
rest -= n;
// Carry over the end of the buffer for next iteration
if (n == buf.size() && rest > 0) {
size_t carrySize = std::min(MAX_FILEPATH_LENGTH, n);
std::copy(buf.end() - carrySize, buf.end(), bufCarry.begin());
bufCarrySize = carrySize;
bufCarryUsed = true;
}
}
} else if (S_ISDIR(st.st_mode)) {
// Handle directories - recursively scan contents
std::map<std::string, std::string> unhacked;
for (DirectoryIterator i(path); i != DirectoryIterator(); ++i) {
std::string entryName = i->path().filename().string();
#ifdef __APPLE__
// Handle case-insensitive filesystems on macOS
std::string name(entryName);
size_t pos = entryName.find(caseHackSuffix);
if (pos != std::string::npos) {
debug("removing case hack suffix from '%s'", path + "/" + entryName);
name.erase(pos);
}
if (unhacked.find(name) != unhacked.end()) {
throw Error(
"file name collision between '%1%' and '%2%'", path + "/" + unhacked[name], path + "/" + entryName);
}
unhacked[name] = entryName;
#else
unhacked[entryName] = entryName;
#endif
}
for (auto & [name, actualName] : unhacked) {
debug("scanForCycleEdges2: recursing into %s/%s", path, actualName);
scanForCycleEdges2(path + "/" + actualName, hashes, edges, storeDir);
}
} else if (S_ISLNK(st.st_mode)) {
// Handle symlinks - scan link target for hash references
std::string linkTarget = readLink(path);
debug("scanForCycleEdges2: scanning symlink %s -> %s", path, linkTarget);
for (size_t i = 0; i + refLength <= linkTarget.size();) {
bool match = true;
for (int j = refLength - 1; j >= 0; --j) {
if (!BaseNix32::lookupReverse(linkTarget[i + j])) {
i += j + 1;
match = false;
break;
}
}
if (!match)
continue;
std::string ref(linkTarget.begin() + i, linkTarget.begin() + i + refLength);
if (hashes.find(ref) != hashes.end()) {
debug("scanForCycleEdges2: found reference '%s' in symlink at offset %lu", ref, i);
// Try to extract full path from link target
std::string targetPath = storeDir + ref;
StoreCycleEdge edge;
edge.push_back(path);
edge.push_back(targetPath);
edges.push_back(edge);
}
++i;
}
} else {
throw Error("file '%1%' has an unsupported type", path);
}
}
void transformEdgesToMultiedges(StoreCycleEdgeVec & edges, StoreCycleEdgeVec & multiedges)
{
debug("transformEdgesToMultiedges: processing %lu edges", edges.size());
for (auto & edge2 : edges) {
bool edge2Joined = false;
for (auto & edge1 : multiedges) {
debug("comparing edge1 (size=%lu) with edge2 (size=%lu)", edge1.size(), edge2.size());
// Check if edge1.back() == edge2.front()
// This means we can extend edge1 by appending edge2
if (edge1.back() == edge2.front()) {
debug("appending: edge1.back()='%s' == edge2.front()='%s'", edge1.back(), edge2.front());
// Append all but the first element of edge2 (to avoid duplication)
for (size_t i = 1; i < edge2.size(); i++) {
debug(" appending edge2[%lu] = %s", i, edge2[i]);
edge1.push_back(edge2[i]);
}
edge2Joined = true;
break;
}
// Check if edge2.back() == edge1.front()
// This means we can extend edge1 by prepending edge2
if (edge2.back() == edge1.front()) {
debug("prepending: edge2.back()='%s' == edge1.front()='%s'", edge2.back(), edge1.front());
// Prepend all but the last element of edge2 (to avoid duplication)
for (int i = edge2.size() - 2; i >= 0; i--) {
debug(" prepending edge2[%d] = %s", i, edge2[i]);
edge1.push_front(edge2[i]);
}
edge2Joined = true;
break;
}
}
if (!edge2Joined) {
debug("edge2 is new, adding as separate multiedge");
multiedges.push_back(edge2);
}
}
debug("transformEdgesToMultiedges: result has %lu multiedges", multiedges.size());
}
} // namespace nix

View file

@ -0,0 +1,69 @@
#pragma once
///@file
#include "nix/store/store-api.hh"
#include "nix/util/types.hh"
#include <string>
#include <deque>
#include <vector>
namespace nix {
/**
* Represents a cycle edge as a sequence of file paths.
* Uses deque to allow efficient prepend/append when joining edges.
*
* Example: {"/nix/store/abc-foo/file1", "/nix/store/def-bar/file2"}
* represents a reference from file1 to file2.
*/
typedef std::deque<std::string> StoreCycleEdge;
/**
* A collection of cycle edges found during scanning.
*/
typedef std::vector<StoreCycleEdge> StoreCycleEdgeVec;
/**
* Scan output paths to find cycle edges with detailed file paths.
*
* This is the second pass of cycle detection. The first pass (scanForReferences)
* detects that a cycle exists. This function provides detailed information about
* where the cycles occur in the actual file system.
*
* @param path The store path to scan (e.g., an output directory)
* @param refs The set of potentially referenced store paths
* @param edges Output parameter that accumulates found cycle edges
*/
void scanForCycleEdges(const Path & path, const StorePathSet & refs, StoreCycleEdgeVec & edges);
/**
* Recursively scan files and directories for hash references.
*
* This function walks the file system tree and searches for store path hashes
* in file contents, symlinks, etc. When a hash is found, it attempts to
* reconstruct the full store path by checking what files actually exist.
*
* @param path Current path being scanned
* @param hashes Set of hash strings to look for (32-char base32 hashes)
* @param edges Output parameter that accumulates found cycle edges
* @param storeDir The store directory prefix (e.g., "/nix/store/")
*/
void scanForCycleEdges2(std::string path, const StringSet & hashes, StoreCycleEdgeVec & edges, std::string storeDir);
/**
* Transform individual edges into connected multi-edges (paths).
*
* Takes a list of edges like [AB, BC, CA] and connects them into
* longer paths like [ABCA]. This makes it easier to visualize the
* actual cycle paths.
*
* The algorithm is greedy: it tries to extend each edge by finding
* matching edges to prepend or append.
*
* @param edges Input edges to transform
* @param multiedges Output parameter with connected paths
*/
void transformEdgesToMultiedges(StoreCycleEdgeVec & edges, StoreCycleEdgeVec & multiedges);
} // namespace nix

View file

@ -267,6 +267,7 @@ sources = files(
'build/derivation-trampoline-goal.cc', 'build/derivation-trampoline-goal.cc',
'build/drv-output-substitution-goal.cc', 'build/drv-output-substitution-goal.cc',
'build/entry-points.cc', 'build/entry-points.cc',
'build/find-cycles.cc',
'build/goal.cc', 'build/goal.cc',
'build/substitution-goal.cc', 'build/substitution-goal.cc',
'build/worker.cc', 'build/worker.cc',

View file

@ -45,6 +45,7 @@
#include "store-config-private.hh" #include "store-config-private.hh"
#include "build/derivation-check.hh" #include "build/derivation-check.hh"
#include "build/find-cycles.hh"
#if NIX_WITH_AWS_AUTH #if NIX_WITH_AWS_AUTH
# include "nix/store/aws-creds.hh" # include "nix/store/aws-creds.hh"
@ -1473,43 +1474,100 @@ SingleDrvOutputs DerivationBuilderImpl::registerOutputs()
outputStats.insert_or_assign(outputName, std::move(st)); outputStats.insert_or_assign(outputName, std::move(st));
} }
auto sortedOutputNames = topoSort( std::vector<std::string> sortedOutputNames;
outputsToSort,
{[&](const std::string & name) { try {
auto orifu = get(outputReferencesIfUnregistered, name); sortedOutputNames = topoSort(
if (!orifu) outputsToSort,
throw BuildError( {[&](const std::string & name) {
BuildResult::Failure::OutputRejected, auto orifu = get(outputReferencesIfUnregistered, name);
"no output reference for '%s' in build of '%s'", if (!orifu)
name, throw BuildError(
store.printStorePath(drvPath)); BuildResult::Failure::OutputRejected,
return std::visit( "no output reference for '%s' in build of '%s'",
overloaded{ name,
/* Since we'll use the already installed versions of these, we store.printStorePath(drvPath));
can treat them as leaves and ignore any references they return std::visit(
have. */ overloaded{
[&](const AlreadyRegistered &) { return StringSet{}; }, /* Since we'll use the already installed versions of these, we
[&](const PerhapsNeedToRegister & refs) { can treat them as leaves and ignore any references they
StringSet referencedOutputs; have. */
/* FIXME build inverted map up front so no quadratic waste here */ [&](const AlreadyRegistered &) { return StringSet{}; },
for (auto & r : refs.refs) [&](const PerhapsNeedToRegister & refs) {
for (auto & [o, p] : scratchOutputs) StringSet referencedOutputs;
if (r == p) /* FIXME build inverted map up front so no quadratic waste here */
referencedOutputs.insert(o); for (auto & r : refs.refs)
return referencedOutputs; for (auto & [o, p] : scratchOutputs)
if (r == p)
referencedOutputs.insert(o);
return referencedOutputs;
},
}, },
}, *orifu);
*orifu); }},
}}, {[&](const std::string & path, const std::string & parent) {
{[&](const std::string & path, const std::string & parent) { // TODO with more -vvvv also show the temporary paths for manual inspection.
// TODO with more -vvvv also show the temporary paths for manual inspection. return BuildError(
return BuildError( BuildResult::Failure::OutputRejected,
BuildResult::Failure::OutputRejected, "cycle detected in build of '%s' in the references of output '%s' from output '%s'",
"cycle detected in build of '%s' in the references of output '%s' from output '%s'", store.printStorePath(drvPath),
store.printStorePath(drvPath), path,
path, parent);
parent); }});
}}); } catch (std::exception & e) {
debug("cycle detected during topoSort, analyzing for detailed error report");
// Scan all outputs for cycle edges with exact file paths
StoreCycleEdgeVec edges;
for (auto & [outputName, _] : drv.outputs) {
auto scratchOutput = get(scratchOutputs, outputName);
if (!scratchOutput)
continue;
auto actualPath = realPathInSandbox(store.printStorePath(*scratchOutput));
debug("scanning output '%s' at path '%s' for cycle edges", outputName, actualPath);
scanForCycleEdges(actualPath, referenceablePaths, edges);
}
if (edges.empty()) {
debug("no detailed cycle edges found, re-throwing original error");
throw;
}
debug("found %lu cycle edges, transforming to connected paths", edges.size());
// Transform individual edges into connected multi-edges (paths)
StoreCycleEdgeVec multiedges;
transformEdgesToMultiedges(edges, multiedges);
// Build detailed error message
std::string cycleDetails = fmt("Detailed cycle analysis found %d cycle path(s):", multiedges.size());
for (size_t i = 0; i < multiedges.size(); i++) {
auto & multiedge = multiedges[i];
cycleDetails += fmt("\n\nCycle %d:", i + 1);
for (auto & file : multiedge) {
cycleDetails += fmt("\n → %s", file);
}
}
cycleDetails +=
fmt("\n\nThis means there are circular references between output files.\n"
"The build cannot proceed because the outputs reference each other.");
// Add hint with temp paths for debugging
if (settings.keepFailed || verbosity >= lvlDebug) {
cycleDetails +=
fmt("\n\nNote: Build outputs are kept for inspection.\n"
"You can examine the files listed above to understand the cycle.");
}
// Throw new error with original message + cycle details
BuildError * buildErr = dynamic_cast<BuildError *>(&e);
std::string originalMsg = buildErr ? std::string(buildErr->msg()) : std::string(e.what());
throw BuildError(BuildResult::Failure::OutputRejected, "%s\n\n%s", originalMsg, cycleDetails);
}
std::reverse(sortedOutputNames.begin(), sortedOutputNames.end()); std::reverse(sortedOutputNames.begin(), sortedOutputNames.end());
@ -1848,12 +1906,61 @@ SingleDrvOutputs DerivationBuilderImpl::registerOutputs()
/* Register each output path as valid, and register the sets of /* Register each output path as valid, and register the sets of
paths referenced by each of them. If there are cycles in the paths referenced by each of them. If there are cycles in the
outputs, this will fail. */ outputs, this will fail. */
{ try {
ValidPathInfos infos2; ValidPathInfos infos2;
for (auto & [outputName, newInfo] : infos) { for (auto & [outputName, newInfo] : infos) {
infos2.insert_or_assign(newInfo.path, newInfo); infos2.insert_or_assign(newInfo.path, newInfo);
} }
store.registerValidPaths(infos2); store.registerValidPaths(infos2);
} catch (BuildError & e) {
debug("cycle detected during registerValidPaths, analyzing for detailed error report");
// Scan all outputs for cycle edges with exact file paths
StoreCycleEdgeVec edges;
for (auto & [outputName, newInfo] : infos) {
auto actualPath = store.toRealPath(store.printStorePath(newInfo.path));
debug("scanning registered output '%s' at path '%s' for cycle edges", outputName, actualPath);
scanForCycleEdges(actualPath, referenceablePaths, edges);
}
if (edges.empty()) {
debug("no detailed cycle edges found, re-throwing original error");
throw;
}
debug("found %lu cycle edges, transforming to connected paths", edges.size());
// Transform individual edges into connected multi-edges (paths)
StoreCycleEdgeVec multiedges;
transformEdgesToMultiedges(edges, multiedges);
// Build detailed error message
std::string cycleDetails = fmt("Detailed cycle analysis found %d cycle path(s):", multiedges.size());
for (size_t i = 0; i < multiedges.size(); i++) {
auto & multiedge = multiedges[i];
cycleDetails += fmt("\n\nCycle %d:", i + 1);
for (auto & file : multiedge) {
cycleDetails += fmt("\n → %s", file);
}
}
cycleDetails +=
fmt("\n\nThis means there are circular references between output files.\n"
"The build cannot proceed because the outputs reference each other.");
// Add hint with temp paths for debugging
if (settings.keepFailed || verbosity >= lvlDebug) {
cycleDetails +=
fmt("\n\nNote: Build outputs were kept for inspection.\n"
"You can examine the files listed above to understand the cycle.");
}
// Throw new error with original message + cycle details
BuildError * buildErr = dynamic_cast<BuildError *>(&e);
std::string originalMsg = buildErr ? std::string(buildErr->msg()) : std::string(e.what());
throw BuildError(BuildResult::Failure::OutputRejected, "%s\n\n%s", originalMsg, cycleDetails);
} }
/* If we made it this far, we are sure the output matches the /* If we made it this far, we are sure the output matches the

View file

@ -83,6 +83,8 @@ rec {
''; '';
}; };
# Test for cycle detection with detailed error messages
# This creates multiple cycles: a→b→c→a and a→c→b→a
cyclic = cyclic =
(mkDerivation { (mkDerivation {
name = "cyclic-outputs"; name = "cyclic-outputs";
@ -92,10 +94,22 @@ rec {
"c" "c"
]; ];
builder = builtins.toFile "builder.sh" '' builder = builtins.toFile "builder.sh" ''
mkdir $a $b $c mkdir -p $a/subdir $b/subdir $c/subdir
echo $a > $b/foo
echo $b > $c/bar # First cycle: a → b → c → a
echo $c > $a/baz echo "$b/subdir/b-to-c" > $a/subdir/a-to-b
echo "$c/subdir/c-to-a" > $b/subdir/b-to-c
echo "$a/subdir/a-to-b" > $c/subdir/c-to-a
# Second cycle: a → c → b → a
echo "$c/subdir/c-to-b-2" > $a/subdir/a-to-c-2
echo "$b/subdir/b-to-a-2" > $c/subdir/c-to-b-2
echo "$a/subdir/a-to-c-2" > $b/subdir/b-to-a-2
# Non-cyclic reference (just for complexity)
echo "non-cyclic-data" > $a/data
echo "non-cyclic-data" > $b/data
echo "non-cyclic-data" > $c/data
''; '';
}).a; }).a;

View file

@ -91,9 +91,22 @@ nix-build multiple-outputs.nix -A a.first --no-out-link
# Cyclic outputs should be rejected. # Cyclic outputs should be rejected.
echo "building cyclic..." echo "building cyclic..."
if nix-build multiple-outputs.nix -A cyclic --no-out-link; then if cyclicOutput=$(nix-build multiple-outputs.nix -A cyclic --no-out-link 2>&1); then
echo "Cyclic outputs incorrectly accepted!" echo "Cyclic outputs incorrectly accepted!"
exit 1 exit 1
else
echo "Cyclic outputs correctly rejected"
# Verify error message mentions cycles
echo "$cyclicOutput" | grepQuiet "cycle"
# Enhanced cycle error messages were added in 2.33
if isDaemonNewer "2.33"; then
echo "$cyclicOutput" | grepQuiet "Detailed cycle analysis"
echo "$cyclicOutput" | grepQuiet "Cycle 1:"
# The error should mention actual file paths with subdirectories
echo "$cyclicOutput" | grepQuiet "subdir"
echo "Enhanced cycle error messages verified"
fi
fi fi
# Do a GC. This should leave an empty store. # Do a GC. This should leave an empty store.