diff --git a/src/libstore/build/find-cycles.cc b/src/libstore/build/find-cycles.cc new file mode 100644 index 000000000..2ac63522a --- /dev/null +++ b/src/libstore/build/find-cycles.cc @@ -0,0 +1,328 @@ +#include "find-cycles.hh" + +#include "nix/store/store-api.hh" +#include "nix/util/file-system.hh" +#include "nix/util/archive.hh" +#include "nix/util/base-nix-32.hh" + +#include +#include +#include +#include +#include + +namespace nix { + +// Hash length in characters (32 for base32-encoded sha256) +static constexpr size_t refLength = StorePath::HashLen; + +// Maximum expected file path length for buffer carry-over +static constexpr size_t MAX_FILEPATH_LENGTH = 1000; + +void scanForCycleEdges(const Path & path, const StorePathSet & refs, StoreCycleEdgeVec & edges) +{ + StringSet hashes; + std::map hashPathMap; + + // Extract the store directory from the path + // Example: /run/user/1000/nix-test/store/abc-foo -> /run/user/1000/nix-test/store/ + auto storePrefixPath = std::filesystem::path(path); + storePrefixPath.remove_filename(); + std::string storePrefix = storePrefixPath.string(); + + debug("scanForCycleEdges: storePrefixPath = %s", storePrefixPath.string()); + debug("scanForCycleEdges: storePrefix = %s", storePrefix); + + // Build map of hash -> StorePath and collect hashes to search for + for (auto & i : refs) { + std::string hashPart(i.hashPart()); + auto inserted = hashPathMap.emplace(hashPart, i).second; + assert(inserted); + hashes.insert(hashPart); + } + + scanForCycleEdges2(path, hashes, edges, storePrefix); +} + +void scanForCycleEdges2(std::string path, const StringSet & hashes, StoreCycleEdgeVec & edges, std::string storeDir) +{ + auto st = lstat(path); + + debug("scanForCycleEdges2: scanning path = %s", path); + + if (S_ISREG(st.st_mode)) { + // Handle regular files - scan contents for hash references + AutoCloseFD fd = open(path.c_str(), O_RDONLY | O_CLOEXEC); + if (!fd) + throw SysError("opening file '%1%'", path); + + std::vector buf(65536); + size_t rest = st.st_size; + size_t start = 0; + + // Buffer to carry data between reads (for references spanning chunks) + std::vector bufCarry(MAX_FILEPATH_LENGTH); + bool bufCarryUsed = false; + size_t bufCarrySize = 0; + + while (rest > 0) { + auto n = std::min(rest, buf.size()); + readFull(fd.get(), buf.data(), n); + + debug("scanForCycleEdges2: read file %s: n = %lu", path, n); + + // Check if we have carry-over data from previous iteration + if (bufCarryUsed) { + // Search in the overlap region (carry + start of new buffer) + size_t searchSize = std::min(bufCarrySize + n, MAX_FILEPATH_LENGTH); + std::vector searchBuf(searchSize); + + // Copy carry buffer + std::copy(bufCarry.begin(), bufCarry.begin() + bufCarrySize, searchBuf.begin()); + + // Copy start of new buffer + size_t newDataSize = searchSize - bufCarrySize; + std::copy(buf.begin(), buf.begin() + newDataSize, searchBuf.begin() + bufCarrySize); + + // Search for hashes in the overlap region + for (size_t i = 0; i + refLength <= searchBuf.size();) { + bool match = true; + for (int j = refLength - 1; j >= 0; --j) { + if (!BaseNix32::lookupReverse(searchBuf[i + j])) { + i += j + 1; + match = false; + break; + } + } + if (!match) + continue; + + std::string hash(searchBuf.begin() + i, searchBuf.begin() + i + refLength); + + if (hashes.find(hash) != hashes.end()) { + debug("scanForCycleEdges2: found hash '%s' in overlap region", hash); + + // Try to find the full path + size_t storeDirLength = storeDir.size(); + if (i >= storeDirLength) { + std::string targetPath = storeDir + hash; + StoreCycleEdge edge; + edge.push_back(path); + edge.push_back(targetPath); + edges.push_back(edge); + } + } + ++i; + } + + bufCarryUsed = false; + } + + // Search in the main buffer + for (size_t i = 0; i + refLength <= n;) { + bool match = true; + for (int j = refLength - 1; j >= 0; --j) { + if (!BaseNix32::lookupReverse(buf[i + j])) { + i += j + 1; + match = false; + break; + } + } + if (!match) + continue; + + // Found a potential hash + std::string hash(buf.begin() + i, buf.begin() + i + refLength); + + if (hashes.find(hash) != hashes.end()) { + debug("scanForCycleEdges2: found reference to hash '%s' at offset %lu", hash, start + i); + + // Try to reconstruct the full store path + size_t storeDirLength = storeDir.size(); + std::string targetPath = storeDir + hash; + std::string targetStorePath; + + // Check if we have storeDir + hash in the buffer + if (i >= (size_t) storeDirLength + && std::string(buf.begin() + i - storeDirLength, buf.begin() + i + refLength) == targetPath) { + + debug("scanForCycleEdges2: found store path prefix at offset %lu", start + i - storeDirLength); + + // Try to find the complete path by checking what exists on disk + // We probe incrementally to find the longest existing path + size_t testNameLength = refLength + 2; // Minimum: hash + "-x" + size_t targetPathLastEnd = 0; + bool foundStorePath = false; + bool foundPath = false; + + for (; testNameLength < 255 && i + (size_t) targetPathLastEnd + (size_t) testNameLength <= n; + testNameLength++) { + std::string testPath( + buf.begin() + i - storeDirLength, buf.begin() + i + targetPathLastEnd + testNameLength); + + struct stat testStat; + if (stat(testPath.c_str(), &testStat) == 0) { + debug("scanForCycleEdges2: found existing path: %s", testPath); + + if (!foundStorePath) { + // First match is the store path component + targetStorePath = testPath.substr(storeDirLength); + foundStorePath = true; + } + + foundPath = true; + targetPath = testPath; + + // Check if this is a directory (followed by '/') + if (buf[i + targetPathLastEnd + testNameLength] == '/') { + debug("scanForCycleEdges2: path is a directory, continuing"); + targetPathLastEnd += testNameLength; + testNameLength = 1; // Reset for next component + continue; + } + } + } + + if (foundPath) { + debug("scanForCycleEdges2: cycle edge: %s -> %s", path, targetPath); + } else { + // Couldn't find exact path, use store path + hash + targetPath = storeDir + hash; + } + } + + StoreCycleEdge edge; + edge.push_back(path); + edge.push_back(targetPath); + edges.push_back(edge); + } + ++i; + } + + start += n; + rest -= n; + + // Carry over the end of the buffer for next iteration + if (n == buf.size() && rest > 0) { + size_t carrySize = std::min(MAX_FILEPATH_LENGTH, n); + std::copy(buf.end() - carrySize, buf.end(), bufCarry.begin()); + bufCarrySize = carrySize; + bufCarryUsed = true; + } + } + } else if (S_ISDIR(st.st_mode)) { + // Handle directories - recursively scan contents + std::map unhacked; + + for (DirectoryIterator i(path); i != DirectoryIterator(); ++i) { + std::string entryName = i->path().filename().string(); + +#ifdef __APPLE__ + // Handle case-insensitive filesystems on macOS + std::string name(entryName); + size_t pos = entryName.find(caseHackSuffix); + if (pos != std::string::npos) { + debug("removing case hack suffix from '%s'", path + "/" + entryName); + name.erase(pos); + } + if (unhacked.find(name) != unhacked.end()) { + throw Error( + "file name collision between '%1%' and '%2%'", path + "/" + unhacked[name], path + "/" + entryName); + } + unhacked[name] = entryName; +#else + unhacked[entryName] = entryName; +#endif + } + + for (auto & [name, actualName] : unhacked) { + debug("scanForCycleEdges2: recursing into %s/%s", path, actualName); + scanForCycleEdges2(path + "/" + actualName, hashes, edges, storeDir); + } + } else if (S_ISLNK(st.st_mode)) { + // Handle symlinks - scan link target for hash references + std::string linkTarget = readLink(path); + + debug("scanForCycleEdges2: scanning symlink %s -> %s", path, linkTarget); + + for (size_t i = 0; i + refLength <= linkTarget.size();) { + bool match = true; + for (int j = refLength - 1; j >= 0; --j) { + if (!BaseNix32::lookupReverse(linkTarget[i + j])) { + i += j + 1; + match = false; + break; + } + } + if (!match) + continue; + + std::string ref(linkTarget.begin() + i, linkTarget.begin() + i + refLength); + + if (hashes.find(ref) != hashes.end()) { + debug("scanForCycleEdges2: found reference '%s' in symlink at offset %lu", ref, i); + + // Try to extract full path from link target + std::string targetPath = storeDir + ref; + + StoreCycleEdge edge; + edge.push_back(path); + edge.push_back(targetPath); + edges.push_back(edge); + } + ++i; + } + } else { + throw Error("file '%1%' has an unsupported type", path); + } +} + +void transformEdgesToMultiedges(StoreCycleEdgeVec & edges, StoreCycleEdgeVec & multiedges) +{ + debug("transformEdgesToMultiedges: processing %lu edges", edges.size()); + + for (auto & edge2 : edges) { + bool edge2Joined = false; + + for (auto & edge1 : multiedges) { + debug("comparing edge1 (size=%lu) with edge2 (size=%lu)", edge1.size(), edge2.size()); + + // Check if edge1.back() == edge2.front() + // This means we can extend edge1 by appending edge2 + if (edge1.back() == edge2.front()) { + debug("appending: edge1.back()='%s' == edge2.front()='%s'", edge1.back(), edge2.front()); + + // Append all but the first element of edge2 (to avoid duplication) + for (size_t i = 1; i < edge2.size(); i++) { + debug(" appending edge2[%lu] = %s", i, edge2[i]); + edge1.push_back(edge2[i]); + } + edge2Joined = true; + break; + } + + // Check if edge2.back() == edge1.front() + // This means we can extend edge1 by prepending edge2 + if (edge2.back() == edge1.front()) { + debug("prepending: edge2.back()='%s' == edge1.front()='%s'", edge2.back(), edge1.front()); + + // Prepend all but the last element of edge2 (to avoid duplication) + for (int i = edge2.size() - 2; i >= 0; i--) { + debug(" prepending edge2[%d] = %s", i, edge2[i]); + edge1.push_front(edge2[i]); + } + edge2Joined = true; + break; + } + } + + if (!edge2Joined) { + debug("edge2 is new, adding as separate multiedge"); + multiedges.push_back(edge2); + } + } + + debug("transformEdgesToMultiedges: result has %lu multiedges", multiedges.size()); +} + +} // namespace nix diff --git a/src/libstore/build/find-cycles.hh b/src/libstore/build/find-cycles.hh new file mode 100644 index 000000000..86f49c6a5 --- /dev/null +++ b/src/libstore/build/find-cycles.hh @@ -0,0 +1,69 @@ +#pragma once +///@file + +#include "nix/store/store-api.hh" +#include "nix/util/types.hh" + +#include +#include +#include + +namespace nix { + +/** + * Represents a cycle edge as a sequence of file paths. + * Uses deque to allow efficient prepend/append when joining edges. + * + * Example: {"/nix/store/abc-foo/file1", "/nix/store/def-bar/file2"} + * represents a reference from file1 to file2. + */ +typedef std::deque StoreCycleEdge; + +/** + * A collection of cycle edges found during scanning. + */ +typedef std::vector StoreCycleEdgeVec; + +/** + * Scan output paths to find cycle edges with detailed file paths. + * + * This is the second pass of cycle detection. The first pass (scanForReferences) + * detects that a cycle exists. This function provides detailed information about + * where the cycles occur in the actual file system. + * + * @param path The store path to scan (e.g., an output directory) + * @param refs The set of potentially referenced store paths + * @param edges Output parameter that accumulates found cycle edges + */ +void scanForCycleEdges(const Path & path, const StorePathSet & refs, StoreCycleEdgeVec & edges); + +/** + * Recursively scan files and directories for hash references. + * + * This function walks the file system tree and searches for store path hashes + * in file contents, symlinks, etc. When a hash is found, it attempts to + * reconstruct the full store path by checking what files actually exist. + * + * @param path Current path being scanned + * @param hashes Set of hash strings to look for (32-char base32 hashes) + * @param edges Output parameter that accumulates found cycle edges + * @param storeDir The store directory prefix (e.g., "/nix/store/") + */ +void scanForCycleEdges2(std::string path, const StringSet & hashes, StoreCycleEdgeVec & edges, std::string storeDir); + +/** + * Transform individual edges into connected multi-edges (paths). + * + * Takes a list of edges like [A→B, B→C, C→A] and connects them into + * longer paths like [A→B→C→A]. This makes it easier to visualize the + * actual cycle paths. + * + * The algorithm is greedy: it tries to extend each edge by finding + * matching edges to prepend or append. + * + * @param edges Input edges to transform + * @param multiedges Output parameter with connected paths + */ +void transformEdgesToMultiedges(StoreCycleEdgeVec & edges, StoreCycleEdgeVec & multiedges); + +} // namespace nix diff --git a/src/libstore/meson.build b/src/libstore/meson.build index d1b3666cc..faab698dc 100644 --- a/src/libstore/meson.build +++ b/src/libstore/meson.build @@ -267,6 +267,7 @@ sources = files( 'build/derivation-trampoline-goal.cc', 'build/drv-output-substitution-goal.cc', 'build/entry-points.cc', + 'build/find-cycles.cc', 'build/goal.cc', 'build/substitution-goal.cc', 'build/worker.cc', diff --git a/src/libstore/unix/build/derivation-builder.cc b/src/libstore/unix/build/derivation-builder.cc index d6abf85e3..bd00b7701 100644 --- a/src/libstore/unix/build/derivation-builder.cc +++ b/src/libstore/unix/build/derivation-builder.cc @@ -45,6 +45,7 @@ #include "store-config-private.hh" #include "build/derivation-check.hh" +#include "build/find-cycles.hh" #if NIX_WITH_AWS_AUTH # include "nix/store/aws-creds.hh" @@ -1473,43 +1474,100 @@ SingleDrvOutputs DerivationBuilderImpl::registerOutputs() outputStats.insert_or_assign(outputName, std::move(st)); } - auto sortedOutputNames = topoSort( - outputsToSort, - {[&](const std::string & name) { - auto orifu = get(outputReferencesIfUnregistered, name); - if (!orifu) - throw BuildError( - BuildResult::Failure::OutputRejected, - "no output reference for '%s' in build of '%s'", - name, - store.printStorePath(drvPath)); - return std::visit( - overloaded{ - /* Since we'll use the already installed versions of these, we - can treat them as leaves and ignore any references they - have. */ - [&](const AlreadyRegistered &) { return StringSet{}; }, - [&](const PerhapsNeedToRegister & refs) { - StringSet referencedOutputs; - /* FIXME build inverted map up front so no quadratic waste here */ - for (auto & r : refs.refs) - for (auto & [o, p] : scratchOutputs) - if (r == p) - referencedOutputs.insert(o); - return referencedOutputs; + std::vector sortedOutputNames; + + try { + sortedOutputNames = topoSort( + outputsToSort, + {[&](const std::string & name) { + auto orifu = get(outputReferencesIfUnregistered, name); + if (!orifu) + throw BuildError( + BuildResult::Failure::OutputRejected, + "no output reference for '%s' in build of '%s'", + name, + store.printStorePath(drvPath)); + return std::visit( + overloaded{ + /* Since we'll use the already installed versions of these, we + can treat them as leaves and ignore any references they + have. */ + [&](const AlreadyRegistered &) { return StringSet{}; }, + [&](const PerhapsNeedToRegister & refs) { + StringSet referencedOutputs; + /* FIXME build inverted map up front so no quadratic waste here */ + for (auto & r : refs.refs) + for (auto & [o, p] : scratchOutputs) + if (r == p) + referencedOutputs.insert(o); + return referencedOutputs; + }, }, - }, - *orifu); - }}, - {[&](const std::string & path, const std::string & parent) { - // TODO with more -vvvv also show the temporary paths for manual inspection. - return BuildError( - BuildResult::Failure::OutputRejected, - "cycle detected in build of '%s' in the references of output '%s' from output '%s'", - store.printStorePath(drvPath), - path, - parent); - }}); + *orifu); + }}, + {[&](const std::string & path, const std::string & parent) { + // TODO with more -vvvv also show the temporary paths for manual inspection. + return BuildError( + BuildResult::Failure::OutputRejected, + "cycle detected in build of '%s' in the references of output '%s' from output '%s'", + store.printStorePath(drvPath), + path, + parent); + }}); + } catch (std::exception & e) { + debug("cycle detected during topoSort, analyzing for detailed error report"); + + // Scan all outputs for cycle edges with exact file paths + StoreCycleEdgeVec edges; + for (auto & [outputName, _] : drv.outputs) { + auto scratchOutput = get(scratchOutputs, outputName); + if (!scratchOutput) + continue; + + auto actualPath = realPathInSandbox(store.printStorePath(*scratchOutput)); + debug("scanning output '%s' at path '%s' for cycle edges", outputName, actualPath); + + scanForCycleEdges(actualPath, referenceablePaths, edges); + } + + if (edges.empty()) { + debug("no detailed cycle edges found, re-throwing original error"); + throw; + } + + debug("found %lu cycle edges, transforming to connected paths", edges.size()); + + // Transform individual edges into connected multi-edges (paths) + StoreCycleEdgeVec multiedges; + transformEdgesToMultiedges(edges, multiedges); + + // Build detailed error message + std::string cycleDetails = fmt("Detailed cycle analysis found %d cycle path(s):", multiedges.size()); + + for (size_t i = 0; i < multiedges.size(); i++) { + auto & multiedge = multiedges[i]; + cycleDetails += fmt("\n\nCycle %d:", i + 1); + for (auto & file : multiedge) { + cycleDetails += fmt("\n → %s", file); + } + } + + cycleDetails += + fmt("\n\nThis means there are circular references between output files.\n" + "The build cannot proceed because the outputs reference each other."); + + // Add hint with temp paths for debugging + if (settings.keepFailed || verbosity >= lvlDebug) { + cycleDetails += + fmt("\n\nNote: Build outputs are kept for inspection.\n" + "You can examine the files listed above to understand the cycle."); + } + + // Throw new error with original message + cycle details + BuildError * buildErr = dynamic_cast(&e); + std::string originalMsg = buildErr ? std::string(buildErr->msg()) : std::string(e.what()); + throw BuildError(BuildResult::Failure::OutputRejected, "%s\n\n%s", originalMsg, cycleDetails); + } std::reverse(sortedOutputNames.begin(), sortedOutputNames.end()); @@ -1848,12 +1906,61 @@ SingleDrvOutputs DerivationBuilderImpl::registerOutputs() /* Register each output path as valid, and register the sets of paths referenced by each of them. If there are cycles in the outputs, this will fail. */ - { + try { ValidPathInfos infos2; for (auto & [outputName, newInfo] : infos) { infos2.insert_or_assign(newInfo.path, newInfo); } store.registerValidPaths(infos2); + } catch (BuildError & e) { + debug("cycle detected during registerValidPaths, analyzing for detailed error report"); + + // Scan all outputs for cycle edges with exact file paths + StoreCycleEdgeVec edges; + for (auto & [outputName, newInfo] : infos) { + auto actualPath = store.toRealPath(store.printStorePath(newInfo.path)); + debug("scanning registered output '%s' at path '%s' for cycle edges", outputName, actualPath); + + scanForCycleEdges(actualPath, referenceablePaths, edges); + } + + if (edges.empty()) { + debug("no detailed cycle edges found, re-throwing original error"); + throw; + } + + debug("found %lu cycle edges, transforming to connected paths", edges.size()); + + // Transform individual edges into connected multi-edges (paths) + StoreCycleEdgeVec multiedges; + transformEdgesToMultiedges(edges, multiedges); + + // Build detailed error message + std::string cycleDetails = fmt("Detailed cycle analysis found %d cycle path(s):", multiedges.size()); + + for (size_t i = 0; i < multiedges.size(); i++) { + auto & multiedge = multiedges[i]; + cycleDetails += fmt("\n\nCycle %d:", i + 1); + for (auto & file : multiedge) { + cycleDetails += fmt("\n → %s", file); + } + } + + cycleDetails += + fmt("\n\nThis means there are circular references between output files.\n" + "The build cannot proceed because the outputs reference each other."); + + // Add hint with temp paths for debugging + if (settings.keepFailed || verbosity >= lvlDebug) { + cycleDetails += + fmt("\n\nNote: Build outputs were kept for inspection.\n" + "You can examine the files listed above to understand the cycle."); + } + + // Throw new error with original message + cycle details + BuildError * buildErr = dynamic_cast(&e); + std::string originalMsg = buildErr ? std::string(buildErr->msg()) : std::string(e.what()); + throw BuildError(BuildResult::Failure::OutputRejected, "%s\n\n%s", originalMsg, cycleDetails); } /* If we made it this far, we are sure the output matches the diff --git a/tests/functional/multiple-outputs.nix b/tests/functional/multiple-outputs.nix index 2c9243097..87a29b19c 100644 --- a/tests/functional/multiple-outputs.nix +++ b/tests/functional/multiple-outputs.nix @@ -83,6 +83,8 @@ rec { ''; }; + # Test for cycle detection with detailed error messages + # This creates multiple cycles: a→b→c→a and a→c→b→a cyclic = (mkDerivation { name = "cyclic-outputs"; @@ -92,10 +94,22 @@ rec { "c" ]; builder = builtins.toFile "builder.sh" '' - mkdir $a $b $c - echo $a > $b/foo - echo $b > $c/bar - echo $c > $a/baz + mkdir -p $a/subdir $b/subdir $c/subdir + + # First cycle: a → b → c → a + echo "$b/subdir/b-to-c" > $a/subdir/a-to-b + echo "$c/subdir/c-to-a" > $b/subdir/b-to-c + echo "$a/subdir/a-to-b" > $c/subdir/c-to-a + + # Second cycle: a → c → b → a + echo "$c/subdir/c-to-b-2" > $a/subdir/a-to-c-2 + echo "$b/subdir/b-to-a-2" > $c/subdir/c-to-b-2 + echo "$a/subdir/a-to-c-2" > $b/subdir/b-to-a-2 + + # Non-cyclic reference (just for complexity) + echo "non-cyclic-data" > $a/data + echo "non-cyclic-data" > $b/data + echo "non-cyclic-data" > $c/data ''; }).a; diff --git a/tests/functional/multiple-outputs.sh b/tests/functional/multiple-outputs.sh index f703fb02b..b115af202 100755 --- a/tests/functional/multiple-outputs.sh +++ b/tests/functional/multiple-outputs.sh @@ -91,9 +91,22 @@ nix-build multiple-outputs.nix -A a.first --no-out-link # Cyclic outputs should be rejected. echo "building cyclic..." -if nix-build multiple-outputs.nix -A cyclic --no-out-link; then +if cyclicOutput=$(nix-build multiple-outputs.nix -A cyclic --no-out-link 2>&1); then echo "Cyclic outputs incorrectly accepted!" exit 1 +else + echo "Cyclic outputs correctly rejected" + # Verify error message mentions cycles + echo "$cyclicOutput" | grepQuiet "cycle" + + # Enhanced cycle error messages were added in 2.33 + if isDaemonNewer "2.33"; then + echo "$cyclicOutput" | grepQuiet "Detailed cycle analysis" + echo "$cyclicOutput" | grepQuiet "Cycle 1:" + # The error should mention actual file paths with subdirectories + echo "$cyclicOutput" | grepQuiet "subdir" + echo "Enhanced cycle error messages verified" + fi fi # Do a GC. This should leave an empty store.