mirror of
https://github.com/NixOS/nix.git
synced 2025-11-12 05:26:02 +01:00
refactor(libstore/find-cycles): use RefScanSink
This commit is contained in:
parent
d44c8c8b69
commit
f7c4bcee9d
2 changed files with 115 additions and 193 deletions
|
|
@ -3,7 +3,6 @@
|
||||||
#include "nix/store/store-api.hh"
|
#include "nix/store/store-api.hh"
|
||||||
#include "nix/util/file-system.hh"
|
#include "nix/util/file-system.hh"
|
||||||
#include "nix/util/archive.hh"
|
#include "nix/util/archive.hh"
|
||||||
#include "nix/util/base-nix-32.hh"
|
|
||||||
|
|
||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
|
|
@ -16,8 +15,50 @@ namespace nix {
|
||||||
// Hash length in characters (32 for base32-encoded sha256)
|
// Hash length in characters (32 for base32-encoded sha256)
|
||||||
static constexpr size_t refLength = StorePath::HashLen;
|
static constexpr size_t refLength = StorePath::HashLen;
|
||||||
|
|
||||||
// Maximum expected file path length for buffer carry-over
|
CycleEdgeScanSink::CycleEdgeScanSink(
|
||||||
static constexpr size_t MAX_FILEPATH_LENGTH = 1000;
|
StringSet && hashes, std::map<std::string, StorePath> && backMap, std::string storeDir)
|
||||||
|
: RefScanSink(std::move(hashes))
|
||||||
|
, hashPathMap(std::move(backMap))
|
||||||
|
, storeDir(std::move(storeDir))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void CycleEdgeScanSink::setCurrentPath(const std::string & path)
|
||||||
|
{
|
||||||
|
currentFilePath = path;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CycleEdgeScanSink::operator()(std::string_view data)
|
||||||
|
{
|
||||||
|
// Track what hashes we've already seen
|
||||||
|
auto seenBefore = getResult();
|
||||||
|
|
||||||
|
// Call parent's operator() to do the actual hash searching
|
||||||
|
// This reuses all the proven buffer boundary handling logic
|
||||||
|
RefScanSink::operator()(data);
|
||||||
|
|
||||||
|
// Check for newly found hashes
|
||||||
|
auto seenAfter = getResult();
|
||||||
|
for (const auto & hash : seenAfter) {
|
||||||
|
if (seenBefore.find(hash) == seenBefore.end()) {
|
||||||
|
// This hash was just found in the current file
|
||||||
|
// Create an edge from current file to the target
|
||||||
|
auto targetPath = storeDir + hash;
|
||||||
|
|
||||||
|
StoreCycleEdge edge;
|
||||||
|
edge.push_back(currentFilePath);
|
||||||
|
edge.push_back(targetPath);
|
||||||
|
edges.push_back(edge);
|
||||||
|
|
||||||
|
debug("found cycle edge: %s → %s (hash: %s)", currentFilePath, targetPath, hash);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
StoreCycleEdgeVec && CycleEdgeScanSink::getEdges()
|
||||||
|
{
|
||||||
|
return std::move(edges);
|
||||||
|
}
|
||||||
|
|
||||||
void scanForCycleEdges(const Path & path, const StorePathSet & refs, StoreCycleEdgeVec & edges)
|
void scanForCycleEdges(const Path & path, const StorePathSet & refs, StoreCycleEdgeVec & edges)
|
||||||
{
|
{
|
||||||
|
|
@ -41,174 +82,46 @@ void scanForCycleEdges(const Path & path, const StorePathSet & refs, StoreCycleE
|
||||||
hashes.insert(hashPart);
|
hashes.insert(hashPart);
|
||||||
}
|
}
|
||||||
|
|
||||||
scanForCycleEdges2(path, hashes, edges, storePrefix);
|
// Create sink that reuses RefScanSink's hash-finding logic
|
||||||
|
CycleEdgeScanSink sink(std::move(hashes), std::move(hashPathMap), storePrefix);
|
||||||
|
|
||||||
|
// Walk the filesystem and scan files using the sink
|
||||||
|
scanForCycleEdges2(path, sink);
|
||||||
|
|
||||||
|
// Extract the found edges
|
||||||
|
edges = sink.getEdges();
|
||||||
}
|
}
|
||||||
|
|
||||||
void scanForCycleEdges2(std::string path, const StringSet & hashes, StoreCycleEdgeVec & edges, std::string storeDir)
|
/**
|
||||||
|
* Recursively walk filesystem and stream files into the sink.
|
||||||
|
* This reuses RefScanSink's hash-finding logic instead of reimplementing it.
|
||||||
|
*/
|
||||||
|
void scanForCycleEdges2(const std::string & path, CycleEdgeScanSink & sink)
|
||||||
{
|
{
|
||||||
auto st = lstat(path);
|
auto st = lstat(path);
|
||||||
|
|
||||||
debug("scanForCycleEdges2: scanning path = %s", path);
|
debug("scanForCycleEdges2: scanning path = %s", path);
|
||||||
|
|
||||||
if (S_ISREG(st.st_mode)) {
|
if (S_ISREG(st.st_mode)) {
|
||||||
// Handle regular files - scan contents for hash references
|
// Handle regular files - stream contents into sink
|
||||||
|
// The sink (RefScanSink) handles all hash detection and buffer management
|
||||||
|
sink.setCurrentPath(path);
|
||||||
|
|
||||||
AutoCloseFD fd = open(path.c_str(), O_RDONLY | O_CLOEXEC);
|
AutoCloseFD fd = open(path.c_str(), O_RDONLY | O_CLOEXEC);
|
||||||
if (!fd)
|
if (!fd)
|
||||||
throw SysError("opening file '%1%'", path);
|
throw SysError("opening file '%1%'", path);
|
||||||
|
|
||||||
|
// Stream file contents into sink
|
||||||
|
// RefScanSink handles buffer boundaries automatically
|
||||||
std::vector<char> buf(65536);
|
std::vector<char> buf(65536);
|
||||||
size_t rest = st.st_size;
|
while (true) {
|
||||||
size_t start = 0;
|
ssize_t n = read(fd.get(), buf.data(), buf.size());
|
||||||
|
if (n == -1)
|
||||||
// Buffer to carry data between reads (for references spanning chunks)
|
throw SysError("reading file '%1%'", path);
|
||||||
std::vector<char> bufCarry(MAX_FILEPATH_LENGTH);
|
if (n == 0)
|
||||||
bool bufCarryUsed = false;
|
|
||||||
size_t bufCarrySize = 0;
|
|
||||||
|
|
||||||
while (rest > 0) {
|
|
||||||
auto n = std::min(rest, buf.size());
|
|
||||||
readFull(fd.get(), buf.data(), n);
|
|
||||||
|
|
||||||
debug("scanForCycleEdges2: read file %s: n = %lu", path, n);
|
|
||||||
|
|
||||||
// Check if we have carry-over data from previous iteration
|
|
||||||
if (bufCarryUsed) {
|
|
||||||
// Search in the overlap region (carry + start of new buffer)
|
|
||||||
size_t searchSize = std::min(bufCarrySize + n, MAX_FILEPATH_LENGTH);
|
|
||||||
std::vector<char> searchBuf(searchSize);
|
|
||||||
|
|
||||||
// Copy carry buffer
|
|
||||||
std::copy(bufCarry.begin(), bufCarry.begin() + bufCarrySize, searchBuf.begin());
|
|
||||||
|
|
||||||
// Copy start of new buffer
|
|
||||||
size_t newDataSize = searchSize - bufCarrySize;
|
|
||||||
std::copy(buf.begin(), buf.begin() + newDataSize, searchBuf.begin() + bufCarrySize);
|
|
||||||
|
|
||||||
// Search for hashes in the overlap region
|
|
||||||
for (size_t i = 0; i + refLength <= searchBuf.size();) {
|
|
||||||
bool match = true;
|
|
||||||
for (int j = refLength - 1; j >= 0; --j) {
|
|
||||||
if (!BaseNix32::lookupReverse(searchBuf[i + j])) {
|
|
||||||
i += j + 1;
|
|
||||||
match = false;
|
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!match)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
std::string hash(searchBuf.begin() + i, searchBuf.begin() + i + refLength);
|
sink(std::string_view(buf.data(), n));
|
||||||
|
|
||||||
if (hashes.find(hash) != hashes.end()) {
|
|
||||||
debug("scanForCycleEdges2: found hash '%s' in overlap region", hash);
|
|
||||||
|
|
||||||
// Try to find the full path
|
|
||||||
size_t storeDirLength = storeDir.size();
|
|
||||||
if (i >= storeDirLength) {
|
|
||||||
std::string targetPath = storeDir + hash;
|
|
||||||
StoreCycleEdge edge;
|
|
||||||
edge.push_back(path);
|
|
||||||
edge.push_back(targetPath);
|
|
||||||
edges.push_back(edge);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
++i;
|
|
||||||
}
|
|
||||||
|
|
||||||
bufCarryUsed = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Search in the main buffer
|
|
||||||
for (size_t i = 0; i + refLength <= n;) {
|
|
||||||
bool match = true;
|
|
||||||
for (int j = refLength - 1; j >= 0; --j) {
|
|
||||||
if (!BaseNix32::lookupReverse(buf[i + j])) {
|
|
||||||
i += j + 1;
|
|
||||||
match = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!match)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// Found a potential hash
|
|
||||||
std::string hash(buf.begin() + i, buf.begin() + i + refLength);
|
|
||||||
|
|
||||||
if (hashes.find(hash) != hashes.end()) {
|
|
||||||
debug("scanForCycleEdges2: found reference to hash '%s' at offset %lu", hash, start + i);
|
|
||||||
|
|
||||||
// Try to reconstruct the full store path
|
|
||||||
size_t storeDirLength = storeDir.size();
|
|
||||||
std::string targetPath = storeDir + hash;
|
|
||||||
std::string targetStorePath;
|
|
||||||
|
|
||||||
// Check if we have storeDir + hash in the buffer
|
|
||||||
if (i >= (size_t) storeDirLength
|
|
||||||
&& std::string(buf.begin() + i - storeDirLength, buf.begin() + i + refLength) == targetPath) {
|
|
||||||
|
|
||||||
debug("scanForCycleEdges2: found store path prefix at offset %lu", start + i - storeDirLength);
|
|
||||||
|
|
||||||
// Try to find the complete path by checking what exists on disk
|
|
||||||
// We probe incrementally to find the longest existing path
|
|
||||||
size_t testNameLength = refLength + 2; // Minimum: hash + "-x"
|
|
||||||
size_t targetPathLastEnd = 0;
|
|
||||||
bool foundStorePath = false;
|
|
||||||
bool foundPath = false;
|
|
||||||
|
|
||||||
for (; testNameLength < 255 && i + (size_t) targetPathLastEnd + (size_t) testNameLength <= n;
|
|
||||||
testNameLength++) {
|
|
||||||
std::string testPath(
|
|
||||||
buf.begin() + i - storeDirLength, buf.begin() + i + targetPathLastEnd + testNameLength);
|
|
||||||
|
|
||||||
struct stat testStat;
|
|
||||||
if (stat(testPath.c_str(), &testStat) == 0) {
|
|
||||||
debug("scanForCycleEdges2: found existing path: %s", testPath);
|
|
||||||
|
|
||||||
if (!foundStorePath) {
|
|
||||||
// First match is the store path component
|
|
||||||
targetStorePath = testPath.substr(storeDirLength);
|
|
||||||
foundStorePath = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
foundPath = true;
|
|
||||||
targetPath = testPath;
|
|
||||||
|
|
||||||
// Check if this is a directory (followed by '/')
|
|
||||||
if (buf[i + targetPathLastEnd + testNameLength] == '/') {
|
|
||||||
debug("scanForCycleEdges2: path is a directory, continuing");
|
|
||||||
targetPathLastEnd += testNameLength;
|
|
||||||
testNameLength = 1; // Reset for next component
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (foundPath) {
|
|
||||||
debug("scanForCycleEdges2: cycle edge: %s -> %s", path, targetPath);
|
|
||||||
} else {
|
|
||||||
// Couldn't find exact path, use store path + hash
|
|
||||||
targetPath = storeDir + hash;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
StoreCycleEdge edge;
|
|
||||||
edge.push_back(path);
|
|
||||||
edge.push_back(targetPath);
|
|
||||||
edges.push_back(edge);
|
|
||||||
}
|
|
||||||
++i;
|
|
||||||
}
|
|
||||||
|
|
||||||
start += n;
|
|
||||||
rest -= n;
|
|
||||||
|
|
||||||
// Carry over the end of the buffer for next iteration
|
|
||||||
if (n == buf.size() && rest > 0) {
|
|
||||||
size_t carrySize = std::min(MAX_FILEPATH_LENGTH, n);
|
|
||||||
std::copy(buf.end() - carrySize, buf.end(), bufCarry.begin());
|
|
||||||
bufCarrySize = carrySize;
|
|
||||||
bufCarryUsed = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else if (S_ISDIR(st.st_mode)) {
|
} else if (S_ISDIR(st.st_mode)) {
|
||||||
// Handle directories - recursively scan contents
|
// Handle directories - recursively scan contents
|
||||||
|
|
@ -237,41 +150,16 @@ void scanForCycleEdges2(std::string path, const StringSet & hashes, StoreCycleEd
|
||||||
|
|
||||||
for (auto & [name, actualName] : unhacked) {
|
for (auto & [name, actualName] : unhacked) {
|
||||||
debug("scanForCycleEdges2: recursing into %s/%s", path, actualName);
|
debug("scanForCycleEdges2: recursing into %s/%s", path, actualName);
|
||||||
scanForCycleEdges2(path + "/" + actualName, hashes, edges, storeDir);
|
scanForCycleEdges2(path + "/" + actualName, sink);
|
||||||
}
|
}
|
||||||
} else if (S_ISLNK(st.st_mode)) {
|
} else if (S_ISLNK(st.st_mode)) {
|
||||||
// Handle symlinks - scan link target for hash references
|
// Handle symlinks - stream link target into sink
|
||||||
std::string linkTarget = readLink(path);
|
std::string linkTarget = readLink(path);
|
||||||
|
|
||||||
debug("scanForCycleEdges2: scanning symlink %s -> %s", path, linkTarget);
|
debug("scanForCycleEdges2: scanning symlink %s -> %s", path, linkTarget);
|
||||||
|
|
||||||
for (size_t i = 0; i + refLength <= linkTarget.size();) {
|
sink.setCurrentPath(path);
|
||||||
bool match = true;
|
sink(std::string_view(linkTarget));
|
||||||
for (int j = refLength - 1; j >= 0; --j) {
|
|
||||||
if (!BaseNix32::lookupReverse(linkTarget[i + j])) {
|
|
||||||
i += j + 1;
|
|
||||||
match = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!match)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
std::string ref(linkTarget.begin() + i, linkTarget.begin() + i + refLength);
|
|
||||||
|
|
||||||
if (hashes.find(ref) != hashes.end()) {
|
|
||||||
debug("scanForCycleEdges2: found reference '%s' in symlink at offset %lu", ref, i);
|
|
||||||
|
|
||||||
// Try to extract full path from link target
|
|
||||||
std::string targetPath = storeDir + ref;
|
|
||||||
|
|
||||||
StoreCycleEdge edge;
|
|
||||||
edge.push_back(path);
|
|
||||||
edge.push_back(targetPath);
|
|
||||||
edges.push_back(edge);
|
|
||||||
}
|
|
||||||
++i;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
throw Error("file '%1%' has an unsupported type", path);
|
throw Error("file '%1%' has an unsupported type", path);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@
|
||||||
///@file
|
///@file
|
||||||
|
|
||||||
#include "nix/store/store-api.hh"
|
#include "nix/store/store-api.hh"
|
||||||
|
#include "nix/store/references.hh"
|
||||||
#include "nix/util/types.hh"
|
#include "nix/util/types.hh"
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
@ -24,6 +25,41 @@ typedef std::deque<std::string> StoreCycleEdge;
|
||||||
*/
|
*/
|
||||||
typedef std::vector<StoreCycleEdge> StoreCycleEdgeVec;
|
typedef std::vector<StoreCycleEdge> StoreCycleEdgeVec;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A sink that extends RefScanSink to track file paths where references are found.
|
||||||
|
*
|
||||||
|
* This reuses the existing reference scanning logic from RefScanSink, but adds
|
||||||
|
* tracking of which file contains which reference. This is essential for providing
|
||||||
|
* detailed cycle error messages.
|
||||||
|
*/
|
||||||
|
class CycleEdgeScanSink : public RefScanSink
|
||||||
|
{
|
||||||
|
std::string currentFilePath;
|
||||||
|
std::map<std::string, StorePath> hashPathMap;
|
||||||
|
std::string storeDir;
|
||||||
|
|
||||||
|
public:
|
||||||
|
StoreCycleEdgeVec edges;
|
||||||
|
|
||||||
|
CycleEdgeScanSink(StringSet && hashes, std::map<std::string, StorePath> && backMap, std::string storeDir);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the current file path being scanned.
|
||||||
|
* Must be called before processing each file.
|
||||||
|
*/
|
||||||
|
void setCurrentPath(const std::string & path);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Override to intercept when hashes are found and record the file location.
|
||||||
|
*/
|
||||||
|
void operator()(std::string_view data) override;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the accumulated cycle edges.
|
||||||
|
*/
|
||||||
|
StoreCycleEdgeVec && getEdges();
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Scan output paths to find cycle edges with detailed file paths.
|
* Scan output paths to find cycle edges with detailed file paths.
|
||||||
*
|
*
|
||||||
|
|
@ -40,16 +76,14 @@ void scanForCycleEdges(const Path & path, const StorePathSet & refs, StoreCycleE
|
||||||
/**
|
/**
|
||||||
* Recursively scan files and directories for hash references.
|
* Recursively scan files and directories for hash references.
|
||||||
*
|
*
|
||||||
* This function walks the file system tree and searches for store path hashes
|
* This function walks the file system tree, streaming file contents into
|
||||||
* in file contents, symlinks, etc. When a hash is found, it attempts to
|
* the provided sink which performs the actual hash detection. This reuses
|
||||||
* reconstruct the full store path by checking what files actually exist.
|
* the existing RefScanSink infrastructure for robustness.
|
||||||
*
|
*
|
||||||
* @param path Current path being scanned
|
* @param path Current path being scanned
|
||||||
* @param hashes Set of hash strings to look for (32-char base32 hashes)
|
* @param sink The CycleEdgeScanSink that will detect and record hash references
|
||||||
* @param edges Output parameter that accumulates found cycle edges
|
|
||||||
* @param storeDir The store directory prefix (e.g., "/nix/store/")
|
|
||||||
*/
|
*/
|
||||||
void scanForCycleEdges2(std::string path, const StringSet & hashes, StoreCycleEdgeVec & edges, std::string storeDir);
|
void scanForCycleEdges2(const std::string & path, CycleEdgeScanSink & sink);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Transform individual edges into connected multi-edges (paths).
|
* Transform individual edges into connected multi-edges (paths).
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue