mirror of
https://github.com/NixOS/nix.git
synced 2025-11-08 19:46:02 +01:00
Merge pull request #14360 from lovesegfault/scan-for-references-detailed
feat(libstore): add scanForReferencesDeep and use it for why-depends
This commit is contained in:
commit
7f1d92793e
4 changed files with 326 additions and 43 deletions
|
|
@ -1,4 +1,6 @@
|
||||||
#include "nix/store/references.hh"
|
#include "nix/store/references.hh"
|
||||||
|
#include "nix/store/path-references.hh"
|
||||||
|
#include "nix/util/memory-source-accessor.hh"
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
|
@ -79,4 +81,145 @@ TEST(references, scan)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(references, scanForReferencesDeep)
|
||||||
|
{
|
||||||
|
using File = MemorySourceAccessor::File;
|
||||||
|
|
||||||
|
// Create store paths to search for
|
||||||
|
StorePath path1{"dc04vv14dak1c1r48qa0m23vr9jy8sm0-foo"};
|
||||||
|
StorePath path2{"zc842j0rz61mjsp3h3wp5ly71ak6qgdn-bar"};
|
||||||
|
StorePath path3{"a5cn2i4b83gnsm60d38l3kgb8qfplm11-baz"};
|
||||||
|
|
||||||
|
StorePathSet refs{path1, path2, path3};
|
||||||
|
|
||||||
|
std::string_view hash1 = path1.hashPart();
|
||||||
|
std::string_view hash2 = path2.hashPart();
|
||||||
|
std::string_view hash3 = path3.hashPart();
|
||||||
|
|
||||||
|
// Create an in-memory file system with various reference patterns
|
||||||
|
auto accessor = make_ref<MemorySourceAccessor>();
|
||||||
|
accessor->root = File::Directory{
|
||||||
|
.contents{
|
||||||
|
{
|
||||||
|
// file1.txt: contains hash1
|
||||||
|
"file1.txt",
|
||||||
|
File::Regular{
|
||||||
|
.contents = "This file references " + hash1 + " in its content",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// file2.txt: contains hash2 and hash3
|
||||||
|
"file2.txt",
|
||||||
|
File::Regular{
|
||||||
|
.contents = "Multiple refs: " + hash2 + " and also " + hash3,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// file3.txt: contains no references
|
||||||
|
"file3.txt",
|
||||||
|
File::Regular{
|
||||||
|
.contents = "This file has no store path references at all",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// subdir: a subdirectory
|
||||||
|
"subdir",
|
||||||
|
File::Directory{
|
||||||
|
.contents{
|
||||||
|
{
|
||||||
|
// subdir/file4.txt: contains hash1 again
|
||||||
|
"file4.txt",
|
||||||
|
File::Regular{
|
||||||
|
.contents = "Subdirectory file with " + hash1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// link1: a symlink that contains a reference in its target
|
||||||
|
"link1",
|
||||||
|
File::Symlink{
|
||||||
|
.target = hash2 + "-target",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
// Test the callback-based API
|
||||||
|
{
|
||||||
|
std::map<CanonPath, StorePathSet> foundRefs;
|
||||||
|
|
||||||
|
scanForReferencesDeep(*accessor, CanonPath::root, refs, [&](FileRefScanResult result) {
|
||||||
|
foundRefs[std::move(result.filePath)] = std::move(result.foundRefs);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Verify we found the expected references
|
||||||
|
EXPECT_EQ(foundRefs.size(), 4); // file1, file2, file4, link1
|
||||||
|
|
||||||
|
// Check file1.txt found path1
|
||||||
|
{
|
||||||
|
CanonPath f1Path("/file1.txt");
|
||||||
|
auto it = foundRefs.find(f1Path);
|
||||||
|
ASSERT_TRUE(it != foundRefs.end());
|
||||||
|
EXPECT_EQ(it->second.size(), 1);
|
||||||
|
EXPECT_TRUE(it->second.count(path1));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check file2.txt found path2 and path3
|
||||||
|
{
|
||||||
|
CanonPath f2Path("/file2.txt");
|
||||||
|
auto it = foundRefs.find(f2Path);
|
||||||
|
ASSERT_TRUE(it != foundRefs.end());
|
||||||
|
EXPECT_EQ(it->second.size(), 2);
|
||||||
|
EXPECT_TRUE(it->second.count(path2));
|
||||||
|
EXPECT_TRUE(it->second.count(path3));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check file3.txt is not in results (no refs)
|
||||||
|
{
|
||||||
|
CanonPath f3Path("/file3.txt");
|
||||||
|
EXPECT_FALSE(foundRefs.count(f3Path));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check subdir/file4.txt found path1
|
||||||
|
{
|
||||||
|
CanonPath f4Path("/subdir/file4.txt");
|
||||||
|
auto it = foundRefs.find(f4Path);
|
||||||
|
ASSERT_TRUE(it != foundRefs.end());
|
||||||
|
EXPECT_EQ(it->second.size(), 1);
|
||||||
|
EXPECT_TRUE(it->second.count(path1));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check symlink found path2
|
||||||
|
{
|
||||||
|
CanonPath linkPath("/link1");
|
||||||
|
auto it = foundRefs.find(linkPath);
|
||||||
|
ASSERT_TRUE(it != foundRefs.end());
|
||||||
|
EXPECT_EQ(it->second.size(), 1);
|
||||||
|
EXPECT_TRUE(it->second.count(path2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test the map-based convenience API
|
||||||
|
{
|
||||||
|
auto results = scanForReferencesDeep(*accessor, CanonPath::root, refs);
|
||||||
|
|
||||||
|
EXPECT_EQ(results.size(), 4); // file1, file2, file4, link1
|
||||||
|
|
||||||
|
// Verify all expected files are in the results
|
||||||
|
EXPECT_TRUE(results.count(CanonPath("/file1.txt")));
|
||||||
|
EXPECT_TRUE(results.count(CanonPath("/file2.txt")));
|
||||||
|
EXPECT_TRUE(results.count(CanonPath("/subdir/file4.txt")));
|
||||||
|
EXPECT_TRUE(results.count(CanonPath("/link1")));
|
||||||
|
EXPECT_FALSE(results.count(CanonPath("/file3.txt")));
|
||||||
|
|
||||||
|
// Verify the references found in each file are correct
|
||||||
|
EXPECT_EQ(results.at(CanonPath("/file1.txt")), StorePathSet{path1});
|
||||||
|
EXPECT_EQ(results.at(CanonPath("/file2.txt")), StorePathSet({path2, path3}));
|
||||||
|
EXPECT_EQ(results.at(CanonPath("/subdir/file4.txt")), StorePathSet{path1});
|
||||||
|
EXPECT_EQ(results.at(CanonPath("/link1")), StorePathSet{path2});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace nix
|
} // namespace nix
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,10 @@
|
||||||
|
|
||||||
#include "nix/store/references.hh"
|
#include "nix/store/references.hh"
|
||||||
#include "nix/store/path.hh"
|
#include "nix/store/path.hh"
|
||||||
|
#include "nix/util/source-accessor.hh"
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
namespace nix {
|
namespace nix {
|
||||||
|
|
||||||
|
|
@ -21,4 +25,57 @@ public:
|
||||||
StorePathSet getResultPaths();
|
StorePathSet getResultPaths();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Result of scanning a single file for references.
|
||||||
|
*/
|
||||||
|
struct FileRefScanResult
|
||||||
|
{
|
||||||
|
CanonPath filePath; ///< The file that was scanned
|
||||||
|
StorePathSet foundRefs; ///< Which store paths were found in this file
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scan a store path tree and report which references appear in which files.
|
||||||
|
*
|
||||||
|
* This is like scanForReferences() but provides per-file granularity.
|
||||||
|
* Useful for cycle detection and detailed dependency analysis like `nix why-depends --precise`.
|
||||||
|
*
|
||||||
|
* The function walks the tree using the provided accessor and streams each file's
|
||||||
|
* contents through a RefScanSink to detect hash references. For each file that
|
||||||
|
* contains at least one reference, a callback is invoked with the file path and
|
||||||
|
* the set of references found.
|
||||||
|
*
|
||||||
|
* Note: This function only searches for the hash part of store paths (e.g.,
|
||||||
|
* "dc04vv14dak1c1r48qa0m23vr9jy8sm0"), not the name part. A store path like
|
||||||
|
* "/nix/store/dc04vv14dak1c1r48qa0m23vr9jy8sm0-foo" will be detected if the
|
||||||
|
* hash appears anywhere in the scanned content, regardless of the "-foo" suffix.
|
||||||
|
*
|
||||||
|
* @param accessor Source accessor to read the tree
|
||||||
|
* @param rootPath Root path to scan
|
||||||
|
* @param refs Set of store paths to search for
|
||||||
|
* @param callback Called for each file that contains at least one reference
|
||||||
|
*/
|
||||||
|
void scanForReferencesDeep(
|
||||||
|
SourceAccessor & accessor,
|
||||||
|
const CanonPath & rootPath,
|
||||||
|
const StorePathSet & refs,
|
||||||
|
std::function<void(FileRefScanResult)> callback);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scan a store path tree and return which references appear in which files.
|
||||||
|
*
|
||||||
|
* This is a convenience wrapper around the callback-based scanForReferencesDeep()
|
||||||
|
* that collects all results into a map for efficient lookups.
|
||||||
|
*
|
||||||
|
* Note: This function only searches for the hash part of store paths, not the name part.
|
||||||
|
* See the callback-based overload for details.
|
||||||
|
*
|
||||||
|
* @param accessor Source accessor to read the tree
|
||||||
|
* @param rootPath Root path to scan
|
||||||
|
* @param refs Set of store paths to search for
|
||||||
|
* @return Map from file paths to the set of references found in each file
|
||||||
|
*/
|
||||||
|
std::map<CanonPath, StorePathSet>
|
||||||
|
scanForReferencesDeep(SourceAccessor & accessor, const CanonPath & rootPath, const StorePathSet & refs);
|
||||||
|
|
||||||
} // namespace nix
|
} // namespace nix
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,15 @@
|
||||||
#include "nix/store/path-references.hh"
|
#include "nix/store/path-references.hh"
|
||||||
#include "nix/util/hash.hh"
|
#include "nix/util/hash.hh"
|
||||||
#include "nix/util/archive.hh"
|
#include "nix/util/archive.hh"
|
||||||
|
#include "nix/util/source-accessor.hh"
|
||||||
|
#include "nix/util/canon-path.hh"
|
||||||
|
#include "nix/util/logging.hh"
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
namespace nix {
|
namespace nix {
|
||||||
|
|
||||||
|
|
@ -54,4 +58,90 @@ StorePathSet scanForReferences(Sink & toTee, const Path & path, const StorePathS
|
||||||
return refsSink.getResultPaths();
|
return refsSink.getResultPaths();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void scanForReferencesDeep(
|
||||||
|
SourceAccessor & accessor,
|
||||||
|
const CanonPath & rootPath,
|
||||||
|
const StorePathSet & refs,
|
||||||
|
std::function<void(FileRefScanResult)> callback)
|
||||||
|
{
|
||||||
|
// Recursive tree walker
|
||||||
|
auto walk = [&](this auto & self, const CanonPath & path) -> void {
|
||||||
|
auto stat = accessor.lstat(path);
|
||||||
|
|
||||||
|
switch (stat.type) {
|
||||||
|
case SourceAccessor::tRegular: {
|
||||||
|
// Create a fresh sink for each file to independently detect references.
|
||||||
|
// RefScanSink accumulates found hashes globally - once a hash is found,
|
||||||
|
// it remains in the result set. If we reused the same sink across files,
|
||||||
|
// we couldn't distinguish which files contain which references, as a hash
|
||||||
|
// found in an earlier file wouldn't be reported when found in later files.
|
||||||
|
PathRefScanSink sink = PathRefScanSink::fromPaths(refs);
|
||||||
|
|
||||||
|
// Scan this file by streaming its contents through the sink
|
||||||
|
accessor.readFile(path, sink);
|
||||||
|
|
||||||
|
// Get the references found in this file
|
||||||
|
auto foundRefs = sink.getResultPaths();
|
||||||
|
|
||||||
|
// Report if we found anything in this file
|
||||||
|
if (!foundRefs.empty()) {
|
||||||
|
debug("scanForReferencesDeep: found %d references in %s", foundRefs.size(), path.abs());
|
||||||
|
callback(FileRefScanResult{.filePath = path, .foundRefs = std::move(foundRefs)});
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case SourceAccessor::tDirectory: {
|
||||||
|
// Recursively scan directory contents
|
||||||
|
auto entries = accessor.readDirectory(path);
|
||||||
|
for (const auto & [name, entryType] : entries) {
|
||||||
|
self(path / name);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case SourceAccessor::tSymlink: {
|
||||||
|
// Create a fresh sink for the symlink target (same reason as regular files)
|
||||||
|
PathRefScanSink sink = PathRefScanSink::fromPaths(refs);
|
||||||
|
|
||||||
|
// Scan symlink target for references
|
||||||
|
auto target = accessor.readLink(path);
|
||||||
|
sink(std::string_view(target));
|
||||||
|
|
||||||
|
// Get the references found in this symlink target
|
||||||
|
auto foundRefs = sink.getResultPaths();
|
||||||
|
|
||||||
|
if (!foundRefs.empty()) {
|
||||||
|
debug("scanForReferencesDeep: found %d references in symlink %s", foundRefs.size(), path.abs());
|
||||||
|
callback(FileRefScanResult{.filePath = path, .foundRefs = std::move(foundRefs)});
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case SourceAccessor::tChar:
|
||||||
|
case SourceAccessor::tBlock:
|
||||||
|
case SourceAccessor::tSocket:
|
||||||
|
case SourceAccessor::tFifo:
|
||||||
|
case SourceAccessor::tUnknown:
|
||||||
|
default:
|
||||||
|
throw Error("file '%s' has an unsupported type", path.abs());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Start the recursive walk from the root
|
||||||
|
walk(rootPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::map<CanonPath, StorePathSet>
|
||||||
|
scanForReferencesDeep(SourceAccessor & accessor, const CanonPath & rootPath, const StorePathSet & refs)
|
||||||
|
{
|
||||||
|
std::map<CanonPath, StorePathSet> results;
|
||||||
|
|
||||||
|
scanForReferencesDeep(accessor, rootPath, refs, [&](FileRefScanResult result) {
|
||||||
|
results[std::move(result.filePath)] = std::move(result.foundRefs);
|
||||||
|
});
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace nix
|
} // namespace nix
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
#include "nix/cmd/command.hh"
|
#include "nix/cmd/command.hh"
|
||||||
#include "nix/store/store-api.hh"
|
#include "nix/store/store-api.hh"
|
||||||
|
#include "nix/store/path-references.hh"
|
||||||
#include "nix/util/source-accessor.hh"
|
#include "nix/util/source-accessor.hh"
|
||||||
#include "nix/main/shared.hh"
|
#include "nix/main/shared.hh"
|
||||||
|
|
||||||
|
|
@ -191,7 +192,7 @@ struct CmdWhyDepends : SourceExprCommand, MixOperateOnOptions
|
||||||
/* Sort the references by distance to `dependency` to
|
/* Sort the references by distance to `dependency` to
|
||||||
ensure that the shortest path is printed first. */
|
ensure that the shortest path is printed first. */
|
||||||
std::multimap<size_t, Node *> refs;
|
std::multimap<size_t, Node *> refs;
|
||||||
StringSet hashes;
|
StorePathSet refPaths;
|
||||||
|
|
||||||
for (auto & ref : node.refs) {
|
for (auto & ref : node.refs) {
|
||||||
if (ref == node.path && packagePath != dependencyPath)
|
if (ref == node.path && packagePath != dependencyPath)
|
||||||
|
|
@ -200,7 +201,7 @@ struct CmdWhyDepends : SourceExprCommand, MixOperateOnOptions
|
||||||
if (node2.dist == inf)
|
if (node2.dist == inf)
|
||||||
continue;
|
continue;
|
||||||
refs.emplace(node2.dist, &node2);
|
refs.emplace(node2.dist, &node2);
|
||||||
hashes.insert(std::string(node2.path.hashPart()));
|
refPaths.insert(node2.path);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* For each reference, find the files and symlinks that
|
/* For each reference, find the files and symlinks that
|
||||||
|
|
@ -209,58 +210,50 @@ struct CmdWhyDepends : SourceExprCommand, MixOperateOnOptions
|
||||||
|
|
||||||
auto accessor = store->requireStoreObjectAccessor(node.path);
|
auto accessor = store->requireStoreObjectAccessor(node.path);
|
||||||
|
|
||||||
auto visitPath = [&](this auto && recur, const CanonPath & p) -> void {
|
auto getColour = [&](const std::string & hash) {
|
||||||
auto st = accessor->maybeLstat(p);
|
return hash == dependencyPathHash ? ANSI_GREEN : ANSI_BLUE;
|
||||||
assert(st);
|
};
|
||||||
|
|
||||||
auto p2 = p.isRoot() ? p.abs() : p.rel();
|
if (precise) {
|
||||||
|
// Use scanForReferencesDeep to find files containing references
|
||||||
|
scanForReferencesDeep(*accessor, CanonPath::root, refPaths, [&](FileRefScanResult result) {
|
||||||
|
auto p2 = result.filePath.isRoot() ? result.filePath.abs() : result.filePath.rel();
|
||||||
|
auto st = accessor->lstat(result.filePath);
|
||||||
|
|
||||||
auto getColour = [&](const std::string & hash) {
|
if (st.type == SourceAccessor::Type::tRegular) {
|
||||||
return hash == dependencyPathHash ? ANSI_GREEN : ANSI_BLUE;
|
auto contents = accessor->readFile(result.filePath);
|
||||||
};
|
|
||||||
|
|
||||||
if (st->type == SourceAccessor::Type::tDirectory) {
|
// For each reference found in this file, extract context
|
||||||
auto names = accessor->readDirectory(p);
|
for (auto & foundRef : result.foundRefs) {
|
||||||
for (auto & [name, type] : names)
|
std::string hash(foundRef.hashPart());
|
||||||
recur(p / name);
|
auto pos = contents.find(hash);
|
||||||
}
|
if (pos != std::string::npos) {
|
||||||
|
size_t margin = 32;
|
||||||
else if (st->type == SourceAccessor::Type::tRegular) {
|
auto pos2 = pos >= margin ? pos - margin : 0;
|
||||||
auto contents = accessor->readFile(p);
|
hits[hash].emplace_back(fmt(
|
||||||
|
"%s: …%s…",
|
||||||
for (auto & hash : hashes) {
|
|
||||||
auto pos = contents.find(hash);
|
|
||||||
if (pos != std::string::npos) {
|
|
||||||
size_t margin = 32;
|
|
||||||
auto pos2 = pos >= margin ? pos - margin : 0;
|
|
||||||
hits[hash].emplace_back(
|
|
||||||
fmt("%s: …%s…",
|
|
||||||
p2,
|
p2,
|
||||||
hilite(
|
hilite(
|
||||||
filterPrintable(std::string(contents, pos2, pos - pos2 + hash.size() + margin)),
|
filterPrintable(std::string(contents, pos2, pos - pos2 + hash.size() + margin)),
|
||||||
pos - pos2,
|
pos - pos2,
|
||||||
StorePath::HashLen,
|
StorePath::HashLen,
|
||||||
getColour(hash))));
|
getColour(hash))));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (st.type == SourceAccessor::Type::tSymlink) {
|
||||||
|
auto target = accessor->readLink(result.filePath);
|
||||||
|
|
||||||
|
// For each reference found in this symlink, show it
|
||||||
|
for (auto & foundRef : result.foundRefs) {
|
||||||
|
std::string hash(foundRef.hashPart());
|
||||||
|
auto pos = target.find(hash);
|
||||||
|
if (pos != std::string::npos)
|
||||||
|
hits[hash].emplace_back(
|
||||||
|
fmt("%s -> %s", p2, hilite(target, pos, StorePath::HashLen, getColour(hash))));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
|
}
|
||||||
else if (st->type == SourceAccessor::Type::tSymlink) {
|
|
||||||
auto target = accessor->readLink(p);
|
|
||||||
|
|
||||||
for (auto & hash : hashes) {
|
|
||||||
auto pos = target.find(hash);
|
|
||||||
if (pos != std::string::npos)
|
|
||||||
hits[hash].emplace_back(
|
|
||||||
fmt("%s -> %s", p2, hilite(target, pos, StorePath::HashLen, getColour(hash))));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// FIXME: should use scanForReferences().
|
|
||||||
|
|
||||||
if (precise)
|
|
||||||
visitPath(CanonPath::root);
|
|
||||||
|
|
||||||
for (auto & ref : refs) {
|
for (auto & ref : refs) {
|
||||||
std::string hash(ref.second->path.hashPart());
|
std::string hash(ref.second->path.hashPart());
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue