1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-08 19:46:02 +01:00

feat(libstore): add scanForReferencesDeep for per-file reference tracking

Introduces `scanForReferencesDeep` to provide per-file granularity when
scanning for store path references, enabling better diagnostics for
cycle detection and `nix why-depends --precise`.
This commit is contained in:
Bernardo Meurer Costa 2025-10-25 22:38:43 +00:00
parent 91ed3701fe
commit 5e220271e2
No known key found for this signature in database
3 changed files with 290 additions and 0 deletions

View file

@ -1,4 +1,6 @@
#include "nix/store/references.hh" #include "nix/store/references.hh"
#include "nix/store/path-references.hh"
#include "nix/util/memory-source-accessor.hh"
#include <gtest/gtest.h> #include <gtest/gtest.h>
@ -79,4 +81,145 @@ TEST(references, scan)
} }
} }
TEST(references, scanForReferencesDeep)
{
using File = MemorySourceAccessor::File;
// Create store paths to search for
StorePath path1{"dc04vv14dak1c1r48qa0m23vr9jy8sm0-foo"};
StorePath path2{"zc842j0rz61mjsp3h3wp5ly71ak6qgdn-bar"};
StorePath path3{"a5cn2i4b83gnsm60d38l3kgb8qfplm11-baz"};
StorePathSet refs{path1, path2, path3};
std::string_view hash1 = path1.hashPart();
std::string_view hash2 = path2.hashPart();
std::string_view hash3 = path3.hashPart();
// Create an in-memory file system with various reference patterns
auto accessor = make_ref<MemorySourceAccessor>();
accessor->root = File::Directory{
.contents{
{
// file1.txt: contains hash1
"file1.txt",
File::Regular{
.contents = "This file references " + hash1 + " in its content",
},
},
{
// file2.txt: contains hash2 and hash3
"file2.txt",
File::Regular{
.contents = "Multiple refs: " + hash2 + " and also " + hash3,
},
},
{
// file3.txt: contains no references
"file3.txt",
File::Regular{
.contents = "This file has no store path references at all",
},
},
{
// subdir: a subdirectory
"subdir",
File::Directory{
.contents{
{
// subdir/file4.txt: contains hash1 again
"file4.txt",
File::Regular{
.contents = "Subdirectory file with " + hash1,
},
},
},
},
},
{
// link1: a symlink that contains a reference in its target
"link1",
File::Symlink{
.target = hash2 + "-target",
},
},
},
};
// Test the callback-based API
{
std::map<CanonPath, StorePathSet> foundRefs;
scanForReferencesDeep(*accessor, CanonPath::root, refs, [&](FileRefScanResult result) {
foundRefs[std::move(result.filePath)] = std::move(result.foundRefs);
});
// Verify we found the expected references
EXPECT_EQ(foundRefs.size(), 4); // file1, file2, file4, link1
// Check file1.txt found path1
{
CanonPath f1Path("/file1.txt");
auto it = foundRefs.find(f1Path);
ASSERT_TRUE(it != foundRefs.end());
EXPECT_EQ(it->second.size(), 1);
EXPECT_TRUE(it->second.count(path1));
}
// Check file2.txt found path2 and path3
{
CanonPath f2Path("/file2.txt");
auto it = foundRefs.find(f2Path);
ASSERT_TRUE(it != foundRefs.end());
EXPECT_EQ(it->second.size(), 2);
EXPECT_TRUE(it->second.count(path2));
EXPECT_TRUE(it->second.count(path3));
}
// Check file3.txt is not in results (no refs)
{
CanonPath f3Path("/file3.txt");
EXPECT_FALSE(foundRefs.count(f3Path));
}
// Check subdir/file4.txt found path1
{
CanonPath f4Path("/subdir/file4.txt");
auto it = foundRefs.find(f4Path);
ASSERT_TRUE(it != foundRefs.end());
EXPECT_EQ(it->second.size(), 1);
EXPECT_TRUE(it->second.count(path1));
}
// Check symlink found path2
{
CanonPath linkPath("/link1");
auto it = foundRefs.find(linkPath);
ASSERT_TRUE(it != foundRefs.end());
EXPECT_EQ(it->second.size(), 1);
EXPECT_TRUE(it->second.count(path2));
}
}
// Test the map-based convenience API
{
auto results = scanForReferencesDeep(*accessor, CanonPath::root, refs);
EXPECT_EQ(results.size(), 4); // file1, file2, file4, link1
// Verify all expected files are in the results
EXPECT_TRUE(results.count(CanonPath("/file1.txt")));
EXPECT_TRUE(results.count(CanonPath("/file2.txt")));
EXPECT_TRUE(results.count(CanonPath("/subdir/file4.txt")));
EXPECT_TRUE(results.count(CanonPath("/link1")));
EXPECT_FALSE(results.count(CanonPath("/file3.txt")));
// Verify the references found in each file are correct
EXPECT_EQ(results.at(CanonPath("/file1.txt")), StorePathSet{path1});
EXPECT_EQ(results.at(CanonPath("/file2.txt")), StorePathSet({path2, path3}));
EXPECT_EQ(results.at(CanonPath("/subdir/file4.txt")), StorePathSet{path1});
EXPECT_EQ(results.at(CanonPath("/link1")), StorePathSet{path2});
}
}
} // namespace nix } // namespace nix

View file

@ -3,6 +3,10 @@
#include "nix/store/references.hh" #include "nix/store/references.hh"
#include "nix/store/path.hh" #include "nix/store/path.hh"
#include "nix/util/source-accessor.hh"
#include <functional>
#include <vector>
namespace nix { namespace nix {
@ -21,4 +25,57 @@ public:
StorePathSet getResultPaths(); StorePathSet getResultPaths();
}; };
/**
* Result of scanning a single file for references.
*/
struct FileRefScanResult
{
CanonPath filePath; ///< The file that was scanned
StorePathSet foundRefs; ///< Which store paths were found in this file
};
/**
* Scan a store path tree and report which references appear in which files.
*
* This is like scanForReferences() but provides per-file granularity.
* Useful for cycle detection and detailed dependency analysis like `nix why-depends --precise`.
*
* The function walks the tree using the provided accessor and streams each file's
* contents through a RefScanSink to detect hash references. For each file that
* contains at least one reference, a callback is invoked with the file path and
* the set of references found.
*
* Note: This function only searches for the hash part of store paths (e.g.,
* "dc04vv14dak1c1r48qa0m23vr9jy8sm0"), not the name part. A store path like
* "/nix/store/dc04vv14dak1c1r48qa0m23vr9jy8sm0-foo" will be detected if the
* hash appears anywhere in the scanned content, regardless of the "-foo" suffix.
*
* @param accessor Source accessor to read the tree
* @param rootPath Root path to scan
* @param refs Set of store paths to search for
* @param callback Called for each file that contains at least one reference
*/
void scanForReferencesDeep(
SourceAccessor & accessor,
const CanonPath & rootPath,
const StorePathSet & refs,
std::function<void(FileRefScanResult)> callback);
/**
* Scan a store path tree and return which references appear in which files.
*
* This is a convenience wrapper around the callback-based scanForReferencesDeep()
* that collects all results into a map for efficient lookups.
*
* Note: This function only searches for the hash part of store paths, not the name part.
* See the callback-based overload for details.
*
* @param accessor Source accessor to read the tree
* @param rootPath Root path to scan
* @param refs Set of store paths to search for
* @return Map from file paths to the set of references found in each file
*/
std::map<CanonPath, StorePathSet>
scanForReferencesDeep(SourceAccessor & accessor, const CanonPath & rootPath, const StorePathSet & refs);
} // namespace nix } // namespace nix

View file

@ -1,11 +1,15 @@
#include "nix/store/path-references.hh" #include "nix/store/path-references.hh"
#include "nix/util/hash.hh" #include "nix/util/hash.hh"
#include "nix/util/archive.hh" #include "nix/util/archive.hh"
#include "nix/util/source-accessor.hh"
#include "nix/util/canon-path.hh"
#include "nix/util/logging.hh"
#include <map> #include <map>
#include <cstdlib> #include <cstdlib>
#include <mutex> #include <mutex>
#include <algorithm> #include <algorithm>
#include <functional>
namespace nix { namespace nix {
@ -54,4 +58,90 @@ StorePathSet scanForReferences(Sink & toTee, const Path & path, const StorePathS
return refsSink.getResultPaths(); return refsSink.getResultPaths();
} }
void scanForReferencesDeep(
SourceAccessor & accessor,
const CanonPath & rootPath,
const StorePathSet & refs,
std::function<void(FileRefScanResult)> callback)
{
// Recursive tree walker
auto walk = [&](this auto & self, const CanonPath & path) -> void {
auto stat = accessor.lstat(path);
switch (stat.type) {
case SourceAccessor::tRegular: {
// Create a fresh sink for each file to independently detect references.
// RefScanSink accumulates found hashes globally - once a hash is found,
// it remains in the result set. If we reused the same sink across files,
// we couldn't distinguish which files contain which references, as a hash
// found in an earlier file wouldn't be reported when found in later files.
PathRefScanSink sink = PathRefScanSink::fromPaths(refs);
// Scan this file by streaming its contents through the sink
accessor.readFile(path, sink);
// Get the references found in this file
auto foundRefs = sink.getResultPaths();
// Report if we found anything in this file
if (!foundRefs.empty()) {
debug("scanForReferencesDeep: found %d references in %s", foundRefs.size(), path.abs());
callback(FileRefScanResult{.filePath = path, .foundRefs = std::move(foundRefs)});
}
break;
}
case SourceAccessor::tDirectory: {
// Recursively scan directory contents
auto entries = accessor.readDirectory(path);
for (const auto & [name, entryType] : entries) {
self(path / name);
}
break;
}
case SourceAccessor::tSymlink: {
// Create a fresh sink for the symlink target (same reason as regular files)
PathRefScanSink sink = PathRefScanSink::fromPaths(refs);
// Scan symlink target for references
auto target = accessor.readLink(path);
sink(std::string_view(target));
// Get the references found in this symlink target
auto foundRefs = sink.getResultPaths();
if (!foundRefs.empty()) {
debug("scanForReferencesDeep: found %d references in symlink %s", foundRefs.size(), path.abs());
callback(FileRefScanResult{.filePath = path, .foundRefs = std::move(foundRefs)});
}
break;
}
case SourceAccessor::tChar:
case SourceAccessor::tBlock:
case SourceAccessor::tSocket:
case SourceAccessor::tFifo:
case SourceAccessor::tUnknown:
default:
throw Error("file '%s' has an unsupported type", path.abs());
}
};
// Start the recursive walk from the root
walk(rootPath);
}
std::map<CanonPath, StorePathSet>
scanForReferencesDeep(SourceAccessor & accessor, const CanonPath & rootPath, const StorePathSet & refs)
{
std::map<CanonPath, StorePathSet> results;
scanForReferencesDeep(accessor, rootPath, refs, [&](FileRefScanResult result) {
results[std::move(result.filePath)] = std::move(result.foundRefs);
});
return results;
}
} // namespace nix } // namespace nix