diff --git a/src/libstore-tests/references.cc b/src/libstore-tests/references.cc index 27ecad08f..9cecd573e 100644 --- a/src/libstore-tests/references.cc +++ b/src/libstore-tests/references.cc @@ -1,4 +1,6 @@ #include "nix/store/references.hh" +#include "nix/store/path-references.hh" +#include "nix/util/memory-source-accessor.hh" #include @@ -79,4 +81,145 @@ TEST(references, scan) } } +TEST(references, scanForReferencesDeep) +{ + using File = MemorySourceAccessor::File; + + // Create store paths to search for + StorePath path1{"dc04vv14dak1c1r48qa0m23vr9jy8sm0-foo"}; + StorePath path2{"zc842j0rz61mjsp3h3wp5ly71ak6qgdn-bar"}; + StorePath path3{"a5cn2i4b83gnsm60d38l3kgb8qfplm11-baz"}; + + StorePathSet refs{path1, path2, path3}; + + std::string_view hash1 = path1.hashPart(); + std::string_view hash2 = path2.hashPart(); + std::string_view hash3 = path3.hashPart(); + + // Create an in-memory file system with various reference patterns + auto accessor = make_ref(); + accessor->root = File::Directory{ + .contents{ + { + // file1.txt: contains hash1 + "file1.txt", + File::Regular{ + .contents = "This file references " + hash1 + " in its content", + }, + }, + { + // file2.txt: contains hash2 and hash3 + "file2.txt", + File::Regular{ + .contents = "Multiple refs: " + hash2 + " and also " + hash3, + }, + }, + { + // file3.txt: contains no references + "file3.txt", + File::Regular{ + .contents = "This file has no store path references at all", + }, + }, + { + // subdir: a subdirectory + "subdir", + File::Directory{ + .contents{ + { + // subdir/file4.txt: contains hash1 again + "file4.txt", + File::Regular{ + .contents = "Subdirectory file with " + hash1, + }, + }, + }, + }, + }, + { + // link1: a symlink that contains a reference in its target + "link1", + File::Symlink{ + .target = hash2 + "-target", + }, + }, + }, + }; + + // Test the callback-based API + { + std::map foundRefs; + + scanForReferencesDeep(*accessor, CanonPath::root, refs, [&](FileRefScanResult result) { + foundRefs[std::move(result.filePath)] = std::move(result.foundRefs); + }); + + // Verify we found the expected references + EXPECT_EQ(foundRefs.size(), 4); // file1, file2, file4, link1 + + // Check file1.txt found path1 + { + CanonPath f1Path("/file1.txt"); + auto it = foundRefs.find(f1Path); + ASSERT_TRUE(it != foundRefs.end()); + EXPECT_EQ(it->second.size(), 1); + EXPECT_TRUE(it->second.count(path1)); + } + + // Check file2.txt found path2 and path3 + { + CanonPath f2Path("/file2.txt"); + auto it = foundRefs.find(f2Path); + ASSERT_TRUE(it != foundRefs.end()); + EXPECT_EQ(it->second.size(), 2); + EXPECT_TRUE(it->second.count(path2)); + EXPECT_TRUE(it->second.count(path3)); + } + + // Check file3.txt is not in results (no refs) + { + CanonPath f3Path("/file3.txt"); + EXPECT_FALSE(foundRefs.count(f3Path)); + } + + // Check subdir/file4.txt found path1 + { + CanonPath f4Path("/subdir/file4.txt"); + auto it = foundRefs.find(f4Path); + ASSERT_TRUE(it != foundRefs.end()); + EXPECT_EQ(it->second.size(), 1); + EXPECT_TRUE(it->second.count(path1)); + } + + // Check symlink found path2 + { + CanonPath linkPath("/link1"); + auto it = foundRefs.find(linkPath); + ASSERT_TRUE(it != foundRefs.end()); + EXPECT_EQ(it->second.size(), 1); + EXPECT_TRUE(it->second.count(path2)); + } + } + + // Test the map-based convenience API + { + auto results = scanForReferencesDeep(*accessor, CanonPath::root, refs); + + EXPECT_EQ(results.size(), 4); // file1, file2, file4, link1 + + // Verify all expected files are in the results + EXPECT_TRUE(results.count(CanonPath("/file1.txt"))); + EXPECT_TRUE(results.count(CanonPath("/file2.txt"))); + EXPECT_TRUE(results.count(CanonPath("/subdir/file4.txt"))); + EXPECT_TRUE(results.count(CanonPath("/link1"))); + EXPECT_FALSE(results.count(CanonPath("/file3.txt"))); + + // Verify the references found in each file are correct + EXPECT_EQ(results.at(CanonPath("/file1.txt")), StorePathSet{path1}); + EXPECT_EQ(results.at(CanonPath("/file2.txt")), StorePathSet({path2, path3})); + EXPECT_EQ(results.at(CanonPath("/subdir/file4.txt")), StorePathSet{path1}); + EXPECT_EQ(results.at(CanonPath("/link1")), StorePathSet{path2}); + } +} + } // namespace nix diff --git a/src/libstore/include/nix/store/path-references.hh b/src/libstore/include/nix/store/path-references.hh index 66d0da268..6aa506da4 100644 --- a/src/libstore/include/nix/store/path-references.hh +++ b/src/libstore/include/nix/store/path-references.hh @@ -3,6 +3,10 @@ #include "nix/store/references.hh" #include "nix/store/path.hh" +#include "nix/util/source-accessor.hh" + +#include +#include namespace nix { @@ -21,4 +25,57 @@ public: StorePathSet getResultPaths(); }; +/** + * Result of scanning a single file for references. + */ +struct FileRefScanResult +{ + CanonPath filePath; ///< The file that was scanned + StorePathSet foundRefs; ///< Which store paths were found in this file +}; + +/** + * Scan a store path tree and report which references appear in which files. + * + * This is like scanForReferences() but provides per-file granularity. + * Useful for cycle detection and detailed dependency analysis like `nix why-depends --precise`. + * + * The function walks the tree using the provided accessor and streams each file's + * contents through a RefScanSink to detect hash references. For each file that + * contains at least one reference, a callback is invoked with the file path and + * the set of references found. + * + * Note: This function only searches for the hash part of store paths (e.g., + * "dc04vv14dak1c1r48qa0m23vr9jy8sm0"), not the name part. A store path like + * "/nix/store/dc04vv14dak1c1r48qa0m23vr9jy8sm0-foo" will be detected if the + * hash appears anywhere in the scanned content, regardless of the "-foo" suffix. + * + * @param accessor Source accessor to read the tree + * @param rootPath Root path to scan + * @param refs Set of store paths to search for + * @param callback Called for each file that contains at least one reference + */ +void scanForReferencesDeep( + SourceAccessor & accessor, + const CanonPath & rootPath, + const StorePathSet & refs, + std::function callback); + +/** + * Scan a store path tree and return which references appear in which files. + * + * This is a convenience wrapper around the callback-based scanForReferencesDeep() + * that collects all results into a map for efficient lookups. + * + * Note: This function only searches for the hash part of store paths, not the name part. + * See the callback-based overload for details. + * + * @param accessor Source accessor to read the tree + * @param rootPath Root path to scan + * @param refs Set of store paths to search for + * @return Map from file paths to the set of references found in each file + */ +std::map +scanForReferencesDeep(SourceAccessor & accessor, const CanonPath & rootPath, const StorePathSet & refs); + } // namespace nix diff --git a/src/libstore/path-references.cc b/src/libstore/path-references.cc index 8b167e902..3d783bbe4 100644 --- a/src/libstore/path-references.cc +++ b/src/libstore/path-references.cc @@ -1,11 +1,15 @@ #include "nix/store/path-references.hh" #include "nix/util/hash.hh" #include "nix/util/archive.hh" +#include "nix/util/source-accessor.hh" +#include "nix/util/canon-path.hh" +#include "nix/util/logging.hh" #include #include #include #include +#include namespace nix { @@ -54,4 +58,90 @@ StorePathSet scanForReferences(Sink & toTee, const Path & path, const StorePathS return refsSink.getResultPaths(); } +void scanForReferencesDeep( + SourceAccessor & accessor, + const CanonPath & rootPath, + const StorePathSet & refs, + std::function callback) +{ + // Recursive tree walker + auto walk = [&](this auto & self, const CanonPath & path) -> void { + auto stat = accessor.lstat(path); + + switch (stat.type) { + case SourceAccessor::tRegular: { + // Create a fresh sink for each file to independently detect references. + // RefScanSink accumulates found hashes globally - once a hash is found, + // it remains in the result set. If we reused the same sink across files, + // we couldn't distinguish which files contain which references, as a hash + // found in an earlier file wouldn't be reported when found in later files. + PathRefScanSink sink = PathRefScanSink::fromPaths(refs); + + // Scan this file by streaming its contents through the sink + accessor.readFile(path, sink); + + // Get the references found in this file + auto foundRefs = sink.getResultPaths(); + + // Report if we found anything in this file + if (!foundRefs.empty()) { + debug("scanForReferencesDeep: found %d references in %s", foundRefs.size(), path.abs()); + callback(FileRefScanResult{.filePath = path, .foundRefs = std::move(foundRefs)}); + } + break; + } + + case SourceAccessor::tDirectory: { + // Recursively scan directory contents + auto entries = accessor.readDirectory(path); + for (const auto & [name, entryType] : entries) { + self(path / name); + } + break; + } + + case SourceAccessor::tSymlink: { + // Create a fresh sink for the symlink target (same reason as regular files) + PathRefScanSink sink = PathRefScanSink::fromPaths(refs); + + // Scan symlink target for references + auto target = accessor.readLink(path); + sink(std::string_view(target)); + + // Get the references found in this symlink target + auto foundRefs = sink.getResultPaths(); + + if (!foundRefs.empty()) { + debug("scanForReferencesDeep: found %d references in symlink %s", foundRefs.size(), path.abs()); + callback(FileRefScanResult{.filePath = path, .foundRefs = std::move(foundRefs)}); + } + break; + } + + case SourceAccessor::tChar: + case SourceAccessor::tBlock: + case SourceAccessor::tSocket: + case SourceAccessor::tFifo: + case SourceAccessor::tUnknown: + default: + throw Error("file '%s' has an unsupported type", path.abs()); + } + }; + + // Start the recursive walk from the root + walk(rootPath); +} + +std::map +scanForReferencesDeep(SourceAccessor & accessor, const CanonPath & rootPath, const StorePathSet & refs) +{ + std::map results; + + scanForReferencesDeep(accessor, rootPath, refs, [&](FileRefScanResult result) { + results[std::move(result.filePath)] = std::move(result.foundRefs); + }); + + return results; +} + } // namespace nix