From dc5e6200325ad5c8f380de8777e4d4f24e0032ae Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 13 Jun 2025 22:19:53 +0200 Subject: [PATCH] fetchToStore() cache: Use content hashes instead of store paths We can always compute the store path from the content hash, but not vice versa. Storing the content hash allows `hashPath()` to be replaced by `fetchToStore(...FetchMode::DryRun...)`, which gets us caching in lazy-trees mode. --- src/libexpr/paths.cc | 3 +- src/libfetchers/fetch-to-store.cc | 73 ++++++++++++------- src/libfetchers/fetchers.cc | 4 +- .../include/nix/fetchers/fetch-to-store.hh | 13 +++- src/libfetchers/path.cc | 35 +++------ tests/functional/flakes/flakes.sh | 2 +- 6 files changed, 72 insertions(+), 58 deletions(-) diff --git a/src/libexpr/paths.cc b/src/libexpr/paths.cc index 65b8212e1..b6a372fb2 100644 --- a/src/libexpr/paths.cc +++ b/src/libexpr/paths.cc @@ -84,8 +84,7 @@ StorePath EvalState::mountInput( if (store->isValidPath(storePath)) _narHash = store->queryPathInfo(storePath)->narHash; else - // FIXME: use fetchToStore to make it cache this - _narHash = accessor->hashPath(CanonPath::root); + _narHash = fetchToStore2(*store, accessor, FetchMode::DryRun, input.getName()).second; } return _narHash; }; diff --git a/src/libfetchers/fetch-to-store.cc b/src/libfetchers/fetch-to-store.cc index 618f32cae..5595f7594 100644 --- a/src/libfetchers/fetch-to-store.cc +++ b/src/libfetchers/fetch-to-store.cc @@ -3,19 +3,16 @@ namespace nix { -fetchers::Cache::Key makeFetchToStoreCacheKey( - const std::string &name, - const std::string &fingerprint, +fetchers::Cache::Key makeSourcePathToHashCacheKey( + const std::string & fingerprint, ContentAddressMethod method, - const std::string &path) + const std::string & path) { - return fetchers::Cache::Key{"fetchToStore", { - {"name", name}, + return fetchers::Cache::Key{"sourcePathToHash", { {"fingerprint", fingerprint}, {"method", std::string{method.render()}}, {"path", path} }}; - } StorePath fetchToStore( @@ -27,9 +24,18 @@ StorePath fetchToStore( PathFilter * filter, RepairFlag repair) { - // FIXME: add an optimisation for the case where the accessor is - // a `PosixSourceAccessor` pointing to a store path. + return fetchToStore2(store, path, mode, name, method, filter, repair).first; +} +std::pair fetchToStore2( + Store & store, + const SourcePath & path, + FetchMode mode, + std::string_view name, + ContentAddressMethod method, + PathFilter * filter, + RepairFlag repair) +{ std::optional cacheKey; auto [subpath, fingerprint] = @@ -38,32 +44,47 @@ StorePath fetchToStore( : path.accessor->getFingerprint(path.path); if (fingerprint) { - cacheKey = makeFetchToStoreCacheKey(std::string{name}, *fingerprint, method, subpath.abs()); - if (auto res = fetchers::getCache()->lookupStorePath(*cacheKey, store, mode == FetchMode::DryRun)) { - debug("store path cache hit for '%s'", path); - return res->storePath; + cacheKey = makeSourcePathToHashCacheKey(*fingerprint, method, subpath.abs()); + if (auto res = fetchers::getCache()->lookup(*cacheKey)) { + debug("source path hash cache hit for '%s'", path); + auto hash = Hash::parseSRI(fetchers::getStrAttr(*res, "hash")); + auto storePath = store.makeFixedOutputPathFromCA(name, + ContentAddressWithReferences::fromParts(method, hash, {})); + if (store.isValidPath(storePath)) { + debug("source path '%s' has valid store path '%s'", path, store.printStorePath(storePath)); + return {storePath, hash}; + } + debug("source path '%s' not in store", path); } } else - debug("source path '%s' is uncacheable (%d, %d)", path, filter, (bool) fingerprint); + // FIXME: could still provide in-memory caching keyed on `SourcePath`. + debug("source path '%s' is uncacheable (%d, %d)", path, (bool) filter, (bool) fingerprint); Activity act(*logger, lvlChatty, actUnknown, fmt(mode == FetchMode::DryRun ? "hashing '%s'" : "copying '%s' to the store", path)); auto filter2 = filter ? *filter : defaultPathFilter; - auto storePath = - mode == FetchMode::DryRun - ? store.computeStorePath( - name, path, method, HashAlgorithm::SHA256, {}, filter2).first - : store.addToStore( + if (mode == FetchMode::DryRun) { + auto [storePath, hash] = store.computeStorePath( + name, path, method, HashAlgorithm::SHA256, {}, filter2); + debug("hashed '%s' to '%s'", path, store.printStorePath(storePath)); + if (cacheKey) + fetchers::getCache()->upsert(*cacheKey, {{"hash", hash.to_string(HashFormat::SRI, true)}}); + return {storePath, hash}; + } else { + auto storePath = store.addToStore( name, path, method, HashAlgorithm::SHA256, {}, filter2, repair); - - debug(mode == FetchMode::DryRun ? "hashed '%s'" : "copied '%s' to '%s'", path, store.printStorePath(storePath)); - - if (cacheKey) - fetchers::getCache()->upsert(*cacheKey, store, {}, storePath); - - return storePath; + debug("copied '%s' to '%s'", path, store.printStorePath(storePath)); + // FIXME: this is the wrong hash when method != + // ContentAddressMethod::Raw::NixArchive. Doesn't matter at + // the moment since the only place where that's the case + // doesn't use the hash. + auto hash = store.queryPathInfo(storePath)->narHash; + if (cacheKey) + fetchers::getCache()->upsert(*cacheKey, {{"hash", hash.to_string(HashFormat::SRI, true)}}); + return {storePath, hash}; + } } } diff --git a/src/libfetchers/fetchers.cc b/src/libfetchers/fetchers.cc index 5764f310d..d91f24b6a 100644 --- a/src/libfetchers/fetchers.cc +++ b/src/libfetchers/fetchers.cc @@ -352,8 +352,8 @@ std::pair, Input> Input::getAccessorUnchecked(ref sto auto [accessor, result] = scheme->getAccessor(store, *this); - assert(!accessor->fingerprint); - accessor->fingerprint = result.getFingerprint(store); + if (!accessor->fingerprint) + accessor->fingerprint = result.getFingerprint(store); return {accessor, std::move(result)}; } diff --git a/src/libfetchers/include/nix/fetchers/fetch-to-store.hh b/src/libfetchers/include/nix/fetchers/fetch-to-store.hh index 44c33c147..364d25375 100644 --- a/src/libfetchers/include/nix/fetchers/fetch-to-store.hh +++ b/src/libfetchers/include/nix/fetchers/fetch-to-store.hh @@ -23,7 +23,16 @@ StorePath fetchToStore( PathFilter * filter = nullptr, RepairFlag repair = NoRepair); -fetchers::Cache::Key makeFetchToStoreCacheKey( - const std::string & name, const std::string & fingerprint, ContentAddressMethod method, const std::string & path); +std::pair fetchToStore2( + Store & store, + const SourcePath & path, + FetchMode mode, + std::string_view name = "source", + ContentAddressMethod method = ContentAddressMethod::Raw::NixArchive, + PathFilter * filter = nullptr, + RepairFlag repair = NoRepair); + +fetchers::Cache::Key +makeSourcePathToHashCacheKey(const std::string & fingerprint, ContentAddressMethod method, const std::string & path); } diff --git a/src/libfetchers/path.cc b/src/libfetchers/path.cc index ff39cb02f..0de81ae43 100644 --- a/src/libfetchers/path.cc +++ b/src/libfetchers/path.cc @@ -144,37 +144,22 @@ struct PathInputScheme : InputScheme storePath = store->addToStoreFromDump(*src, "source"); } - // To avoid copying the path again to the /nix/store, we need to add a cache entry. - ContentAddressMethod method = ContentAddressMethod::Raw::NixArchive; - auto fp = getFingerprint(store, input); - if (fp) { - auto cacheKey = makeFetchToStoreCacheKey(input.getName(), *fp, method, "/"); - fetchers::getCache()->upsert(cacheKey, *store, {}, *storePath); - } + auto accessor = makeStorePathAccessor(store, *storePath); + + // To prevent `fetchToStore()` copying the path again to Nix + // store, pre-create an entry in the fetcher cache. + auto info = store->queryPathInfo(*storePath); + accessor->fingerprint = fmt("path:%s", store->queryPathInfo(*storePath)->narHash.to_string(HashFormat::SRI, true)); + fetchers::getCache()->upsert( + makeSourcePathToHashCacheKey(*accessor->fingerprint, ContentAddressMethod::Raw::NixArchive, "/"), + {{"hash", info->narHash.to_string(HashFormat::SRI, true)}}); /* Trust the lastModified value supplied by the user, if any. It's not a "secure" attribute so we don't care. */ if (!input.getLastModified()) input.attrs.insert_or_assign("lastModified", uint64_t(mtime)); - return {makeStorePathAccessor(store, *storePath), std::move(input)}; - } - - std::optional getFingerprint(ref store, const Input & input) const override - { - if (isRelative(input)) - return std::nullopt; - - /* If this path is in the Nix store, use the hash of the - store object and the subpath. */ - auto path = getAbsPath(input); - try { - auto [storePath, subPath] = store->toStorePath(path.string()); - auto info = store->queryPathInfo(storePath); - return fmt("path:%s:%s", info->narHash.to_string(HashFormat::Base16, false), subPath); - } catch (Error &) { - return std::nullopt; - } + return {accessor, std::move(input)}; } }; diff --git a/tests/functional/flakes/flakes.sh b/tests/functional/flakes/flakes.sh index 51f1909a2..878e02682 100755 --- a/tests/functional/flakes/flakes.sh +++ b/tests/functional/flakes/flakes.sh @@ -115,7 +115,7 @@ nix build -o "$TEST_ROOT/result" "git+file://$flake1Dir?ref=HEAD#default" # Check that the fetcher cache works. if [[ $(nix config show lazy-trees) = false ]]; then nix build -o "$TEST_ROOT/result" "git+file://$flake1Dir?ref=HEAD#default" -vvvvv 2>&1 | grepQuietInverse "source path.*is uncacheable" - nix build -o "$TEST_ROOT/result" "git+file://$flake1Dir?ref=HEAD#default" -vvvvv 2>&1 | grepQuiet "store path cache hit" + nix build -o "$TEST_ROOT/result" "git+file://$flake1Dir?ref=HEAD#default" -vvvvv 2>&1 | grepQuiet "source path hash cache hit" fi # Check that relative paths are allowed for git flakes.