1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-20 01:09:37 +01:00
nix/src/libfetchers/tarball.cc
2025-11-17 20:08:51 +01:00

421 lines
14 KiB
C++

#include "nix/fetchers/tarball.hh"
#include "nix/fetchers/fetchers.hh"
#include "nix/fetchers/cache.hh"
#include "nix/store/filetransfer.hh"
#include "nix/store/store-api.hh"
#include "nix/util/archive.hh"
#include "nix/util/tarfile.hh"
#include "nix/util/types.hh"
#include "nix/store/store-api.hh"
#include "nix/fetchers/git-utils.hh"
#include "nix/fetchers/fetch-settings.hh"
namespace nix::fetchers {
DownloadFileResult downloadFile(
Store & store,
const Settings & settings,
const std::string & url,
const std::string & name,
const Headers & headers)
{
// FIXME: check store
Cache::Key key{
"file",
{{
{"url", url},
{"name", name},
}}};
auto cached = settings.getCache()->lookupStorePath(key, store);
auto useCached = [&]() -> DownloadFileResult {
return {
.storePath = std::move(cached->storePath),
.etag = getStrAttr(cached->value, "etag"),
.effectiveUrl = getStrAttr(cached->value, "url"),
.immutableUrl = maybeGetStrAttr(cached->value, "immutableUrl"),
};
};
if (cached && !cached->expired)
return useCached();
FileTransferRequest request(VerbatimURL{url});
request.headers = headers;
if (cached)
request.expectedETag = getStrAttr(cached->value, "etag");
FileTransferResult res;
try {
res = getFileTransfer()->download(request);
} catch (FileTransferError & e) {
if (cached) {
warn("%s; using cached version", e.msg());
return useCached();
} else
throw;
}
Attrs infoAttrs({
{"etag", res.etag},
});
if (res.immutableUrl)
infoAttrs.emplace("immutableUrl", *res.immutableUrl);
std::optional<StorePath> storePath;
if (res.cached) {
assert(cached);
storePath = std::move(cached->storePath);
} else {
StringSink sink;
dumpString(res.data, sink);
auto hash = hashString(HashAlgorithm::SHA256, res.data);
auto info = ValidPathInfo::makeFromCA(
store,
name,
FixedOutputInfo{
.method = FileIngestionMethod::Flat,
.hash = hash,
.references = {},
},
hashString(HashAlgorithm::SHA256, sink.s));
info.narSize = sink.s.size();
auto source = StringSource{sink.s};
store.addToStore(info, source, NoRepair, NoCheckSigs);
storePath = std::move(info.path);
}
/* Cache metadata for all URLs in the redirect chain. */
for (auto & url : res.urls) {
key.second.insert_or_assign("url", url);
assert(!res.urls.empty());
infoAttrs.insert_or_assign("url", *res.urls.rbegin());
settings.getCache()->upsert(key, store, infoAttrs, *storePath);
}
return {
.storePath = std::move(*storePath),
.etag = res.etag,
.effectiveUrl = *res.urls.rbegin(),
.immutableUrl = res.immutableUrl,
};
}
static DownloadTarballResult downloadTarball_(
const Settings & settings, const std::string & urlS, const Headers & headers, const std::string & displayPrefix)
{
ParsedURL url = parseURL(urlS);
// Some friendly error messages for common mistakes.
// Namely lets catch when the url is a local file path, but
// it is not in fact a tarball.
if (url.scheme == "file") {
std::filesystem::path localPath = renderUrlPathEnsureLegal(url.path);
if (!exists(localPath)) {
throw Error("tarball '%s' does not exist.", localPath);
}
if (is_directory(localPath)) {
if (exists(localPath / ".git")) {
throw Error(
"tarball '%s' is a git repository, not a tarball. Please use `git+file` as the scheme.", localPath);
}
throw Error("tarball '%s' is a directory, not a file.", localPath);
}
}
Cache::Key cacheKey{"tarball", {{"url", urlS}}};
auto cached = settings.getCache()->lookupExpired(cacheKey);
auto attrsToResult = [&](const Attrs & infoAttrs) {
auto treeHash = getRevAttr(infoAttrs, "treeHash");
return DownloadTarballResult{
.treeHash = treeHash,
.lastModified = (time_t) getIntAttr(infoAttrs, "lastModified"),
.immutableUrl = maybeGetStrAttr(infoAttrs, "immutableUrl"),
.accessor = settings.getTarballCache()->getAccessor(treeHash, false, displayPrefix),
};
};
if (cached && !settings.getTarballCache()->hasObject(getRevAttr(cached->value, "treeHash")))
cached.reset();
if (cached && !cached->expired)
/* We previously downloaded this tarball and it's younger than
`tarballTtl`, so no need to check the server. */
return attrsToResult(cached->value);
auto _res = std::make_shared<Sync<FileTransferResult>>();
auto source = sinkToSource([&](Sink & sink) {
FileTransferRequest req(url);
req.expectedETag = cached ? getStrAttr(cached->value, "etag") : "";
getFileTransfer()->download(std::move(req), sink, [_res](FileTransferResult r) { *_res->lock() = r; });
});
// TODO: fall back to cached value if download fails.
auto act = std::make_unique<Activity>(*logger, lvlInfo, actUnknown, fmt("unpacking '%s' into the Git cache", url));
AutoDelete cleanupTemp;
/* Note: if the download is cached, `importTarball()` will receive
no data, which causes it to import an empty tarball. */
auto archive = !url.path.empty() && hasSuffix(toLower(url.path.back()), ".zip") ? ({
/* In streaming mode, libarchive doesn't handle
symlinks in zip files correctly (#10649). So write
the entire file to disk so libarchive can access it
in random-access mode. */
auto [fdTemp, path] = createTempFile("nix-zipfile");
cleanupTemp.reset(path);
debug("downloading '%s' into '%s'...", url, path);
{
FdSink sink(fdTemp.get());
source->drainInto(sink);
}
TarArchive{path};
})
: TarArchive{*source};
auto tarballCache = settings.getTarballCache();
auto parseSink = tarballCache->getFileSystemObjectSink();
auto lastModified = unpackTarfileToSink(archive, *parseSink);
auto tree = parseSink->flush();
act.reset();
auto res(_res->lock());
Attrs infoAttrs;
if (res->cached) {
/* The server says that the previously downloaded version is
still current. */
infoAttrs = cached->value;
} else {
infoAttrs.insert_or_assign("etag", res->etag);
infoAttrs.insert_or_assign("treeHash", tarballCache->dereferenceSingletonDirectory(tree).gitRev());
infoAttrs.insert_or_assign("lastModified", uint64_t(lastModified));
if (res->immutableUrl)
infoAttrs.insert_or_assign("immutableUrl", *res->immutableUrl);
}
/* Insert a cache entry for every URL in the redirect chain. */
for (auto & url : res->urls) {
cacheKey.second.insert_or_assign("url", url);
settings.getCache()->upsert(cacheKey, infoAttrs);
}
// FIXME: add a cache entry for immutableUrl? That could allow
// cache poisoning.
return attrsToResult(infoAttrs);
}
ref<SourceAccessor> downloadTarball(Store & store, const Settings & settings, const std::string & url)
{
/* Go through Input::getAccessor() to ensure that the resulting
accessor has a fingerprint. */
fetchers::Attrs attrs;
attrs.insert_or_assign("type", "tarball");
attrs.insert_or_assign("url", url);
auto input = Input::fromAttrs(settings, std::move(attrs));
return input.getAccessor(settings, store).first;
}
// An input scheme corresponding to a curl-downloadable resource.
struct CurlInputScheme : InputScheme
{
const StringSet transportUrlSchemes = {"file", "http", "https"};
bool hasTarballExtension(const ParsedURL & url) const
{
if (url.path.empty())
return false;
const auto & path = url.path.back();
return hasSuffix(path, ".zip") || hasSuffix(path, ".tar") || hasSuffix(path, ".tgz")
|| hasSuffix(path, ".tar.gz") || hasSuffix(path, ".tar.xz") || hasSuffix(path, ".tar.bz2")
|| hasSuffix(path, ".tar.zst");
}
virtual bool isValidURL(const ParsedURL & url, bool requireTree) const = 0;
static const StringSet specialParams;
std::optional<Input>
inputFromURL(const Settings & settings, const ParsedURL & _url, bool requireTree) const override
{
if (!isValidURL(_url, requireTree))
return std::nullopt;
Input input{};
auto url = _url;
url.scheme = parseUrlScheme(url.scheme).transport;
auto narHash = url.query.find("narHash");
if (narHash != url.query.end())
input.attrs.insert_or_assign("narHash", narHash->second);
if (auto i = get(url.query, "rev"))
input.attrs.insert_or_assign("rev", *i);
if (auto i = get(url.query, "revCount"))
if (auto n = string2Int<uint64_t>(*i))
input.attrs.insert_or_assign("revCount", *n);
if (auto i = get(url.query, "lastModified"))
if (auto n = string2Int<uint64_t>(*i))
input.attrs.insert_or_assign("lastModified", *n);
/* The URL query parameters serve two roles: specifying fetch
settings for Nix itself, and arbitrary data as part of the
HTTP request. Now that we've processed the Nix-specific
attributes above, remove them so we don't also send them as
part of the HTTP request. */
for (auto & param : allowedAttrs())
url.query.erase(param);
input.attrs.insert_or_assign("type", std::string{schemeName()});
input.attrs.insert_or_assign("url", url.to_string());
return input;
}
StringSet allowedAttrs() const override
{
return {
"type",
"url",
"narHash",
"name",
"unpack",
"rev",
"revCount",
"lastModified",
};
}
std::optional<Input> inputFromAttrs(const Settings & settings, const Attrs & attrs) const override
{
Input input{};
input.attrs = attrs;
// input.locked = (bool) maybeGetStrAttr(input.attrs, "hash");
return input;
}
ParsedURL toURL(const Input & input) const override
{
auto url = parseURL(getStrAttr(input.attrs, "url"));
// NAR hashes are preferred over file hashes since tar/zip
// files don't have a canonical representation.
if (auto narHash = input.getNarHash())
url.query.insert_or_assign("narHash", narHash->to_string(HashFormat::SRI, true));
return url;
}
bool isLocked(const Settings & settings, const Input & input) const override
{
return (bool) input.getNarHash();
}
};
struct FileInputScheme : CurlInputScheme
{
std::string_view schemeName() const override
{
return "file";
}
bool isValidURL(const ParsedURL & url, bool requireTree) const override
{
auto parsedUrlScheme = parseUrlScheme(url.scheme);
return transportUrlSchemes.count(std::string(parsedUrlScheme.transport))
&& (parsedUrlScheme.application ? parsedUrlScheme.application.value() == schemeName()
: (!requireTree && !hasTarballExtension(url)));
}
std::pair<ref<SourceAccessor>, Input>
getAccessor(const Settings & settings, Store & store, const Input & _input) const override
{
auto input(_input);
/* Unlike TarballInputScheme, this stores downloaded files in
the Nix store directly, since there is little deduplication
benefit in using the Git cache for single big files like
tarballs. */
auto file = downloadFile(store, settings, getStrAttr(input.attrs, "url"), input.getName());
auto narHash = store.queryPathInfo(file.storePath)->narHash;
input.attrs.insert_or_assign("narHash", narHash.to_string(HashFormat::SRI, true));
auto accessor = ref{store.getFSAccessor(file.storePath)};
accessor->setPathDisplay("«" + input.to_string() + "»");
return {accessor, input};
}
};
struct TarballInputScheme : CurlInputScheme
{
std::string_view schemeName() const override
{
return "tarball";
}
bool isValidURL(const ParsedURL & url, bool requireTree) const override
{
auto parsedUrlScheme = parseUrlScheme(url.scheme);
return transportUrlSchemes.count(std::string(parsedUrlScheme.transport))
&& (parsedUrlScheme.application ? parsedUrlScheme.application.value() == schemeName()
: (requireTree || hasTarballExtension(url)));
}
std::pair<ref<SourceAccessor>, Input>
getAccessor(const Settings & settings, Store & store, const Input & _input) const override
{
auto input(_input);
auto result = downloadTarball_(settings, getStrAttr(input.attrs, "url"), {}, "«" + input.to_string() + "»");
if (result.immutableUrl) {
auto immutableInput = Input::fromURL(settings, *result.immutableUrl);
// FIXME: would be nice to support arbitrary flakerefs
// here, e.g. git flakes.
if (immutableInput.getType() != "tarball")
throw Error("tarball 'Link' headers that redirect to non-tarball URLs are not supported");
input = immutableInput;
}
if (result.lastModified && !input.attrs.contains("lastModified"))
input.attrs.insert_or_assign("lastModified", uint64_t(result.lastModified));
input.attrs.insert_or_assign(
"narHash",
settings.getTarballCache()->treeHashToNarHash(settings, result.treeHash).to_string(HashFormat::SRI, true));
return {result.accessor, input};
}
std::optional<std::string> getFingerprint(Store & store, const Input & input) const override
{
if (auto narHash = input.getNarHash())
return narHash->to_string(HashFormat::SRI, true);
else if (auto rev = input.getRev())
return rev->gitRev();
else
return std::nullopt;
}
};
static auto rTarballInputScheme = OnStartup([] { registerInputScheme(std::make_unique<TarballInputScheme>()); });
static auto rFileInputScheme = OnStartup([] { registerInputScheme(std::make_unique<FileInputScheme>()); });
} // namespace nix::fetchers