From e54870001037fd4b7b2b9f3d6ff9e8c751e6f8df Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Mon, 1 Sep 2025 00:35:31 +0300 Subject: [PATCH] lib{store,fetchers}: Pass URLs specified directly verbatim to FileTransferRequest The URL should not be normalized before handing it off to cURL, because builtin fetchers like fetchTarball/fetchurl are expected to work with arbitrary URLs, that might not be RFC3986 compliant. For those cases Nix should not normalize URLs, though validation is fine. ParseURL and cURL are supposed to match the set of acceptable URLs, since they implement the same RFC. --- src/libfetchers/tarball.cc | 12 ++-- src/libstore/builtins/fetchurl.cc | 2 +- src/libstore/filetransfer.cc | 6 +- .../include/nix/store/filetransfer.hh | 6 +- src/libutil/include/nix/util/url.hh | 59 +++++++++++++++++++ src/libutil/url.cc | 6 ++ src/nix/prefetch.cc | 2 +- tests/functional/fetchurl.sh | 5 ++ 8 files changed, 84 insertions(+), 14 deletions(-) diff --git a/src/libfetchers/tarball.cc b/src/libfetchers/tarball.cc index 52038317e..8a8039b6b 100644 --- a/src/libfetchers/tarball.cc +++ b/src/libfetchers/tarball.cc @@ -43,7 +43,7 @@ DownloadFileResult downloadFile( if (cached && !cached->expired) return useCached(); - FileTransferRequest request(parseURL(url)); + FileTransferRequest request(ValidURL{url}); request.headers = headers; if (cached) request.expectedETag = getStrAttr(cached->value, "etag"); @@ -109,13 +109,13 @@ DownloadFileResult downloadFile( static DownloadTarballResult downloadTarball_( const Settings & settings, const std::string & urlS, const Headers & headers, const std::string & displayPrefix) { - auto url = parseURL(urlS); + ValidURL url = urlS; // Some friendly error messages for common mistakes. // Namely lets catch when the url is a local file path, but // it is not in fact a tarball. - if (url.scheme == "file") { - std::filesystem::path localPath = renderUrlPathEnsureLegal(url.path); + if (url.scheme() == "file") { + std::filesystem::path localPath = renderUrlPathEnsureLegal(url.path()); if (!exists(localPath)) { throw Error("tarball '%s' does not exist.", localPath); } @@ -166,7 +166,7 @@ static DownloadTarballResult downloadTarball_( /* Note: if the download is cached, `importTarball()` will receive no data, which causes it to import an empty tarball. */ - auto archive = !url.path.empty() && hasSuffix(toLower(url.path.back()), ".zip") ? ({ + auto archive = !url.path().empty() && hasSuffix(toLower(url.path().back()), ".zip") ? ({ /* In streaming mode, libarchive doesn't handle symlinks in zip files correctly (#10649). So write the entire file to disk so libarchive can access it @@ -180,7 +180,7 @@ static DownloadTarballResult downloadTarball_( } TarArchive{path}; }) - : TarArchive{*source}; + : TarArchive{*source}; auto tarballCache = getTarballCache(); auto parseSink = tarballCache->getFileSystemObjectSink(); auto lastModified = unpackTarfileToSink(archive, *parseSink); diff --git a/src/libstore/builtins/fetchurl.cc b/src/libstore/builtins/fetchurl.cc index c44d4d5ee..7abfa4495 100644 --- a/src/libstore/builtins/fetchurl.cc +++ b/src/libstore/builtins/fetchurl.cc @@ -37,7 +37,7 @@ static void builtinFetchurl(const BuiltinBuilderContext & ctx) auto fetch = [&](const std::string & url) { auto source = sinkToSource([&](Sink & sink) { - FileTransferRequest request(parseURL(url)); + FileTransferRequest request(ValidURL{url}); request.decompress = false; auto decompressor = makeDecompressionSink(unpack && hasSuffix(mainUrl, ".xz") ? "xz" : "none", sink); diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index 0007b9ad8..a162df1ad 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -784,7 +784,7 @@ struct curlFileTransfer : public FileTransfer void enqueueItem(std::shared_ptr item) { - if (item->request.data && item->request.uri.scheme != "http" && item->request.uri.scheme != "https") + if (item->request.data && item->request.uri.scheme() != "http" && item->request.uri.scheme() != "https") throw nix::Error("uploading to '%s' is not supported", item->request.uri.to_string()); { @@ -801,11 +801,11 @@ struct curlFileTransfer : public FileTransfer void enqueueFileTransfer(const FileTransferRequest & request, Callback callback) override { /* Ugly hack to support s3:// URIs. */ - if (request.uri.scheme == "s3") { + if (request.uri.scheme() == "s3") { // FIXME: do this on a worker thread try { #if NIX_WITH_S3_SUPPORT - auto parsed = ParsedS3URL::parse(request.uri); + auto parsed = ParsedS3URL::parse(request.uri.parsed()); std::string profile = parsed.profile.value_or(""); std::string region = parsed.region.value_or(Aws::Region::US_EAST_1); diff --git a/src/libstore/include/nix/store/filetransfer.hh b/src/libstore/include/nix/store/filetransfer.hh index 8a04293bd..6f541d463 100644 --- a/src/libstore/include/nix/store/filetransfer.hh +++ b/src/libstore/include/nix/store/filetransfer.hh @@ -71,7 +71,7 @@ extern const unsigned int RETRY_TIME_MS_DEFAULT; struct FileTransferRequest { - ParsedURL uri; + ValidURL uri; Headers headers; std::string expectedETag; bool verifyTLS = true; @@ -85,8 +85,8 @@ struct FileTransferRequest std::string mimeType; std::function dataCallback; - FileTransferRequest(ParsedURL uri) - : uri(uri) + FileTransferRequest(ValidURL uri) + : uri(std::move(uri)) , parentAct(getCurActivity()) { } diff --git a/src/libutil/include/nix/util/url.hh b/src/libutil/include/nix/util/url.hh index 5aa85230a..f2bd79b08 100644 --- a/src/libutil/include/nix/util/url.hh +++ b/src/libutil/include/nix/util/url.hh @@ -341,4 +341,63 @@ ParsedURL fixGitURL(const std::string & url); */ bool isValidSchemeName(std::string_view scheme); +/** + * Either a ParsedURL or a verbatim string, but the string must be a valid + * ParsedURL. This is necessary because in certain cases URI must be passed + * verbatim (e.g. in builtin fetchers), since those are specified by the user. + * In those cases normalizations performed by the ParsedURL might be surprising + * and undesirable, since Nix must be a universal client that has to work with + * various broken services that might interpret URLs in quirky and non-standard ways. + * + * One of those examples is space-as-plus encoding that is very widespread, but it's + * not strictly RFC3986 compliant. We must preserve that information verbatim. + * + * Though we perform parsing and validation for internal needs. + */ +struct ValidURL : private ParsedURL +{ + std::optional encoded; + + ValidURL(std::string str) + : ParsedURL(parseURL(str, /*lenient=*/false)) + , encoded(std::move(str)) + { + } + + ValidURL(std::string_view str) + : ValidURL(std::string{str}) + { + } + + ValidURL(ParsedURL parsed) + : ParsedURL{std::move(parsed)} + { + } + + /** + * Get the encoded URL (if specified) verbatim or encode the parsed URL. + */ + std::string to_string() const + { + return encoded.or_else([&]() -> std::optional { return ParsedURL::to_string(); }).value(); + } + + const ParsedURL & parsed() const & + { + return *this; + } + + std::string_view scheme() const & + { + return ParsedURL::scheme; + } + + const auto & path() const & + { + return ParsedURL::path; + } +}; + +std::ostream & operator<<(std::ostream & os, const ValidURL & url); + } // namespace nix diff --git a/src/libutil/url.cc b/src/libutil/url.cc index b9bf0b4f4..1c7fd3f0f 100644 --- a/src/libutil/url.cc +++ b/src/libutil/url.cc @@ -434,4 +434,10 @@ bool isValidSchemeName(std::string_view s) return std::regex_match(s.begin(), s.end(), regex, std::regex_constants::match_default); } +std::ostream & operator<<(std::ostream & os, const ValidURL & url) +{ + os << url.to_string(); + return os; +} + } // namespace nix diff --git a/src/nix/prefetch.cc b/src/nix/prefetch.cc index 88a4717a0..26905e34c 100644 --- a/src/nix/prefetch.cc +++ b/src/nix/prefetch.cc @@ -105,7 +105,7 @@ std::tuple prefetchFile( FdSink sink(fd.get()); - FileTransferRequest req(parseURL(url)); + FileTransferRequest req(ValidURL{url}); req.decompress = false; getFileTransfer()->download(std::move(req), sink); } diff --git a/tests/functional/fetchurl.sh b/tests/functional/fetchurl.sh index c25ac3216..5bc8ca625 100755 --- a/tests/functional/fetchurl.sh +++ b/tests/functional/fetchurl.sh @@ -88,3 +88,8 @@ requireDaemonNewerThan "2.20" expected=100 if [[ -v NIX_DAEMON_PACKAGE ]]; then expected=1; fi # work around the daemon not returning a 100 status correctly expectStderr $expected nix-build --expr '{ url }: builtins.derivation { name = "nix-cache-info"; system = "x86_64-linux"; builder = "builtin:fetchurl"; inherit url; outputHashMode = "flat"; }' --argstr url "file://$narxz" 2>&1 | grep 'must be a fixed-output or impure derivation' + +requireDaemonNewerThan "2.32.0pre20250831" + +expect 1 nix-build --expr 'import ' --argstr name 'name' --argstr url "file://authority.not.allowed/fetchurl.sh?a=1&a=2" --no-out-link |& + grepQuiet "error: file:// URL 'file://authority.not.allowed/fetchurl.sh?a=1&a=2' has unexpected authority 'authority.not.allowed'"