From 11f9c59140a3bce6f9622db8b630ed25c7957318 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Mon, 13 Oct 2025 22:05:46 +0300 Subject: [PATCH] Remove validation of URLs passed to FileTransferRequest verbatim CURL is not very strict about validation of URLs passed to it. We should reflect this in our handling of URLs that we get from the user in or builtins.fetchurl. ValidURL was an attempt to rectify this, but it turned out to be too strict. The only good way to resolve this is to pass (in some cases) the user-provided string verbatim to CURL. Other usages in libfetchers still benefit from using structured ParsedURL and validation though. nix store prefetch-file --name foo 'https://cdn.skypack.dev/big.js@^5.2.2' error: 'https://cdn.skypack.dev/big.js@^5.2.2' is not a valid URL: leftover (cherry picked from commit 47f427a1723ba36e4f48dc3db6dcdafa206932e6) --- src/libfetchers/tarball.cc | 12 ++--- src/libstore/builtins/fetchurl.cc | 2 +- .../include/nix/store/filetransfer.hh | 4 +- src/libutil/include/nix/util/url.hh | 54 +++++++++++-------- src/libutil/url.cc | 2 +- src/nix/prefetch.cc | 2 +- tests/functional/fetchurl.sh | 5 -- 7 files changed, 44 insertions(+), 37 deletions(-) diff --git a/src/libfetchers/tarball.cc b/src/libfetchers/tarball.cc index 31d5ab460..863a0d680 100644 --- a/src/libfetchers/tarball.cc +++ b/src/libfetchers/tarball.cc @@ -42,7 +42,7 @@ DownloadFileResult downloadFile( if (cached && !cached->expired) return useCached(); - FileTransferRequest request(ValidURL{url}); + FileTransferRequest request(VerbatimURL{url}); request.headers = headers; if (cached) request.expectedETag = getStrAttr(cached->value, "etag"); @@ -107,13 +107,13 @@ DownloadFileResult downloadFile( static DownloadTarballResult downloadTarball_( const Settings & settings, const std::string & urlS, const Headers & headers, const std::string & displayPrefix) { - ValidURL url = urlS; + ParsedURL url = parseURL(urlS); // Some friendly error messages for common mistakes. // Namely lets catch when the url is a local file path, but // it is not in fact a tarball. - if (url.scheme() == "file") { - std::filesystem::path localPath = renderUrlPathEnsureLegal(url.path()); + if (url.scheme == "file") { + std::filesystem::path localPath = renderUrlPathEnsureLegal(url.path); if (!exists(localPath)) { throw Error("tarball '%s' does not exist.", localPath); } @@ -164,7 +164,7 @@ static DownloadTarballResult downloadTarball_( /* Note: if the download is cached, `importTarball()` will receive no data, which causes it to import an empty tarball. */ - auto archive = !url.path().empty() && hasSuffix(toLower(url.path().back()), ".zip") ? ({ + auto archive = !url.path.empty() && hasSuffix(toLower(url.path.back()), ".zip") ? ({ /* In streaming mode, libarchive doesn't handle symlinks in zip files correctly (#10649). So write the entire file to disk so libarchive can access it @@ -178,7 +178,7 @@ static DownloadTarballResult downloadTarball_( } TarArchive{path}; }) - : TarArchive{*source}; + : TarArchive{*source}; auto tarballCache = getTarballCache(); auto parseSink = tarballCache->getFileSystemObjectSink(); auto lastModified = unpackTarfileToSink(archive, *parseSink); diff --git a/src/libstore/builtins/fetchurl.cc b/src/libstore/builtins/fetchurl.cc index 7abfa4495..df056954e 100644 --- a/src/libstore/builtins/fetchurl.cc +++ b/src/libstore/builtins/fetchurl.cc @@ -37,7 +37,7 @@ static void builtinFetchurl(const BuiltinBuilderContext & ctx) auto fetch = [&](const std::string & url) { auto source = sinkToSource([&](Sink & sink) { - FileTransferRequest request(ValidURL{url}); + FileTransferRequest request(VerbatimURL{url}); request.decompress = false; auto decompressor = makeDecompressionSink(unpack && hasSuffix(mainUrl, ".xz") ? "xz" : "none", sink); diff --git a/src/libstore/include/nix/store/filetransfer.hh b/src/libstore/include/nix/store/filetransfer.hh index 2f2d59036..edd5f4dd4 100644 --- a/src/libstore/include/nix/store/filetransfer.hh +++ b/src/libstore/include/nix/store/filetransfer.hh @@ -79,7 +79,7 @@ extern const unsigned int RETRY_TIME_MS_DEFAULT; struct FileTransferRequest { - ValidURL uri; + VerbatimURL uri; Headers headers; std::string expectedETag; bool verifyTLS = true; @@ -93,7 +93,7 @@ struct FileTransferRequest std::string mimeType; std::function dataCallback; - FileTransferRequest(ValidURL uri) + FileTransferRequest(VerbatimURL uri) : uri(std::move(uri)) , parentAct(getCurActivity()) { diff --git a/src/libutil/include/nix/util/url.hh b/src/libutil/include/nix/util/url.hh index f2bd79b08..4ed80feb3 100644 --- a/src/libutil/include/nix/util/url.hh +++ b/src/libutil/include/nix/util/url.hh @@ -6,6 +6,9 @@ #include "nix/util/error.hh" #include "nix/util/canon-path.hh" +#include "nix/util/split.hh" +#include "nix/util/util.hh" +#include "nix/util/variant-wrapper.hh" namespace nix { @@ -342,8 +345,7 @@ ParsedURL fixGitURL(const std::string & url); bool isValidSchemeName(std::string_view scheme); /** - * Either a ParsedURL or a verbatim string, but the string must be a valid - * ParsedURL. This is necessary because in certain cases URI must be passed + * Either a ParsedURL or a verbatim string. This is necessary because in certain cases URI must be passed * verbatim (e.g. in builtin fetchers), since those are specified by the user. * In those cases normalizations performed by the ParsedURL might be surprising * and undesirable, since Nix must be a universal client that has to work with @@ -354,23 +356,23 @@ bool isValidSchemeName(std::string_view scheme); * * Though we perform parsing and validation for internal needs. */ -struct ValidURL : private ParsedURL +struct VerbatimURL { - std::optional encoded; + using Raw = std::variant; + Raw raw; - ValidURL(std::string str) - : ParsedURL(parseURL(str, /*lenient=*/false)) - , encoded(std::move(str)) + VerbatimURL(std::string_view s) + : raw(std::string{s}) { } - ValidURL(std::string_view str) - : ValidURL(std::string{str}) + VerbatimURL(std::string s) + : raw(std::move(s)) { } - ValidURL(ParsedURL parsed) - : ParsedURL{std::move(parsed)} + VerbatimURL(ParsedURL url) + : raw(std::move(url)) { } @@ -379,25 +381,35 @@ struct ValidURL : private ParsedURL */ std::string to_string() const { - return encoded.or_else([&]() -> std::optional { return ParsedURL::to_string(); }).value(); + return std::visit( + overloaded{ + [](const std::string & str) { return str; }, [](const ParsedURL & url) { return url.to_string(); }}, + raw); } - const ParsedURL & parsed() const & + const ParsedURL parsed() const { - return *this; + return std::visit( + overloaded{ + [](const std::string & str) { return parseURL(str); }, [](const ParsedURL & url) { return url; }}, + raw); } std::string_view scheme() const & { - return ParsedURL::scheme; - } - - const auto & path() const & - { - return ParsedURL::path; + return std::visit( + overloaded{ + [](std::string_view str) { + auto scheme = splitPrefixTo(str, ':'); + if (!scheme) + throw BadURL("URL '%s' doesn't have a scheme", str); + return *scheme; + }, + [](const ParsedURL & url) -> std::string_view { return url.scheme; }}, + raw); } }; -std::ostream & operator<<(std::ostream & os, const ValidURL & url); +std::ostream & operator<<(std::ostream & os, const VerbatimURL & url); } // namespace nix diff --git a/src/libutil/url.cc b/src/libutil/url.cc index a50de0944..7410e4062 100644 --- a/src/libutil/url.cc +++ b/src/libutil/url.cc @@ -434,7 +434,7 @@ bool isValidSchemeName(std::string_view s) return std::regex_match(s.begin(), s.end(), regex, std::regex_constants::match_default); } -std::ostream & operator<<(std::ostream & os, const ValidURL & url) +std::ostream & operator<<(std::ostream & os, const VerbatimURL & url) { os << url.to_string(); return os; diff --git a/src/nix/prefetch.cc b/src/nix/prefetch.cc index 26905e34c..18abfa0aa 100644 --- a/src/nix/prefetch.cc +++ b/src/nix/prefetch.cc @@ -105,7 +105,7 @@ std::tuple prefetchFile( FdSink sink(fd.get()); - FileTransferRequest req(ValidURL{url}); + FileTransferRequest req(VerbatimURL{url}); req.decompress = false; getFileTransfer()->download(std::move(req), sink); } diff --git a/tests/functional/fetchurl.sh b/tests/functional/fetchurl.sh index 5bc8ca625..c25ac3216 100755 --- a/tests/functional/fetchurl.sh +++ b/tests/functional/fetchurl.sh @@ -88,8 +88,3 @@ requireDaemonNewerThan "2.20" expected=100 if [[ -v NIX_DAEMON_PACKAGE ]]; then expected=1; fi # work around the daemon not returning a 100 status correctly expectStderr $expected nix-build --expr '{ url }: builtins.derivation { name = "nix-cache-info"; system = "x86_64-linux"; builder = "builtin:fetchurl"; inherit url; outputHashMode = "flat"; }' --argstr url "file://$narxz" 2>&1 | grep 'must be a fixed-output or impure derivation' - -requireDaemonNewerThan "2.32.0pre20250831" - -expect 1 nix-build --expr 'import ' --argstr name 'name' --argstr url "file://authority.not.allowed/fetchurl.sh?a=1&a=2" --no-out-link |& - grepQuiet "error: file:// URL 'file://authority.not.allowed/fetchurl.sh?a=1&a=2' has unexpected authority 'authority.not.allowed'"