1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-08 19:46:02 +01:00

Remove validation of URLs passed to FileTransferRequest verbatim

CURL is not very strict about validation of URLs passed to it. We
should reflect this in our handling of URLs that we get from the user
in <nix/fetchurl.nix> or builtins.fetchurl. ValidURL was an attempt to
rectify this, but it turned out to be too strict. The only good way to
resolve this is to pass (in some cases) the user-provided string verbatim
to CURL. Other usages in libfetchers still benefit from using structured
ParsedURL and validation though.

nix store prefetch-file --name foo 'https://cdn.skypack.dev/big.js@^5.2.2'
error: 'https://cdn.skypack.dev/big.js@^5.2.2' is not a valid URL: leftover

(cherry picked from commit 47f427a172)
This commit is contained in:
Sergei Zimmerman 2025-10-13 22:05:46 +03:00 committed by github-actions[bot]
parent a25a219e79
commit 11f9c59140
7 changed files with 44 additions and 37 deletions

View file

@ -42,7 +42,7 @@ DownloadFileResult downloadFile(
if (cached && !cached->expired)
return useCached();
FileTransferRequest request(ValidURL{url});
FileTransferRequest request(VerbatimURL{url});
request.headers = headers;
if (cached)
request.expectedETag = getStrAttr(cached->value, "etag");
@ -107,13 +107,13 @@ DownloadFileResult downloadFile(
static DownloadTarballResult downloadTarball_(
const Settings & settings, const std::string & urlS, const Headers & headers, const std::string & displayPrefix)
{
ValidURL url = urlS;
ParsedURL url = parseURL(urlS);
// Some friendly error messages for common mistakes.
// Namely lets catch when the url is a local file path, but
// it is not in fact a tarball.
if (url.scheme() == "file") {
std::filesystem::path localPath = renderUrlPathEnsureLegal(url.path());
if (url.scheme == "file") {
std::filesystem::path localPath = renderUrlPathEnsureLegal(url.path);
if (!exists(localPath)) {
throw Error("tarball '%s' does not exist.", localPath);
}
@ -164,7 +164,7 @@ static DownloadTarballResult downloadTarball_(
/* Note: if the download is cached, `importTarball()` will receive
no data, which causes it to import an empty tarball. */
auto archive = !url.path().empty() && hasSuffix(toLower(url.path().back()), ".zip") ? ({
auto archive = !url.path.empty() && hasSuffix(toLower(url.path.back()), ".zip") ? ({
/* In streaming mode, libarchive doesn't handle
symlinks in zip files correctly (#10649). So write
the entire file to disk so libarchive can access it
@ -178,7 +178,7 @@ static DownloadTarballResult downloadTarball_(
}
TarArchive{path};
})
: TarArchive{*source};
: TarArchive{*source};
auto tarballCache = getTarballCache();
auto parseSink = tarballCache->getFileSystemObjectSink();
auto lastModified = unpackTarfileToSink(archive, *parseSink);

View file

@ -37,7 +37,7 @@ static void builtinFetchurl(const BuiltinBuilderContext & ctx)
auto fetch = [&](const std::string & url) {
auto source = sinkToSource([&](Sink & sink) {
FileTransferRequest request(ValidURL{url});
FileTransferRequest request(VerbatimURL{url});
request.decompress = false;
auto decompressor = makeDecompressionSink(unpack && hasSuffix(mainUrl, ".xz") ? "xz" : "none", sink);

View file

@ -79,7 +79,7 @@ extern const unsigned int RETRY_TIME_MS_DEFAULT;
struct FileTransferRequest
{
ValidURL uri;
VerbatimURL uri;
Headers headers;
std::string expectedETag;
bool verifyTLS = true;
@ -93,7 +93,7 @@ struct FileTransferRequest
std::string mimeType;
std::function<void(std::string_view data)> dataCallback;
FileTransferRequest(ValidURL uri)
FileTransferRequest(VerbatimURL uri)
: uri(std::move(uri))
, parentAct(getCurActivity())
{

View file

@ -6,6 +6,9 @@
#include "nix/util/error.hh"
#include "nix/util/canon-path.hh"
#include "nix/util/split.hh"
#include "nix/util/util.hh"
#include "nix/util/variant-wrapper.hh"
namespace nix {
@ -342,8 +345,7 @@ ParsedURL fixGitURL(const std::string & url);
bool isValidSchemeName(std::string_view scheme);
/**
* Either a ParsedURL or a verbatim string, but the string must be a valid
* ParsedURL. This is necessary because in certain cases URI must be passed
* Either a ParsedURL or a verbatim string. This is necessary because in certain cases URI must be passed
* verbatim (e.g. in builtin fetchers), since those are specified by the user.
* In those cases normalizations performed by the ParsedURL might be surprising
* and undesirable, since Nix must be a universal client that has to work with
@ -354,23 +356,23 @@ bool isValidSchemeName(std::string_view scheme);
*
* Though we perform parsing and validation for internal needs.
*/
struct ValidURL : private ParsedURL
struct VerbatimURL
{
std::optional<std::string> encoded;
using Raw = std::variant<std::string, ParsedURL>;
Raw raw;
ValidURL(std::string str)
: ParsedURL(parseURL(str, /*lenient=*/false))
, encoded(std::move(str))
VerbatimURL(std::string_view s)
: raw(std::string{s})
{
}
ValidURL(std::string_view str)
: ValidURL(std::string{str})
VerbatimURL(std::string s)
: raw(std::move(s))
{
}
ValidURL(ParsedURL parsed)
: ParsedURL{std::move(parsed)}
VerbatimURL(ParsedURL url)
: raw(std::move(url))
{
}
@ -379,25 +381,35 @@ struct ValidURL : private ParsedURL
*/
std::string to_string() const
{
return encoded.or_else([&]() -> std::optional<std::string> { return ParsedURL::to_string(); }).value();
return std::visit(
overloaded{
[](const std::string & str) { return str; }, [](const ParsedURL & url) { return url.to_string(); }},
raw);
}
const ParsedURL & parsed() const &
const ParsedURL parsed() const
{
return *this;
return std::visit(
overloaded{
[](const std::string & str) { return parseURL(str); }, [](const ParsedURL & url) { return url; }},
raw);
}
std::string_view scheme() const &
{
return ParsedURL::scheme;
}
const auto & path() const &
{
return ParsedURL::path;
return std::visit(
overloaded{
[](std::string_view str) {
auto scheme = splitPrefixTo(str, ':');
if (!scheme)
throw BadURL("URL '%s' doesn't have a scheme", str);
return *scheme;
},
[](const ParsedURL & url) -> std::string_view { return url.scheme; }},
raw);
}
};
std::ostream & operator<<(std::ostream & os, const ValidURL & url);
std::ostream & operator<<(std::ostream & os, const VerbatimURL & url);
} // namespace nix

View file

@ -434,7 +434,7 @@ bool isValidSchemeName(std::string_view s)
return std::regex_match(s.begin(), s.end(), regex, std::regex_constants::match_default);
}
std::ostream & operator<<(std::ostream & os, const ValidURL & url)
std::ostream & operator<<(std::ostream & os, const VerbatimURL & url)
{
os << url.to_string();
return os;

View file

@ -105,7 +105,7 @@ std::tuple<StorePath, Hash> prefetchFile(
FdSink sink(fd.get());
FileTransferRequest req(ValidURL{url});
FileTransferRequest req(VerbatimURL{url});
req.decompress = false;
getFileTransfer()->download(std::move(req), sink);
}

View file

@ -88,8 +88,3 @@ requireDaemonNewerThan "2.20"
expected=100
if [[ -v NIX_DAEMON_PACKAGE ]]; then expected=1; fi # work around the daemon not returning a 100 status correctly
expectStderr $expected nix-build --expr '{ url }: builtins.derivation { name = "nix-cache-info"; system = "x86_64-linux"; builder = "builtin:fetchurl"; inherit url; outputHashMode = "flat"; }' --argstr url "file://$narxz" 2>&1 | grep 'must be a fixed-output or impure derivation'
requireDaemonNewerThan "2.32.0pre20250831"
expect 1 nix-build --expr 'import <nix/fetchurl.nix>' --argstr name 'name' --argstr url "file://authority.not.allowed/fetchurl.sh?a=1&a=2" --no-out-link |&
grepQuiet "error: file:// URL 'file://authority.not.allowed/fetchurl.sh?a=1&a=2' has unexpected authority 'authority.not.allowed'"