diff --git a/src/libfetchers/tarball.cc b/src/libfetchers/tarball.cc index 52038317e..8a8039b6b 100644 --- a/src/libfetchers/tarball.cc +++ b/src/libfetchers/tarball.cc @@ -43,7 +43,7 @@ DownloadFileResult downloadFile( if (cached && !cached->expired) return useCached(); - FileTransferRequest request(parseURL(url)); + FileTransferRequest request(ValidURL{url}); request.headers = headers; if (cached) request.expectedETag = getStrAttr(cached->value, "etag"); @@ -109,13 +109,13 @@ DownloadFileResult downloadFile( static DownloadTarballResult downloadTarball_( const Settings & settings, const std::string & urlS, const Headers & headers, const std::string & displayPrefix) { - auto url = parseURL(urlS); + ValidURL url = urlS; // Some friendly error messages for common mistakes. // Namely lets catch when the url is a local file path, but // it is not in fact a tarball. - if (url.scheme == "file") { - std::filesystem::path localPath = renderUrlPathEnsureLegal(url.path); + if (url.scheme() == "file") { + std::filesystem::path localPath = renderUrlPathEnsureLegal(url.path()); if (!exists(localPath)) { throw Error("tarball '%s' does not exist.", localPath); } @@ -166,7 +166,7 @@ static DownloadTarballResult downloadTarball_( /* Note: if the download is cached, `importTarball()` will receive no data, which causes it to import an empty tarball. */ - auto archive = !url.path.empty() && hasSuffix(toLower(url.path.back()), ".zip") ? ({ + auto archive = !url.path().empty() && hasSuffix(toLower(url.path().back()), ".zip") ? ({ /* In streaming mode, libarchive doesn't handle symlinks in zip files correctly (#10649). So write the entire file to disk so libarchive can access it @@ -180,7 +180,7 @@ static DownloadTarballResult downloadTarball_( } TarArchive{path}; }) - : TarArchive{*source}; + : TarArchive{*source}; auto tarballCache = getTarballCache(); auto parseSink = tarballCache->getFileSystemObjectSink(); auto lastModified = unpackTarfileToSink(archive, *parseSink); diff --git a/src/libstore/builtins/fetchurl.cc b/src/libstore/builtins/fetchurl.cc index c44d4d5ee..7abfa4495 100644 --- a/src/libstore/builtins/fetchurl.cc +++ b/src/libstore/builtins/fetchurl.cc @@ -37,7 +37,7 @@ static void builtinFetchurl(const BuiltinBuilderContext & ctx) auto fetch = [&](const std::string & url) { auto source = sinkToSource([&](Sink & sink) { - FileTransferRequest request(parseURL(url)); + FileTransferRequest request(ValidURL{url}); request.decompress = false; auto decompressor = makeDecompressionSink(unpack && hasSuffix(mainUrl, ".xz") ? "xz" : "none", sink); diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index 0007b9ad8..a162df1ad 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -784,7 +784,7 @@ struct curlFileTransfer : public FileTransfer void enqueueItem(std::shared_ptr item) { - if (item->request.data && item->request.uri.scheme != "http" && item->request.uri.scheme != "https") + if (item->request.data && item->request.uri.scheme() != "http" && item->request.uri.scheme() != "https") throw nix::Error("uploading to '%s' is not supported", item->request.uri.to_string()); { @@ -801,11 +801,11 @@ struct curlFileTransfer : public FileTransfer void enqueueFileTransfer(const FileTransferRequest & request, Callback callback) override { /* Ugly hack to support s3:// URIs. */ - if (request.uri.scheme == "s3") { + if (request.uri.scheme() == "s3") { // FIXME: do this on a worker thread try { #if NIX_WITH_S3_SUPPORT - auto parsed = ParsedS3URL::parse(request.uri); + auto parsed = ParsedS3URL::parse(request.uri.parsed()); std::string profile = parsed.profile.value_or(""); std::string region = parsed.region.value_or(Aws::Region::US_EAST_1); diff --git a/src/libstore/include/nix/store/filetransfer.hh b/src/libstore/include/nix/store/filetransfer.hh index 8a04293bd..6f541d463 100644 --- a/src/libstore/include/nix/store/filetransfer.hh +++ b/src/libstore/include/nix/store/filetransfer.hh @@ -71,7 +71,7 @@ extern const unsigned int RETRY_TIME_MS_DEFAULT; struct FileTransferRequest { - ParsedURL uri; + ValidURL uri; Headers headers; std::string expectedETag; bool verifyTLS = true; @@ -85,8 +85,8 @@ struct FileTransferRequest std::string mimeType; std::function dataCallback; - FileTransferRequest(ParsedURL uri) - : uri(uri) + FileTransferRequest(ValidURL uri) + : uri(std::move(uri)) , parentAct(getCurActivity()) { } diff --git a/src/libutil/include/nix/util/url.hh b/src/libutil/include/nix/util/url.hh index 5aa85230a..f2bd79b08 100644 --- a/src/libutil/include/nix/util/url.hh +++ b/src/libutil/include/nix/util/url.hh @@ -341,4 +341,63 @@ ParsedURL fixGitURL(const std::string & url); */ bool isValidSchemeName(std::string_view scheme); +/** + * Either a ParsedURL or a verbatim string, but the string must be a valid + * ParsedURL. This is necessary because in certain cases URI must be passed + * verbatim (e.g. in builtin fetchers), since those are specified by the user. + * In those cases normalizations performed by the ParsedURL might be surprising + * and undesirable, since Nix must be a universal client that has to work with + * various broken services that might interpret URLs in quirky and non-standard ways. + * + * One of those examples is space-as-plus encoding that is very widespread, but it's + * not strictly RFC3986 compliant. We must preserve that information verbatim. + * + * Though we perform parsing and validation for internal needs. + */ +struct ValidURL : private ParsedURL +{ + std::optional encoded; + + ValidURL(std::string str) + : ParsedURL(parseURL(str, /*lenient=*/false)) + , encoded(std::move(str)) + { + } + + ValidURL(std::string_view str) + : ValidURL(std::string{str}) + { + } + + ValidURL(ParsedURL parsed) + : ParsedURL{std::move(parsed)} + { + } + + /** + * Get the encoded URL (if specified) verbatim or encode the parsed URL. + */ + std::string to_string() const + { + return encoded.or_else([&]() -> std::optional { return ParsedURL::to_string(); }).value(); + } + + const ParsedURL & parsed() const & + { + return *this; + } + + std::string_view scheme() const & + { + return ParsedURL::scheme; + } + + const auto & path() const & + { + return ParsedURL::path; + } +}; + +std::ostream & operator<<(std::ostream & os, const ValidURL & url); + } // namespace nix diff --git a/src/libutil/url.cc b/src/libutil/url.cc index b9bf0b4f4..1c7fd3f0f 100644 --- a/src/libutil/url.cc +++ b/src/libutil/url.cc @@ -434,4 +434,10 @@ bool isValidSchemeName(std::string_view s) return std::regex_match(s.begin(), s.end(), regex, std::regex_constants::match_default); } +std::ostream & operator<<(std::ostream & os, const ValidURL & url) +{ + os << url.to_string(); + return os; +} + } // namespace nix diff --git a/src/nix/prefetch.cc b/src/nix/prefetch.cc index 88a4717a0..26905e34c 100644 --- a/src/nix/prefetch.cc +++ b/src/nix/prefetch.cc @@ -105,7 +105,7 @@ std::tuple prefetchFile( FdSink sink(fd.get()); - FileTransferRequest req(parseURL(url)); + FileTransferRequest req(ValidURL{url}); req.decompress = false; getFileTransfer()->download(std::move(req), sink); } diff --git a/tests/functional/fetchurl.sh b/tests/functional/fetchurl.sh index c25ac3216..5bc8ca625 100755 --- a/tests/functional/fetchurl.sh +++ b/tests/functional/fetchurl.sh @@ -88,3 +88,8 @@ requireDaemonNewerThan "2.20" expected=100 if [[ -v NIX_DAEMON_PACKAGE ]]; then expected=1; fi # work around the daemon not returning a 100 status correctly expectStderr $expected nix-build --expr '{ url }: builtins.derivation { name = "nix-cache-info"; system = "x86_64-linux"; builder = "builtin:fetchurl"; inherit url; outputHashMode = "flat"; }' --argstr url "file://$narxz" 2>&1 | grep 'must be a fixed-output or impure derivation' + +requireDaemonNewerThan "2.32.0pre20250831" + +expect 1 nix-build --expr 'import ' --argstr name 'name' --argstr url "file://authority.not.allowed/fetchurl.sh?a=1&a=2" --no-out-link |& + grepQuiet "error: file:// URL 'file://authority.not.allowed/fetchurl.sh?a=1&a=2' has unexpected authority 'authority.not.allowed'"