1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-08 19:46:02 +01:00

fix(nix-prefetch-url): correctly extract filename from URLs with query parameters

Previously, `prefetchFile()` used `baseNameOf()` directly on the URL string
to extract the filename. This caused issues with URLs containing query
parameters that include slashes, such as S3 URLs with custom endpoints:

```
s3://bucket/file.txt?endpoint=http://server:9000
```

The `baseNameOf()` function naively searches for the rightmost `/` in the
entire string, which would find the `/` in `http://server:9000` and extract
`server:9000&region=...` as the filename. This resulted in invalid store
path names containing illegal characters like `:`.

This commit fixes the issue by:

1. Adding a `VerbatimURL::lastPathSegment()` method that extracts the last
   non-empty path segment from a URL, using `pathSegments(true)` to filter
   empty segments
2. Changing `prefetchFile()` to accept `const VerbatimURL &` and use the new
   `lastPathSegment()` method instead of manual path parsing
3. Adding early validation with `checkName()` to fail quickly on invalid
   filenames
4. Maintains backward compatibility by falling back to `baseNameOf()` for
   unparsable `VerbatimURL`s
This commit is contained in:
Bernardo Meurer Costa 2025-10-18 20:45:43 +00:00 committed by Sergei Zimmerman
parent ddf7de0a76
commit e3b3f05e5d
No known key found for this signature in database
3 changed files with 44 additions and 7 deletions

View file

@ -408,6 +408,17 @@ struct VerbatimURL
[](const ParsedURL & url) -> std::string_view { return url.scheme; }}, [](const ParsedURL & url) -> std::string_view { return url.scheme; }},
raw); raw);
} }
/**
* Get the last non-empty path segment from the URL.
*
* This is useful for extracting filenames from URLs.
* For example, "https://example.com/path/to/file.txt?query=value"
* returns "file.txt".
*
* @return The last non-empty path segment, or std::nullopt if no such segment exists.
*/
std::optional<std::string> lastPathSegment() const;
}; };
std::ostream & operator<<(std::ostream & os, const VerbatimURL & url); std::ostream & operator<<(std::ostream & os, const VerbatimURL & url);

View file

@ -4,6 +4,7 @@
#include "nix/util/split.hh" #include "nix/util/split.hh"
#include "nix/util/canon-path.hh" #include "nix/util/canon-path.hh"
#include "nix/util/strings-inline.hh" #include "nix/util/strings-inline.hh"
#include "nix/util/file-system.hh"
#include <boost/url.hpp> #include <boost/url.hpp>
@ -440,4 +441,21 @@ std::ostream & operator<<(std::ostream & os, const VerbatimURL & url)
return os; return os;
} }
std::optional<std::string> VerbatimURL::lastPathSegment() const
{
try {
auto parsedUrl = parsed();
auto segments = parsedUrl.pathSegments(/*skipEmpty=*/true);
if (std::ranges::empty(segments))
return std::nullopt;
return segments.back();
} catch (BadURL &) {
// Fall back to baseNameOf for unparsable URLs
auto name = baseNameOf(to_string());
if (name.empty())
return std::nullopt;
return std::string{name};
}
}
} // namespace nix } // namespace nix

View file

@ -13,6 +13,8 @@
#include "nix/cmd/misc-store-flags.hh" #include "nix/cmd/misc-store-flags.hh"
#include "nix/util/terminal.hh" #include "nix/util/terminal.hh"
#include "nix/util/environment-variables.hh" #include "nix/util/environment-variables.hh"
#include "nix/util/url.hh"
#include "nix/store/path.hh"
#include "man-pages.hh" #include "man-pages.hh"
@ -56,7 +58,7 @@ std::string resolveMirrorUrl(EvalState & state, const std::string & url)
std::tuple<StorePath, Hash> prefetchFile( std::tuple<StorePath, Hash> prefetchFile(
ref<Store> store, ref<Store> store,
std::string_view url, const VerbatimURL & url,
std::optional<std::string> name, std::optional<std::string> name,
HashAlgorithm hashAlgo, HashAlgorithm hashAlgo,
std::optional<Hash> expectedHash, std::optional<Hash> expectedHash,
@ -68,9 +70,15 @@ std::tuple<StorePath, Hash> prefetchFile(
/* Figure out a name in the Nix store. */ /* Figure out a name in the Nix store. */
if (!name) { if (!name) {
name = baseNameOf(url); name = url.lastPathSegment();
if (name->empty()) if (!name || name->empty())
throw Error("cannot figure out file name for '%s'", url); throw Error("cannot figure out file name for '%s'", url.to_string());
}
try {
checkName(*name);
} catch (BadStorePathName & e) {
e.addTrace({}, "file name '%s' was extracted from URL '%s'", *name, url.to_string());
throw;
} }
std::optional<StorePath> storePath; std::optional<StorePath> storePath;
@ -105,14 +113,14 @@ std::tuple<StorePath, Hash> prefetchFile(
FdSink sink(fd.get()); FdSink sink(fd.get());
FileTransferRequest req(VerbatimURL{url}); FileTransferRequest req(url);
req.decompress = false; req.decompress = false;
getFileTransfer()->download(std::move(req), sink); getFileTransfer()->download(std::move(req), sink);
} }
/* Optionally unpack the file. */ /* Optionally unpack the file. */
if (unpack) { if (unpack) {
Activity act(*logger, lvlChatty, actUnknown, fmt("unpacking '%s'", url)); Activity act(*logger, lvlChatty, actUnknown, fmt("unpacking '%s'", url.to_string()));
auto unpacked = (tmpDir.path() / "unpacked").string(); auto unpacked = (tmpDir.path() / "unpacked").string();
createDirs(unpacked); createDirs(unpacked);
unpackTarfile(tmpFile.string(), unpacked); unpackTarfile(tmpFile.string(), unpacked);
@ -128,7 +136,7 @@ std::tuple<StorePath, Hash> prefetchFile(
} }
} }
Activity act(*logger, lvlChatty, actUnknown, fmt("adding '%s' to the store", url)); Activity act(*logger, lvlChatty, actUnknown, fmt("adding '%s' to the store", url.to_string()));
auto info = store->addToStoreSlow( auto info = store->addToStoreSlow(
*name, PosixSourceAccessor::createAtRoot(tmpFile), method, hashAlgo, {}, expectedHash); *name, PosixSourceAccessor::createAtRoot(tmpFile), method, hashAlgo, {}, expectedHash);