1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-15 15:02:42 +01:00

Fix ParsedURL handling of %2F in URL paths

See the new extensive doxygen in `url.hh`.
This fixes fetching gitlab: flakes.

Paths are now stored as a std::vector of individual path
segments, which can themselves contain path separators '/' (%2F).
This is necessary to make the Gitlab's /projects/ API work.

Co-authored-by: John Ericson <John.Ericson@Obsidian.Systems>
Co-authored-by: Sergei Zimmerman <sergei@zimmerman.foo>
This commit is contained in:
Jörg Thalheim 2025-08-26 12:49:28 +02:00 committed by Sergei Zimmerman
parent 6839f3de55
commit c436b7a32a
No known key found for this signature in database
19 changed files with 446 additions and 117 deletions

View file

@ -815,7 +815,7 @@ struct curlFileTransfer : public FileTransfer
S3Helper s3Helper(profile, region, scheme, endpoint);
// FIXME: implement ETag
auto s3Res = s3Helper.getObject(parsed.bucket, parsed.key);
auto s3Res = s3Helper.getObject(parsed.bucket, encodeUrlPath(parsed.key));
FileTransferResult res;
if (!s3Res.data)
throw FileTransferError(NotFound, {}, "S3 object '%s' does not exist", request.uri);

View file

@ -27,7 +27,7 @@ HttpBinaryCacheStoreConfig::HttpBinaryCacheStoreConfig(
+ (!_cacheUri.empty() ? _cacheUri
: throw UsageError("`%s` Store requires a non-empty authority in Store URL", scheme))))
{
while (!cacheUri.path.empty() && cacheUri.path.back() == '/')
while (!cacheUri.path.empty() && cacheUri.path.back() == "")
cacheUri.path.pop_back();
}
@ -37,7 +37,7 @@ StoreReference HttpBinaryCacheStoreConfig::getReference() const
.variant =
StoreReference::Specified{
.scheme = cacheUri.scheme,
.authority = (cacheUri.authority ? cacheUri.authority->to_string() : "") + cacheUri.path,
.authority = cacheUri.renderAuthorityAndPath(),
},
.params = cacheUri.query,
};
@ -157,7 +157,7 @@ protected:
/* Otherwise the last path fragment will get discarded. */
auto cacheUriWithTrailingSlash = config->cacheUri;
if (!cacheUriWithTrailingSlash.path.empty())
cacheUriWithTrailingSlash.path += "/";
cacheUriWithTrailingSlash.path.push_back("");
/* path is not a path, but a full relative or absolute
URL, e.g. we've seen in the wild NARINFO files have a URL

View file

@ -54,7 +54,12 @@ struct S3Helper
struct ParsedS3URL
{
std::string bucket;
std::string key;
/**
* @see ParsedURL::path. This is a vector for the same reason.
* Unlike ParsedURL::path this doesn't include the leading empty segment,
* since the bucket name is necessary.
*/
std::vector<std::string> key;
std::optional<std::string> profile;
std::optional<std::string> region;
std::optional<std::string> scheme;

View file

@ -77,12 +77,22 @@ struct StoreReference
*/
std::string render(bool withParams = true) const;
std::string to_string() const
{
return render();
}
/**
* Parse a URI into a store reference.
*/
static StoreReference parse(const std::string & uri, const Params & extraParams = Params{});
};
static inline std::ostream & operator<<(std::ostream & os, const StoreReference & ref)
{
return os << ref.render();
}
/**
* Split URI into protocol+hierarchy part and its parameter set.
*/

View file

@ -3,6 +3,9 @@
#include "nix/util/url.hh"
#include "nix/util/util.hh"
#include "nix/util/canon-path.hh"
#include "nix/util/strings-inline.hh"
#include <ranges>
namespace nix {
@ -24,10 +27,6 @@ try {
|| parsed.authority->hostType != ParsedURL::Authority::HostType::Name)
throw BadURL("URI has a missing or invalid bucket name");
std::string_view key = parsed.path;
/* Make the key a relative path. */
splitPrefix(key, "/");
/* TODO: Validate the key against:
* https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html#object-key-guidelines
*/
@ -41,10 +40,14 @@ try {
};
auto endpoint = getOptionalParam("endpoint");
if (parsed.path.size() <= 1 || !parsed.path.front().empty())
throw BadURL("URI has a missing or invalid key");
auto path = std::views::drop(parsed.path, 1) | std::ranges::to<std::vector<std::string>>();
return ParsedS3URL{
.bucket = parsed.authority->host,
.key = std::string{key},
.key = std::move(path),
.profile = getOptionalParam("profile"),
.region = getOptionalParam("region"),
.scheme = getOptionalParam("scheme"),
@ -78,26 +81,35 @@ ParsedURL ParsedS3URL::toHttpsUrl() const
overloaded{
[&](const std::monostate &) {
// No custom endpoint, use standard AWS S3 endpoint
std::vector<std::string> path{""};
path.push_back(bucket);
path.insert(path.end(), key.begin(), key.end());
return ParsedURL{
.scheme = std::string{schemeStr},
.authority = ParsedURL::Authority{.host = "s3." + regionStr + ".amazonaws.com"},
.path = (CanonPath::root / bucket / CanonPath(key)).abs(),
.path = std::move(path),
};
},
[&](const ParsedURL::Authority & auth) {
// Endpoint is just an authority (hostname/port)
std::vector<std::string> path{""};
path.push_back(bucket);
path.insert(path.end(), key.begin(), key.end());
return ParsedURL{
.scheme = std::string{schemeStr},
.authority = auth,
.path = (CanonPath::root / bucket / CanonPath(key)).abs(),
.path = std::move(path),
};
},
[&](const ParsedURL & endpointUrl) {
// Endpoint is already a ParsedURL (e.g., http://server:9000)
auto path = endpointUrl.path;
path.push_back(bucket);
path.insert(path.end(), key.begin(), key.end());
return ParsedURL{
.scheme = endpointUrl.scheme,
.authority = endpointUrl.authority,
.path = (CanonPath(endpointUrl.path) / bucket / CanonPath(key)).abs(),
.path = std::move(path),
};
},
},

View file

@ -48,13 +48,11 @@ StoreReference StoreReference::parse(const std::string & uri, const StoreReferen
auto parsedUri = parseURL(uri, /*lenient=*/true);
params.insert(parsedUri.query.begin(), parsedUri.query.end());
auto baseURI = parsedUri.authority.value_or(ParsedURL::Authority{}).to_string() + parsedUri.path;
return {
.variant =
Specified{
.scheme = std::move(parsedUri.scheme),
.authority = std::move(baseURI),
.authority = parsedUri.renderAuthorityAndPath(),
},
.params = std::move(params),
};