1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-09 03:56:01 +01:00

refactor(libstore): extract S3 URL parsing into separate files

Move ParsedS3URL from s3.cc/.hh into dedicated s3-url.cc/.hh files.
This separates URL parsing utilities (which are protocol-agnostic) from
the AWS SDK-specific S3Helper implementation, making the code cleaner
and enabling reuse by future curl-based S3 implementation.
This commit is contained in:
Bernardo Meurer Costa 2025-10-01 16:01:28 +00:00
parent 251479bdda
commit b72898b2aa
No known key found for this signature in database
7 changed files with 76 additions and 59 deletions

121
src/libstore/s3-url.cc Normal file
View file

@ -0,0 +1,121 @@
#include "nix/store/s3-url.hh"
#if NIX_WITH_S3_SUPPORT
# include "nix/util/error.hh"
# include "nix/util/split.hh"
# include "nix/util/strings-inline.hh"
# include <ranges>
# include <string_view>
using namespace std::string_view_literals;
namespace nix {
ParsedS3URL ParsedS3URL::parse(const ParsedURL & parsed)
try {
if (parsed.scheme != "s3"sv)
throw BadURL("URI scheme '%s' is not 's3'", parsed.scheme);
/* Yeah, S3 URLs in Nix have the bucket name as authority. Luckily registered name type
authority has the same restrictions (mostly) as S3 bucket names.
TODO: Validate against:
https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html#general-purpose-bucket-names
*/
if (!parsed.authority || parsed.authority->host.empty()
|| parsed.authority->hostType != ParsedURL::Authority::HostType::Name)
throw BadURL("URI has a missing or invalid bucket name");
/* TODO: Validate the key against:
* https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html#object-key-guidelines
*/
auto getOptionalParam = [&](std::string_view key) -> std::optional<std::string> {
const auto & query = parsed.query;
auto it = query.find(key);
if (it == query.end())
return std::nullopt;
return it->second;
};
auto endpoint = getOptionalParam("endpoint");
if (parsed.path.size() <= 1 || !parsed.path.front().empty())
throw BadURL("URI has a missing or invalid key");
auto path = std::views::drop(parsed.path, 1) | std::ranges::to<std::vector<std::string>>();
return ParsedS3URL{
.bucket = parsed.authority->host,
.key = std::move(path),
.profile = getOptionalParam("profile"),
.region = getOptionalParam("region"),
.scheme = getOptionalParam("scheme"),
.endpoint = [&]() -> decltype(ParsedS3URL::endpoint) {
if (!endpoint)
return std::monostate();
/* Try to parse the endpoint as a full-fledged URL with a scheme. */
try {
return parseURL(*endpoint);
} catch (BadURL &) {
}
return ParsedURL::Authority::parse(*endpoint);
}(),
};
} catch (BadURL & e) {
e.addTrace({}, "while parsing S3 URI: '%s'", parsed.to_string());
throw;
}
ParsedURL ParsedS3URL::toHttpsUrl() const
{
auto toView = [](const auto & x) { return std::string_view{x}; };
auto regionStr = region.transform(toView).value_or("us-east-1");
auto schemeStr = scheme.transform(toView).value_or("https");
// Handle endpoint configuration using std::visit
return std::visit(
overloaded{
[&](const std::monostate &) {
// No custom endpoint, use standard AWS S3 endpoint
std::vector<std::string> path{""};
path.push_back(bucket);
path.insert(path.end(), key.begin(), key.end());
return ParsedURL{
.scheme = std::string{schemeStr},
.authority = ParsedURL::Authority{.host = "s3." + regionStr + ".amazonaws.com"},
.path = std::move(path),
};
},
[&](const ParsedURL::Authority & auth) {
// Endpoint is just an authority (hostname/port)
std::vector<std::string> path{""};
path.push_back(bucket);
path.insert(path.end(), key.begin(), key.end());
return ParsedURL{
.scheme = std::string{schemeStr},
.authority = auth,
.path = std::move(path),
};
},
[&](const ParsedURL & endpointUrl) {
// Endpoint is already a ParsedURL (e.g., http://server:9000)
auto path = endpointUrl.path;
path.push_back(bucket);
path.insert(path.end(), key.begin(), key.end());
return ParsedURL{
.scheme = endpointUrl.scheme,
.authority = endpointUrl.authority,
.path = std::move(path),
};
},
},
endpoint);
}
} // namespace nix
#endif