1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-09 12:06:01 +01:00

libstore: Introduce ParsedS3URL type

This systematizes the way our s3:// URLs are parsed in filetransfer.cc.
Yoinked out and refactored out of [1].

[1]: https://github.com/NixOS/nix/pull/13752

Co-authored-by: Bernardo Meurer Costa <beme@anthropic.com>
This commit is contained in:
Sergei Zimmerman 2025-08-19 23:21:36 +03:00
parent 5c0eff24d5
commit 69fcc2cfc1
No known key found for this signature in database
8 changed files with 212 additions and 28 deletions

View file

@ -74,6 +74,7 @@ sources = files(
'path.cc', 'path.cc',
'references.cc', 'references.cc',
's3-binary-cache-store.cc', 's3-binary-cache-store.cc',
's3.cc',
'serve-protocol.cc', 'serve-protocol.cc',
'ssh-store.cc', 'ssh-store.cc',
'store-reference.cc', 'store-reference.cc',

96
src/libstore-tests/s3.cc Normal file
View file

@ -0,0 +1,96 @@
#include "nix/store/s3.hh"
#include "nix/util/tests/gmock-matchers.hh"
#if NIX_WITH_S3_SUPPORT
# include <gtest/gtest.h>
# include <gmock/gmock.h>
namespace nix {
struct ParsedS3URLTestCase
{
std::string url;
ParsedS3URL expected;
std::string description;
};
class ParsedS3URLTest : public ::testing::WithParamInterface<ParsedS3URLTestCase>, public ::testing::Test
{};
TEST_P(ParsedS3URLTest, parseS3URLSuccessfully)
{
const auto & testCase = GetParam();
auto parsed = ParsedS3URL::parse(testCase.url);
ASSERT_EQ(parsed, testCase.expected);
}
INSTANTIATE_TEST_SUITE_P(
QueryParams,
ParsedS3URLTest,
::testing::Values(
ParsedS3URLTestCase{
"s3://my-bucket/my-key.txt",
{
.bucket = "my-bucket",
.key = "my-key.txt",
},
"basic_s3_bucket"},
ParsedS3URLTestCase{
"s3://prod-cache/nix/store/abc123.nar.xz?region=eu-west-1",
{
.bucket = "prod-cache",
.key = "nix/store/abc123.nar.xz",
.region = "eu-west-1",
},
"with_region"},
ParsedS3URLTestCase{
"s3://bucket/key?region=us-west-2&profile=prod&endpoint=custom.s3.com&scheme=https&region=us-east-1",
{
.bucket = "bucket",
.key = "key",
.profile = "prod",
.region = "us-west-2", //< using the first parameter (decodeQuery ignores dupicates)
.scheme = "https",
.endpoint = ParsedURL::Authority{.host = "custom.s3.com"},
},
"complex"},
ParsedS3URLTestCase{
"s3://cache/file.txt?profile=production&region=ap-southeast-2",
{
.bucket = "cache",
.key = "file.txt",
.profile = "production",
.region = "ap-southeast-2",
},
"with_profile_and_region"},
ParsedS3URLTestCase{
"s3://bucket/key?endpoint=https://minio.local&scheme=http",
{
.bucket = "bucket",
.key = "key",
/* TODO: Figure out what AWS SDK is doing when both endpointOverride and scheme are set. */
.scheme = "http",
.endpoint =
ParsedURL{
.scheme = "https",
.authority = ParsedURL::Authority{.host = "minio.local"},
},
},
"with_absolute_endpoint_uri"}),
[](const ::testing::TestParamInfo<ParsedS3URLTestCase> & info) { return info.param.description; });
TEST(InvalidParsedS3URLTest, parseS3URLErrors)
{
auto invalidBucketMatcher = ::testing::ThrowsMessage<BadURL>(
testing::HasSubstrIgnoreANSIMatcher("error: URI has a missing or invalid bucket name"));
/* Empty bucket (authority) */
ASSERT_THAT([]() { ParsedS3URL::parse("s3:///key"); }, invalidBucketMatcher);
/* Invalid bucket name */
ASSERT_THAT([]() { ParsedS3URL::parse("s3://127.0.0.1"); }, invalidBucketMatcher);
}
} // namespace nix
#endif

View file

@ -798,22 +798,6 @@ struct curlFileTransfer : public FileTransfer
#endif #endif
} }
#if NIX_WITH_S3_SUPPORT
std::tuple<std::string, std::string, Store::Config::Params> parseS3Uri(std::string uri)
{
auto [path, params] = splitUriAndParams(uri);
auto slash = path.find('/', 5); // 5 is the length of "s3://" prefix
if (slash == std::string::npos)
throw nix::Error("bad S3 URI '%s'", path);
std::string bucketName(path, 5, slash - 5);
std::string key(path, slash + 1);
return {bucketName, key, params};
}
#endif
void enqueueFileTransfer(const FileTransferRequest & request, Callback<FileTransferResult> callback) override void enqueueFileTransfer(const FileTransferRequest & request, Callback<FileTransferResult> callback) override
{ {
/* Ugly hack to support s3:// URIs. */ /* Ugly hack to support s3:// URIs. */
@ -821,17 +805,17 @@ struct curlFileTransfer : public FileTransfer
// FIXME: do this on a worker thread // FIXME: do this on a worker thread
try { try {
#if NIX_WITH_S3_SUPPORT #if NIX_WITH_S3_SUPPORT
auto [bucketName, key, params] = parseS3Uri(request.uri); auto parsed = ParsedS3URL::parse(request.uri);
std::string profile = getOr(params, "profile", ""); std::string profile = parsed.profile.value_or("");
std::string region = getOr(params, "region", Aws::Region::US_EAST_1); std::string region = parsed.region.value_or(Aws::Region::US_EAST_1);
std::string scheme = getOr(params, "scheme", ""); std::string scheme = parsed.scheme.value_or("");
std::string endpoint = getOr(params, "endpoint", ""); std::string endpoint = parsed.getEncodedEndpoint().value_or("");
S3Helper s3Helper(profile, region, scheme, endpoint); S3Helper s3Helper(profile, region, scheme, endpoint);
// FIXME: implement ETag // FIXME: implement ETag
auto s3Res = s3Helper.getObject(bucketName, key); auto s3Res = s3Helper.getObject(parsed.bucket, parsed.key);
FileTransferResult res; FileTransferResult res;
if (!s3Res.data) if (!s3Res.data)
throw FileTransferError(NotFound, {}, "S3 object '%s' does not exist", request.uri); throw FileTransferError(NotFound, {}, "S3 object '%s' does not exist", request.uri);

View file

@ -4,9 +4,12 @@
#if NIX_WITH_S3_SUPPORT #if NIX_WITH_S3_SUPPORT
# include "nix/util/ref.hh" # include "nix/util/ref.hh"
# include "nix/util/url.hh"
# include "nix/util/util.hh"
# include <optional> # include <optional>
# include <string> # include <string>
# include <variant>
namespace Aws { namespace Aws {
namespace Client { namespace Client {
@ -45,6 +48,36 @@ struct S3Helper
FileTransferResult getObject(const std::string & bucketName, const std::string & key); FileTransferResult getObject(const std::string & bucketName, const std::string & key);
}; };
/**
* Parsed S3 URL.
*/
struct ParsedS3URL
{
std::string bucket;
std::string key;
std::optional<std::string> profile;
std::optional<std::string> region;
std::optional<std::string> scheme;
/**
* The endpoint can be either missing, be an absolute URI (with a scheme like `http:`)
* or an authority (so an IP address or a registered name).
*/
std::variant<std::monostate, ParsedURL, ParsedURL::Authority> endpoint;
std::optional<std::string> getEncodedEndpoint() const
{
return std::visit(
overloaded{
[](std::monostate) -> std::optional<std::string> { return std::nullopt; },
[](const auto & authorityOrUrl) -> std::optional<std::string> { return authorityOrUrl.to_string(); },
},
endpoint);
}
static ParsedS3URL parse(std::string_view uri);
auto operator<=>(const ParsedS3URL & other) const = default;
};
} // namespace nix } // namespace nix
#endif #endif

View file

@ -321,6 +321,7 @@ sources = files(
'remote-store.cc', 'remote-store.cc',
'restricted-store.cc', 'restricted-store.cc',
's3-binary-cache-store.cc', 's3-binary-cache-store.cc',
's3.cc',
'serve-protocol-connection.cc', 'serve-protocol-connection.cc',
'serve-protocol.cc', 'serve-protocol.cc',
'sqlite.cc', 'sqlite.cc',

71
src/libstore/s3.cc Normal file
View file

@ -0,0 +1,71 @@
#include "nix/store/s3.hh"
#include "nix/util/split.hh"
#include "nix/util/url.hh"
namespace nix {
using namespace std::string_view_literals;
#if NIX_WITH_S3_SUPPORT
ParsedS3URL ParsedS3URL::parse(std::string_view uri)
try {
auto parsed = parseURL(uri);
if (parsed.scheme != "s3"sv)
throw BadURL("URI scheme '%s' is not 's3'", parsed.scheme);
/* Yeah, S3 URLs in Nix have the bucket name as authority. Luckily registered name type
authority has the same restrictions (mostly) as S3 bucket names.
TODO: Validate against:
https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html#general-purpose-bucket-names
*/
if (!parsed.authority || parsed.authority->host.empty()
|| parsed.authority->hostType != ParsedURL::Authority::HostType::Name)
throw BadURL("URI has a missing or invalid bucket name");
std::string_view key = parsed.path;
/* Make the key a relative path. */
splitPrefix(key, "/");
/* TODO: Validate the key against:
* https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html#object-key-guidelines
*/
auto getOptionalParam = [&](std::string_view key) -> std::optional<std::string> {
const auto & query = parsed.query;
auto it = query.find(key);
if (it == query.end())
return std::nullopt;
return it->second;
};
auto endpoint = getOptionalParam("endpoint");
return ParsedS3URL{
.bucket = std::move(parsed.authority->host),
.key = std::string{key},
.profile = getOptionalParam("profile"),
.region = getOptionalParam("region"),
.scheme = getOptionalParam("scheme"),
.endpoint = [&]() -> decltype(ParsedS3URL::endpoint) {
if (!endpoint)
return std::monostate();
/* Try to parse the endpoint as a full-fledged URL with a scheme. */
try {
return parseURL(*endpoint);
} catch (BadURL &) {
}
return ParsedURL::Authority::parse(*endpoint);
}(),
};
} catch (BadURL & e) {
e.addTrace({}, "while parsing S3 URI: '%s'", uri);
throw;
}
#endif
} // namespace nix

View file

@ -30,7 +30,7 @@ struct ParsedURL
}; };
static Authority parse(std::string_view encodedAuthority); static Authority parse(std::string_view encodedAuthority);
bool operator==(const Authority & other) const = default; auto operator<=>(const Authority & other) const = default;
std::string to_string() const; std::string to_string() const;
friend std::ostream & operator<<(std::ostream & os, const Authority & self); friend std::ostream & operator<<(std::ostream & os, const Authority & self);
@ -81,7 +81,7 @@ struct ParsedURL
std::string to_string() const; std::string to_string() const;
bool operator==(const ParsedURL & other) const noexcept = default; auto operator<=>(const ParsedURL & other) const noexcept = default;
/** /**
* Remove `.` and `..` path elements. * Remove `.` and `..` path elements.
@ -111,7 +111,7 @@ std::string encodeQuery(const StringMap & query);
* *
* @throws BadURL * @throws BadURL
*/ */
ParsedURL parseURL(const std::string & url); ParsedURL parseURL(std::string_view url);
/** /**
* Although thats not really standardized anywhere, an number of tools * Although thats not really standardized anywhere, an number of tools

View file

@ -108,10 +108,8 @@ static std::string percentEncodeCharSet(std::string_view s, auto charSet)
return res; return res;
} }
ParsedURL parseURL(const std::string & url) ParsedURL parseURL(std::string_view url)
try { try {
auto unparsedView = url;
/* Account for several non-standard properties of nix urls (for back-compat): /* Account for several non-standard properties of nix urls (for back-compat):
* - Allow unescaped spaces ' ' and '"' characters in queries. * - Allow unescaped spaces ' ' and '"' characters in queries.
* - Allow '"', ' ' and '^' characters in the fragment component. * - Allow '"', ' ' and '^' characters in the fragment component.