diff --git a/src/libstore-tests/meson.build b/src/libstore-tests/meson.build index 87f6a234a..9b98a2812 100644 --- a/src/libstore-tests/meson.build +++ b/src/libstore-tests/meson.build @@ -74,6 +74,7 @@ sources = files( 'path.cc', 'references.cc', 's3-binary-cache-store.cc', + 's3.cc', 'serve-protocol.cc', 'ssh-store.cc', 'store-reference.cc', diff --git a/src/libstore-tests/s3.cc b/src/libstore-tests/s3.cc new file mode 100644 index 000000000..b66005cb9 --- /dev/null +++ b/src/libstore-tests/s3.cc @@ -0,0 +1,96 @@ +#include "nix/store/s3.hh" +#include "nix/util/tests/gmock-matchers.hh" + +#if NIX_WITH_S3_SUPPORT + +# include +# include + +namespace nix { + +struct ParsedS3URLTestCase +{ + std::string url; + ParsedS3URL expected; + std::string description; +}; + +class ParsedS3URLTest : public ::testing::WithParamInterface, public ::testing::Test +{}; + +TEST_P(ParsedS3URLTest, parseS3URLSuccessfully) +{ + const auto & testCase = GetParam(); + auto parsed = ParsedS3URL::parse(testCase.url); + ASSERT_EQ(parsed, testCase.expected); +} + +INSTANTIATE_TEST_SUITE_P( + QueryParams, + ParsedS3URLTest, + ::testing::Values( + ParsedS3URLTestCase{ + "s3://my-bucket/my-key.txt", + { + .bucket = "my-bucket", + .key = "my-key.txt", + }, + "basic_s3_bucket"}, + ParsedS3URLTestCase{ + "s3://prod-cache/nix/store/abc123.nar.xz?region=eu-west-1", + { + .bucket = "prod-cache", + .key = "nix/store/abc123.nar.xz", + .region = "eu-west-1", + }, + "with_region"}, + ParsedS3URLTestCase{ + "s3://bucket/key?region=us-west-2&profile=prod&endpoint=custom.s3.com&scheme=https®ion=us-east-1", + { + .bucket = "bucket", + .key = "key", + .profile = "prod", + .region = "us-west-2", //< using the first parameter (decodeQuery ignores dupicates) + .scheme = "https", + .endpoint = ParsedURL::Authority{.host = "custom.s3.com"}, + }, + "complex"}, + ParsedS3URLTestCase{ + "s3://cache/file.txt?profile=production®ion=ap-southeast-2", + { + .bucket = "cache", + .key = "file.txt", + .profile = "production", + .region = "ap-southeast-2", + }, + "with_profile_and_region"}, + ParsedS3URLTestCase{ + "s3://bucket/key?endpoint=https://minio.local&scheme=http", + { + .bucket = "bucket", + .key = "key", + /* TODO: Figure out what AWS SDK is doing when both endpointOverride and scheme are set. */ + .scheme = "http", + .endpoint = + ParsedURL{ + .scheme = "https", + .authority = ParsedURL::Authority{.host = "minio.local"}, + }, + }, + "with_absolute_endpoint_uri"}), + [](const ::testing::TestParamInfo & info) { return info.param.description; }); + +TEST(InvalidParsedS3URLTest, parseS3URLErrors) +{ + auto invalidBucketMatcher = ::testing::ThrowsMessage( + testing::HasSubstrIgnoreANSIMatcher("error: URI has a missing or invalid bucket name")); + + /* Empty bucket (authority) */ + ASSERT_THAT([]() { ParsedS3URL::parse("s3:///key"); }, invalidBucketMatcher); + /* Invalid bucket name */ + ASSERT_THAT([]() { ParsedS3URL::parse("s3://127.0.0.1"); }, invalidBucketMatcher); +} + +} // namespace nix + +#endif diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index c29da12e8..974797e12 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -798,22 +798,6 @@ struct curlFileTransfer : public FileTransfer #endif } -#if NIX_WITH_S3_SUPPORT - std::tuple parseS3Uri(std::string uri) - { - auto [path, params] = splitUriAndParams(uri); - - auto slash = path.find('/', 5); // 5 is the length of "s3://" prefix - if (slash == std::string::npos) - throw nix::Error("bad S3 URI '%s'", path); - - std::string bucketName(path, 5, slash - 5); - std::string key(path, slash + 1); - - return {bucketName, key, params}; - } -#endif - void enqueueFileTransfer(const FileTransferRequest & request, Callback callback) override { /* Ugly hack to support s3:// URIs. */ @@ -821,17 +805,17 @@ struct curlFileTransfer : public FileTransfer // FIXME: do this on a worker thread try { #if NIX_WITH_S3_SUPPORT - auto [bucketName, key, params] = parseS3Uri(request.uri); + auto parsed = ParsedS3URL::parse(request.uri); - std::string profile = getOr(params, "profile", ""); - std::string region = getOr(params, "region", Aws::Region::US_EAST_1); - std::string scheme = getOr(params, "scheme", ""); - std::string endpoint = getOr(params, "endpoint", ""); + std::string profile = parsed.profile.value_or(""); + std::string region = parsed.region.value_or(Aws::Region::US_EAST_1); + std::string scheme = parsed.scheme.value_or(""); + std::string endpoint = parsed.getEncodedEndpoint().value_or(""); S3Helper s3Helper(profile, region, scheme, endpoint); // FIXME: implement ETag - auto s3Res = s3Helper.getObject(bucketName, key); + auto s3Res = s3Helper.getObject(parsed.bucket, parsed.key); FileTransferResult res; if (!s3Res.data) throw FileTransferError(NotFound, {}, "S3 object '%s' does not exist", request.uri); diff --git a/src/libstore/include/nix/store/s3.hh b/src/libstore/include/nix/store/s3.hh index 57e03a065..517825952 100644 --- a/src/libstore/include/nix/store/s3.hh +++ b/src/libstore/include/nix/store/s3.hh @@ -4,9 +4,12 @@ #if NIX_WITH_S3_SUPPORT # include "nix/util/ref.hh" +# include "nix/util/url.hh" +# include "nix/util/util.hh" # include # include +# include namespace Aws { namespace Client { @@ -45,6 +48,36 @@ struct S3Helper FileTransferResult getObject(const std::string & bucketName, const std::string & key); }; +/** + * Parsed S3 URL. + */ +struct ParsedS3URL +{ + std::string bucket; + std::string key; + std::optional profile; + std::optional region; + std::optional scheme; + /** + * The endpoint can be either missing, be an absolute URI (with a scheme like `http:`) + * or an authority (so an IP address or a registered name). + */ + std::variant endpoint; + + std::optional getEncodedEndpoint() const + { + return std::visit( + overloaded{ + [](std::monostate) -> std::optional { return std::nullopt; }, + [](const auto & authorityOrUrl) -> std::optional { return authorityOrUrl.to_string(); }, + }, + endpoint); + } + + static ParsedS3URL parse(std::string_view uri); + auto operator<=>(const ParsedS3URL & other) const = default; +}; + } // namespace nix #endif diff --git a/src/libstore/meson.build b/src/libstore/meson.build index ad76582d8..e98ba7545 100644 --- a/src/libstore/meson.build +++ b/src/libstore/meson.build @@ -321,6 +321,7 @@ sources = files( 'remote-store.cc', 'restricted-store.cc', 's3-binary-cache-store.cc', + 's3.cc', 'serve-protocol-connection.cc', 'serve-protocol.cc', 'sqlite.cc', diff --git a/src/libstore/s3.cc b/src/libstore/s3.cc new file mode 100644 index 000000000..9ed4e7fd9 --- /dev/null +++ b/src/libstore/s3.cc @@ -0,0 +1,71 @@ +#include "nix/store/s3.hh" +#include "nix/util/split.hh" +#include "nix/util/url.hh" + +namespace nix { + +using namespace std::string_view_literals; + +#if NIX_WITH_S3_SUPPORT + +ParsedS3URL ParsedS3URL::parse(std::string_view uri) +try { + auto parsed = parseURL(uri); + + if (parsed.scheme != "s3"sv) + throw BadURL("URI scheme '%s' is not 's3'", parsed.scheme); + + /* Yeah, S3 URLs in Nix have the bucket name as authority. Luckily registered name type + authority has the same restrictions (mostly) as S3 bucket names. + TODO: Validate against: + https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html#general-purpose-bucket-names + */ + if (!parsed.authority || parsed.authority->host.empty() + || parsed.authority->hostType != ParsedURL::Authority::HostType::Name) + throw BadURL("URI has a missing or invalid bucket name"); + + std::string_view key = parsed.path; + /* Make the key a relative path. */ + splitPrefix(key, "/"); + + /* TODO: Validate the key against: + * https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html#object-key-guidelines + */ + + auto getOptionalParam = [&](std::string_view key) -> std::optional { + const auto & query = parsed.query; + auto it = query.find(key); + if (it == query.end()) + return std::nullopt; + return it->second; + }; + + auto endpoint = getOptionalParam("endpoint"); + + return ParsedS3URL{ + .bucket = std::move(parsed.authority->host), + .key = std::string{key}, + .profile = getOptionalParam("profile"), + .region = getOptionalParam("region"), + .scheme = getOptionalParam("scheme"), + .endpoint = [&]() -> decltype(ParsedS3URL::endpoint) { + if (!endpoint) + return std::monostate(); + + /* Try to parse the endpoint as a full-fledged URL with a scheme. */ + try { + return parseURL(*endpoint); + } catch (BadURL &) { + } + + return ParsedURL::Authority::parse(*endpoint); + }(), + }; +} catch (BadURL & e) { + e.addTrace({}, "while parsing S3 URI: '%s'", uri); + throw; +} + +#endif + +} // namespace nix diff --git a/src/libutil/include/nix/util/url.hh b/src/libutil/include/nix/util/url.hh index 0aa1eac9f..cd20a08c6 100644 --- a/src/libutil/include/nix/util/url.hh +++ b/src/libutil/include/nix/util/url.hh @@ -30,7 +30,7 @@ struct ParsedURL }; static Authority parse(std::string_view encodedAuthority); - bool operator==(const Authority & other) const = default; + auto operator<=>(const Authority & other) const = default; std::string to_string() const; friend std::ostream & operator<<(std::ostream & os, const Authority & self); @@ -81,7 +81,7 @@ struct ParsedURL std::string to_string() const; - bool operator==(const ParsedURL & other) const noexcept = default; + auto operator<=>(const ParsedURL & other) const noexcept = default; /** * Remove `.` and `..` path elements. @@ -111,7 +111,7 @@ std::string encodeQuery(const StringMap & query); * * @throws BadURL */ -ParsedURL parseURL(const std::string & url); +ParsedURL parseURL(std::string_view url); /** * Although that’s not really standardized anywhere, an number of tools diff --git a/src/libutil/url.cc b/src/libutil/url.cc index 75f62d445..cdfba8a83 100644 --- a/src/libutil/url.cc +++ b/src/libutil/url.cc @@ -108,10 +108,8 @@ static std::string percentEncodeCharSet(std::string_view s, auto charSet) return res; } -ParsedURL parseURL(const std::string & url) +ParsedURL parseURL(std::string_view url) try { - auto unparsedView = url; - /* Account for several non-standard properties of nix urls (for back-compat): * - Allow unescaped spaces ' ' and '"' characters in queries. * - Allow '"', ' ' and '^' characters in the fragment component.