mirror of
https://github.com/NixOS/nix.git
synced 2025-11-09 12:06:01 +01:00
libstore: Introduce ParsedS3URL type
This systematizes the way our s3:// URLs are parsed in filetransfer.cc. Yoinked out and refactored out of [1]. [1]: https://github.com/NixOS/nix/pull/13752 Co-authored-by: Bernardo Meurer Costa <beme@anthropic.com>
This commit is contained in:
parent
5c0eff24d5
commit
69fcc2cfc1
8 changed files with 212 additions and 28 deletions
|
|
@ -74,6 +74,7 @@ sources = files(
|
|||
'path.cc',
|
||||
'references.cc',
|
||||
's3-binary-cache-store.cc',
|
||||
's3.cc',
|
||||
'serve-protocol.cc',
|
||||
'ssh-store.cc',
|
||||
'store-reference.cc',
|
||||
|
|
|
|||
96
src/libstore-tests/s3.cc
Normal file
96
src/libstore-tests/s3.cc
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
#include "nix/store/s3.hh"
|
||||
#include "nix/util/tests/gmock-matchers.hh"
|
||||
|
||||
#if NIX_WITH_S3_SUPPORT
|
||||
|
||||
# include <gtest/gtest.h>
|
||||
# include <gmock/gmock.h>
|
||||
|
||||
namespace nix {
|
||||
|
||||
struct ParsedS3URLTestCase
|
||||
{
|
||||
std::string url;
|
||||
ParsedS3URL expected;
|
||||
std::string description;
|
||||
};
|
||||
|
||||
class ParsedS3URLTest : public ::testing::WithParamInterface<ParsedS3URLTestCase>, public ::testing::Test
|
||||
{};
|
||||
|
||||
TEST_P(ParsedS3URLTest, parseS3URLSuccessfully)
|
||||
{
|
||||
const auto & testCase = GetParam();
|
||||
auto parsed = ParsedS3URL::parse(testCase.url);
|
||||
ASSERT_EQ(parsed, testCase.expected);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
QueryParams,
|
||||
ParsedS3URLTest,
|
||||
::testing::Values(
|
||||
ParsedS3URLTestCase{
|
||||
"s3://my-bucket/my-key.txt",
|
||||
{
|
||||
.bucket = "my-bucket",
|
||||
.key = "my-key.txt",
|
||||
},
|
||||
"basic_s3_bucket"},
|
||||
ParsedS3URLTestCase{
|
||||
"s3://prod-cache/nix/store/abc123.nar.xz?region=eu-west-1",
|
||||
{
|
||||
.bucket = "prod-cache",
|
||||
.key = "nix/store/abc123.nar.xz",
|
||||
.region = "eu-west-1",
|
||||
},
|
||||
"with_region"},
|
||||
ParsedS3URLTestCase{
|
||||
"s3://bucket/key?region=us-west-2&profile=prod&endpoint=custom.s3.com&scheme=https®ion=us-east-1",
|
||||
{
|
||||
.bucket = "bucket",
|
||||
.key = "key",
|
||||
.profile = "prod",
|
||||
.region = "us-west-2", //< using the first parameter (decodeQuery ignores dupicates)
|
||||
.scheme = "https",
|
||||
.endpoint = ParsedURL::Authority{.host = "custom.s3.com"},
|
||||
},
|
||||
"complex"},
|
||||
ParsedS3URLTestCase{
|
||||
"s3://cache/file.txt?profile=production®ion=ap-southeast-2",
|
||||
{
|
||||
.bucket = "cache",
|
||||
.key = "file.txt",
|
||||
.profile = "production",
|
||||
.region = "ap-southeast-2",
|
||||
},
|
||||
"with_profile_and_region"},
|
||||
ParsedS3URLTestCase{
|
||||
"s3://bucket/key?endpoint=https://minio.local&scheme=http",
|
||||
{
|
||||
.bucket = "bucket",
|
||||
.key = "key",
|
||||
/* TODO: Figure out what AWS SDK is doing when both endpointOverride and scheme are set. */
|
||||
.scheme = "http",
|
||||
.endpoint =
|
||||
ParsedURL{
|
||||
.scheme = "https",
|
||||
.authority = ParsedURL::Authority{.host = "minio.local"},
|
||||
},
|
||||
},
|
||||
"with_absolute_endpoint_uri"}),
|
||||
[](const ::testing::TestParamInfo<ParsedS3URLTestCase> & info) { return info.param.description; });
|
||||
|
||||
TEST(InvalidParsedS3URLTest, parseS3URLErrors)
|
||||
{
|
||||
auto invalidBucketMatcher = ::testing::ThrowsMessage<BadURL>(
|
||||
testing::HasSubstrIgnoreANSIMatcher("error: URI has a missing or invalid bucket name"));
|
||||
|
||||
/* Empty bucket (authority) */
|
||||
ASSERT_THAT([]() { ParsedS3URL::parse("s3:///key"); }, invalidBucketMatcher);
|
||||
/* Invalid bucket name */
|
||||
ASSERT_THAT([]() { ParsedS3URL::parse("s3://127.0.0.1"); }, invalidBucketMatcher);
|
||||
}
|
||||
|
||||
} // namespace nix
|
||||
|
||||
#endif
|
||||
|
|
@ -798,22 +798,6 @@ struct curlFileTransfer : public FileTransfer
|
|||
#endif
|
||||
}
|
||||
|
||||
#if NIX_WITH_S3_SUPPORT
|
||||
std::tuple<std::string, std::string, Store::Config::Params> parseS3Uri(std::string uri)
|
||||
{
|
||||
auto [path, params] = splitUriAndParams(uri);
|
||||
|
||||
auto slash = path.find('/', 5); // 5 is the length of "s3://" prefix
|
||||
if (slash == std::string::npos)
|
||||
throw nix::Error("bad S3 URI '%s'", path);
|
||||
|
||||
std::string bucketName(path, 5, slash - 5);
|
||||
std::string key(path, slash + 1);
|
||||
|
||||
return {bucketName, key, params};
|
||||
}
|
||||
#endif
|
||||
|
||||
void enqueueFileTransfer(const FileTransferRequest & request, Callback<FileTransferResult> callback) override
|
||||
{
|
||||
/* Ugly hack to support s3:// URIs. */
|
||||
|
|
@ -821,17 +805,17 @@ struct curlFileTransfer : public FileTransfer
|
|||
// FIXME: do this on a worker thread
|
||||
try {
|
||||
#if NIX_WITH_S3_SUPPORT
|
||||
auto [bucketName, key, params] = parseS3Uri(request.uri);
|
||||
auto parsed = ParsedS3URL::parse(request.uri);
|
||||
|
||||
std::string profile = getOr(params, "profile", "");
|
||||
std::string region = getOr(params, "region", Aws::Region::US_EAST_1);
|
||||
std::string scheme = getOr(params, "scheme", "");
|
||||
std::string endpoint = getOr(params, "endpoint", "");
|
||||
std::string profile = parsed.profile.value_or("");
|
||||
std::string region = parsed.region.value_or(Aws::Region::US_EAST_1);
|
||||
std::string scheme = parsed.scheme.value_or("");
|
||||
std::string endpoint = parsed.getEncodedEndpoint().value_or("");
|
||||
|
||||
S3Helper s3Helper(profile, region, scheme, endpoint);
|
||||
|
||||
// FIXME: implement ETag
|
||||
auto s3Res = s3Helper.getObject(bucketName, key);
|
||||
auto s3Res = s3Helper.getObject(parsed.bucket, parsed.key);
|
||||
FileTransferResult res;
|
||||
if (!s3Res.data)
|
||||
throw FileTransferError(NotFound, {}, "S3 object '%s' does not exist", request.uri);
|
||||
|
|
|
|||
|
|
@ -4,9 +4,12 @@
|
|||
#if NIX_WITH_S3_SUPPORT
|
||||
|
||||
# include "nix/util/ref.hh"
|
||||
# include "nix/util/url.hh"
|
||||
# include "nix/util/util.hh"
|
||||
|
||||
# include <optional>
|
||||
# include <string>
|
||||
# include <variant>
|
||||
|
||||
namespace Aws {
|
||||
namespace Client {
|
||||
|
|
@ -45,6 +48,36 @@ struct S3Helper
|
|||
FileTransferResult getObject(const std::string & bucketName, const std::string & key);
|
||||
};
|
||||
|
||||
/**
|
||||
* Parsed S3 URL.
|
||||
*/
|
||||
struct ParsedS3URL
|
||||
{
|
||||
std::string bucket;
|
||||
std::string key;
|
||||
std::optional<std::string> profile;
|
||||
std::optional<std::string> region;
|
||||
std::optional<std::string> scheme;
|
||||
/**
|
||||
* The endpoint can be either missing, be an absolute URI (with a scheme like `http:`)
|
||||
* or an authority (so an IP address or a registered name).
|
||||
*/
|
||||
std::variant<std::monostate, ParsedURL, ParsedURL::Authority> endpoint;
|
||||
|
||||
std::optional<std::string> getEncodedEndpoint() const
|
||||
{
|
||||
return std::visit(
|
||||
overloaded{
|
||||
[](std::monostate) -> std::optional<std::string> { return std::nullopt; },
|
||||
[](const auto & authorityOrUrl) -> std::optional<std::string> { return authorityOrUrl.to_string(); },
|
||||
},
|
||||
endpoint);
|
||||
}
|
||||
|
||||
static ParsedS3URL parse(std::string_view uri);
|
||||
auto operator<=>(const ParsedS3URL & other) const = default;
|
||||
};
|
||||
|
||||
} // namespace nix
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -321,6 +321,7 @@ sources = files(
|
|||
'remote-store.cc',
|
||||
'restricted-store.cc',
|
||||
's3-binary-cache-store.cc',
|
||||
's3.cc',
|
||||
'serve-protocol-connection.cc',
|
||||
'serve-protocol.cc',
|
||||
'sqlite.cc',
|
||||
|
|
|
|||
71
src/libstore/s3.cc
Normal file
71
src/libstore/s3.cc
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
#include "nix/store/s3.hh"
|
||||
#include "nix/util/split.hh"
|
||||
#include "nix/util/url.hh"
|
||||
|
||||
namespace nix {
|
||||
|
||||
using namespace std::string_view_literals;
|
||||
|
||||
#if NIX_WITH_S3_SUPPORT
|
||||
|
||||
ParsedS3URL ParsedS3URL::parse(std::string_view uri)
|
||||
try {
|
||||
auto parsed = parseURL(uri);
|
||||
|
||||
if (parsed.scheme != "s3"sv)
|
||||
throw BadURL("URI scheme '%s' is not 's3'", parsed.scheme);
|
||||
|
||||
/* Yeah, S3 URLs in Nix have the bucket name as authority. Luckily registered name type
|
||||
authority has the same restrictions (mostly) as S3 bucket names.
|
||||
TODO: Validate against:
|
||||
https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html#general-purpose-bucket-names
|
||||
*/
|
||||
if (!parsed.authority || parsed.authority->host.empty()
|
||||
|| parsed.authority->hostType != ParsedURL::Authority::HostType::Name)
|
||||
throw BadURL("URI has a missing or invalid bucket name");
|
||||
|
||||
std::string_view key = parsed.path;
|
||||
/* Make the key a relative path. */
|
||||
splitPrefix(key, "/");
|
||||
|
||||
/* TODO: Validate the key against:
|
||||
* https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html#object-key-guidelines
|
||||
*/
|
||||
|
||||
auto getOptionalParam = [&](std::string_view key) -> std::optional<std::string> {
|
||||
const auto & query = parsed.query;
|
||||
auto it = query.find(key);
|
||||
if (it == query.end())
|
||||
return std::nullopt;
|
||||
return it->second;
|
||||
};
|
||||
|
||||
auto endpoint = getOptionalParam("endpoint");
|
||||
|
||||
return ParsedS3URL{
|
||||
.bucket = std::move(parsed.authority->host),
|
||||
.key = std::string{key},
|
||||
.profile = getOptionalParam("profile"),
|
||||
.region = getOptionalParam("region"),
|
||||
.scheme = getOptionalParam("scheme"),
|
||||
.endpoint = [&]() -> decltype(ParsedS3URL::endpoint) {
|
||||
if (!endpoint)
|
||||
return std::monostate();
|
||||
|
||||
/* Try to parse the endpoint as a full-fledged URL with a scheme. */
|
||||
try {
|
||||
return parseURL(*endpoint);
|
||||
} catch (BadURL &) {
|
||||
}
|
||||
|
||||
return ParsedURL::Authority::parse(*endpoint);
|
||||
}(),
|
||||
};
|
||||
} catch (BadURL & e) {
|
||||
e.addTrace({}, "while parsing S3 URI: '%s'", uri);
|
||||
throw;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace nix
|
||||
|
|
@ -30,7 +30,7 @@ struct ParsedURL
|
|||
};
|
||||
|
||||
static Authority parse(std::string_view encodedAuthority);
|
||||
bool operator==(const Authority & other) const = default;
|
||||
auto operator<=>(const Authority & other) const = default;
|
||||
std::string to_string() const;
|
||||
friend std::ostream & operator<<(std::ostream & os, const Authority & self);
|
||||
|
||||
|
|
@ -81,7 +81,7 @@ struct ParsedURL
|
|||
|
||||
std::string to_string() const;
|
||||
|
||||
bool operator==(const ParsedURL & other) const noexcept = default;
|
||||
auto operator<=>(const ParsedURL & other) const noexcept = default;
|
||||
|
||||
/**
|
||||
* Remove `.` and `..` path elements.
|
||||
|
|
@ -111,7 +111,7 @@ std::string encodeQuery(const StringMap & query);
|
|||
*
|
||||
* @throws BadURL
|
||||
*/
|
||||
ParsedURL parseURL(const std::string & url);
|
||||
ParsedURL parseURL(std::string_view url);
|
||||
|
||||
/**
|
||||
* Although that’s not really standardized anywhere, an number of tools
|
||||
|
|
|
|||
|
|
@ -108,10 +108,8 @@ static std::string percentEncodeCharSet(std::string_view s, auto charSet)
|
|||
return res;
|
||||
}
|
||||
|
||||
ParsedURL parseURL(const std::string & url)
|
||||
ParsedURL parseURL(std::string_view url)
|
||||
try {
|
||||
auto unparsedView = url;
|
||||
|
||||
/* Account for several non-standard properties of nix urls (for back-compat):
|
||||
* - Allow unescaped spaces ' ' and '"' characters in queries.
|
||||
* - Allow '"', ' ' and '^' characters in the fragment component.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue