1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-08 19:46:02 +01:00

refactor(libstore): replace AWS SDK with curl-based S3 implementation

This commit replaces the AWS C++ SDK with a lighter curl-based approach
for S3 binary cache operations.

- Removed dependency on the heavy aws-cpp-sdk-s3 and aws-cpp-sdk-transfer
- Added lightweight aws-crt-cpp for credential resolution only
- Leverages curl's native AWS SigV4 authentication (requires curl >= 7.75.0)
- S3BinaryCacheStore now delegates to HttpBinaryCacheStore
- Function s3ToHttpsUrl converts ParsedS3URL to ParsedURL
- Multipart uploads are no longer supported (may be reimplemented later)
- Build now requires curl >= 7.75.0 for AWS SigV4 support

Fixes: #13084, #12671, #11748, #12403, #5947
This commit is contained in:
Bernardo Meurer Costa 2025-08-21 05:38:42 +00:00
parent a543519ca9
commit 9295c14a35
No known key found for this signature in database
18 changed files with 14 additions and 1002 deletions

View file

@ -67,8 +67,7 @@ jobs:
instrumented: false
primary: true
stdenv: stdenv
withAWS: true
withCurlS3: false
withCurlS3: true
# TODO: remove once curl-based-s3 fully lands
- scenario: on ubuntu (no s3)
runs-on: ubuntu-24.04
@ -76,33 +75,21 @@ jobs:
instrumented: false
primary: false
stdenv: stdenv
withAWS: false
withCurlS3: false
# TODO: remove once curl-based-s3 fully lands
- scenario: on ubuntu (curl s3)
runs-on: ubuntu-24.04
os: linux
instrumented: false
primary: false
stdenv: stdenv
withAWS: false
withCurlS3: true
- scenario: on macos
runs-on: macos-14
os: darwin
instrumented: false
primary: true
stdenv: stdenv
withAWS: true
withCurlS3: false
withCurlS3: true
- scenario: on ubuntu (with sanitizers / coverage)
runs-on: ubuntu-24.04
os: linux
instrumented: true
primary: false
stdenv: clangStdenv
withAWS: true
withCurlS3: false
withCurlS3: true
name: tests ${{ matrix.scenario }}
runs-on: ${{ matrix.runs-on }}
timeout-minutes: 60
@ -126,14 +113,12 @@ jobs:
nix build --file ci/gha/tests/wrapper.nix componentTests -L \
--arg withInstrumentation ${{ matrix.instrumented }} \
--argstr stdenv "${{ matrix.stdenv }}" \
${{ format('--arg withAWS {0}', matrix.withAWS) }} \
${{ format('--arg withCurlS3 {0}', matrix.withCurlS3) }}
- name: Run VM tests
run: |
nix build --file ci/gha/tests/wrapper.nix vmTests -L \
--arg withInstrumentation ${{ matrix.instrumented }} \
--argstr stdenv "${{ matrix.stdenv }}" \
${{ format('--arg withAWS {0}', matrix.withAWS) }} \
${{ format('--arg withCurlS3 {0}', matrix.withCurlS3) }}
if: ${{ matrix.os == 'linux' }}
- name: Run flake checks and prepare the installer tarball
@ -146,7 +131,6 @@ jobs:
nix build --file ci/gha/tests/wrapper.nix codeCoverage.coverageReports -L \
--arg withInstrumentation ${{ matrix.instrumented }} \
--argstr stdenv "${{ matrix.stdenv }}" \
${{ format('--arg withAWS {0}', matrix.withAWS) }} \
${{ format('--arg withCurlS3 {0}', matrix.withCurlS3) }} \
--out-link coverage-reports
cat coverage-reports/index.txt >> $GITHUB_STEP_SUMMARY

View file

@ -12,7 +12,6 @@
componentTestsPrefix ? "",
withSanitizers ? false,
withCoverage ? false,
withAWS ? null,
withCurlS3 ? null,
...
}:
@ -60,8 +59,7 @@ rec {
# Override AWS configuration if specified
nix-store = prev.nix-store.override (
lib.optionalAttrs (withAWS != null) { inherit withAWS; }
// lib.optionalAttrs (withCurlS3 != null) { inherit withCurlS3; }
lib.optionalAttrs (withCurlS3 != null) { inherit withCurlS3; }
);
mesonComponentOverrides = lib.composeManyExtensions componentOverrides;
@ -231,11 +229,6 @@ rec {
vmTests = {
}
# FIXME: when the curlS3 implementation is complete, it should also enable these tests.
// lib.optionalAttrs (withAWS == true) {
# S3 binary cache store test only runs when S3 support is enabled
inherit (nixosTests) s3-binary-cache-store;
}
// lib.optionalAttrs (withCurlS3 == true) {
# S3 binary cache store test using curl implementation
inherit (nixosTests) curl-s3-binary-cache-store;

View file

@ -5,7 +5,6 @@
stdenv ? "stdenv",
componentTestsPrefix ? "",
withInstrumentation ? false,
withAWS ? null,
withCurlS3 ? null,
}@args:
import ./. (
@ -14,6 +13,6 @@ import ./. (
getStdenv = p: p.${stdenv};
withSanitizers = withInstrumentation;
withCoverage = withInstrumentation;
inherit withAWS withCurlS3;
inherit withCurlS3;
}
)

View file

@ -490,7 +490,7 @@ in
Example:
```
overrideScope (finalScope: prevScope: { aws-sdk-cpp = null; })
overrideScope (finalScope: prevScope: { aws-crt-cpp = null; })
```
*/
overrideScope = f: (scope.overrideScope f).nix-everything;

View file

@ -16,21 +16,6 @@ in
scope: {
inherit stdenv;
aws-sdk-cpp =
(pkgs.aws-sdk-cpp.override {
apis = [
"identity-management"
"s3"
"transfer"
];
customMemoryManagement = false;
}).overrideAttrs
{
# only a stripped down version is built, which takes a lot less resources
# to build, so we don't need a "big-parallel" machine.
requiredSystemFeatures = [ ];
};
boehmgc =
(pkgs.boehmgc.override {
enableLargeConfig = true;

View file

@ -1,21 +1,6 @@
#include "nix/store/s3-binary-cache-store.hh"
#if NIX_WITH_S3_SUPPORT
# include <gtest/gtest.h>
namespace nix {
TEST(S3BinaryCacheStore, constructConfig)
{
S3BinaryCacheStoreConfig config{"s3", "foobar", {}};
EXPECT_EQ(config.bucketName, "foobar");
}
} // namespace nix
#elif NIX_WITH_CURL_S3
#if NIX_WITH_CURL_S3
# include "nix/store/http-binary-cache-store.hh"
# include "nix/store/filetransfer.hh"

View file

@ -1,7 +1,7 @@
#include "nix/store/s3-url.hh"
#include "nix/util/tests/gmock-matchers.hh"
#if NIX_WITH_S3_SUPPORT || NIX_WITH_CURL_S3
#if NIX_WITH_CURL_S3
# include <gtest/gtest.h>
# include <gmock/gmock.h>

View file

@ -2,7 +2,6 @@
#include "nix/store/globals.hh"
#include "nix/util/config-global.hh"
#include "nix/store/store-api.hh"
#include "nix/store/s3.hh"
#include "nix/util/compression.hh"
#include "nix/util/finally.hh"
#include "nix/util/callback.hh"
@ -10,9 +9,6 @@
#include "store-config-private.hh"
#include <optional>
#if NIX_WITH_S3_SUPPORT
# include <aws/core/client/ClientConfiguration.h>
#endif
#if NIX_WITH_CURL_S3
# include "nix/store/aws-creds.hh"
# include "nix/store/s3-url.hh"
@ -850,30 +846,6 @@ struct curlFileTransfer : public FileTransfer
auto modifiedRequest = request;
modifiedRequest.setupForS3();
enqueueItem(std::make_shared<TransferItem>(*this, std::move(modifiedRequest), std::move(callback)));
#elif NIX_WITH_S3_SUPPORT
// Old AWS SDK-based implementation
// FIXME: do this on a worker thread
try {
auto parsed = ParsedS3URL::parse(request.uri.parsed());
std::string profile = parsed.profile.value_or("");
std::string region = parsed.region.value_or(Aws::Region::US_EAST_1);
std::string scheme = parsed.scheme.value_or("");
std::string endpoint = parsed.getEncodedEndpoint().value_or("");
S3Helper s3Helper(profile, region, scheme, endpoint);
// FIXME: implement ETag
auto s3Res = s3Helper.getObject(parsed.bucket, encodeUrlPath(parsed.key));
FileTransferResult res;
if (!s3Res.data)
throw FileTransferError(NotFound, {}, "S3 object '%s' does not exist", request.uri);
res.data = std::move(*s3Res.data);
res.urls.push_back(request.uri.to_string());
callback(std::move(res));
} catch (...) {
callback.rethrow();
}
#else
throw nix::Error("cannot download '%s' because Nix is not built with S3 support", request.uri.to_string());
#endif

View file

@ -76,7 +76,6 @@ headers = [ config_pub_h ] + files(
'restricted-store.hh',
's3-binary-cache-store.hh',
's3-url.hh',
's3.hh',
'serve-protocol-connection.hh',
'serve-protocol-impl.hh',
'serve-protocol.hh',

View file

@ -3,138 +3,7 @@
#include "nix/store/config.hh"
#if NIX_WITH_S3_SUPPORT
# include "nix/store/binary-cache-store.hh"
# include <atomic>
namespace nix {
struct S3BinaryCacheStoreConfig : std::enable_shared_from_this<S3BinaryCacheStoreConfig>, virtual BinaryCacheStoreConfig
{
std::string bucketName;
using BinaryCacheStoreConfig::BinaryCacheStoreConfig;
S3BinaryCacheStoreConfig(std::string_view uriScheme, std::string_view bucketName, const Params & params);
const Setting<std::string> profile{
this,
"",
"profile",
R"(
The name of the AWS configuration profile to use. By default
Nix uses the `default` profile.
)"};
protected:
constexpr static const char * defaultRegion = "us-east-1";
public:
const Setting<std::string> region{
this,
defaultRegion,
"region",
R"(
The region of the S3 bucket. If your bucket is not in
`us-east-1`, you should always explicitly specify the region
parameter.
)"};
const Setting<std::string> scheme{
this,
"",
"scheme",
R"(
The scheme used for S3 requests, `https` (default) or `http`. This
option allows you to disable HTTPS for binary caches which don't
support it.
> **Note**
>
> HTTPS should be used if the cache might contain sensitive
> information.
)"};
const Setting<std::string> endpoint{
this,
"",
"endpoint",
R"(
The URL of the endpoint of an S3-compatible service such as MinIO.
Do not specify this setting if you're using Amazon S3.
> **Note**
>
> This endpoint must support HTTPS and uses path-based
> addressing instead of virtual host based addressing.
)"};
const Setting<std::string> narinfoCompression{
this, "", "narinfo-compression", "Compression method for `.narinfo` files."};
const Setting<std::string> lsCompression{this, "", "ls-compression", "Compression method for `.ls` files."};
const Setting<std::string> logCompression{
this,
"",
"log-compression",
R"(
Compression method for `log/*` files. It is recommended to
use a compression method supported by most web browsers
(e.g. `brotli`).
)"};
const Setting<bool> multipartUpload{this, false, "multipart-upload", "Whether to use multi-part uploads."};
const Setting<uint64_t> bufferSize{
this, 5 * 1024 * 1024, "buffer-size", "Size (in bytes) of each part in multi-part uploads."};
static const std::string name()
{
return "S3 Binary Cache Store";
}
static StringSet uriSchemes()
{
return {"s3"};
}
static std::string doc();
ref<Store> openStore() const override;
StoreReference getReference() const override;
};
struct S3BinaryCacheStore : virtual BinaryCacheStore
{
using Config = S3BinaryCacheStoreConfig;
ref<Config> config;
S3BinaryCacheStore(ref<Config>);
struct Stats
{
std::atomic<uint64_t> put{0};
std::atomic<uint64_t> putBytes{0};
std::atomic<uint64_t> putTimeMs{0};
std::atomic<uint64_t> get{0};
std::atomic<uint64_t> getBytes{0};
std::atomic<uint64_t> getTimeMs{0};
std::atomic<uint64_t> head{0};
};
virtual const Stats & getS3Stats() = 0;
};
} // namespace nix
#elif NIX_WITH_CURL_S3
#if NIX_WITH_CURL_S3
# include "nix/store/http-binary-cache-store.hh"

View file

@ -2,7 +2,7 @@
///@file
#include "nix/store/config.hh"
#if NIX_WITH_S3_SUPPORT || NIX_WITH_CURL_S3
#if NIX_WITH_CURL_S3
# include "nix/util/url.hh"
# include "nix/util/util.hh"

View file

@ -1,50 +0,0 @@
#pragma once
///@file
#include "nix/store/config.hh"
#if NIX_WITH_S3_SUPPORT
# include "nix/util/ref.hh"
# include "nix/store/s3-url.hh"
# include <string>
namespace Aws {
namespace Client {
struct ClientConfiguration;
}
} // namespace Aws
namespace Aws {
namespace S3 {
class S3Client;
}
} // namespace Aws
namespace nix {
struct S3Helper
{
ref<Aws::Client::ClientConfiguration> config;
ref<Aws::S3::S3Client> client;
S3Helper(
const std::string & profile,
const std::string & region,
const std::string & scheme,
const std::string & endpoint);
ref<Aws::Client::ClientConfiguration>
makeConfig(const std::string & region, const std::string & scheme, const std::string & endpoint);
struct FileTransferResult
{
std::optional<std::string> data;
unsigned int durationMs;
};
FileTransferResult getObject(const std::string & bucketName, const std::string & key);
};
} // namespace nix
#endif

View file

@ -142,29 +142,7 @@ deps_public += nlohmann_json
sqlite = dependency('sqlite3', 'sqlite', version : '>=3.6.19')
deps_private += sqlite
# AWS C++ SDK has bad pkg-config. See
# https://github.com/aws/aws-sdk-cpp/issues/2673 for details.
aws_s3 = dependency('aws-cpp-sdk-s3', required : false)
# The S3 store definitions in the header will be hidden based on this variables.
configdata_pub.set('NIX_WITH_S3_SUPPORT', aws_s3.found().to_int())
if aws_s3.found()
aws_s3 = declare_dependency(
include_directories : include_directories(aws_s3.get_variable('includedir')),
link_args : [
'-L' + aws_s3.get_variable('libdir'),
'-laws-cpp-sdk-transfer',
'-laws-cpp-sdk-s3',
'-laws-cpp-sdk-identity-management',
'-laws-cpp-sdk-cognito-identity',
'-laws-cpp-sdk-sts',
'-laws-cpp-sdk-core',
'-laws-crt-cpp',
],
).as_system('system')
endif
deps_other += aws_s3
# Curl-based S3 store support (alternative to AWS SDK)
# Curl-based S3 store support
# Check if curl supports AWS SigV4 (requires >= 7.75.0)
curl_supports_aws_sigv4 = curl.version().version_compare('>= 7.75.0')
# AWS CRT C++ for lightweight credential management
@ -178,13 +156,6 @@ curl_s3_store_opt = get_option('curl-s3-store').require(
error_message : 'curl-based S3 support requires aws-crt-cpp',
)
# Make AWS SDK and curl-based S3 mutually exclusive
if aws_s3.found() and curl_s3_store_opt.enabled()
error(
'Cannot enable both AWS SDK S3 support and curl-based S3 support. Please choose one.',
)
endif
if curl_s3_store_opt.enabled()
deps_other += aws_crt_cpp
endif

View file

@ -23,20 +23,15 @@
embeddedSandboxShell ? stdenv.hostPlatform.isStatic,
withAWS ?
withCurlS3 ?
# Default is this way because there have been issues building this dependency
stdenv.hostPlatform == stdenv.buildPlatform && (stdenv.isLinux || stdenv.isDarwin),
withCurlS3 ? false,
}:
let
inherit (lib) fileset;
in
assert lib.assertMsg (!withAWS || !withCurlS3)
"withAWS and withCurlS3 are mutually exclusive - cannot enable both S3 implementations simultaneously";
mkMesonLibrary (finalAttrs: {
pname = "nix-store";
inherit version;
@ -70,7 +65,6 @@ mkMesonLibrary (finalAttrs: {
sqlite
]
++ lib.optional stdenv.hostPlatform.isLinux libseccomp
++ lib.optional withAWS aws-sdk-cpp
++ lib.optional withCurlS3 aws-crt-cpp;
propagatedBuildInputs = [

View file

@ -1,595 +1,6 @@
#include "nix/store/s3-binary-cache-store.hh"
#if NIX_WITH_S3_SUPPORT
# include <assert.h>
# include "nix/store/s3.hh"
# include "nix/store/nar-info.hh"
# include "nix/store/nar-info-disk-cache.hh"
# include "nix/store/globals.hh"
# include "nix/util/compression.hh"
# include "nix/store/filetransfer.hh"
# include "nix/util/signals.hh"
# include "nix/store/store-registration.hh"
# include <aws/core/Aws.h>
# include <aws/core/VersionConfig.h>
# include <aws/core/auth/AWSCredentialsProvider.h>
# include <aws/core/auth/AWSCredentialsProviderChain.h>
# include <aws/core/client/ClientConfiguration.h>
# include <aws/core/client/DefaultRetryStrategy.h>
# include <aws/core/utils/logging/FormattedLogSystem.h>
# include <aws/core/utils/logging/LogMacros.h>
# include <aws/core/utils/threading/Executor.h>
# include <aws/identity-management/auth/STSProfileCredentialsProvider.h>
# include <aws/s3/S3Client.h>
# include <aws/s3/model/GetObjectRequest.h>
# include <aws/s3/model/HeadObjectRequest.h>
# include <aws/s3/model/ListObjectsRequest.h>
# include <aws/s3/model/PutObjectRequest.h>
# include <aws/transfer/TransferManager.h>
using namespace Aws::Transfer;
namespace nix {
struct S3Error : public Error
{
Aws::S3::S3Errors err;
Aws::String exceptionName;
template<typename... Args>
S3Error(Aws::S3::S3Errors err, Aws::String exceptionName, const Args &... args)
: Error(args...)
, err(err)
, exceptionName(exceptionName){};
};
/* Helper: given an Outcome<R, E>, return R in case of success, or
throw an exception in case of an error. */
template<typename R, typename E>
R && checkAws(std::string_view s, Aws::Utils::Outcome<R, E> && outcome)
{
if (!outcome.IsSuccess())
throw S3Error(
outcome.GetError().GetErrorType(),
outcome.GetError().GetExceptionName(),
fmt("%s: %s (request id: %s)", s, outcome.GetError().GetMessage(), outcome.GetError().GetRequestId()));
return outcome.GetResultWithOwnership();
}
class AwsLogger : public Aws::Utils::Logging::FormattedLogSystem
{
using Aws::Utils::Logging::FormattedLogSystem::FormattedLogSystem;
void ProcessFormattedStatement(Aws::String && statement) override
{
debug("AWS: %s", chomp(statement));
}
# if !(AWS_SDK_VERSION_MAJOR <= 1 && AWS_SDK_VERSION_MINOR <= 7 && AWS_SDK_VERSION_PATCH <= 115)
void Flush() override {}
# endif
};
/* Retrieve the credentials from the list of AWS default providers, with the addition of the STS creds provider. This
last can be used to acquire further permissions with a specific IAM role.
Roughly based on https://github.com/aws/aws-sdk-cpp/issues/150#issuecomment-538548438
*/
struct CustomAwsCredentialsProviderChain : public Aws::Auth::AWSCredentialsProviderChain
{
CustomAwsCredentialsProviderChain(const std::string & profile)
{
if (profile.empty()) {
// Use all the default AWS providers, plus the possibility to acquire a IAM role directly via a profile.
Aws::Auth::DefaultAWSCredentialsProviderChain default_aws_chain;
for (auto provider : default_aws_chain.GetProviders())
AddProvider(provider);
AddProvider(std::make_shared<Aws::Auth::STSProfileCredentialsProvider>());
} else {
// Override the profile name to retrieve from the AWS config and credentials. I believe this option
// comes from the ?profile querystring in nix.conf.
AddProvider(std::make_shared<Aws::Auth::ProfileConfigFileAWSCredentialsProvider>(profile.c_str()));
AddProvider(std::make_shared<Aws::Auth::STSProfileCredentialsProvider>(profile));
}
}
};
static void initAWS()
{
static std::once_flag flag;
std::call_once(flag, []() {
Aws::SDKOptions options;
/* We install our own OpenSSL locking function (see
shared.cc), so don't let aws-sdk-cpp override it. */
options.cryptoOptions.initAndCleanupOpenSSL = false;
if (verbosity >= lvlDebug) {
options.loggingOptions.logLevel =
verbosity == lvlDebug ? Aws::Utils::Logging::LogLevel::Debug : Aws::Utils::Logging::LogLevel::Trace;
options.loggingOptions.logger_create_fn = [options]() {
return std::make_shared<AwsLogger>(options.loggingOptions.logLevel);
};
}
Aws::InitAPI(options);
});
}
S3Helper::S3Helper(
const std::string & profile, const std::string & region, const std::string & scheme, const std::string & endpoint)
: config(makeConfig(region, scheme, endpoint))
, client(
make_ref<Aws::S3::S3Client>(
std::make_shared<CustomAwsCredentialsProviderChain>(profile),
*config,
# if AWS_SDK_VERSION_MAJOR == 1 && AWS_SDK_VERSION_MINOR < 3
false,
# else
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
# endif
endpoint.empty()))
{
}
/* Log AWS retries. */
class RetryStrategy : public Aws::Client::DefaultRetryStrategy
{
bool ShouldRetry(const Aws::Client::AWSError<Aws::Client::CoreErrors> & error, long attemptedRetries) const override
{
checkInterrupt();
auto retry = Aws::Client::DefaultRetryStrategy::ShouldRetry(error, attemptedRetries);
if (retry)
printError(
"AWS error '%s' (%s; request id: %s), will retry in %d ms",
error.GetExceptionName(),
error.GetMessage(),
error.GetRequestId(),
CalculateDelayBeforeNextRetry(error, attemptedRetries));
return retry;
}
};
ref<Aws::Client::ClientConfiguration>
S3Helper::makeConfig(const std::string & region, const std::string & scheme, const std::string & endpoint)
{
initAWS();
auto res = make_ref<Aws::Client::ClientConfiguration>();
res->allowSystemProxy = true;
res->region = region;
if (!scheme.empty()) {
res->scheme = Aws::Http::SchemeMapper::FromString(scheme.c_str());
}
if (!endpoint.empty()) {
res->endpointOverride = endpoint;
}
res->requestTimeoutMs = 600 * 1000;
res->connectTimeoutMs = 5 * 1000;
res->retryStrategy = std::make_shared<RetryStrategy>();
res->caFile = settings.caFile;
return res;
}
S3Helper::FileTransferResult S3Helper::getObject(const std::string & bucketName, const std::string & key)
{
std::string uri = "s3://" + bucketName + "/" + key;
Activity act(
*logger, lvlTalkative, actFileTransfer, fmt("downloading '%s'", uri), Logger::Fields{uri}, getCurActivity());
auto request = Aws::S3::Model::GetObjectRequest().WithBucket(bucketName).WithKey(key);
request.SetResponseStreamFactory([&]() { return Aws::New<std::stringstream>("STRINGSTREAM"); });
size_t bytesDone = 0;
size_t bytesExpected = 0;
request.SetDataReceivedEventHandler(
[&](const Aws::Http::HttpRequest * req, Aws::Http::HttpResponse * resp, long long l) {
if (!bytesExpected && resp->HasHeader("Content-Length")) {
if (auto length = string2Int<size_t>(resp->GetHeader("Content-Length"))) {
bytesExpected = *length;
}
}
bytesDone += l;
act.progress(bytesDone, bytesExpected);
});
request.SetContinueRequestHandler([](const Aws::Http::HttpRequest *) { return !isInterrupted(); });
FileTransferResult res;
auto now1 = std::chrono::steady_clock::now();
try {
auto result = checkAws(fmt("AWS error fetching '%s'", key), client->GetObject(request));
act.progress(result.GetContentLength(), result.GetContentLength());
res.data = decompress(result.GetContentEncoding(), dynamic_cast<std::stringstream &>(result.GetBody()).str());
} catch (S3Error & e) {
if ((e.err != Aws::S3::S3Errors::NO_SUCH_KEY) && (e.err != Aws::S3::S3Errors::ACCESS_DENIED) &&
// Expired tokens are not really an error, more of a caching problem. Should be treated same as 403.
//
// AWS unwilling to provide a specific error type for the situation
// (https://github.com/aws/aws-sdk-cpp/issues/1843) so use this hack
(e.exceptionName != "ExpiredToken"))
throw;
}
auto now2 = std::chrono::steady_clock::now();
res.durationMs = std::chrono::duration_cast<std::chrono::milliseconds>(now2 - now1).count();
return res;
}
S3BinaryCacheStoreConfig::S3BinaryCacheStoreConfig(
std::string_view uriScheme, std::string_view bucketName, const Params & params)
: StoreConfig(params)
, BinaryCacheStoreConfig(params)
, bucketName(bucketName)
{
// Don't want to use use AWS SDK in header, so we check the default
// here. TODO do this better after we overhaul the store settings
// system.
assert(std::string{defaultRegion} == std::string{Aws::Region::US_EAST_1});
if (bucketName.empty())
throw UsageError("`%s` store requires a bucket name in its Store URI", uriScheme);
}
S3BinaryCacheStore::S3BinaryCacheStore(ref<Config> config)
: BinaryCacheStore(*config)
, config{config}
{
}
std::string S3BinaryCacheStoreConfig::doc()
{
return
# include "s3-binary-cache-store.md"
;
}
StoreReference S3BinaryCacheStoreConfig::getReference() const
{
return {
.variant =
StoreReference::Specified{
.scheme = *uriSchemes().begin(),
.authority = bucketName,
},
.params = getQueryParams(),
};
}
struct S3BinaryCacheStoreImpl : virtual S3BinaryCacheStore
{
Stats stats;
S3Helper s3Helper;
S3BinaryCacheStoreImpl(ref<Config> config)
: Store{*config}
, BinaryCacheStore{*config}
, S3BinaryCacheStore{config}
, s3Helper(config->profile, config->region, config->scheme, config->endpoint)
{
diskCache = getNarInfoDiskCache();
}
void init() override
{
/* FIXME: The URI (when used as a cache key) must have several parameters rendered (e.g. the endpoint).
This must be represented as a separate opaque string (probably a URI) that has the right query parameters. */
auto cacheUri = config->getReference().render(/*withParams=*/false);
if (auto cacheInfo = diskCache->upToDateCacheExists(cacheUri)) {
config->wantMassQuery.setDefault(cacheInfo->wantMassQuery);
config->priority.setDefault(cacheInfo->priority);
} else {
BinaryCacheStore::init();
diskCache->createCache(cacheUri, config->storeDir, config->wantMassQuery, config->priority);
}
}
const Stats & getS3Stats() override
{
return stats;
}
/* This is a specialisation of isValidPath() that optimistically
fetches the .narinfo file, rather than first checking for its
existence via a HEAD request. Since .narinfos are small, doing
a GET is unlikely to be slower than HEAD. */
bool isValidPathUncached(const StorePath & storePath) override
{
try {
queryPathInfo(storePath);
return true;
} catch (InvalidPath & e) {
return false;
}
}
bool fileExists(const std::string & path) override
{
stats.head++;
auto res = s3Helper.client->HeadObject(
Aws::S3::Model::HeadObjectRequest().WithBucket(config->bucketName).WithKey(path));
if (!res.IsSuccess()) {
auto & error = res.GetError();
if (error.GetErrorType() == Aws::S3::S3Errors::RESOURCE_NOT_FOUND
|| error.GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY
// Expired tokens are not really an error, more of a caching problem. Should be treated same as 403.
// AWS unwilling to provide a specific error type for the situation
// (https://github.com/aws/aws-sdk-cpp/issues/1843) so use this hack
|| (error.GetErrorType() == Aws::S3::S3Errors::UNKNOWN && error.GetExceptionName() == "ExpiredToken")
// If bucket listing is disabled, 404s turn into 403s
|| error.GetErrorType() == Aws::S3::S3Errors::ACCESS_DENIED)
return false;
throw Error("AWS error fetching '%s': %s", path, error.GetMessage());
}
return true;
}
std::shared_ptr<TransferManager> transferManager;
std::once_flag transferManagerCreated;
struct AsyncContext : public Aws::Client::AsyncCallerContext
{
mutable std::mutex mutex;
mutable std::condition_variable cv;
const Activity & act;
void notify() const
{
cv.notify_one();
}
void wait() const
{
std::unique_lock<std::mutex> lk(mutex);
cv.wait(lk);
}
AsyncContext(const Activity & act)
: act(act)
{
}
};
void uploadFile(
const std::string & path,
std::shared_ptr<std::basic_iostream<char>> istream,
const std::string & mimeType,
const std::string & contentEncoding)
{
std::string uri = "s3://" + config->bucketName + "/" + path;
Activity act(
*logger, lvlTalkative, actFileTransfer, fmt("uploading '%s'", uri), Logger::Fields{uri}, getCurActivity());
istream->seekg(0, istream->end);
auto size = istream->tellg();
istream->seekg(0, istream->beg);
auto maxThreads = std::thread::hardware_concurrency();
static std::shared_ptr<Aws::Utils::Threading::PooledThreadExecutor> executor =
std::make_shared<Aws::Utils::Threading::PooledThreadExecutor>(maxThreads);
std::call_once(transferManagerCreated, [&]() {
if (config->multipartUpload) {
TransferManagerConfiguration transferConfig(executor.get());
transferConfig.s3Client = s3Helper.client;
transferConfig.bufferSize = config->bufferSize;
transferConfig.uploadProgressCallback =
[](const TransferManager * transferManager,
const std::shared_ptr<const TransferHandle> & transferHandle) {
auto context = std::dynamic_pointer_cast<const AsyncContext>(transferHandle->GetContext());
size_t bytesDone = transferHandle->GetBytesTransferred();
size_t bytesTotal = transferHandle->GetBytesTotalSize();
try {
checkInterrupt();
context->act.progress(bytesDone, bytesTotal);
} catch (...) {
context->notify();
}
};
transferConfig.transferStatusUpdatedCallback =
[](const TransferManager * transferManager,
const std::shared_ptr<const TransferHandle> & transferHandle) {
auto context = std::dynamic_pointer_cast<const AsyncContext>(transferHandle->GetContext());
context->notify();
};
transferManager = TransferManager::Create(transferConfig);
}
});
auto now1 = std::chrono::steady_clock::now();
auto & bucketName = config->bucketName;
if (transferManager) {
if (contentEncoding != "")
throw Error("setting a content encoding is not supported with S3 multi-part uploads");
auto context = std::make_shared<AsyncContext>(act);
std::shared_ptr<TransferHandle> transferHandle = transferManager->UploadFile(
istream,
bucketName,
path,
mimeType,
Aws::Map<Aws::String, Aws::String>(),
context /*, contentEncoding */);
TransferStatus status = transferHandle->GetStatus();
while (status == TransferStatus::IN_PROGRESS || status == TransferStatus::NOT_STARTED) {
if (!isInterrupted()) {
context->wait();
} else {
transferHandle->Cancel();
transferHandle->WaitUntilFinished();
}
status = transferHandle->GetStatus();
}
act.progress(transferHandle->GetBytesTransferred(), transferHandle->GetBytesTotalSize());
if (status == TransferStatus::FAILED)
throw Error(
"AWS error: failed to upload 's3://%s/%s': %s",
bucketName,
path,
transferHandle->GetLastError().GetMessage());
if (status != TransferStatus::COMPLETED)
throw Error("AWS error: transfer status of 's3://%s/%s' in unexpected state", bucketName, path);
} else {
act.progress(0, size);
auto request = Aws::S3::Model::PutObjectRequest().WithBucket(bucketName).WithKey(path);
size_t bytesSent = 0;
request.SetDataSentEventHandler([&](const Aws::Http::HttpRequest * req, long long l) {
bytesSent += l;
act.progress(bytesSent, size);
});
request.SetContinueRequestHandler([](const Aws::Http::HttpRequest *) { return !isInterrupted(); });
request.SetContentType(mimeType);
if (contentEncoding != "")
request.SetContentEncoding(contentEncoding);
request.SetBody(istream);
auto result = checkAws(fmt("AWS error uploading '%s'", path), s3Helper.client->PutObject(request));
act.progress(size, size);
}
auto now2 = std::chrono::steady_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(now2 - now1).count();
printInfo("uploaded 's3://%s/%s' (%d bytes) in %d ms", bucketName, path, size, duration);
stats.putTimeMs += duration;
stats.putBytes += std::max(size, (decltype(size)) 0);
stats.put++;
}
void upsertFile(
const std::string & path,
std::shared_ptr<std::basic_iostream<char>> istream,
const std::string & mimeType) override
{
auto compress = [&](std::string compression) {
auto compressed = nix::compress(compression, StreamToSourceAdapter(istream).drain());
return std::make_shared<std::stringstream>(std::move(compressed));
};
if (config->narinfoCompression != "" && hasSuffix(path, ".narinfo"))
uploadFile(path, compress(config->narinfoCompression), mimeType, config->narinfoCompression);
else if (config->lsCompression != "" && hasSuffix(path, ".ls"))
uploadFile(path, compress(config->lsCompression), mimeType, config->lsCompression);
else if (config->logCompression != "" && hasPrefix(path, "log/"))
uploadFile(path, compress(config->logCompression), mimeType, config->logCompression);
else
uploadFile(path, istream, mimeType, "");
}
void getFile(const std::string & path, Sink & sink) override
{
stats.get++;
// FIXME: stream output to sink.
auto res = s3Helper.getObject(config->bucketName, path);
stats.getBytes += res.data ? res.data->size() : 0;
stats.getTimeMs += res.durationMs;
if (res.data) {
printTalkative(
"downloaded 's3://%s/%s' (%d bytes) in %d ms",
config->bucketName,
path,
res.data->size(),
res.durationMs);
sink(*res.data);
} else
throw NoSuchBinaryCacheFile(
"file '%s' does not exist in binary cache '%s'", path, config->getHumanReadableURI());
}
StorePathSet queryAllValidPaths() override
{
StorePathSet paths;
std::string marker;
auto & bucketName = config->bucketName;
do {
debug("listing bucket 's3://%s' from key '%s'...", bucketName, marker);
auto res = checkAws(
fmt("AWS error listing bucket '%s'", bucketName),
s3Helper.client->ListObjects(
Aws::S3::Model::ListObjectsRequest().WithBucket(bucketName).WithDelimiter("/").WithMarker(marker)));
auto & contents = res.GetContents();
debug("got %d keys, next marker '%s'", contents.size(), res.GetNextMarker());
for (const auto & object : contents) {
auto & key = object.GetKey();
if (key.size() != 40 || !hasSuffix(key, ".narinfo"))
continue;
paths.insert(parseStorePath(storeDir + "/" + key.substr(0, key.size() - 8) + "-" + MissingName));
}
marker = res.GetNextMarker();
} while (!marker.empty());
return paths;
}
/**
* For now, we conservatively say we don't know.
*
* \todo try to expose our S3 authentication status.
*/
std::optional<TrustedFlag> isTrustedClient() override
{
return std::nullopt;
}
};
ref<Store> S3BinaryCacheStoreImpl::Config::openStore() const
{
auto store =
make_ref<S3BinaryCacheStoreImpl>(ref{// FIXME we shouldn't actually need a mutable config
std::const_pointer_cast<S3BinaryCacheStore::Config>(shared_from_this())});
store->init();
return store;
}
static RegisterStoreImplementation<S3BinaryCacheStoreImpl::Config> regS3BinaryCacheStore;
} // namespace nix
#elif NIX_WITH_CURL_S3
#if NIX_WITH_CURL_S3
# include <cassert>

View file

@ -1,6 +1,6 @@
#include "nix/store/s3-url.hh"
#if NIX_WITH_S3_SUPPORT || NIX_WITH_CURL_S3
#if NIX_WITH_CURL_S3
# include "nix/util/error.hh"
# include "nix/util/split.hh"

View file

@ -199,8 +199,6 @@ in
user-sandboxing = runNixOSTest ./user-sandboxing;
s3-binary-cache-store = runNixOSTest ./s3-binary-cache-store.nix;
curl-s3-binary-cache-store = runNixOSTest ./curl-s3-binary-cache-store.nix;
fsync = runNixOSTest ./fsync.nix;

View file

@ -1,98 +0,0 @@
{
lib,
config,
nixpkgs,
...
}:
let
pkgs = config.nodes.client.nixpkgs.pkgs;
pkgA = pkgs.cowsay;
accessKey = "BKIKJAA5BMMU2RHO6IBB";
secretKey = "V7f1CwQqAcwo80UEIJEjc5gVQUSSx5ohQ9GSrr12";
env = "AWS_ACCESS_KEY_ID=${accessKey} AWS_SECRET_ACCESS_KEY=${secretKey}";
storeUrl = "s3://my-cache?endpoint=http://server:9000&region=eu-west-1";
objectThatDoesNotExist = "s3://my-cache/foo-that-does-not-exist?endpoint=http://server:9000&region=eu-west-1";
in
{
name = "s3-binary-cache-store";
nodes = {
server =
{
config,
lib,
pkgs,
...
}:
{
virtualisation.writableStore = true;
virtualisation.additionalPaths = [ pkgA ];
environment.systemPackages = [ pkgs.minio-client ];
nix.extraOptions = ''
experimental-features = nix-command
substituters =
'';
services.minio = {
enable = true;
region = "eu-west-1";
rootCredentialsFile = pkgs.writeText "minio-credentials-full" ''
MINIO_ROOT_USER=${accessKey}
MINIO_ROOT_PASSWORD=${secretKey}
'';
};
networking.firewall.allowedTCPPorts = [ 9000 ];
};
client =
{ config, pkgs, ... }:
{
virtualisation.writableStore = true;
nix.extraOptions = ''
experimental-features = nix-command
substituters =
'';
};
};
testScript =
{ nodes }:
''
# fmt: off
start_all()
# Create a binary cache.
server.wait_for_unit("minio")
server.wait_for_unit("network-addresses-eth1.service")
server.wait_for_open_port(9000)
server.succeed("mc config host add minio http://localhost:9000 ${accessKey} ${secretKey} --api s3v4")
server.succeed("mc mb minio/my-cache")
server.succeed("${env} nix copy --to '${storeUrl}' ${pkgA}")
client.wait_for_unit("network-addresses-eth1.service")
# Test fetchurl on s3:// URLs while we're at it.
client.succeed("${env} nix eval --impure --expr 'builtins.fetchurl { name = \"foo\"; url = \"s3://my-cache/nix-cache-info?endpoint=http://server:9000&region=eu-west-1\"; }'")
# Test that the format string in the error message is properly setup and won't display `%s` instead of the failed URI
msg = client.fail("${env} nix eval --impure --expr 'builtins.fetchurl { name = \"foo\"; url = \"${objectThatDoesNotExist}\"; }' 2>&1")
if "S3 object '${objectThatDoesNotExist}' does not exist" not in msg:
print(msg) # So that you can see the message that was improperly formatted
raise Exception("Error message formatting didn't work")
# Copy a package from the binary cache.
client.fail("nix path-info ${pkgA}")
client.succeed("${env} nix store info --store '${storeUrl}' >&2")
client.succeed("${env} nix copy --no-check-sigs --from '${storeUrl}' ${pkgA}")
client.succeed("nix path-info ${pkgA}")
'';
}