mirror of
https://github.com/NixOS/nix.git
synced 2025-12-06 09:01:01 +01:00
Merge pull request #14689 from NixOS/tarball-cache-faster
libfetchers/git-utils: Avoid using git_writestream for small files
This commit is contained in:
commit
e67c97b5f0
2 changed files with 108 additions and 49 deletions
|
|
@ -24,6 +24,7 @@
|
|||
#include <git2/indexer.h>
|
||||
#include <git2/object.h>
|
||||
#include <git2/odb.h>
|
||||
#include <git2/odb_backend.h>
|
||||
#include <git2/refs.h>
|
||||
#include <git2/remote.h>
|
||||
#include <git2/repository.h>
|
||||
|
|
@ -31,6 +32,7 @@
|
|||
#include <git2/status.h>
|
||||
#include <git2/submodule.h>
|
||||
#include <git2/sys/odb_backend.h>
|
||||
#include <git2/sys/repository.h>
|
||||
#include <git2/sys/mempack.h>
|
||||
#include <git2/tag.h>
|
||||
#include <git2/tree.h>
|
||||
|
|
@ -89,7 +91,7 @@ typedef std::unique_ptr<git_odb, Deleter<git_odb_free>> ObjectDb;
|
|||
typedef std::unique_ptr<git_packbuilder, Deleter<git_packbuilder_free>> PackBuilder;
|
||||
typedef std::unique_ptr<git_indexer, Deleter<git_indexer_free>> Indexer;
|
||||
|
||||
Hash toHash(const git_oid & oid)
|
||||
static Hash toHash(const git_oid & oid)
|
||||
{
|
||||
#ifdef GIT_EXPERIMENTAL_SHA256
|
||||
assert(oid.type == GIT_OID_SHA1);
|
||||
|
|
@ -108,7 +110,7 @@ static void initLibGit2()
|
|||
});
|
||||
}
|
||||
|
||||
git_oid hashToOID(const Hash & hash)
|
||||
static git_oid hashToOID(const Hash & hash)
|
||||
{
|
||||
git_oid oid;
|
||||
if (git_oid_fromstr(&oid, hash.gitRev().c_str()))
|
||||
|
|
@ -116,7 +118,7 @@ git_oid hashToOID(const Hash & hash)
|
|||
return oid;
|
||||
}
|
||||
|
||||
Object lookupObject(git_repository * repo, const git_oid & oid, git_object_t type = GIT_OBJECT_ANY)
|
||||
static Object lookupObject(git_repository * repo, const git_oid & oid, git_object_t type = GIT_OBJECT_ANY)
|
||||
{
|
||||
Object obj;
|
||||
if (git_object_lookup(Setter(obj), repo, &oid, type)) {
|
||||
|
|
@ -127,7 +129,7 @@ Object lookupObject(git_repository * repo, const git_oid & oid, git_object_t typ
|
|||
}
|
||||
|
||||
template<typename T>
|
||||
T peelObject(git_object * obj, git_object_t type)
|
||||
static T peelObject(git_object * obj, git_object_t type)
|
||||
{
|
||||
T obj2;
|
||||
if (git_object_peel((git_object **) (typename T::pointer *) Setter(obj2), obj, type)) {
|
||||
|
|
@ -138,7 +140,7 @@ T peelObject(git_object * obj, git_object_t type)
|
|||
}
|
||||
|
||||
template<typename T>
|
||||
T dupObject(typename T::pointer obj)
|
||||
static T dupObject(typename T::pointer obj)
|
||||
{
|
||||
T obj2;
|
||||
if (git_object_dup((git_object **) (typename T::pointer *) Setter(obj2), (git_object *) obj))
|
||||
|
|
@ -245,9 +247,15 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
|
|||
* In-memory object store for efficient batched writing to packfiles.
|
||||
* Owned by `repo`.
|
||||
*/
|
||||
git_odb_backend * mempack_backend;
|
||||
git_odb_backend * mempackBackend = nullptr;
|
||||
|
||||
GitRepoImpl(std::filesystem::path _path, bool create, bool bare)
|
||||
/**
|
||||
* On-disk packfile object store.
|
||||
* Owned by `repo`.
|
||||
*/
|
||||
git_odb_backend * packBackend = nullptr;
|
||||
|
||||
GitRepoImpl(std::filesystem::path _path, bool create, bool bare, bool packfilesOnly = false)
|
||||
: path(std::move(_path))
|
||||
, bare(bare)
|
||||
{
|
||||
|
|
@ -258,15 +266,39 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
|
|||
throw Error("opening Git repository %s: %s", path, git_error_last()->message);
|
||||
|
||||
ObjectDb odb;
|
||||
if (git_repository_odb(Setter(odb), repo.get()))
|
||||
throw Error("getting Git object database: %s", git_error_last()->message);
|
||||
if (packfilesOnly) {
|
||||
/* Create a fresh object database because by default the repo also
|
||||
loose object backends. We are not using any of those for the
|
||||
tarball cache, but libgit2 still does a bunch of unnecessary
|
||||
syscalls that always fail with ENOENT. NOTE: We are only creating
|
||||
a libgit2 object here and not modifying the repo. Think of this as
|
||||
enabling the specific backend.
|
||||
*/
|
||||
|
||||
if (git_odb_new(Setter(odb)))
|
||||
throw Error("creating Git object database: %s", git_error_last()->message);
|
||||
|
||||
if (git_odb_backend_pack(&packBackend, (path / "objects").string().c_str()))
|
||||
throw Error("creating pack backend: %s", git_error_last()->message);
|
||||
|
||||
if (git_odb_add_backend(odb.get(), packBackend, 1))
|
||||
throw Error("adding pack backend to Git object database: %s", git_error_last()->message);
|
||||
} else {
|
||||
if (git_repository_odb(Setter(odb), repo.get()))
|
||||
throw Error("getting Git object database: %s", git_error_last()->message);
|
||||
}
|
||||
|
||||
// mempack_backend will be owned by the repository, so we are not expected to free it ourselves.
|
||||
if (git_mempack_new(&mempack_backend))
|
||||
if (git_mempack_new(&mempackBackend))
|
||||
throw Error("creating mempack backend: %s", git_error_last()->message);
|
||||
|
||||
if (git_odb_add_backend(odb.get(), mempack_backend, 999))
|
||||
if (git_odb_add_backend(odb.get(), mempackBackend, 999))
|
||||
throw Error("adding mempack backend to Git object database: %s", git_error_last()->message);
|
||||
|
||||
if (packfilesOnly) {
|
||||
if (git_repository_set_odb(repo.get(), odb.get()))
|
||||
throw Error("setting Git object database: %s", git_error_last()->message);
|
||||
}
|
||||
}
|
||||
|
||||
operator git_repository *()
|
||||
|
|
@ -287,7 +319,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
|
|||
git_packbuilder_set_threads(packBuilder.get(), 0 /* autodetect */);
|
||||
|
||||
packBuilderContext.handleException(
|
||||
"preparing packfile", git_mempack_write_thin_pack(mempack_backend, packBuilder.get()));
|
||||
"preparing packfile", git_mempack_write_thin_pack(mempackBackend, packBuilder.get()));
|
||||
checkInterrupt();
|
||||
packBuilderContext.handleException("writing packfile", git_packbuilder_write_buf(&buf, packBuilder.get()));
|
||||
checkInterrupt();
|
||||
|
|
@ -320,7 +352,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
|
|||
if (git_indexer_commit(indexer.get(), &stats))
|
||||
throw Error("committing git packfile index: %s", git_error_last()->message);
|
||||
|
||||
if (git_mempack_reset(mempack_backend))
|
||||
if (git_mempack_reset(mempackBackend))
|
||||
throw Error("resetting git mempack backend: %s", git_error_last()->message);
|
||||
|
||||
checkInterrupt();
|
||||
|
|
@ -553,27 +585,6 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
|
|||
|
||||
ref<GitFileSystemObjectSink> getFileSystemObjectSink() override;
|
||||
|
||||
static int sidebandProgressCallback(const char * str, int len, void * payload)
|
||||
{
|
||||
auto act = (Activity *) payload;
|
||||
act->result(resFetchStatus, trim(std::string_view(str, len)));
|
||||
return getInterrupted() ? -1 : 0;
|
||||
}
|
||||
|
||||
static int transferProgressCallback(const git_indexer_progress * stats, void * payload)
|
||||
{
|
||||
auto act = (Activity *) payload;
|
||||
act->result(
|
||||
resFetchStatus,
|
||||
fmt("%d/%d objects received, %d/%d deltas indexed, %s",
|
||||
stats->received_objects,
|
||||
stats->total_objects,
|
||||
stats->indexed_deltas,
|
||||
stats->total_deltas,
|
||||
renderSize(stats->received_bytes)));
|
||||
return getInterrupted() ? -1 : 0;
|
||||
}
|
||||
|
||||
void fetch(const std::string & url, const std::string & refspec, bool shallow) override
|
||||
{
|
||||
Activity act(*logger, lvlTalkative, actFetchTree, fmt("fetching Git repository '%s'", url));
|
||||
|
|
@ -701,9 +712,9 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
|
|||
}
|
||||
};
|
||||
|
||||
ref<GitRepo> GitRepo::openRepo(const std::filesystem::path & path, bool create, bool bare)
|
||||
ref<GitRepo> GitRepo::openRepo(const std::filesystem::path & path, bool create, bool bare, bool packfilesOnly)
|
||||
{
|
||||
return make_ref<GitRepoImpl>(path, create, bare);
|
||||
return make_ref<GitRepoImpl>(path, create, bare, packfilesOnly);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1052,6 +1063,11 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink
|
|||
|
||||
std::vector<PendingDir> pendingDirs;
|
||||
|
||||
/**
|
||||
* Temporary buffer used by createRegularFile for storing small file contents.
|
||||
*/
|
||||
std::string regularFileContentsBuffer;
|
||||
|
||||
void pushBuilder(std::string name)
|
||||
{
|
||||
const git_tree_entry * entry;
|
||||
|
|
@ -1133,41 +1149,83 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink
|
|||
if (!prepareDirs(pathComponents, false))
|
||||
return;
|
||||
|
||||
git_writestream * stream = nullptr;
|
||||
if (git_blob_create_from_stream(&stream, *repo, nullptr))
|
||||
throw Error("creating a blob stream object: %s", git_error_last()->message);
|
||||
using WriteStream = std::unique_ptr<::git_writestream, decltype([](::git_writestream * stream) {
|
||||
if (stream)
|
||||
stream->free(stream);
|
||||
})>;
|
||||
|
||||
/* Maximum file size that gets buffered in memory before flushing to a WriteStream,
|
||||
that's backed by a temporary objects/streamed_git2_* file. We should avoid that
|
||||
for common cases, since creating (and deleting) a temporary file for each blob
|
||||
is insanely expensive. */
|
||||
static constexpr std::size_t maxBufferSize = 1024 * 1024; /* 1 MiB */
|
||||
|
||||
struct CRF : CreateRegularFileSink
|
||||
{
|
||||
const CanonPath & path;
|
||||
GitFileSystemObjectSinkImpl & back;
|
||||
git_writestream * stream;
|
||||
WriteStream stream;
|
||||
std::string & contents;
|
||||
bool executable = false;
|
||||
|
||||
CRF(const CanonPath & path, GitFileSystemObjectSinkImpl & back, git_writestream * stream)
|
||||
CRF(const CanonPath & path, GitFileSystemObjectSinkImpl & back, std::string & regularFileContentsBuffer)
|
||||
: path(path)
|
||||
, back(back)
|
||||
, stream(stream)
|
||||
, stream(nullptr)
|
||||
, contents(regularFileContentsBuffer)
|
||||
{
|
||||
contents.clear();
|
||||
}
|
||||
|
||||
void writeToStream(std::string_view data)
|
||||
{
|
||||
/* Lazily create the stream. */
|
||||
if (!stream) {
|
||||
::git_writestream * stream2 = nullptr;
|
||||
if (git_blob_create_from_stream(&stream2, *back.repo, nullptr))
|
||||
throw Error("creating a blob stream object: %s", git_error_last()->message);
|
||||
stream = WriteStream{stream2};
|
||||
assert(stream);
|
||||
}
|
||||
|
||||
if (stream->write(stream.get(), data.data(), data.size()))
|
||||
throw Error("writing a blob for tarball member '%s': %s", path, git_error_last()->message);
|
||||
}
|
||||
|
||||
void operator()(std::string_view data) override
|
||||
{
|
||||
if (stream->write(stream, data.data(), data.size()))
|
||||
throw Error("writing a blob for tarball member '%s': %s", path, git_error_last()->message);
|
||||
/* Already in slow path. Just write to the slow stream. */
|
||||
if (stream) {
|
||||
writeToStream(data);
|
||||
return;
|
||||
}
|
||||
|
||||
contents += data;
|
||||
if (contents.size() > maxBufferSize) {
|
||||
writeToStream(contents); /* Will initialize stream. */
|
||||
contents.clear();
|
||||
}
|
||||
}
|
||||
|
||||
void isExecutable() override
|
||||
{
|
||||
executable = true;
|
||||
}
|
||||
} crf{path, *this, stream};
|
||||
} crf{path, *this, regularFileContentsBuffer};
|
||||
|
||||
func(crf);
|
||||
|
||||
git_oid oid;
|
||||
if (git_blob_create_from_stream_commit(&oid, stream))
|
||||
throw Error("creating a blob object for tarball member '%s': %s", path, git_error_last()->message);
|
||||
if (crf.stream) {
|
||||
/* Call .release(), since git_blob_create_from_stream_commit
|
||||
acquires ownership and frees the stream. */
|
||||
if (git_blob_create_from_stream_commit(&oid, crf.stream.release()))
|
||||
throw Error("creating a blob object for '%s': %s", path, git_error_last()->message);
|
||||
} else {
|
||||
if (git_blob_create_from_buffer(&oid, *repo, crf.contents.data(), crf.contents.size()))
|
||||
throw Error(
|
||||
"creating a blob object for '%s' from in-memory buffer: %s", path, git_error_last()->message);
|
||||
}
|
||||
|
||||
addToTree(*pathComponents.rbegin(), oid, crf.executable ? GIT_FILEMODE_BLOB_EXECUTABLE : GIT_FILEMODE_BLOB);
|
||||
}
|
||||
|
|
@ -1335,7 +1393,7 @@ namespace fetchers {
|
|||
ref<GitRepo> Settings::getTarballCache() const
|
||||
{
|
||||
static auto repoDir = std::filesystem::path(getCacheDir()) / "tarball-cache";
|
||||
return GitRepo::openRepo(repoDir, true, true);
|
||||
return GitRepo::openRepo(repoDir, /*create=*/true, /*bare=*/true, /*packfilesOnly=*/true);
|
||||
}
|
||||
|
||||
} // namespace fetchers
|
||||
|
|
|
|||
|
|
@ -32,7 +32,8 @@ struct GitRepo
|
|||
{
|
||||
virtual ~GitRepo() {}
|
||||
|
||||
static ref<GitRepo> openRepo(const std::filesystem::path & path, bool create = false, bool bare = false);
|
||||
static ref<GitRepo>
|
||||
openRepo(const std::filesystem::path & path, bool create = false, bool bare = false, bool packfilesOnly = false);
|
||||
|
||||
virtual uint64_t getRevCount(const Hash & rev) = 0;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue