mirror of
https://github.com/NixOS/nix.git
synced 2025-12-05 08:31:00 +01:00
libfetchers/git-utils: Avoid using git_writestream for small files
It turns out that libgit2 is incredibly naive and each git_writestream creates a new temporary file like .cache/nix/tarball-cache/objects/streamed_git2_6a82bb68dc0a3918 that it reads from afterwards. It doesn't do any internal buffering. Doing (with a fresh fetcher cache) a simple: strace -c nix flake metadata "https://releases.nixos.org/nixos/25.05/nixos-25.05.813095.1c8ba8d3f763/nixexprs.tar.xz" --store "dummy://?read-only=false" (Before) % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ------------------ 31.05 2.372728 9 259790 81917 openat 19.21 1.467784 30 48157 unlink 10.43 0.796793 4 162898 getdents64 7.75 0.592637 4 145969 read 7.67 0.585976 3 177877 close 7.11 0.543032 4 129970 190 newfstatat 6.98 0.533211 10 48488 write 4.09 0.312585 3 81443 81443 utimensat 3.22 0.246158 3 81552 fstat (After) % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ------------------ 29.61 0.639393 3 162898 getdents64 26.26 0.567119 3 163523 81934 openat 12.50 0.269835 3 81848 207 newfstatat 11.60 0.250429 3 81443 81443 utimensat 9.82 0.212053 2 81593 close 9.33 0.201390 2 81544 fstat 0.18 0.003814 9 406 17 futex
This commit is contained in:
parent
0ec93e7ae7
commit
2f6550b7a7
1 changed files with 58 additions and 11 deletions
|
|
@ -1031,6 +1031,11 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink
|
|||
|
||||
std::vector<PendingDir> pendingDirs;
|
||||
|
||||
/**
|
||||
* Temporary buffer used by createRegularFile for storing small file contents.
|
||||
*/
|
||||
std::string regularFileContentsBuffer;
|
||||
|
||||
void pushBuilder(std::string name)
|
||||
{
|
||||
const git_tree_entry * entry;
|
||||
|
|
@ -1112,41 +1117,83 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink
|
|||
if (!prepareDirs(pathComponents, false))
|
||||
return;
|
||||
|
||||
git_writestream * stream = nullptr;
|
||||
if (git_blob_create_from_stream(&stream, *repo, nullptr))
|
||||
throw Error("creating a blob stream object: %s", git_error_last()->message);
|
||||
using WriteStream = std::unique_ptr<::git_writestream, decltype([](::git_writestream * stream) {
|
||||
if (stream)
|
||||
stream->free(stream);
|
||||
})>;
|
||||
|
||||
/* Maximum file size that gets buffered in memory before flushing to a WriteStream,
|
||||
that's backed by a temporary objects/streamed_git2_* file. We should avoid that
|
||||
for common cases, since creating (and deleting) a temporary file for each blob
|
||||
is insanely expensive. */
|
||||
static constexpr std::size_t maxBufferSize = 1024 * 1024; /* 1 MiB */
|
||||
|
||||
struct CRF : CreateRegularFileSink
|
||||
{
|
||||
const CanonPath & path;
|
||||
GitFileSystemObjectSinkImpl & back;
|
||||
git_writestream * stream;
|
||||
WriteStream stream;
|
||||
std::string & contents;
|
||||
bool executable = false;
|
||||
|
||||
CRF(const CanonPath & path, GitFileSystemObjectSinkImpl & back, git_writestream * stream)
|
||||
CRF(const CanonPath & path, GitFileSystemObjectSinkImpl & back, std::string & regularFileContentsBuffer)
|
||||
: path(path)
|
||||
, back(back)
|
||||
, stream(stream)
|
||||
, stream(nullptr)
|
||||
, contents(regularFileContentsBuffer)
|
||||
{
|
||||
contents.clear();
|
||||
}
|
||||
|
||||
void writeToStream(std::string_view data)
|
||||
{
|
||||
/* Lazily create the stream. */
|
||||
if (!stream) {
|
||||
::git_writestream * stream2 = nullptr;
|
||||
if (git_blob_create_from_stream(&stream2, *back.repo, nullptr))
|
||||
throw Error("creating a blob stream object: %s", git_error_last()->message);
|
||||
stream = WriteStream{stream2};
|
||||
assert(stream);
|
||||
}
|
||||
|
||||
if (stream->write(stream.get(), data.data(), data.size()))
|
||||
throw Error("writing a blob for tarball member '%s': %s", path, git_error_last()->message);
|
||||
}
|
||||
|
||||
void operator()(std::string_view data) override
|
||||
{
|
||||
if (stream->write(stream, data.data(), data.size()))
|
||||
throw Error("writing a blob for tarball member '%s': %s", path, git_error_last()->message);
|
||||
/* Already in slow path. Just write to the slow stream. */
|
||||
if (stream) {
|
||||
writeToStream(data);
|
||||
return;
|
||||
}
|
||||
|
||||
contents += data;
|
||||
if (contents.size() > maxBufferSize) {
|
||||
writeToStream(contents); /* Will initialize stream. */
|
||||
contents.clear();
|
||||
}
|
||||
}
|
||||
|
||||
void isExecutable() override
|
||||
{
|
||||
executable = true;
|
||||
}
|
||||
} crf{path, *this, stream};
|
||||
} crf{path, *this, regularFileContentsBuffer};
|
||||
|
||||
func(crf);
|
||||
|
||||
git_oid oid;
|
||||
if (git_blob_create_from_stream_commit(&oid, stream))
|
||||
throw Error("creating a blob object for tarball member '%s': %s", path, git_error_last()->message);
|
||||
if (crf.stream) {
|
||||
/* Call .release(), since git_blob_create_from_stream_commit
|
||||
acquires ownership and frees the stream. */
|
||||
if (git_blob_create_from_stream_commit(&oid, crf.stream.release()))
|
||||
throw Error("creating a blob object for '%s': %s", path, git_error_last()->message);
|
||||
} else {
|
||||
if (git_blob_create_from_buffer(&oid, *repo, crf.contents.data(), crf.contents.size()))
|
||||
throw Error(
|
||||
"creating a blob object for '%s' from in-memory buffer: %s", path, git_error_last()->message);
|
||||
}
|
||||
|
||||
addToTree(*pathComponents.rbegin(), oid, crf.executable ? GIT_FILEMODE_BLOB_EXECUTABLE : GIT_FILEMODE_BLOB);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue