From d1f9fe984b6efecf69666c2e79ebbce6ef17ef92 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Wed, 3 Dec 2025 03:23:12 +0300 Subject: [PATCH] libfetchers/git-utils: Do not refresh pack files in GitFileSystemObjectSink This leads to incredibly wasteful refreshes (see [^]) when oids are not found. Since we are writing the pack files only once per unpacking we should not bother with this refreshing at all. This brings down the number of syscalls during `nix flake metadata "https://releases.nixos.org/nixos/25.05/nixos-25.05.813095.1c8ba8d3f763/nixexprs.tar.xz" --store "dummy://?read-only=false"` Down from 576334 to just 6235 (100x less syscalls): (Before) % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ------------------ 32.98 0.625288 3 162898 getdents64 29.58 0.560686 3 163514 81917 openat 15.01 0.284509 3 81819 186 newfstatat 10.99 0.208349 2 81601 close 10.56 0.200145 2 81552 fstat All these are coming from [2] and are totally useless. (After) % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ------------------ 76.47 0.108558 247 438 20 futex 6.55 0.009292 18 513 munmap 3.30 0.004680 7 639 492 openat 2.68 0.003803 10 359 write 2.30 0.003268 2 1146 read 2.26 0.003215 3 870 mmap [^]: https://github.com/libgit2/libgit2/blob/58d9363f02f1fa39e46d49b604f27008e75b72f2/include/git2/sys/odb_backend.h#L68-L75 [2]: https://github.com/libgit2/libgit2/blob/58d9363f02f1fa39e46d49b604f27008e75b72f2/src/libgit2/odb_pack.c#L517-L546 --- src/libfetchers/git-utils.cc | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index aa9efb883..f62c3b9b4 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -1068,6 +1068,13 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink */ std::string regularFileContentsBuffer; + /** + * If repo has a non-null packBackend, this has a copy of the refresh function + * from the backend virtual table. This is needed to restore it after we've flushed + * the sink. We modify it to avoid unnecessary I/O on non-existent oids. + */ + decltype(::git_odb_backend::refresh) packfileOdbRefresh = nullptr; + void pushBuilder(std::string name) { const git_tree_entry * entry; @@ -1093,6 +1100,29 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink GitFileSystemObjectSinkImpl(ref repo) : repo(repo) { + /* Monkey-patching the pack backend to only read the pack directory + once. Otherwise it will do a readdir for each added oid when it's + not found and that translates to ~6 syscalls. Since we are never + writing pack files until flushing we can force the odb backend to + read the directory just once. It's very convenient that the vtable is + semi-public interface and is up for grabs. + + This is purely an optimization for our use-case with a tarball cache. + libgit2 calls refresh() if the backend provides it when an oid isn't found. + We are only writing objects to a mempack (it has higher priority) and there isn't + a realistic use-case where a previously missing object would appear from thin air + on the disk (unless another process happens to be unpacking a similar tarball to + the cache at the same time, but that's a very unrealistic scenario). + */ + if (auto * backend = repo->packBackend) { + if (backend->refresh(backend)) /* Refresh just once manually. */ + throw Error("refreshing packfiles: %s", git_error_last()->message); + /* Save the function pointer to restore it later in flush() and + unset it in the vtable. libgit2 does nothing if it's a nullptr: + https://github.com/libgit2/libgit2/blob/58d9363f02f1fa39e46d49b604f27008e75b72f2/src/libgit2/odb.c#L1922 + */ + packfileOdbRefresh = std::exchange(backend->refresh, nullptr); + } pushBuilder(""); } @@ -1309,6 +1339,11 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink auto [oid, _name] = popBuilder(); + if (auto * backend = repo->packBackend) { + /* We are done writing blobs, can restore refresh functionality. */ + backend->refresh = packfileOdbRefresh; + } + repo->flush(); return toHash(oid);