From daa7e0d2e967f455cbf6285f31c5307be49545cd Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 17 Oct 2025 13:56:17 +0200 Subject: [PATCH 1/3] Source: Add skip() method This allows FdSource to efficiently skip data we don't care about. --- src/libutil/include/nix/util/serialise.hh | 5 +++ src/libutil/serialise.cc | 48 +++++++++++++++++++++-- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/src/libutil/include/nix/util/serialise.hh b/src/libutil/include/nix/util/serialise.hh index 16e0d0fa5..8799e128f 100644 --- a/src/libutil/include/nix/util/serialise.hh +++ b/src/libutil/include/nix/util/serialise.hh @@ -97,6 +97,8 @@ struct Source void drainInto(Sink & sink); std::string drain(); + + virtual void skip(size_t len); }; /** @@ -177,6 +179,7 @@ struct FdSource : BufferedSource Descriptor fd; size_t read = 0; BackedStringView endOfFileError{"unexpected end-of-file"}; + bool isSeekable = true; FdSource() : fd(INVALID_DESCRIPTOR) @@ -200,6 +203,8 @@ struct FdSource : BufferedSource */ bool hasData(); + void skip(size_t len) override; + protected: size_t readUnbuffered(char * data, size_t len) override; private: diff --git a/src/libutil/serialise.cc b/src/libutil/serialise.cc index 15629935e..bdce956f3 100644 --- a/src/libutil/serialise.cc +++ b/src/libutil/serialise.cc @@ -94,9 +94,8 @@ void Source::drainInto(Sink & sink) { std::array buf; while (true) { - size_t n; try { - n = read(buf.data(), buf.size()); + auto n = read(buf.data(), buf.size()); sink({buf.data(), n}); } catch (EndOfFile &) { break; @@ -111,6 +110,16 @@ std::string Source::drain() return std::move(s.s); } +void Source::skip(size_t len) +{ + std::array buf; + while (len) { + auto n = read(buf.data(), std::min(len, buf.size())); + assert(n <= len); + len -= n; + } +} + size_t BufferedSource::read(char * data, size_t len) { if (!buffer) @@ -120,7 +129,7 @@ size_t BufferedSource::read(char * data, size_t len) bufPosIn = readUnbuffered(buffer.get(), bufSize); /* Copy out the data in the buffer. */ - size_t n = len > bufPosIn - bufPosOut ? bufPosIn - bufPosOut : len; + auto n = std::min(len, bufPosIn - bufPosOut); memcpy(data, buffer.get() + bufPosOut, n); bufPosOut += n; if (bufPosIn == bufPosOut) @@ -191,6 +200,39 @@ bool FdSource::hasData() } } +void FdSource::skip(size_t len) +{ + /* Discard data in the buffer. */ + if (len && buffer && bufPosIn - bufPosOut) { + if (len >= bufPosIn - bufPosOut) { + len -= bufPosIn - bufPosOut; + bufPosIn = bufPosOut = 0; + } else { + bufPosOut += len; + len = 0; + } + } + +#ifndef _WIN32 + /* If we can, seek forward in the file to skip the rest. */ + if (isSeekable && len) { + if (lseek(fd, len, SEEK_CUR) == -1) { + if (errno == ESPIPE) + isSeekable = false; + else + throw SysError("seeking forward in file"); + } else { + read += len; + return; + } + } +#endif + + /* Otherwise, skip by reading. */ + if (len) + BufferedSource::skip(len); +} + size_t StringSource::read(char * data, size_t len) { if (pos == s.size()) From 67bffa19a533c5d2b562db367d1823166ca714b2 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 17 Oct 2025 18:32:47 +0200 Subject: [PATCH 2/3] NullFileSystemObjectSink: Skip over file contents --- src/libutil/archive.cc | 7 ++++++- src/libutil/fs-sink.cc | 2 ++ src/libutil/include/nix/util/fs-sink.hh | 8 ++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/libutil/archive.cc b/src/libutil/archive.cc index 3d96df75e..b8fef9ef3 100644 --- a/src/libutil/archive.cc +++ b/src/libutil/archive.cc @@ -132,6 +132,11 @@ static void parseContents(CreateRegularFileSink & sink, Source & source) sink.preallocateContents(size); + if (sink.skipContents) { + source.skip(size + (size % 8 ? 8 - (size % 8) : 0)); + return; + } + uint64_t left = size; std::array buf; @@ -166,7 +171,7 @@ static void parse(FileSystemObjectSink & sink, Source & source, const CanonPath auto expectTag = [&](std::string_view expected) { auto tag = getString(); if (tag != expected) - throw badArchive("expected tag '%s', got '%s'", expected, tag); + throw badArchive("expected tag '%s', got '%s'", expected, tag.substr(0, 1024)); }; expectTag("("); diff --git a/src/libutil/fs-sink.cc b/src/libutil/fs-sink.cc index 6efd5e0c7..45ef57a9f 100644 --- a/src/libutil/fs-sink.cc +++ b/src/libutil/fs-sink.cc @@ -196,6 +196,8 @@ void NullFileSystemObjectSink::createRegularFile( void isExecutable() override {} } crf; + crf.skipContents = true; + // Even though `NullFileSystemObjectSink` doesn't do anything, it's important // that we call the function, to e.g. advance the parser using this // sink. diff --git a/src/libutil/include/nix/util/fs-sink.hh b/src/libutil/include/nix/util/fs-sink.hh index f96fe3ef9..bd2db7f53 100644 --- a/src/libutil/include/nix/util/fs-sink.hh +++ b/src/libutil/include/nix/util/fs-sink.hh @@ -14,6 +14,14 @@ namespace nix { */ struct CreateRegularFileSink : Sink { + /** + * If set to true, the sink will not be called with the contents + * of the file. `preallocateContents()` will still be called to + * convey the file size. Useful for sinks that want to efficiently + * discard the contents of the file. + */ + bool skipContents = false; + virtual void isExecutable() = 0; /** From 6c9083db2c4d68c3a3719a816da48b68541a4a72 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 20 Oct 2025 13:40:19 +0200 Subject: [PATCH 3/3] Use a smaller buffer --- src/libutil/serialise.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libutil/serialise.cc b/src/libutil/serialise.cc index bdce956f3..47a00c8d6 100644 --- a/src/libutil/serialise.cc +++ b/src/libutil/serialise.cc @@ -112,7 +112,7 @@ std::string Source::drain() void Source::skip(size_t len) { - std::array buf; + std::array buf; while (len) { auto n = read(buf.data(), std::min(len, buf.size())); assert(n <= len);