From 4060ec3a8cca039937bd70b37acac8801ca9c691 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Wed, 17 Dec 2025 02:39:44 +0300 Subject: [PATCH] libutil: Add CompressionAlgo enum, add Suggestions to UnknownCompressionMethod exception Error messages now include suggestions like: error: unknown compression method 'bzip' Did you mean one of bzip2, gzip, lzip, grzip or lrzip? Also a bit of progress on making the compression code use less stringly typed compression type, which is good because it's easy to confuse which strings are accepted where (e.g. Content-Encoding should be able to accept x-gzip, but it shouldn't be exposed in NAR decompression and so on). An enum cleanly separates the concerns of parsing strings / handling libarchive write/read filters. --- .../build/derivation-building-goal.cc | 2 +- src/libutil/compression.cc | 91 ++++++++++++++++--- src/libutil/include/nix/util/compression.hh | 19 ++++ 3 files changed, 97 insertions(+), 15 deletions(-) diff --git a/src/libstore/build/derivation-building-goal.cc b/src/libstore/build/derivation-building-goal.cc index 9e4605f07..1065d2f15 100644 --- a/src/libstore/build/derivation-building-goal.cc +++ b/src/libstore/build/derivation-building-goal.cc @@ -988,7 +988,7 @@ Path DerivationBuildingGoal::openLogFile() logFileSink = std::make_shared(fdLogFile.get()); if (settings.compressLog) - logSink = std::shared_ptr(makeCompressionSink("bzip2", *logFileSink)); + logSink = std::shared_ptr(makeCompressionSink(CompressionAlgo::bzip2, *logFileSink)); else logSink = logFileSink; diff --git a/src/libutil/compression.cc b/src/libutil/compression.cc index 36b476e9a..38b2cdb7e 100644 --- a/src/libutil/compression.cc +++ b/src/libutil/compression.cc @@ -69,24 +69,54 @@ struct ArchiveDecompressionSource : Source } }; +/* These strings are a part of the public API in store parameters and such. Do not change! + Happens to match enum names. */ +#define NIX_FOR_EACH_LA_ALGO(MACRO) \ + MACRO(bzip2) \ + MACRO(compress) \ + MACRO(grzip) \ + MACRO(gzip) \ + MACRO(lrzip) \ + MACRO(lz4) \ + MACRO(lzip) \ + MACRO(lzma) \ + MACRO(lzop) \ + MACRO(xz) \ + MACRO(zstd) + struct ArchiveCompressionSink : CompressionSink { Sink & nextSink; struct archive * archive; - ArchiveCompressionSink(Sink & nextSink, std::string format, bool parallel, int level = COMPRESSION_LEVEL_DEFAULT) + ArchiveCompressionSink( + Sink & nextSink, CompressionAlgo method, bool parallel, int level = COMPRESSION_LEVEL_DEFAULT) : nextSink(nextSink) { archive = archive_write_new(); if (!archive) throw Error("failed to initialize libarchive"); - check(archive_write_add_filter_by_name(archive, format.c_str()), "couldn't initialize compression (%s)"); + + auto [addFilter, format] = [method]() -> std::pair { + switch (method) { + case CompressionAlgo::none: + case CompressionAlgo::brotli: + unreachable(); +#define NIX_DEF_LA_ALGO_CASE(algo) \ + case CompressionAlgo::algo: \ + return {archive_write_add_filter_##algo, #algo}; + NIX_FOR_EACH_LA_ALGO(NIX_DEF_LA_ALGO_CASE) +#undef NIX_DEF_LA_ALGO_CASE + } + unreachable(); + }(); + + check(addFilter(archive), "couldn't initialize compression (%s)"); check(archive_write_set_format_raw(archive)); if (parallel) - check(archive_write_set_filter_option(archive, format.c_str(), "threads", "0")); + check(archive_write_set_filter_option(archive, format, "threads", "0")); if (level != COMPRESSION_LEVEL_DEFAULT) - check(archive_write_set_filter_option( - archive, format.c_str(), "compression-level", std::to_string(level).c_str())); + check(archive_write_set_filter_option(archive, format, "compression-level", std::to_string(level).c_str())); // disable internal buffering check(archive_write_set_bytes_per_block(archive, 0)); // disable output padding @@ -289,19 +319,52 @@ struct BrotliCompressionSink : ChunkedCompressionSink } }; +/* Parses a *compression* method into the corresponding enum. This is only used + in the *compression* case and user interface. Content-Encoding should not use + these. */ +static CompressionAlgo parseNixCompressionAlgoString(std::string_view method) +{ + static const std::unordered_map lookupTable = { + {"none", CompressionAlgo::none}, + {"br", CompressionAlgo::brotli}, +#define NIX_DEF_LA_ALGO_NAME(algo) {#algo, CompressionAlgo::algo}, + NIX_FOR_EACH_LA_ALGO(NIX_DEF_LA_ALGO_NAME) +#undef NIX_DEF_LA_ALGO_NAME + }; + + if (auto it = lookupTable.find(method); it != lookupTable.end()) + return it->second; + + static const StringSet allNames = [&]() { + StringSet res; + for (auto & [name, _] : lookupTable) + res.emplace(name); + return res; + }(); + + throw UnknownCompressionMethod( + Suggestions::bestMatches(allNames, method), "unknown compression method '%s'", method); +} + ref makeCompressionSink(const std::string & method, Sink & nextSink, const bool parallel, int level) { - std::vector la_supports = { - "bzip2", "compress", "grzip", "gzip", "lrzip", "lz4", "lzip", "lzma", "lzop", "xz", "zstd"}; - if (std::find(la_supports.begin(), la_supports.end(), method) != la_supports.end()) { - return make_ref(nextSink, method, parallel, level); - } - if (method == "none") + return makeCompressionSink(parseNixCompressionAlgoString(method), nextSink, parallel, level); +} + +ref makeCompressionSink(CompressionAlgo method, Sink & nextSink, const bool parallel, int level) +{ + switch (method) { + case CompressionAlgo::none: return make_ref(nextSink); - else if (method == "br") + case CompressionAlgo::brotli: return make_ref(nextSink); - else - throw UnknownCompressionMethod("unknown compression method '%s'", method); + /* Everything else is supported via libarchive. */ +#define NIX_DEF_LA_ALGO_CASE(algo) case CompressionAlgo::algo: + NIX_FOR_EACH_LA_ALGO(NIX_DEF_LA_ALGO_CASE) + return make_ref(nextSink, method, parallel, level); +#undef NIX_DEF_LA_ALGO_CASE + } + unreachable(); } std::string compress(const std::string & method, std::string_view in, const bool parallel, int level) diff --git a/src/libutil/include/nix/util/compression.hh b/src/libutil/include/nix/util/compression.hh index 351826856..7569c3933 100644 --- a/src/libutil/include/nix/util/compression.hh +++ b/src/libutil/include/nix/util/compression.hh @@ -16,6 +16,22 @@ struct CompressionSink : BufferedSink, FinishSink using FinishSink::finish; }; +enum class CompressionAlgo { + none, + brotli, + bzip2, + compress, + grzip, + gzip, + lrzip, + lz4, + lzip, + lzma, + lzop, + xz, + zstd, +}; + std::string decompress(const std::string & method, std::string_view in); std::unique_ptr makeDecompressionSink(const std::string & method, Sink & nextSink); @@ -25,6 +41,9 @@ std::string compress(const std::string & method, std::string_view in, const bool ref makeCompressionSink(const std::string & method, Sink & nextSink, const bool parallel = false, int level = -1); +ref +makeCompressionSink(CompressionAlgo method, Sink & nextSink, const bool parallel = false, int level = -1); + MakeError(UnknownCompressionMethod, Error); MakeError(CompressionError, Error);