From 1667782c67a6533558e3403dd2275d6322b10847 Mon Sep 17 00:00:00 2001 From: Bernardo Meurer Costa Date: Wed, 29 Oct 2025 17:31:20 +0000 Subject: [PATCH] fix(libutil/tarfile): normalize legacy HTTP Content-Encoding names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Nix failed to download files served with `Content-Encoding: x-gzip` because libarchive doesn't recognize the legacy `x-*` compression format names. Per RFC 9110 ยง8.4.1.3, HTTP recipients should treat these as equivalent to their standard counterparts. Adds `normalizeCompressionMethod()` to map legacy encoding names before passing to libarchive: - `x-gzip` โ†’ `gzip` - `x-compress` โ†’ `compress` - `x-bzip2` โ†’ `bzip2` --- src/libutil-tests/compression.cc | 24 ++++++++++++++++++++++++ src/libutil/tarfile.cc | 23 ++++++++++++++++++++++- 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/src/libutil-tests/compression.cc b/src/libutil-tests/compression.cc index c6d570471..d8ffb05aa 100644 --- a/src/libutil-tests/compression.cc +++ b/src/libutil-tests/compression.cc @@ -74,6 +74,30 @@ TEST(decompress, decompressInvalidInputThrowsCompressionError) ASSERT_THROW(decompress(method, str), CompressionError); } +/* ---------------------------------------------------------------------------- + * legacy HTTP Content-Encoding names (RFC 9110) + * --------------------------------------------------------------------------*/ + +TEST(decompress, decompressXGzipCompressed) +{ + // Test that x-gzip (legacy HTTP Content-Encoding) works like gzip + auto str = "slfja;sljfklsa;jfklsjfkl;sdjfkl;sadjfkl;sdjf;lsdfjsadlf"; + auto compressedData = compress("gzip", str); + auto o = decompress("x-gzip", compressedData); + + ASSERT_EQ(o, str); +} + +TEST(decompress, decompressXBzip2Compressed) +{ + // Test that x-bzip2 (legacy HTTP Content-Encoding) works like bzip2 + auto str = "slfja;sljfklsa;jfklsjfkl;sdjfkl;sadjfkl;sdjf;lsdfjsadlf"; + auto compressedData = compress("bzip2", str); + auto o = decompress("x-bzip2", compressedData); + + ASSERT_EQ(o, str); +} + /* ---------------------------------------------------------------------------- * compression sinks * --------------------------------------------------------------------------*/ diff --git a/src/libutil/tarfile.cc b/src/libutil/tarfile.cc index 0757b3a81..e3aa32cfc 100644 --- a/src/libutil/tarfile.cc +++ b/src/libutil/tarfile.cc @@ -51,6 +51,26 @@ void TarArchive::check(int err, const std::string & reason) checkLibArchive(archive, err, reason); } +/// @brief Normalize compression method names from legacy HTTP Content-Encoding values. +/// +/// Per RFC 9110 Section 8.4.1.3, HTTP recipients should treat legacy "x-*" compression +/// names as equivalent to their standard counterparts: +/// - "x-gzip" is equivalent to "gzip" +/// - "x-compress" is equivalent to "compress" +/// +/// This function maps these legacy names to their libarchive-compatible equivalents. +static std::string normalizeCompressionMethod(const std::string & method) +{ + if (method == "x-gzip") + return "gzip"; + else if (method == "x-compress") + return "compress"; + else if (method == "x-bzip2") + return "bzip2"; + else + return method; +} + /// @brief Get filter_code from its name. /// /// libarchive does not provide a convenience function like archive_write_add_filter_by_name but for reading. @@ -59,9 +79,10 @@ void TarArchive::check(int err, const std::string & reason) /// hand-rolling the equivalent function that is better implemented in libarchive. int getArchiveFilterCodeByName(const std::string & method) { + auto normalizedMethod = normalizeCompressionMethod(method); auto * ar = archive_write_new(); auto cleanup = Finally{[&ar]() { checkLibArchive(ar, archive_write_close(ar), "failed to close archive: %s"); }}; - auto err = archive_write_add_filter_by_name(ar, method.c_str()); + auto err = archive_write_add_filter_by_name(ar, normalizedMethod.c_str()); checkLibArchive(ar, err, "failed to get libarchive filter by name: %s"); auto code = archive_filter_code(ar, 0); return code;