From 23c87d8a210ec3ffe29a14d7ceb3f4e87ab4fa90 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Mon, 4 Aug 2025 14:31:14 -0400 Subject: [PATCH] Encapsulate `invalidBase32`, avoid 0xFF magic number This keeps things fast by making the function inline, but also prevents people from having to know about the `0xFF` implementation detail directly, instead making one go through a `std::optional` (which could be fused away with a sufficiently smart compiler). Additionally, the base "nix32" implementation is moved to its own header file pair, as it is logically distinct and prior to the `Hash` data type. It would probably be nice to do this with all the hash format implementations. --- src/libstore-tests/ref-scan-bench.cc | 5 ++- src/libutil/base-nix-32.cc | 42 +++++++++++++++++++ src/libutil/hash.cc | 38 +++-------------- src/libutil/include/nix/util/base-nix-32.hh | 45 +++++++++++++++++++++ src/libutil/include/nix/util/hash.hh | 4 +- src/libutil/include/nix/util/meson.build | 1 + src/libutil/meson.build | 1 + src/libutil/references.cc | 3 +- 8 files changed, 101 insertions(+), 38 deletions(-) create mode 100644 src/libutil/base-nix-32.cc create mode 100644 src/libutil/include/nix/util/base-nix-32.hh diff --git a/src/libstore-tests/ref-scan-bench.cc b/src/libstore-tests/ref-scan-bench.cc index 8219c05bf..011d53aec 100644 --- a/src/libstore-tests/ref-scan-bench.cc +++ b/src/libstore-tests/ref-scan-bench.cc @@ -1,5 +1,6 @@ #include "nix/util/references.hh" #include "nix/store/path.hh" +#include "nix/util/base-nix-32.hh" #include @@ -10,9 +11,9 @@ using namespace nix; template static void randomReference(std::mt19937 & urng, OIt outIter) { - auto dist = std::uniform_int_distribution(0, nix32Chars.size() - 1); + auto dist = std::uniform_int_distribution(0, BaseNix32::characters.size() - 1); dist(urng); - std::generate_n(outIter, StorePath::HashLen, [&]() { return nix32Chars[dist(urng)]; }); + std::generate_n(outIter, StorePath::HashLen, [&]() { return BaseNix32::characters[dist(urng)]; }); } /** diff --git a/src/libutil/base-nix-32.cc b/src/libutil/base-nix-32.cc new file mode 100644 index 000000000..dec5cd7d7 --- /dev/null +++ b/src/libutil/base-nix-32.cc @@ -0,0 +1,42 @@ +#include + +#include "nix/util/base-nix-32.hh" + +namespace nix { + +constexpr const std::array BaseNix32::reverseMap = [] { + std::array map{}; + + for (size_t i = 0; i < map.size(); ++i) + map[i] = invalid; // invalid + + for (unsigned char i = 0; i < 32; ++i) + map[static_cast(characters[i])] = i; + + return map; +}(); + +std::string BaseNix32::encode(std::span originalData) +{ + if (originalData.size() == 0) + return {}; + + size_t len = encodedLength(originalData.size()); + assert(len); + + std::string s; + s.reserve(len); + + for (int n = (int) len - 1; n >= 0; n--) { + unsigned int b = n * 5; + unsigned int i = b / 8; + unsigned int j = b % 8; + unsigned char c = + (originalData.data()[i] >> j) | (i >= originalData.size() - 1 ? 0 : originalData.data()[i + 1] << (8 - j)); + s.push_back(characters[c & 0x1f]); + } + + return s; +} + +} // namespace nix diff --git a/src/libutil/hash.cc b/src/libutil/hash.cc index a9603b6fc..a5ac9b654 100644 --- a/src/libutil/hash.cc +++ b/src/libutil/hash.cc @@ -11,6 +11,7 @@ #include "nix/util/archive.hh" #include "nix/util/configuration.hh" #include "nix/util/split.hh" +#include "nix/util/base-nix-32.hh" #include #include @@ -71,39 +72,10 @@ static std::string printHash16(const Hash & hash) return buf; } -// omitted: E O U T -constexpr char nix32Chars[] = "0123456789abcdfghijklmnpqrsvwxyz"; - -constexpr const std::array reverseNix32Map = [] { - std::array map{}; - - for (size_t i = 0; i < map.size(); ++i) - map[i] = 0xFF; // invalid - - for (unsigned char i = 0; i < 32; ++i) - map[static_cast(nix32Chars[i])] = i; - - return map; -}(); - static std::string printHash32(const Hash & hash) { assert(hash.hashSize); - size_t len = hash.base32Len(); - assert(len); - - std::string s; - s.reserve(len); - - for (int n = (int) len - 1; n >= 0; n--) { - unsigned int b = n * 5; - unsigned int i = b / 8; - unsigned int j = b % 8; - unsigned char c = (hash.hash[i] >> j) | (i >= hash.hashSize - 1 ? 0 : hash.hash[i + 1] << (8 - j)); - s.push_back(nix32Chars[c & 0x1f]); - } - - return s; + return BaseNix32::encode({&hash.hash[0], hash.hashSize}); } std::string printHash16or32(const Hash & hash) @@ -229,11 +201,13 @@ Hash::Hash(std::string_view rest, HashAlgorithm algo, bool isSRI) for (unsigned int n = 0; n < rest.size(); ++n) { char c = rest[rest.size() - n - 1]; - unsigned char digit = reverseNix32Map[static_cast(c)]; + auto digit_opt = BaseNix32::lookupReverse(c); - if (digit == 0xFF) + if (!digit_opt) throw BadHash("invalid base-32 hash: '%s'", rest); + uint8_t digit = std::move(*digit_opt); + unsigned int b = n * 5; unsigned int i = b / 8; unsigned int j = b % 8; diff --git a/src/libutil/include/nix/util/base-nix-32.hh b/src/libutil/include/nix/util/base-nix-32.hh new file mode 100644 index 000000000..37b23a2bb --- /dev/null +++ b/src/libutil/include/nix/util/base-nix-32.hh @@ -0,0 +1,45 @@ +#pragma once +///@file + +#include +#include +#include +#include +#include + +namespace nix { + +struct BaseNix32 +{ + /// omitted: E O U T + constexpr static std::array characters = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', + 'b', 'c', 'd', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'p', 'q', 'r', 's', 'v', 'w', 'x', 'y', 'z'}; + +private: + static const std::array reverseMap; + + const static constexpr uint8_t invalid = 0xFF; + +public: + static inline std::optional lookupReverse(char base32) + { + uint8_t digit = reverseMap[static_cast(base32)]; + if (digit == invalid) + return std::nullopt; + else + return digit; + } + + /** + * Returns the length of a base-32 representation of this hash. + */ + static size_t encodedLength(size_t originalLength) + { + return (originalLength * 8 - 1) / 5 + 1; + } + + static std::string encode(std::span originalData); +}; + +} // namespace nix diff --git a/src/libutil/include/nix/util/hash.hh b/src/libutil/include/nix/util/hash.hh index 9661fd489..48f1b0c3c 100644 --- a/src/libutil/include/nix/util/hash.hh +++ b/src/libutil/include/nix/util/hash.hh @@ -35,8 +35,6 @@ constexpr inline size_t regularHashSize(HashAlgorithm type) extern const StringSet hashAlgorithms; -extern const std::array reverseNix32Map; - /** * @brief Enumeration representing the hash formats. */ @@ -44,7 +42,7 @@ enum struct HashFormat : int { /// @brief Base 64 encoding. /// @see [IETF RFC 4648, section 4](https://datatracker.ietf.org/doc/html/rfc4648#section-4). Base64, - /// @brief Nix-specific base-32 encoding. @see nix32Chars + /// @brief Nix-specific base-32 encoding. @see BaseNix32 Nix32, /// @brief Lowercase hexadecimal encoding. @see base16Chars Base16, diff --git a/src/libutil/include/nix/util/meson.build b/src/libutil/include/nix/util/meson.build index 22438c1d0..b7d4d761d 100644 --- a/src/libutil/include/nix/util/meson.build +++ b/src/libutil/include/nix/util/meson.build @@ -8,6 +8,7 @@ headers = files( 'archive.hh', 'args.hh', 'args/root.hh', + 'base-nix-32.hh', 'callback.hh', 'canon-path.hh', 'checked-arithmetic.hh', diff --git a/src/libutil/meson.build b/src/libutil/meson.build index d11d14c33..fb3e98e1d 100644 --- a/src/libutil/meson.build +++ b/src/libutil/meson.build @@ -112,6 +112,7 @@ subdir('nix-meson-build-support/common') sources = [config_priv_h] + files( 'archive.cc', 'args.cc', + 'base-nix-32.cc', 'canon-path.cc', 'compression.cc', 'compute-levels.cc', diff --git a/src/libutil/references.cc b/src/libutil/references.cc index 6b88da0c6..0f5164f6b 100644 --- a/src/libutil/references.cc +++ b/src/libutil/references.cc @@ -1,6 +1,7 @@ #include "nix/util/references.hh" #include "nix/util/hash.hh" #include "nix/util/archive.hh" +#include "nix/util/base-nix-32.hh" #include #include @@ -17,7 +18,7 @@ static void search(std::string_view s, StringSet & hashes, StringSet & seen) int j; bool match = true; for (j = refLength - 1; j >= 0; --j) - if (reverseNix32Map[(unsigned char) s[i + j]] == 0xFF) { + if (!BaseNix32::lookupReverse(s[i + j])) { i += j + 1; match = false; break;