From 156061409a8502438ab8db4ad02dede8a0c5b667 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sun, 9 Nov 2025 18:50:11 -0500 Subject: [PATCH] Do some parsing of git hashes based on length. Note that for Nix-native information we should *not* do length tricks, but instead always rely on an explicit algorithm. This hack should be only for foreign hash literals. --- src/libfetchers/attrs.cc | 3 ++- src/libfetchers/fetchers.cc | 3 +++ src/libfetchers/git-utils.cc | 17 +++++++++++++++++ src/libfetchers/github.cc | 12 ++++++------ .../include/nix/fetchers/git-utils.hh | 10 ++++++++++ src/libfetchers/indirect.cc | 4 ++-- 6 files changed, 40 insertions(+), 9 deletions(-) diff --git a/src/libfetchers/attrs.cc b/src/libfetchers/attrs.cc index 841808bd1..7c1cfcea8 100644 --- a/src/libfetchers/attrs.cc +++ b/src/libfetchers/attrs.cc @@ -1,5 +1,6 @@ #include "nix/fetchers/attrs.hh" #include "nix/fetchers/fetchers.hh" +#include "nix/fetchers/git-utils.hh" #include @@ -111,7 +112,7 @@ StringMap attrsToQuery(const Attrs & attrs) Hash getRevAttr(const Attrs & attrs, const std::string & name) { - return Hash::parseAny(getStrAttr(attrs, name), HashAlgorithm::SHA1); + return parseGitHash(getStrAttr(attrs, name)); } } // namespace nix::fetchers diff --git a/src/libfetchers/fetchers.cc b/src/libfetchers/fetchers.cc index c9c0fffa2..f648869be 100644 --- a/src/libfetchers/fetchers.cc +++ b/src/libfetchers/fetchers.cc @@ -447,6 +447,9 @@ std::optional Input::getRev() const } catch (BadHash & e) { // Default to sha1 for backwards compatibility with existing // usages (e.g. `builtins.fetchTree` calls or flake inputs). + // + // Note that means that for SHA-256 git repos, prefixing + // must be used. hash = Hash::parseAny(*s, HashAlgorithm::SHA1); } } diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index 5f6bafdfe..a881e29c2 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -1404,4 +1404,21 @@ bool isLegalRefName(const std::string & refName) return false; } +Hash parseGitHash(std::string_view hashStr) +{ + HashAlgorithm algo; + switch (hashStr.size()) { + case 40: + algo = HashAlgorithm::SHA1; + break; + case 64: + algo = HashAlgorithm::SHA256; + break; + default: + throw Error( + "invalid git hash '%s': expected 40 (SHA1) or 64 (SHA256) hex characters, got %d", hashStr, hashStr.size()); + } + return Hash::parseNonSRIUnprefixed(hashStr, algo); +} + } // namespace nix diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index 2479a57d2..5c4e9a092 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -48,7 +48,7 @@ struct GitArchiveInputScheme : InputScheme auto size = path.size(); if (size == 3) { if (std::regex_match(path[2], revRegex)) - rev = Hash::parseAny(path[2], HashAlgorithm::SHA1); + rev = parseGitHash(path[2]); else if (isLegalRefName(path[2])) ref = path[2]; else @@ -74,7 +74,7 @@ struct GitArchiveInputScheme : InputScheme if (name == "rev") { if (rev) throw BadURL("URL '%s' contains multiple commit hashes", url); - rev = Hash::parseAny(value, HashAlgorithm::SHA1); + rev = parseGitHash(value); } else if (name == "ref") { if (!isLegalRefName(value)) throw BadURL("URL '%s' contains an invalid branch/tag name", url); @@ -403,8 +403,8 @@ struct GitHubInputScheme : GitArchiveInputScheme store->requireStoreObjectAccessor(downloadResult.storePath)->readFile(CanonPath::root)); return RefInfo{ - .rev = Hash::parseAny(std::string{json["sha"]}, HashAlgorithm::SHA1), - .treeHash = Hash::parseAny(std::string{json["commit"]["tree"]["sha"]}, HashAlgorithm::SHA1)}; + .rev = parseGitHash(std::string{json["sha"]}), + .treeHash = parseGitHash(std::string{json["commit"]["tree"]["sha"]})}; } DownloadUrl getDownloadUrl(const Input & input) const override @@ -478,7 +478,7 @@ struct GitLabInputScheme : GitArchiveInputScheme store->requireStoreObjectAccessor(downloadResult.storePath)->readFile(CanonPath::root)); if (json.is_array() && json.size() >= 1 && json[0]["id"] != nullptr) { - return RefInfo{.rev = Hash::parseAny(std::string(json[0]["id"]), HashAlgorithm::SHA1)}; + return RefInfo{.rev = parseGitHash(std::string(json[0]["id"]))}; } if (json.is_array() && json.size() == 0) { throw Error("No commits returned by GitLab API -- does the git ref really exist?"); @@ -579,7 +579,7 @@ struct SourceHutInputScheme : GitArchiveInputScheme if (!id) throw BadURL("in '%d', couldn't find ref '%d'", input.to_string(), ref); - return RefInfo{.rev = Hash::parseAny(*id, HashAlgorithm::SHA1)}; + return RefInfo{.rev = parseGitHash(*id)}; } DownloadUrl getDownloadUrl(const Input & input) const override diff --git a/src/libfetchers/include/nix/fetchers/git-utils.hh b/src/libfetchers/include/nix/fetchers/git-utils.hh index 8357ce4cd..dad0044c7 100644 --- a/src/libfetchers/include/nix/fetchers/git-utils.hh +++ b/src/libfetchers/include/nix/fetchers/git-utils.hh @@ -167,4 +167,14 @@ struct Setter */ bool isLegalRefName(const std::string & refName); +/** + * Parse a base16-encoded git hash string and determine the hash + * algorithm based on the length (40 chars = SHA1, 64 chars = SHA256). + * + * @note For Nix-native information we should *not* do length tricks, + * but instead always rely on an explicit algorithm. This hack should be + * only for foreign hash literals. + */ +Hash parseGitHash(std::string_view hashStr); + } // namespace nix diff --git a/src/libfetchers/indirect.cc b/src/libfetchers/indirect.cc index e05d27adc..86487ef1b 100644 --- a/src/libfetchers/indirect.cc +++ b/src/libfetchers/indirect.cc @@ -23,7 +23,7 @@ struct IndirectInputScheme : InputScheme if (path.size() == 1) { } else if (path.size() == 2) { if (std::regex_match(path[1], revRegex)) - rev = Hash::parseAny(path[1], HashAlgorithm::SHA1); + rev = parseGitHash(path[1]); else if (isLegalRefName(path[1])) ref = path[1]; else @@ -34,7 +34,7 @@ struct IndirectInputScheme : InputScheme ref = path[1]; if (!std::regex_match(path[2], revRegex)) throw BadURL("in flake URL '%s', '%s' is not a commit hash", url, path[2]); - rev = Hash::parseAny(path[2], HashAlgorithm::SHA1); + rev = parseGitHash(path[2]); } else throw BadURL("GitHub URL '%s' is invalid", url);