From e8e9376a7b38cb7606bb54e28bbc60a076463077 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Mon, 11 Aug 2025 01:00:21 +0300 Subject: [PATCH] libfetchers: Remove badGitRefRegex and use libgit2 for reference validation Fixes usage of `#` symbol in the reference name. This also seems to identify several deficiencies in the libgit2 refname validation code wrt to DEL symbol and a singular `@` symbol [1]. [1]: https://git-scm.com/docs/git-check-ref-format#_description --- src/libfetchers-tests/git-utils.cc | 54 +++++++++++++++ src/libfetchers/git-utils.cc | 66 ++++++++++++++++++- src/libfetchers/git.cc | 6 +- src/libfetchers/github.cc | 6 +- .../include/nix/fetchers/git-utils.hh | 7 ++ src/libfetchers/indirect.cc | 5 +- src/libflake-tests/flakeref.cc | 19 ++++++ src/libutil/include/nix/util/url-parts.hh | 7 -- src/libutil/url.cc | 1 - tests/functional/fetchGitRefs.sh | 2 + 10 files changed, 154 insertions(+), 19 deletions(-) diff --git a/src/libfetchers-tests/git-utils.cc b/src/libfetchers-tests/git-utils.cc index bfba3d679..f9fae23da 100644 --- a/src/libfetchers-tests/git-utils.cc +++ b/src/libfetchers-tests/git-utils.cc @@ -173,4 +173,58 @@ TEST_F(GitUtilsTest, peel_reference) git_repository_free(rawRepo); } +TEST(GitUtils, isLegalRefName) +{ + ASSERT_TRUE(isLegalRefName("foox")); + ASSERT_TRUE(isLegalRefName("1337")); + ASSERT_TRUE(isLegalRefName("foo.baz")); + ASSERT_TRUE(isLegalRefName("foo/bar/baz")); + ASSERT_TRUE(isLegalRefName("foo./bar")); + ASSERT_TRUE(isLegalRefName("heads/foo@bar")); + ASSERT_TRUE(isLegalRefName("heads/fu\303\237")); + ASSERT_TRUE(isLegalRefName("foo-bar-baz")); + ASSERT_TRUE(isLegalRefName("branch#")); + ASSERT_TRUE(isLegalRefName("$1")); + ASSERT_TRUE(isLegalRefName("foo.locke")); + + ASSERT_FALSE(isLegalRefName("refs///heads/foo")); + ASSERT_FALSE(isLegalRefName("heads/foo/")); + ASSERT_FALSE(isLegalRefName("///heads/foo")); + ASSERT_FALSE(isLegalRefName(".foo")); + ASSERT_FALSE(isLegalRefName("./foo")); + ASSERT_FALSE(isLegalRefName("./foo/bar")); + ASSERT_FALSE(isLegalRefName("foo/./bar")); + ASSERT_FALSE(isLegalRefName("foo/bar/.")); + ASSERT_FALSE(isLegalRefName("foo bar")); + ASSERT_FALSE(isLegalRefName("foo?bar")); + ASSERT_FALSE(isLegalRefName("foo^bar")); + ASSERT_FALSE(isLegalRefName("foo~bar")); + ASSERT_FALSE(isLegalRefName("foo:bar")); + ASSERT_FALSE(isLegalRefName("foo[bar")); + ASSERT_FALSE(isLegalRefName("foo/bar/.")); + ASSERT_FALSE(isLegalRefName(".refs/foo")); + ASSERT_FALSE(isLegalRefName("refs/heads/foo.")); + ASSERT_FALSE(isLegalRefName("heads/foo..bar")); + ASSERT_FALSE(isLegalRefName("heads/foo?bar")); + ASSERT_FALSE(isLegalRefName("heads/foo.lock")); + ASSERT_FALSE(isLegalRefName("heads///foo.lock")); + ASSERT_FALSE(isLegalRefName("foo.lock/bar")); + ASSERT_FALSE(isLegalRefName("foo.lock///bar")); + ASSERT_FALSE(isLegalRefName("heads/v@{ation")); + ASSERT_FALSE(isLegalRefName("heads/foo\bar")); + + ASSERT_FALSE(isLegalRefName("@")); + ASSERT_FALSE(isLegalRefName("\37")); + ASSERT_FALSE(isLegalRefName("\177")); + + ASSERT_FALSE(isLegalRefName("foo/*")); + ASSERT_FALSE(isLegalRefName("*/foo")); + ASSERT_FALSE(isLegalRefName("foo/*/bar")); + ASSERT_FALSE(isLegalRefName("*")); + ASSERT_FALSE(isLegalRefName("foo/*/*")); + ASSERT_FALSE(isLegalRefName("*/foo/*")); + ASSERT_FALSE(isLegalRefName("/foo")); + ASSERT_FALSE(isLegalRefName("")); +} + } // namespace nix diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index 993d7fb08..b8d9b03ce 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -93,8 +93,11 @@ Hash toHash(const git_oid & oid) static void initLibGit2() { - if (git_libgit2_init() < 0) - throw Error("initialising libgit2: %s", git_error_last()->message); + static std::once_flag initialized; + std::call_once(initialized, []() { + if (git_libgit2_init() < 0) + throw Error("initialising libgit2: %s", git_error_last()->message); + }); } git_oid hashToOID(const Hash & hash) @@ -1308,4 +1311,63 @@ GitRepo::WorkdirInfo GitRepo::getCachedWorkdirInfo(const std::filesystem::path & return workdirInfo; } +/** + * Checks that the git reference is valid and normalizes slash '/' sequences. + * + * Accepts shorthand references (one-level refnames are allowed). + */ +bool isValidRefNameAllowNormalizations(const std::string & refName) +{ + /* Unfortunately libgit2 doesn't expose the limit in headers, but its internal + limit is also 1024. */ + std::array normalizedRefBuffer; + + /* It would be nice to have a better API like git_reference_name_is_valid, but + * with GIT_REFERENCE_FORMAT_REFSPEC_SHORTHAND flag. libgit2 uses it internally + * but doesn't expose it in public headers [1]. + * [1]: + * https://github.com/libgit2/libgit2/blob/9d5f1bacc23594c2ba324c8f0d41b88bf0e9ef04/src/libgit2/refs.c#L1362-L1365 + */ + + auto res = git_reference_normalize_name( + normalizedRefBuffer.data(), + normalizedRefBuffer.size(), + refName.c_str(), + GIT_REFERENCE_FORMAT_ALLOW_ONELEVEL | GIT_REFERENCE_FORMAT_REFSPEC_SHORTHAND); + + return res == 0; +} + +bool isLegalRefName(const std::string & refName) +{ + initLibGit2(); + + /* Since `git_reference_normalize_name` is the best API libgit2 has for verifying + * reference names with shorthands (see comment in normalizeRefName), we need to + * ensure that exceptions to the validity checks imposed by normalization [1] are checked + * explicitly. + * [1]: https://git-scm.com/docs/git-check-ref-format#Documentation/git-check-ref-format.txt---normalize + */ + + /* Check for cases that don't get rejected by libgit2. + * FIXME: libgit2 should reject this. */ + if (refName == "@") + return false; + + /* Leading slashes and consecutive slashes are stripped during normalizatiton. */ + if (refName.starts_with('/') || refName.find("//") != refName.npos) + return false; + + /* Refer to libgit2. */ + if (!isValidRefNameAllowNormalizations(refName)) + return false; + + /* libgit2 doesn't barf on DEL symbol. + * FIXME: libgit2 should reject this. */ + if (refName.find('\177') != refName.npos) + return false; + + return true; +} + } // namespace nix diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 4c2a655b9..43105c699 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -228,10 +228,8 @@ struct GitInputScheme : InputScheme maybeGetBoolAttr(attrs, "verifyCommit"); - if (auto ref = maybeGetStrAttr(attrs, "ref")) { - if (std::regex_search(*ref, badGitRefRegex)) - throw BadURL("invalid Git branch/tag name '%s'", *ref); - } + if (auto ref = maybeGetStrAttr(attrs, "ref"); ref && !isLegalRefName(*ref)) + throw BadURL("invalid Git branch/tag name '%s'", *ref); Input input{settings}; input.attrs = attrs; diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index c91f3ad3a..841a9c2df 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -48,7 +48,7 @@ struct GitArchiveInputScheme : InputScheme if (size == 3) { if (std::regex_match(path[2], revRegex)) rev = Hash::parseAny(path[2], HashAlgorithm::SHA1); - else if (std::regex_match(path[2], refRegex)) + else if (isLegalRefName(path[2])) ref = path[2]; else throw BadURL("in URL '%s', '%s' is not a commit hash or branch/tag name", url, path[2]); @@ -61,7 +61,7 @@ struct GitArchiveInputScheme : InputScheme } } - if (std::regex_match(rs, refRegex)) { + if (isLegalRefName(rs)) { ref = rs; } else { throw BadURL("in URL '%s', '%s' is not a branch/tag name", url, rs); @@ -75,7 +75,7 @@ struct GitArchiveInputScheme : InputScheme throw BadURL("URL '%s' contains multiple commit hashes", url); rev = Hash::parseAny(value, HashAlgorithm::SHA1); } else if (name == "ref") { - if (!std::regex_match(value, refRegex)) + if (!isLegalRefName(value)) throw BadURL("URL '%s' contains an invalid branch/tag name", url); if (ref) throw BadURL("URL '%s' contains multiple branch/tag names", url); diff --git a/src/libfetchers/include/nix/fetchers/git-utils.hh b/src/libfetchers/include/nix/fetchers/git-utils.hh index 2ea2acd02..07b985541 100644 --- a/src/libfetchers/include/nix/fetchers/git-utils.hh +++ b/src/libfetchers/include/nix/fetchers/git-utils.hh @@ -157,4 +157,11 @@ struct Setter } }; +/** + * Checks that the git reference is valid and normalized. + * + * Accepts shorthand references (one-level refnames are allowed). + */ +bool isLegalRefName(const std::string & refName); + } // namespace nix diff --git a/src/libfetchers/indirect.cc b/src/libfetchers/indirect.cc index f949679c2..4bd4d890d 100644 --- a/src/libfetchers/indirect.cc +++ b/src/libfetchers/indirect.cc @@ -1,4 +1,5 @@ #include "nix/fetchers/fetchers.hh" +#include "nix/fetchers/git-utils.hh" #include "nix/util/url-parts.hh" #include "nix/store/path.hh" @@ -22,12 +23,12 @@ struct IndirectInputScheme : InputScheme } else if (path.size() == 2) { if (std::regex_match(path[1], revRegex)) rev = Hash::parseAny(path[1], HashAlgorithm::SHA1); - else if (std::regex_match(path[1], refRegex)) + else if (isLegalRefName(path[1])) ref = path[1]; else throw BadURL("in flake URL '%s', '%s' is not a commit hash or branch/tag name", url, path[1]); } else if (path.size() == 3) { - if (!std::regex_match(path[1], refRegex)) + if (!isLegalRefName(path[1])) throw BadURL("in flake URL '%s', '%s' is not a branch/tag name", url, path[1]); ref = path[1]; if (!std::regex_match(path[2], revRegex)) diff --git a/src/libflake-tests/flakeref.cc b/src/libflake-tests/flakeref.cc index b8f1ef7c9..2f8deb123 100644 --- a/src/libflake-tests/flakeref.cc +++ b/src/libflake-tests/flakeref.cc @@ -57,6 +57,25 @@ TEST(parseFlakeRef, path) } } +TEST(parseFlakeRef, GitArchiveInput) +{ + experimentalFeatureSettings.experimentalFeatures.get().insert(Xp::Flakes); + + fetchers::Settings fetchSettings; + + { + auto s = "github:foo/bar/branch%23"; // branch name with `#` + auto flakeref = parseFlakeRef(fetchSettings, s); + ASSERT_EQ(flakeref.to_string(), "github:foo/bar/branch%23"); + } + + { + auto s = "github:foo/bar?ref=branch%23"; // branch name with `#` + auto flakeref = parseFlakeRef(fetchSettings, s); + ASSERT_EQ(flakeref.to_string(), "github:foo/bar/branch%23"); + } +} + TEST(to_string, doesntReencodeUrl) { fetchers::Settings fetchSettings; diff --git a/src/libutil/include/nix/util/url-parts.hh b/src/libutil/include/nix/util/url-parts.hh index 72c901b5d..c57c32e61 100644 --- a/src/libutil/include/nix/util/url-parts.hh +++ b/src/libutil/include/nix/util/url-parts.hh @@ -19,13 +19,6 @@ const static std::string fragmentRegex = "(?:" + pcharRegex + "|[/? \"^])*"; const static std::string refRegexS = "[a-zA-Z0-9@][a-zA-Z0-9_.\\/@+-]*"; extern std::regex refRegex; -/// Instead of defining what a good Git Ref is, we define what a bad Git Ref is -/// This is because of the definition of a ref in refs.c in https://github.com/git/git -/// See tests/functional/fetchGitRefs.sh for the full definition -const static std::string badGitRefRegexS = - "//|^[./]|/\\.|\\.\\.|[[:cntrl:][:space:]:?^~\[]|\\\\|\\*|\\.lock$|\\.lock/|@\\{|[/.]$|^@$|^$"; -extern std::regex badGitRefRegex; - /// A Git revision (a SHA-1 commit hash). const static std::string revRegexS = "[0-9a-fA-F]{40}"; extern std::regex revRegex; diff --git a/src/libutil/url.cc b/src/libutil/url.cc index 134d313ed..8f902552f 100644 --- a/src/libutil/url.cc +++ b/src/libutil/url.cc @@ -9,7 +9,6 @@ namespace nix { std::regex refRegex(refRegexS, std::regex::ECMAScript); -std::regex badGitRefRegex(badGitRefRegexS, std::regex::ECMAScript); std::regex revRegex(revRegexS, std::regex::ECMAScript); /** diff --git a/tests/functional/fetchGitRefs.sh b/tests/functional/fetchGitRefs.sh index ee054fabc..258a65525 100755 --- a/tests/functional/fetchGitRefs.sh +++ b/tests/functional/fetchGitRefs.sh @@ -67,6 +67,7 @@ valid_ref 'foo./bar' valid_ref 'heads/foo@bar' valid_ref "$(printf 'heads/fu\303\237')" valid_ref 'foo-bar-baz' +valid_ref 'branch#' valid_ref '$1' valid_ref 'foo.locke' @@ -97,6 +98,7 @@ invalid_ref 'heads/v@{ation' invalid_ref 'heads/foo\.ar' # should fail due to \ invalid_ref 'heads/foo\bar' # should fail due to \ invalid_ref "$(printf 'heads/foo\t')" # should fail because it has a TAB +invalid_ref "$(printf 'heads/foo\37')" invalid_ref "$(printf 'heads/foo\177')" invalid_ref '@'