1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-08 19:46:02 +01:00

libfetchers: Remove badGitRefRegex and use libgit2 for reference validation

Fixes usage of `#` symbol in the reference name.
This also seems to identify several deficiencies in the libgit2 refname
validation code wrt to DEL symbol and a singular `@` symbol [1].

[1]: https://git-scm.com/docs/git-check-ref-format#_description
This commit is contained in:
Sergei Zimmerman 2025-08-11 01:00:21 +03:00
parent 0b7f7e4b03
commit e8e9376a7b
No known key found for this signature in database
10 changed files with 154 additions and 19 deletions

View file

@ -173,4 +173,58 @@ TEST_F(GitUtilsTest, peel_reference)
git_repository_free(rawRepo);
}
TEST(GitUtils, isLegalRefName)
{
ASSERT_TRUE(isLegalRefName("foox"));
ASSERT_TRUE(isLegalRefName("1337"));
ASSERT_TRUE(isLegalRefName("foo.baz"));
ASSERT_TRUE(isLegalRefName("foo/bar/baz"));
ASSERT_TRUE(isLegalRefName("foo./bar"));
ASSERT_TRUE(isLegalRefName("heads/foo@bar"));
ASSERT_TRUE(isLegalRefName("heads/fu\303\237"));
ASSERT_TRUE(isLegalRefName("foo-bar-baz"));
ASSERT_TRUE(isLegalRefName("branch#"));
ASSERT_TRUE(isLegalRefName("$1"));
ASSERT_TRUE(isLegalRefName("foo.locke"));
ASSERT_FALSE(isLegalRefName("refs///heads/foo"));
ASSERT_FALSE(isLegalRefName("heads/foo/"));
ASSERT_FALSE(isLegalRefName("///heads/foo"));
ASSERT_FALSE(isLegalRefName(".foo"));
ASSERT_FALSE(isLegalRefName("./foo"));
ASSERT_FALSE(isLegalRefName("./foo/bar"));
ASSERT_FALSE(isLegalRefName("foo/./bar"));
ASSERT_FALSE(isLegalRefName("foo/bar/."));
ASSERT_FALSE(isLegalRefName("foo bar"));
ASSERT_FALSE(isLegalRefName("foo?bar"));
ASSERT_FALSE(isLegalRefName("foo^bar"));
ASSERT_FALSE(isLegalRefName("foo~bar"));
ASSERT_FALSE(isLegalRefName("foo:bar"));
ASSERT_FALSE(isLegalRefName("foo[bar"));
ASSERT_FALSE(isLegalRefName("foo/bar/."));
ASSERT_FALSE(isLegalRefName(".refs/foo"));
ASSERT_FALSE(isLegalRefName("refs/heads/foo."));
ASSERT_FALSE(isLegalRefName("heads/foo..bar"));
ASSERT_FALSE(isLegalRefName("heads/foo?bar"));
ASSERT_FALSE(isLegalRefName("heads/foo.lock"));
ASSERT_FALSE(isLegalRefName("heads///foo.lock"));
ASSERT_FALSE(isLegalRefName("foo.lock/bar"));
ASSERT_FALSE(isLegalRefName("foo.lock///bar"));
ASSERT_FALSE(isLegalRefName("heads/v@{ation"));
ASSERT_FALSE(isLegalRefName("heads/foo\bar"));
ASSERT_FALSE(isLegalRefName("@"));
ASSERT_FALSE(isLegalRefName("\37"));
ASSERT_FALSE(isLegalRefName("\177"));
ASSERT_FALSE(isLegalRefName("foo/*"));
ASSERT_FALSE(isLegalRefName("*/foo"));
ASSERT_FALSE(isLegalRefName("foo/*/bar"));
ASSERT_FALSE(isLegalRefName("*"));
ASSERT_FALSE(isLegalRefName("foo/*/*"));
ASSERT_FALSE(isLegalRefName("*/foo/*"));
ASSERT_FALSE(isLegalRefName("/foo"));
ASSERT_FALSE(isLegalRefName(""));
}
} // namespace nix

View file

@ -93,8 +93,11 @@ Hash toHash(const git_oid & oid)
static void initLibGit2()
{
if (git_libgit2_init() < 0)
throw Error("initialising libgit2: %s", git_error_last()->message);
static std::once_flag initialized;
std::call_once(initialized, []() {
if (git_libgit2_init() < 0)
throw Error("initialising libgit2: %s", git_error_last()->message);
});
}
git_oid hashToOID(const Hash & hash)
@ -1308,4 +1311,63 @@ GitRepo::WorkdirInfo GitRepo::getCachedWorkdirInfo(const std::filesystem::path &
return workdirInfo;
}
/**
* Checks that the git reference is valid and normalizes slash '/' sequences.
*
* Accepts shorthand references (one-level refnames are allowed).
*/
bool isValidRefNameAllowNormalizations(const std::string & refName)
{
/* Unfortunately libgit2 doesn't expose the limit in headers, but its internal
limit is also 1024. */
std::array<char, 1024> normalizedRefBuffer;
/* It would be nice to have a better API like git_reference_name_is_valid, but
* with GIT_REFERENCE_FORMAT_REFSPEC_SHORTHAND flag. libgit2 uses it internally
* but doesn't expose it in public headers [1].
* [1]:
* https://github.com/libgit2/libgit2/blob/9d5f1bacc23594c2ba324c8f0d41b88bf0e9ef04/src/libgit2/refs.c#L1362-L1365
*/
auto res = git_reference_normalize_name(
normalizedRefBuffer.data(),
normalizedRefBuffer.size(),
refName.c_str(),
GIT_REFERENCE_FORMAT_ALLOW_ONELEVEL | GIT_REFERENCE_FORMAT_REFSPEC_SHORTHAND);
return res == 0;
}
bool isLegalRefName(const std::string & refName)
{
initLibGit2();
/* Since `git_reference_normalize_name` is the best API libgit2 has for verifying
* reference names with shorthands (see comment in normalizeRefName), we need to
* ensure that exceptions to the validity checks imposed by normalization [1] are checked
* explicitly.
* [1]: https://git-scm.com/docs/git-check-ref-format#Documentation/git-check-ref-format.txt---normalize
*/
/* Check for cases that don't get rejected by libgit2.
* FIXME: libgit2 should reject this. */
if (refName == "@")
return false;
/* Leading slashes and consecutive slashes are stripped during normalizatiton. */
if (refName.starts_with('/') || refName.find("//") != refName.npos)
return false;
/* Refer to libgit2. */
if (!isValidRefNameAllowNormalizations(refName))
return false;
/* libgit2 doesn't barf on DEL symbol.
* FIXME: libgit2 should reject this. */
if (refName.find('\177') != refName.npos)
return false;
return true;
}
} // namespace nix

View file

@ -228,10 +228,8 @@ struct GitInputScheme : InputScheme
maybeGetBoolAttr(attrs, "verifyCommit");
if (auto ref = maybeGetStrAttr(attrs, "ref")) {
if (std::regex_search(*ref, badGitRefRegex))
throw BadURL("invalid Git branch/tag name '%s'", *ref);
}
if (auto ref = maybeGetStrAttr(attrs, "ref"); ref && !isLegalRefName(*ref))
throw BadURL("invalid Git branch/tag name '%s'", *ref);
Input input{settings};
input.attrs = attrs;

View file

@ -48,7 +48,7 @@ struct GitArchiveInputScheme : InputScheme
if (size == 3) {
if (std::regex_match(path[2], revRegex))
rev = Hash::parseAny(path[2], HashAlgorithm::SHA1);
else if (std::regex_match(path[2], refRegex))
else if (isLegalRefName(path[2]))
ref = path[2];
else
throw BadURL("in URL '%s', '%s' is not a commit hash or branch/tag name", url, path[2]);
@ -61,7 +61,7 @@ struct GitArchiveInputScheme : InputScheme
}
}
if (std::regex_match(rs, refRegex)) {
if (isLegalRefName(rs)) {
ref = rs;
} else {
throw BadURL("in URL '%s', '%s' is not a branch/tag name", url, rs);
@ -75,7 +75,7 @@ struct GitArchiveInputScheme : InputScheme
throw BadURL("URL '%s' contains multiple commit hashes", url);
rev = Hash::parseAny(value, HashAlgorithm::SHA1);
} else if (name == "ref") {
if (!std::regex_match(value, refRegex))
if (!isLegalRefName(value))
throw BadURL("URL '%s' contains an invalid branch/tag name", url);
if (ref)
throw BadURL("URL '%s' contains multiple branch/tag names", url);

View file

@ -157,4 +157,11 @@ struct Setter
}
};
/**
* Checks that the git reference is valid and normalized.
*
* Accepts shorthand references (one-level refnames are allowed).
*/
bool isLegalRefName(const std::string & refName);
} // namespace nix

View file

@ -1,4 +1,5 @@
#include "nix/fetchers/fetchers.hh"
#include "nix/fetchers/git-utils.hh"
#include "nix/util/url-parts.hh"
#include "nix/store/path.hh"
@ -22,12 +23,12 @@ struct IndirectInputScheme : InputScheme
} else if (path.size() == 2) {
if (std::regex_match(path[1], revRegex))
rev = Hash::parseAny(path[1], HashAlgorithm::SHA1);
else if (std::regex_match(path[1], refRegex))
else if (isLegalRefName(path[1]))
ref = path[1];
else
throw BadURL("in flake URL '%s', '%s' is not a commit hash or branch/tag name", url, path[1]);
} else if (path.size() == 3) {
if (!std::regex_match(path[1], refRegex))
if (!isLegalRefName(path[1]))
throw BadURL("in flake URL '%s', '%s' is not a branch/tag name", url, path[1]);
ref = path[1];
if (!std::regex_match(path[2], revRegex))

View file

@ -57,6 +57,25 @@ TEST(parseFlakeRef, path)
}
}
TEST(parseFlakeRef, GitArchiveInput)
{
experimentalFeatureSettings.experimentalFeatures.get().insert(Xp::Flakes);
fetchers::Settings fetchSettings;
{
auto s = "github:foo/bar/branch%23"; // branch name with `#`
auto flakeref = parseFlakeRef(fetchSettings, s);
ASSERT_EQ(flakeref.to_string(), "github:foo/bar/branch%23");
}
{
auto s = "github:foo/bar?ref=branch%23"; // branch name with `#`
auto flakeref = parseFlakeRef(fetchSettings, s);
ASSERT_EQ(flakeref.to_string(), "github:foo/bar/branch%23");
}
}
TEST(to_string, doesntReencodeUrl)
{
fetchers::Settings fetchSettings;

View file

@ -19,13 +19,6 @@ const static std::string fragmentRegex = "(?:" + pcharRegex + "|[/? \"^])*";
const static std::string refRegexS = "[a-zA-Z0-9@][a-zA-Z0-9_.\\/@+-]*";
extern std::regex refRegex;
/// Instead of defining what a good Git Ref is, we define what a bad Git Ref is
/// This is because of the definition of a ref in refs.c in https://github.com/git/git
/// See tests/functional/fetchGitRefs.sh for the full definition
const static std::string badGitRefRegexS =
"//|^[./]|/\\.|\\.\\.|[[:cntrl:][:space:]:?^~\[]|\\\\|\\*|\\.lock$|\\.lock/|@\\{|[/.]$|^@$|^$";
extern std::regex badGitRefRegex;
/// A Git revision (a SHA-1 commit hash).
const static std::string revRegexS = "[0-9a-fA-F]{40}";
extern std::regex revRegex;

View file

@ -9,7 +9,6 @@
namespace nix {
std::regex refRegex(refRegexS, std::regex::ECMAScript);
std::regex badGitRefRegex(badGitRefRegexS, std::regex::ECMAScript);
std::regex revRegex(revRegexS, std::regex::ECMAScript);
/**

View file

@ -67,6 +67,7 @@ valid_ref 'foo./bar'
valid_ref 'heads/foo@bar'
valid_ref "$(printf 'heads/fu\303\237')"
valid_ref 'foo-bar-baz'
valid_ref 'branch#'
valid_ref '$1'
valid_ref 'foo.locke'
@ -97,6 +98,7 @@ invalid_ref 'heads/v@{ation'
invalid_ref 'heads/foo\.ar' # should fail due to \
invalid_ref 'heads/foo\bar' # should fail due to \
invalid_ref "$(printf 'heads/foo\t')" # should fail because it has a TAB
invalid_ref "$(printf 'heads/foo\37')"
invalid_ref "$(printf 'heads/foo\177')"
invalid_ref '@'