mirror of
https://github.com/NixOS/nix.git
synced 2025-11-08 19:46:02 +01:00
libfetchers: Remove badGitRefRegex and use libgit2 for reference validation
Fixes usage of `#` symbol in the reference name. This also seems to identify several deficiencies in the libgit2 refname validation code wrt to DEL symbol and a singular `@` symbol [1]. [1]: https://git-scm.com/docs/git-check-ref-format#_description
This commit is contained in:
parent
0b7f7e4b03
commit
e8e9376a7b
10 changed files with 154 additions and 19 deletions
|
|
@ -173,4 +173,58 @@ TEST_F(GitUtilsTest, peel_reference)
|
|||
git_repository_free(rawRepo);
|
||||
}
|
||||
|
||||
TEST(GitUtils, isLegalRefName)
|
||||
{
|
||||
ASSERT_TRUE(isLegalRefName("foox"));
|
||||
ASSERT_TRUE(isLegalRefName("1337"));
|
||||
ASSERT_TRUE(isLegalRefName("foo.baz"));
|
||||
ASSERT_TRUE(isLegalRefName("foo/bar/baz"));
|
||||
ASSERT_TRUE(isLegalRefName("foo./bar"));
|
||||
ASSERT_TRUE(isLegalRefName("heads/foo@bar"));
|
||||
ASSERT_TRUE(isLegalRefName("heads/fu\303\237"));
|
||||
ASSERT_TRUE(isLegalRefName("foo-bar-baz"));
|
||||
ASSERT_TRUE(isLegalRefName("branch#"));
|
||||
ASSERT_TRUE(isLegalRefName("$1"));
|
||||
ASSERT_TRUE(isLegalRefName("foo.locke"));
|
||||
|
||||
ASSERT_FALSE(isLegalRefName("refs///heads/foo"));
|
||||
ASSERT_FALSE(isLegalRefName("heads/foo/"));
|
||||
ASSERT_FALSE(isLegalRefName("///heads/foo"));
|
||||
ASSERT_FALSE(isLegalRefName(".foo"));
|
||||
ASSERT_FALSE(isLegalRefName("./foo"));
|
||||
ASSERT_FALSE(isLegalRefName("./foo/bar"));
|
||||
ASSERT_FALSE(isLegalRefName("foo/./bar"));
|
||||
ASSERT_FALSE(isLegalRefName("foo/bar/."));
|
||||
ASSERT_FALSE(isLegalRefName("foo bar"));
|
||||
ASSERT_FALSE(isLegalRefName("foo?bar"));
|
||||
ASSERT_FALSE(isLegalRefName("foo^bar"));
|
||||
ASSERT_FALSE(isLegalRefName("foo~bar"));
|
||||
ASSERT_FALSE(isLegalRefName("foo:bar"));
|
||||
ASSERT_FALSE(isLegalRefName("foo[bar"));
|
||||
ASSERT_FALSE(isLegalRefName("foo/bar/."));
|
||||
ASSERT_FALSE(isLegalRefName(".refs/foo"));
|
||||
ASSERT_FALSE(isLegalRefName("refs/heads/foo."));
|
||||
ASSERT_FALSE(isLegalRefName("heads/foo..bar"));
|
||||
ASSERT_FALSE(isLegalRefName("heads/foo?bar"));
|
||||
ASSERT_FALSE(isLegalRefName("heads/foo.lock"));
|
||||
ASSERT_FALSE(isLegalRefName("heads///foo.lock"));
|
||||
ASSERT_FALSE(isLegalRefName("foo.lock/bar"));
|
||||
ASSERT_FALSE(isLegalRefName("foo.lock///bar"));
|
||||
ASSERT_FALSE(isLegalRefName("heads/v@{ation"));
|
||||
ASSERT_FALSE(isLegalRefName("heads/foo\bar"));
|
||||
|
||||
ASSERT_FALSE(isLegalRefName("@"));
|
||||
ASSERT_FALSE(isLegalRefName("\37"));
|
||||
ASSERT_FALSE(isLegalRefName("\177"));
|
||||
|
||||
ASSERT_FALSE(isLegalRefName("foo/*"));
|
||||
ASSERT_FALSE(isLegalRefName("*/foo"));
|
||||
ASSERT_FALSE(isLegalRefName("foo/*/bar"));
|
||||
ASSERT_FALSE(isLegalRefName("*"));
|
||||
ASSERT_FALSE(isLegalRefName("foo/*/*"));
|
||||
ASSERT_FALSE(isLegalRefName("*/foo/*"));
|
||||
ASSERT_FALSE(isLegalRefName("/foo"));
|
||||
ASSERT_FALSE(isLegalRefName(""));
|
||||
}
|
||||
|
||||
} // namespace nix
|
||||
|
|
|
|||
|
|
@ -93,8 +93,11 @@ Hash toHash(const git_oid & oid)
|
|||
|
||||
static void initLibGit2()
|
||||
{
|
||||
if (git_libgit2_init() < 0)
|
||||
throw Error("initialising libgit2: %s", git_error_last()->message);
|
||||
static std::once_flag initialized;
|
||||
std::call_once(initialized, []() {
|
||||
if (git_libgit2_init() < 0)
|
||||
throw Error("initialising libgit2: %s", git_error_last()->message);
|
||||
});
|
||||
}
|
||||
|
||||
git_oid hashToOID(const Hash & hash)
|
||||
|
|
@ -1308,4 +1311,63 @@ GitRepo::WorkdirInfo GitRepo::getCachedWorkdirInfo(const std::filesystem::path &
|
|||
return workdirInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks that the git reference is valid and normalizes slash '/' sequences.
|
||||
*
|
||||
* Accepts shorthand references (one-level refnames are allowed).
|
||||
*/
|
||||
bool isValidRefNameAllowNormalizations(const std::string & refName)
|
||||
{
|
||||
/* Unfortunately libgit2 doesn't expose the limit in headers, but its internal
|
||||
limit is also 1024. */
|
||||
std::array<char, 1024> normalizedRefBuffer;
|
||||
|
||||
/* It would be nice to have a better API like git_reference_name_is_valid, but
|
||||
* with GIT_REFERENCE_FORMAT_REFSPEC_SHORTHAND flag. libgit2 uses it internally
|
||||
* but doesn't expose it in public headers [1].
|
||||
* [1]:
|
||||
* https://github.com/libgit2/libgit2/blob/9d5f1bacc23594c2ba324c8f0d41b88bf0e9ef04/src/libgit2/refs.c#L1362-L1365
|
||||
*/
|
||||
|
||||
auto res = git_reference_normalize_name(
|
||||
normalizedRefBuffer.data(),
|
||||
normalizedRefBuffer.size(),
|
||||
refName.c_str(),
|
||||
GIT_REFERENCE_FORMAT_ALLOW_ONELEVEL | GIT_REFERENCE_FORMAT_REFSPEC_SHORTHAND);
|
||||
|
||||
return res == 0;
|
||||
}
|
||||
|
||||
bool isLegalRefName(const std::string & refName)
|
||||
{
|
||||
initLibGit2();
|
||||
|
||||
/* Since `git_reference_normalize_name` is the best API libgit2 has for verifying
|
||||
* reference names with shorthands (see comment in normalizeRefName), we need to
|
||||
* ensure that exceptions to the validity checks imposed by normalization [1] are checked
|
||||
* explicitly.
|
||||
* [1]: https://git-scm.com/docs/git-check-ref-format#Documentation/git-check-ref-format.txt---normalize
|
||||
*/
|
||||
|
||||
/* Check for cases that don't get rejected by libgit2.
|
||||
* FIXME: libgit2 should reject this. */
|
||||
if (refName == "@")
|
||||
return false;
|
||||
|
||||
/* Leading slashes and consecutive slashes are stripped during normalizatiton. */
|
||||
if (refName.starts_with('/') || refName.find("//") != refName.npos)
|
||||
return false;
|
||||
|
||||
/* Refer to libgit2. */
|
||||
if (!isValidRefNameAllowNormalizations(refName))
|
||||
return false;
|
||||
|
||||
/* libgit2 doesn't barf on DEL symbol.
|
||||
* FIXME: libgit2 should reject this. */
|
||||
if (refName.find('\177') != refName.npos)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace nix
|
||||
|
|
|
|||
|
|
@ -228,10 +228,8 @@ struct GitInputScheme : InputScheme
|
|||
|
||||
maybeGetBoolAttr(attrs, "verifyCommit");
|
||||
|
||||
if (auto ref = maybeGetStrAttr(attrs, "ref")) {
|
||||
if (std::regex_search(*ref, badGitRefRegex))
|
||||
throw BadURL("invalid Git branch/tag name '%s'", *ref);
|
||||
}
|
||||
if (auto ref = maybeGetStrAttr(attrs, "ref"); ref && !isLegalRefName(*ref))
|
||||
throw BadURL("invalid Git branch/tag name '%s'", *ref);
|
||||
|
||||
Input input{settings};
|
||||
input.attrs = attrs;
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ struct GitArchiveInputScheme : InputScheme
|
|||
if (size == 3) {
|
||||
if (std::regex_match(path[2], revRegex))
|
||||
rev = Hash::parseAny(path[2], HashAlgorithm::SHA1);
|
||||
else if (std::regex_match(path[2], refRegex))
|
||||
else if (isLegalRefName(path[2]))
|
||||
ref = path[2];
|
||||
else
|
||||
throw BadURL("in URL '%s', '%s' is not a commit hash or branch/tag name", url, path[2]);
|
||||
|
|
@ -61,7 +61,7 @@ struct GitArchiveInputScheme : InputScheme
|
|||
}
|
||||
}
|
||||
|
||||
if (std::regex_match(rs, refRegex)) {
|
||||
if (isLegalRefName(rs)) {
|
||||
ref = rs;
|
||||
} else {
|
||||
throw BadURL("in URL '%s', '%s' is not a branch/tag name", url, rs);
|
||||
|
|
@ -75,7 +75,7 @@ struct GitArchiveInputScheme : InputScheme
|
|||
throw BadURL("URL '%s' contains multiple commit hashes", url);
|
||||
rev = Hash::parseAny(value, HashAlgorithm::SHA1);
|
||||
} else if (name == "ref") {
|
||||
if (!std::regex_match(value, refRegex))
|
||||
if (!isLegalRefName(value))
|
||||
throw BadURL("URL '%s' contains an invalid branch/tag name", url);
|
||||
if (ref)
|
||||
throw BadURL("URL '%s' contains multiple branch/tag names", url);
|
||||
|
|
|
|||
|
|
@ -157,4 +157,11 @@ struct Setter
|
|||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Checks that the git reference is valid and normalized.
|
||||
*
|
||||
* Accepts shorthand references (one-level refnames are allowed).
|
||||
*/
|
||||
bool isLegalRefName(const std::string & refName);
|
||||
|
||||
} // namespace nix
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#include "nix/fetchers/fetchers.hh"
|
||||
#include "nix/fetchers/git-utils.hh"
|
||||
#include "nix/util/url-parts.hh"
|
||||
#include "nix/store/path.hh"
|
||||
|
||||
|
|
@ -22,12 +23,12 @@ struct IndirectInputScheme : InputScheme
|
|||
} else if (path.size() == 2) {
|
||||
if (std::regex_match(path[1], revRegex))
|
||||
rev = Hash::parseAny(path[1], HashAlgorithm::SHA1);
|
||||
else if (std::regex_match(path[1], refRegex))
|
||||
else if (isLegalRefName(path[1]))
|
||||
ref = path[1];
|
||||
else
|
||||
throw BadURL("in flake URL '%s', '%s' is not a commit hash or branch/tag name", url, path[1]);
|
||||
} else if (path.size() == 3) {
|
||||
if (!std::regex_match(path[1], refRegex))
|
||||
if (!isLegalRefName(path[1]))
|
||||
throw BadURL("in flake URL '%s', '%s' is not a branch/tag name", url, path[1]);
|
||||
ref = path[1];
|
||||
if (!std::regex_match(path[2], revRegex))
|
||||
|
|
|
|||
|
|
@ -57,6 +57,25 @@ TEST(parseFlakeRef, path)
|
|||
}
|
||||
}
|
||||
|
||||
TEST(parseFlakeRef, GitArchiveInput)
|
||||
{
|
||||
experimentalFeatureSettings.experimentalFeatures.get().insert(Xp::Flakes);
|
||||
|
||||
fetchers::Settings fetchSettings;
|
||||
|
||||
{
|
||||
auto s = "github:foo/bar/branch%23"; // branch name with `#`
|
||||
auto flakeref = parseFlakeRef(fetchSettings, s);
|
||||
ASSERT_EQ(flakeref.to_string(), "github:foo/bar/branch%23");
|
||||
}
|
||||
|
||||
{
|
||||
auto s = "github:foo/bar?ref=branch%23"; // branch name with `#`
|
||||
auto flakeref = parseFlakeRef(fetchSettings, s);
|
||||
ASSERT_EQ(flakeref.to_string(), "github:foo/bar/branch%23");
|
||||
}
|
||||
}
|
||||
|
||||
TEST(to_string, doesntReencodeUrl)
|
||||
{
|
||||
fetchers::Settings fetchSettings;
|
||||
|
|
|
|||
|
|
@ -19,13 +19,6 @@ const static std::string fragmentRegex = "(?:" + pcharRegex + "|[/? \"^])*";
|
|||
const static std::string refRegexS = "[a-zA-Z0-9@][a-zA-Z0-9_.\\/@+-]*";
|
||||
extern std::regex refRegex;
|
||||
|
||||
/// Instead of defining what a good Git Ref is, we define what a bad Git Ref is
|
||||
/// This is because of the definition of a ref in refs.c in https://github.com/git/git
|
||||
/// See tests/functional/fetchGitRefs.sh for the full definition
|
||||
const static std::string badGitRefRegexS =
|
||||
"//|^[./]|/\\.|\\.\\.|[[:cntrl:][:space:]:?^~\[]|\\\\|\\*|\\.lock$|\\.lock/|@\\{|[/.]$|^@$|^$";
|
||||
extern std::regex badGitRefRegex;
|
||||
|
||||
/// A Git revision (a SHA-1 commit hash).
|
||||
const static std::string revRegexS = "[0-9a-fA-F]{40}";
|
||||
extern std::regex revRegex;
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@
|
|||
namespace nix {
|
||||
|
||||
std::regex refRegex(refRegexS, std::regex::ECMAScript);
|
||||
std::regex badGitRefRegex(badGitRefRegexS, std::regex::ECMAScript);
|
||||
std::regex revRegex(revRegexS, std::regex::ECMAScript);
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -67,6 +67,7 @@ valid_ref 'foo./bar'
|
|||
valid_ref 'heads/foo@bar'
|
||||
valid_ref "$(printf 'heads/fu\303\237')"
|
||||
valid_ref 'foo-bar-baz'
|
||||
valid_ref 'branch#'
|
||||
valid_ref '$1'
|
||||
valid_ref 'foo.locke'
|
||||
|
||||
|
|
@ -97,6 +98,7 @@ invalid_ref 'heads/v@{ation'
|
|||
invalid_ref 'heads/foo\.ar' # should fail due to \
|
||||
invalid_ref 'heads/foo\bar' # should fail due to \
|
||||
invalid_ref "$(printf 'heads/foo\t')" # should fail because it has a TAB
|
||||
invalid_ref "$(printf 'heads/foo\37')"
|
||||
invalid_ref "$(printf 'heads/foo\177')"
|
||||
invalid_ref '@'
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue