mirror of
https://github.com/NixOS/nix.git
synced 2025-11-10 04:26:01 +01:00
Improve Git URI handling
Git URI can also support scp style links similar to git itself.
This change augments the function fixGitURL to better handle the scp
style urls through a minimal parser rather than regex which has been
found to be brittle.
* Support for IPV6 added
* New test cases added for fixGitURL
* Clearer documentation on purpose and goal of function
* More `std::string_view` for performance
* Update URL tests
Fixes #5958
Mostly undoes revert 4757487110599bbe9a287ead75741bba5436d52f
Adapted from commit 04ad66af5f
This commit is contained in:
parent
c80805cb61
commit
a67c93c240
3 changed files with 139 additions and 69 deletions
|
|
@ -14,8 +14,8 @@ using HostType = Authority::HostType;
|
||||||
|
|
||||||
struct FixGitURLParam
|
struct FixGitURLParam
|
||||||
{
|
{
|
||||||
std::string input;
|
std::string_view input;
|
||||||
std::string expected;
|
std::string_view expected;
|
||||||
ParsedURL parsed;
|
ParsedURL parsed;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -63,6 +63,34 @@ INSTANTIATE_TEST_SUITE_P(
|
||||||
.path = {"", "owner", "repo.git"},
|
.path = {"", "owner", "repo.git"},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
// SCP-like URL (no user)
|
||||||
|
FixGitURLParam{
|
||||||
|
.input = "github.com:owner/repo.git",
|
||||||
|
.expected = "ssh://github.com/owner/repo.git",
|
||||||
|
.parsed =
|
||||||
|
ParsedURL{
|
||||||
|
.scheme = "ssh",
|
||||||
|
.authority =
|
||||||
|
ParsedURL::Authority{
|
||||||
|
.host = "github.com",
|
||||||
|
},
|
||||||
|
.path = {"", "owner", "repo.git"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
// SCP-like URL (leading slash)
|
||||||
|
FixGitURLParam{
|
||||||
|
.input = "github.com:/owner/repo.git",
|
||||||
|
.expected = "ssh://github.com/owner/repo.git",
|
||||||
|
.parsed =
|
||||||
|
ParsedURL{
|
||||||
|
.scheme = "ssh",
|
||||||
|
.authority =
|
||||||
|
ParsedURL::Authority{
|
||||||
|
.host = "github.com",
|
||||||
|
},
|
||||||
|
.path = {"", "owner", "repo.git"},
|
||||||
|
},
|
||||||
|
},
|
||||||
// Absolute path (becomes file:)
|
// Absolute path (becomes file:)
|
||||||
FixGitURLParam{
|
FixGitURLParam{
|
||||||
.input = "/home/me/repo",
|
.input = "/home/me/repo",
|
||||||
|
|
@ -77,7 +105,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||||
// IPV6 test case
|
// IPV6 test case
|
||||||
FixGitURLParam{
|
FixGitURLParam{
|
||||||
.input = "user@[2001:db8:1::2]:/home/file",
|
.input = "user@[2001:db8:1::2]:/home/file",
|
||||||
.expected = "ssh://user@[2001:db8:1::2]//home/file",
|
.expected = "ssh://user@[2001:db8:1::2]/home/file",
|
||||||
.parsed =
|
.parsed =
|
||||||
ParsedURL{
|
ParsedURL{
|
||||||
.scheme = "ssh",
|
.scheme = "ssh",
|
||||||
|
|
@ -87,7 +115,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||||
.host = "2001:db8:1::2",
|
.host = "2001:db8:1::2",
|
||||||
.user = "user",
|
.user = "user",
|
||||||
},
|
},
|
||||||
.path = {"", "", "home", "file"},
|
.path = {"", "home", "file"},
|
||||||
},
|
},
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
|
@ -99,19 +127,18 @@ TEST_P(FixGitURLTestSuite, parsesVariedGitUrls)
|
||||||
EXPECT_EQ(actual.to_string(), p.expected);
|
EXPECT_EQ(actual.to_string(), p.expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(FixGitURLTestSuite, scpLikeNoUserParsesPoorly)
|
TEST_P(FixGitURLTestSuite, fixGitIsIdempotent)
|
||||||
{
|
{
|
||||||
// SCP-like URL (no user)
|
auto & p = GetParam();
|
||||||
|
const auto actual = fixGitURL(p.expected).to_string();
|
||||||
|
EXPECT_EQ(actual, p.expected);
|
||||||
|
}
|
||||||
|
|
||||||
// Cannot "to_string" this because has illegal path not starting
|
TEST_P(FixGitURLTestSuite, fixGitOutputParses)
|
||||||
// with `/`.
|
{
|
||||||
EXPECT_EQ(
|
auto & p = GetParam();
|
||||||
fixGitURL("github.com:owner/repo.git"),
|
const auto parsed = fixGitURL(p.expected);
|
||||||
(ParsedURL{
|
EXPECT_EQ(parseURL(parsed.to_string()), parsed);
|
||||||
.scheme = "file",
|
|
||||||
.authority = ParsedURL::Authority{},
|
|
||||||
.path = {"github.com:owner", "repo.git"},
|
|
||||||
}));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(FixGitURLTestSuite, properlyRejectFileURLWithAuthority)
|
TEST(FixGitURLTestSuite, properlyRejectFileURLWithAuthority)
|
||||||
|
|
@ -134,39 +161,6 @@ TEST(FixGitURLTestSuite, ambiguousScpLikeOrFileURL)
|
||||||
"URL 'file:/var/repos/x' would parse as SCP authority = 'file', path = '/var/repos/x' but this is also a valid `file:..` URL, and so we choose to disallow it")));
|
"URL 'file:/var/repos/x' would parse as SCP authority = 'file', path = '/var/repos/x' but this is also a valid `file:..` URL, and so we choose to disallow it")));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(FixGitURLTestSuite, scpLikePathLeadingSlashParsesPoorly)
|
|
||||||
{
|
|
||||||
// SCP-like URL (no user)
|
|
||||||
|
|
||||||
// Cannot "to_string" this because has illegal path not starting
|
|
||||||
// with `/`.
|
|
||||||
EXPECT_EQ(
|
|
||||||
fixGitURL("github.com:/owner/repo.git"),
|
|
||||||
(ParsedURL{
|
|
||||||
.scheme = "file",
|
|
||||||
.authority = ParsedURL::Authority{},
|
|
||||||
.path = {"github.com:", "owner", "repo.git"},
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(FixGitURLTestSuite, relativePathParsesPoorly)
|
|
||||||
{
|
|
||||||
// Relative path (becomes file:// absolute)
|
|
||||||
|
|
||||||
// Cannot "to_string" this because has illegal path not starting
|
|
||||||
// with `/`.
|
|
||||||
EXPECT_EQ(
|
|
||||||
fixGitURL("relative/repo"),
|
|
||||||
(ParsedURL{
|
|
||||||
.scheme = "file",
|
|
||||||
.authority =
|
|
||||||
ParsedURL::Authority{
|
|
||||||
.hostType = ParsedURL::Authority::HostType::Name,
|
|
||||||
.host = "",
|
|
||||||
},
|
|
||||||
.path = {"relative", "repo"}}));
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(parseURL, parsesSimpleHttpUrl)
|
TEST(parseURL, parsesSimpleHttpUrl)
|
||||||
{
|
{
|
||||||
auto s = "http://www.example.org/file.tar.gz";
|
auto s = "http://www.example.org/file.tar.gz";
|
||||||
|
|
|
||||||
|
|
@ -327,10 +327,23 @@ struct ParsedUrlScheme
|
||||||
|
|
||||||
ParsedUrlScheme parseUrlScheme(std::string_view scheme);
|
ParsedUrlScheme parseUrlScheme(std::string_view scheme);
|
||||||
|
|
||||||
/* Detects scp-style uris (e.g. git@github.com:NixOS/nix) and fixes
|
/**
|
||||||
them by removing the `:` and assuming a scheme of `ssh://`. Also
|
* Normalize a Git remote string from various styles into a URL-like form.
|
||||||
changes absolute paths into file:// URLs. */
|
* Input forms handled:
|
||||||
ParsedURL fixGitURL(const std::string & url);
|
* 1) SCP-style SSH syntax: "[user@]host:path" -> "ssh://user@host/path"
|
||||||
|
* 2) Already "file:" URLs: "file:/abs/or/rel" -> unchanged
|
||||||
|
* 3) Bare paths / filenames: "src/repo" or "/abs" -> "file:src/repo" or "file:/abs"
|
||||||
|
* 4) Anything with "://": treated as a proper URL -> unchanged
|
||||||
|
*
|
||||||
|
* Note: for the scp-style, as they are converted to ssh-form, all paths are assumed to
|
||||||
|
* then be absolute whereas in programs like git, they retain the scp form which allows
|
||||||
|
* relative paths.
|
||||||
|
*
|
||||||
|
* Additionally, if no url can be determined, it is returned as a file:// URI.
|
||||||
|
* If the url does not start with a leading slash, one will be added since there are no
|
||||||
|
* relative path URIs.
|
||||||
|
*/
|
||||||
|
ParsedURL fixGitURL(std::string_view url);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Whether a string is valid as RFC 3986 scheme name.
|
* Whether a string is valid as RFC 3986 scheme name.
|
||||||
|
|
|
||||||
|
|
@ -408,28 +408,91 @@ ParsedUrlScheme parseUrlScheme(std::string_view scheme)
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
ParsedURL fixGitURL(const std::string & url)
|
struct ScpLike
|
||||||
{
|
{
|
||||||
std::regex scpRegex("([^/]*)@(.*):(.*)");
|
ParsedURL::Authority authority;
|
||||||
if (!hasPrefix(url, "/") && std::regex_match(url, scpRegex))
|
std::string_view path;
|
||||||
return parseURL(std::regex_replace(url, scpRegex, "ssh://$1@$2/$3"));
|
};
|
||||||
std::string_view path = url;
|
|
||||||
if (splitPrefix(path, "file:")) {
|
/**
|
||||||
if (hasPrefix(url, "file://"))
|
* Parse a scp url. This is a helper struct for fixGitURL.
|
||||||
return parseURL(url);
|
* This is needed since we support scp-style urls for git urls.
|
||||||
|
* https://git-scm.com/book/ms/v2/Git-on-the-Server-The-Protocols
|
||||||
|
*
|
||||||
|
* A good reference is libgit2 also allows scp style
|
||||||
|
* https://github.com/libgit2/libgit2/blob/58d9363f02f1fa39e46d49b604f27008e75b72f2/src/util/net.c#L806
|
||||||
|
*/
|
||||||
|
static std::optional<ScpLike> parseScp(const std::string_view s) noexcept
|
||||||
|
{
|
||||||
|
if (s.empty() || s.front() == '/')
|
||||||
|
return std::nullopt;
|
||||||
|
|
||||||
|
// Find the colon that separates host from path.
|
||||||
|
// Find the right-most since ipv6 has colons
|
||||||
|
const auto colon = s.rfind(':');
|
||||||
|
if (colon == std::string_view::npos)
|
||||||
|
return std::nullopt;
|
||||||
|
|
||||||
|
// Split head:[path]
|
||||||
|
const auto head = s.substr(0, colon);
|
||||||
|
const auto path = s.substr(colon + 1);
|
||||||
|
|
||||||
|
if (head.empty())
|
||||||
|
return std::nullopt;
|
||||||
|
|
||||||
|
return ScpLike{
|
||||||
|
.authority = ParsedURL::Authority::parse(head),
|
||||||
|
.path = path,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
ParsedURL fixGitURL(const std::string_view url)
|
||||||
|
{
|
||||||
|
{
|
||||||
|
std::optional<ParsedURL> parsedOpt;
|
||||||
|
try {
|
||||||
|
parsedOpt = parseURL(url);
|
||||||
|
} catch (BadURL &) {
|
||||||
|
if (hasPrefix(url, "file:"))
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
if (parsedOpt) {
|
||||||
|
auto & parsed = *parsedOpt;
|
||||||
|
if (parsed.authority)
|
||||||
|
return parsed;
|
||||||
|
if (parsed.scheme == "file")
|
||||||
throw BadURL(
|
throw BadURL(
|
||||||
"URL '%s' would parse as SCP authority = 'file', path = '%s' but this is also a valid `file:..` URL, and so we choose to disallow it",
|
"URL '%s' would parse as SCP authority = 'file', path = '%s' but this is also a valid `file:..` URL, and so we choose to disallow it",
|
||||||
url,
|
url,
|
||||||
path);
|
parsed.renderPath(true));
|
||||||
}
|
}
|
||||||
if (url.find("://") == std::string::npos) {
|
}
|
||||||
|
|
||||||
|
// if the url does not start with forward slash, add one
|
||||||
|
auto splitMakeAbs = [&](std::string_view pathS) {
|
||||||
|
std::vector<std::string> path;
|
||||||
|
|
||||||
|
if (!hasPrefix(pathS, "/")) {
|
||||||
|
path.emplace_back("");
|
||||||
|
}
|
||||||
|
splitStringInto(path, pathS, "/");
|
||||||
|
|
||||||
|
return path;
|
||||||
|
};
|
||||||
|
|
||||||
|
if (auto scp = parseScp(url)) {
|
||||||
|
return ParsedURL{
|
||||||
|
.scheme = "ssh",
|
||||||
|
.authority = std::move(scp->authority),
|
||||||
|
.path = splitMakeAbs(scp->path),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
return ParsedURL{
|
return ParsedURL{
|
||||||
.scheme = "file",
|
.scheme = "file",
|
||||||
.authority = ParsedURL::Authority{},
|
.authority = ParsedURL::Authority{},
|
||||||
.path = splitString<std::vector<std::string>>(url, "/"),
|
.path = splitMakeAbs(url),
|
||||||
};
|
};
|
||||||
}
|
|
||||||
return parseURL(url);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://www.rfc-editor.org/rfc/rfc3986#section-3.1
|
// https://www.rfc-editor.org/rfc/rfc3986#section-3.1
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue