diff --git a/src/libutil-tests/url.cc b/src/libutil-tests/url.cc index 93dcd232e..9b0ba2f59 100644 --- a/src/libutil-tests/url.cc +++ b/src/libutil-tests/url.cc @@ -14,8 +14,8 @@ using HostType = Authority::HostType; struct FixGitURLParam { - std::string input; - std::string expected; + std::string_view input; + std::string_view expected; ParsedURL parsed; }; @@ -63,6 +63,34 @@ INSTANTIATE_TEST_SUITE_P( .path = {"", "owner", "repo.git"}, }, }, + // SCP-like URL (no user) + FixGitURLParam{ + .input = "github.com:owner/repo.git", + .expected = "ssh://github.com/owner/repo.git", + .parsed = + ParsedURL{ + .scheme = "ssh", + .authority = + ParsedURL::Authority{ + .host = "github.com", + }, + .path = {"", "owner", "repo.git"}, + }, + }, + // SCP-like URL (leading slash) + FixGitURLParam{ + .input = "github.com:/owner/repo.git", + .expected = "ssh://github.com/owner/repo.git", + .parsed = + ParsedURL{ + .scheme = "ssh", + .authority = + ParsedURL::Authority{ + .host = "github.com", + }, + .path = {"", "owner", "repo.git"}, + }, + }, // Absolute path (becomes file:) FixGitURLParam{ .input = "/home/me/repo", @@ -77,7 +105,7 @@ INSTANTIATE_TEST_SUITE_P( // IPV6 test case FixGitURLParam{ .input = "user@[2001:db8:1::2]:/home/file", - .expected = "ssh://user@[2001:db8:1::2]//home/file", + .expected = "ssh://user@[2001:db8:1::2]/home/file", .parsed = ParsedURL{ .scheme = "ssh", @@ -87,7 +115,7 @@ INSTANTIATE_TEST_SUITE_P( .host = "2001:db8:1::2", .user = "user", }, - .path = {"", "", "home", "file"}, + .path = {"", "home", "file"}, }, })); @@ -99,19 +127,18 @@ TEST_P(FixGitURLTestSuite, parsesVariedGitUrls) EXPECT_EQ(actual.to_string(), p.expected); } -TEST(FixGitURLTestSuite, scpLikeNoUserParsesPoorly) +TEST_P(FixGitURLTestSuite, fixGitIsIdempotent) { - // SCP-like URL (no user) + auto & p = GetParam(); + const auto actual = fixGitURL(p.expected).to_string(); + EXPECT_EQ(actual, p.expected); +} - // Cannot "to_string" this because has illegal path not starting - // with `/`. - EXPECT_EQ( - fixGitURL("github.com:owner/repo.git"), - (ParsedURL{ - .scheme = "file", - .authority = ParsedURL::Authority{}, - .path = {"github.com:owner", "repo.git"}, - })); +TEST_P(FixGitURLTestSuite, fixGitOutputParses) +{ + auto & p = GetParam(); + const auto parsed = fixGitURL(p.expected); + EXPECT_EQ(parseURL(parsed.to_string()), parsed); } TEST(FixGitURLTestSuite, properlyRejectFileURLWithAuthority) @@ -134,39 +161,6 @@ TEST(FixGitURLTestSuite, ambiguousScpLikeOrFileURL) "URL 'file:/var/repos/x' would parse as SCP authority = 'file', path = '/var/repos/x' but this is also a valid `file:..` URL, and so we choose to disallow it"))); } -TEST(FixGitURLTestSuite, scpLikePathLeadingSlashParsesPoorly) -{ - // SCP-like URL (no user) - - // Cannot "to_string" this because has illegal path not starting - // with `/`. - EXPECT_EQ( - fixGitURL("github.com:/owner/repo.git"), - (ParsedURL{ - .scheme = "file", - .authority = ParsedURL::Authority{}, - .path = {"github.com:", "owner", "repo.git"}, - })); -} - -TEST(FixGitURLTestSuite, relativePathParsesPoorly) -{ - // Relative path (becomes file:// absolute) - - // Cannot "to_string" this because has illegal path not starting - // with `/`. - EXPECT_EQ( - fixGitURL("relative/repo"), - (ParsedURL{ - .scheme = "file", - .authority = - ParsedURL::Authority{ - .hostType = ParsedURL::Authority::HostType::Name, - .host = "", - }, - .path = {"relative", "repo"}})); -} - TEST(parseURL, parsesSimpleHttpUrl) { auto s = "http://www.example.org/file.tar.gz"; diff --git a/src/libutil/include/nix/util/url.hh b/src/libutil/include/nix/util/url.hh index f2bd79b08..55844ab95 100644 --- a/src/libutil/include/nix/util/url.hh +++ b/src/libutil/include/nix/util/url.hh @@ -327,10 +327,23 @@ struct ParsedUrlScheme ParsedUrlScheme parseUrlScheme(std::string_view scheme); -/* Detects scp-style uris (e.g. git@github.com:NixOS/nix) and fixes - them by removing the `:` and assuming a scheme of `ssh://`. Also - changes absolute paths into file:// URLs. */ -ParsedURL fixGitURL(const std::string & url); +/** + * Normalize a Git remote string from various styles into a URL-like form. + * Input forms handled: + * 1) SCP-style SSH syntax: "[user@]host:path" -> "ssh://user@host/path" + * 2) Already "file:" URLs: "file:/abs/or/rel" -> unchanged + * 3) Bare paths / filenames: "src/repo" or "/abs" -> "file:src/repo" or "file:/abs" + * 4) Anything with "://": treated as a proper URL -> unchanged + * + * Note: for the scp-style, as they are converted to ssh-form, all paths are assumed to + * then be absolute whereas in programs like git, they retain the scp form which allows + * relative paths. + * + * Additionally, if no url can be determined, it is returned as a file:// URI. + * If the url does not start with a leading slash, one will be added since there are no + * relative path URIs. + */ +ParsedURL fixGitURL(std::string_view url); /** * Whether a string is valid as RFC 3986 scheme name. diff --git a/src/libutil/url.cc b/src/libutil/url.cc index 64ad2a566..f368f6aa5 100644 --- a/src/libutil/url.cc +++ b/src/libutil/url.cc @@ -408,28 +408,91 @@ ParsedUrlScheme parseUrlScheme(std::string_view scheme) }; } -ParsedURL fixGitURL(const std::string & url) +struct ScpLike { - std::regex scpRegex("([^/]*)@(.*):(.*)"); - if (!hasPrefix(url, "/") && std::regex_match(url, scpRegex)) - return parseURL(std::regex_replace(url, scpRegex, "ssh://$1@$2/$3")); - std::string_view path = url; - if (splitPrefix(path, "file:")) { - if (hasPrefix(url, "file://")) - return parseURL(url); - throw BadURL( - "URL '%s' would parse as SCP authority = 'file', path = '%s' but this is also a valid `file:..` URL, and so we choose to disallow it", - url, - path); + ParsedURL::Authority authority; + std::string_view path; +}; + +/** + * Parse a scp url. This is a helper struct for fixGitURL. + * This is needed since we support scp-style urls for git urls. + * https://git-scm.com/book/ms/v2/Git-on-the-Server-The-Protocols + * + * A good reference is libgit2 also allows scp style + * https://github.com/libgit2/libgit2/blob/58d9363f02f1fa39e46d49b604f27008e75b72f2/src/util/net.c#L806 + */ +static std::optional parseScp(const std::string_view s) noexcept +{ + if (s.empty() || s.front() == '/') + return std::nullopt; + + // Find the colon that separates host from path. + // Find the right-most since ipv6 has colons + const auto colon = s.rfind(':'); + if (colon == std::string_view::npos) + return std::nullopt; + + // Split head:[path] + const auto head = s.substr(0, colon); + const auto path = s.substr(colon + 1); + + if (head.empty()) + return std::nullopt; + + return ScpLike{ + .authority = ParsedURL::Authority::parse(head), + .path = path, + }; +} + +ParsedURL fixGitURL(const std::string_view url) +{ + { + std::optional parsedOpt; + try { + parsedOpt = parseURL(url); + } catch (BadURL &) { + if (hasPrefix(url, "file:")) + throw; + } + if (parsedOpt) { + auto & parsed = *parsedOpt; + if (parsed.authority) + return parsed; + if (parsed.scheme == "file") + throw BadURL( + "URL '%s' would parse as SCP authority = 'file', path = '%s' but this is also a valid `file:..` URL, and so we choose to disallow it", + url, + parsed.renderPath(true)); + } } - if (url.find("://") == std::string::npos) { + + // if the url does not start with forward slash, add one + auto splitMakeAbs = [&](std::string_view pathS) { + std::vector path; + + if (!hasPrefix(pathS, "/")) { + path.emplace_back(""); + } + splitStringInto(path, pathS, "/"); + + return path; + }; + + if (auto scp = parseScp(url)) { return ParsedURL{ - .scheme = "file", - .authority = ParsedURL::Authority{}, - .path = splitString>(url, "/"), + .scheme = "ssh", + .authority = std::move(scp->authority), + .path = splitMakeAbs(scp->path), }; } - return parseURL(url); + + return ParsedURL{ + .scheme = "file", + .authority = ParsedURL::Authority{}, + .path = splitMakeAbs(url), + }; } // https://www.rfc-editor.org/rfc/rfc3986#section-3.1