From 04ad66af5f0fbec60783d8913292125f43954dcd Mon Sep 17 00:00:00 2001 From: Farid Zakaria Date: Sun, 24 Aug 2025 20:19:53 -0700 Subject: [PATCH] Improve Git URI handling Git URI can also support scp style links similar to git itself. This change augments the function fixGitURL to better handle the scp style urls through a minimal parser rather than regex which has been found to be brittle. * Support for IPV6 added * New test cases added for fixGitURL * Clearer documentation on purpose and goal of function * More `std::string_view` for performance * A few more URL tests Fixes #5958 --- src/libutil-tests/url.cc | 58 +++++++++++++++++++++ src/libutil/include/nix/util/url.hh | 21 ++++++-- src/libutil/url.cc | 78 +++++++++++++++++++++++++---- 3 files changed, 142 insertions(+), 15 deletions(-) diff --git a/src/libutil-tests/url.cc b/src/libutil-tests/url.cc index 56b879846..3f856b0aa 100644 --- a/src/libutil-tests/url.cc +++ b/src/libutil-tests/url.cc @@ -12,6 +12,64 @@ namespace nix { using Authority = ParsedURL::Authority; using HostType = Authority::HostType; +struct FixGitURLParam +{ + std::string_view input; + std::string_view expected; +}; + +std::ostream & operator<<(std::ostream & os, const FixGitURLParam & param) +{ + return os << "Input: \"" << param.input << "\", Expected: \"" << param.expected << "\""; +} + +class FixGitURLTestSuite : public ::testing::TestWithParam +{}; + +INSTANTIATE_TEST_SUITE_P( + FixGitURLs, + FixGitURLTestSuite, + ::testing::Values( + // https://github.com/NixOS/nix/issues/5958 + // Already proper URL with git+ssh + FixGitURLParam{"git+ssh://user@domain:1234/path", "git+ssh://user@domain:1234/path"}, + // SCP-like URL (rewritten to ssh://) + FixGitURLParam{"git@github.com:owner/repo.git", "ssh://git@github.com/owner/repo.git"}, + // SCP-like URL (no user) + FixGitURLParam{"github.com:owner/repo.git", "ssh://github.com/owner/repo.git"}, + // SCP-like URL (leading slash) + FixGitURLParam{"github.com:/owner/repo.git", "ssh://github.com/owner/repo.git"}, + // Absolute path (becomes file:) + FixGitURLParam{"/home/me/repo", "file:///home/me/repo"}, + // Relative path (becomes file:// absolute) + FixGitURLParam{"relative/repo", "file:///relative/repo"}, + // Already file: scheme + // NOTE: This is not valid technically as it's not absolute + FixGitURLParam{"file:/var/repos/x", "file:/var/repos/x"}, + // IPV6 test case + FixGitURLParam{"user@[2001:db8:1::2]:/home/file", "ssh://user@[2001:db8:1::2]/home/file"})); + +TEST_P(FixGitURLTestSuite, parsesVariedGitUrls) +{ + auto & p = GetParam(); + const auto actual = fixGitURL(p.input).to_string(); + EXPECT_EQ(actual, p.expected); +} + +TEST_P(FixGitURLTestSuite, fixGitIsIdempotent) +{ + auto & p = GetParam(); + const auto actual = fixGitURL(p.expected).to_string(); + EXPECT_EQ(actual, p.expected); +} + +TEST_P(FixGitURLTestSuite, fixGitOutputParses) +{ + auto & p = GetParam(); + const auto parsed = fixGitURL(p.expected); + EXPECT_EQ(parseURL(parsed.to_string()), parsed); +} + TEST(parseURL, parsesSimpleHttpUrl) { auto s = "http://www.example.org/file.tar.gz"; diff --git a/src/libutil/include/nix/util/url.hh b/src/libutil/include/nix/util/url.hh index 5aa85230a..e04fe73f4 100644 --- a/src/libutil/include/nix/util/url.hh +++ b/src/libutil/include/nix/util/url.hh @@ -327,10 +327,23 @@ struct ParsedUrlScheme ParsedUrlScheme parseUrlScheme(std::string_view scheme); -/* Detects scp-style uris (e.g. git@github.com:NixOS/nix) and fixes - them by removing the `:` and assuming a scheme of `ssh://`. Also - changes absolute paths into file:// URLs. */ -ParsedURL fixGitURL(const std::string & url); +/** + * Normalize a Git remote string from various styles into a URL-like form. + * Input forms handled: + * 1) SCP-style SSH syntax: "[user@]host:path" -> "ssh://user@host/path" + * 2) Already "file:" URLs: "file:/abs/or/rel" -> unchanged + * 3) Bare paths / filenames: "src/repo" or "/abs" -> "file:src/repo" or "file:/abs" + * 4) Anything with "://": treated as a proper URL -> unchanged + * + * Note: for the scp-style, as they are converted to ssh-form, all paths are assumed to + * then be absolute whereas in programs like git, they retain the scp form which allows + * relative paths. + * + * Additionally, if no url can be determined, it is returned as a file:// URI. + * If the url does not start with a leading slash, one will be added since there are no + * relative path URIs. + */ +ParsedURL fixGitURL(std::string_view url); /** * Whether a string is valid as RFC 3986 scheme name. diff --git a/src/libutil/url.cc b/src/libutil/url.cc index b9bf0b4f4..1acc219df 100644 --- a/src/libutil/url.cc +++ b/src/libutil/url.cc @@ -408,21 +408,77 @@ ParsedUrlScheme parseUrlScheme(std::string_view scheme) }; } -ParsedURL fixGitURL(const std::string & url) +struct ScpLike { - std::regex scpRegex("([^/]*)@(.*):(.*)"); - if (!hasPrefix(url, "/") && std::regex_match(url, scpRegex)) - return parseURL(std::regex_replace(url, scpRegex, "ssh://$1@$2/$3")); - if (hasPrefix(url, "file:")) - return parseURL(url); - if (url.find("://") == std::string::npos) { + ParsedURL::Authority authority; + std::string_view path; +}; + +/** + * Parse a scp url. This is a helper struct for fixGitURL. + * This is needed since we support scp-style urls for git urls. + * https://git-scm.com/book/ms/v2/Git-on-the-Server-The-Protocols + * + * A good reference is libgit2 also allows scp style + * https://github.com/libgit2/libgit2/blob/58d9363f02f1fa39e46d49b604f27008e75b72f2/src/util/net.c#L806 + */ +static std::optional parseScp(const std::string_view s) noexcept +{ + if (s.empty() || s.front() == '/') + return std::nullopt; + + // Find the colon that separates host from path. + // Find the right-most since ipv6 has colons + const auto colon = s.rfind(':'); + if (colon == std::string_view::npos) + return std::nullopt; + + // Split head:[path] + const auto head = s.substr(0, colon); + const auto path = s.substr(colon + 1); + + if (head.empty()) + return std::nullopt; + + return ScpLike{ + .authority = ParsedURL::Authority::parse(head), + .path = path, + }; +} + +ParsedURL fixGitURL(const std::string_view url) +{ + try { + if (auto parsed = parseURL(url); parsed.scheme == "file" || parsed.authority) + return parsed; + } catch (BadURL &) { + } + + // if the url does not start with forward slash, add one + auto splitMakeAbs = [&](std::string_view pathS) { + std::vector path; + + if (!hasPrefix(pathS, "/")) { + path.emplace_back(""); + } + splitStringInto(path, pathS, "/"); + + return path; + }; + + if (auto scp = parseScp(url)) { return ParsedURL{ - .scheme = "file", - .authority = ParsedURL::Authority{}, - .path = splitString>(url, "/"), + .scheme = "ssh", + .authority = std::move(scp->authority), + .path = splitMakeAbs(scp->path), }; } - return parseURL(url); + + return ParsedURL{ + .scheme = "file", + .authority = ParsedURL::Authority{}, + .path = splitMakeAbs(url), + }; } // https://www.rfc-editor.org/rfc/rfc3986#section-3.1