From d2f1860ee52ef6263065a6a73d7d8ea331e4c65d Mon Sep 17 00:00:00 2001 From: John Ericson Date: Mon, 1 Sep 2025 15:08:57 -0400 Subject: [PATCH 1/2] Revert "Improve Git URI handling" I (@Ericson2314) messed up. We were supposed to test the status quo before landing any new chnages, and also there is one change that is not quite right (relative paths). I am reverting for now, and then backporting the test suite to the old situation. This reverts commit 04ad66af5f0fbec60783d8913292125f43954dcd. --- src/libutil-tests/url.cc | 58 --------------------- src/libutil/include/nix/util/url.hh | 21 ++------ src/libutil/url.cc | 78 ++++------------------------- 3 files changed, 15 insertions(+), 142 deletions(-) diff --git a/src/libutil-tests/url.cc b/src/libutil-tests/url.cc index 3f856b0aa..56b879846 100644 --- a/src/libutil-tests/url.cc +++ b/src/libutil-tests/url.cc @@ -12,64 +12,6 @@ namespace nix { using Authority = ParsedURL::Authority; using HostType = Authority::HostType; -struct FixGitURLParam -{ - std::string_view input; - std::string_view expected; -}; - -std::ostream & operator<<(std::ostream & os, const FixGitURLParam & param) -{ - return os << "Input: \"" << param.input << "\", Expected: \"" << param.expected << "\""; -} - -class FixGitURLTestSuite : public ::testing::TestWithParam -{}; - -INSTANTIATE_TEST_SUITE_P( - FixGitURLs, - FixGitURLTestSuite, - ::testing::Values( - // https://github.com/NixOS/nix/issues/5958 - // Already proper URL with git+ssh - FixGitURLParam{"git+ssh://user@domain:1234/path", "git+ssh://user@domain:1234/path"}, - // SCP-like URL (rewritten to ssh://) - FixGitURLParam{"git@github.com:owner/repo.git", "ssh://git@github.com/owner/repo.git"}, - // SCP-like URL (no user) - FixGitURLParam{"github.com:owner/repo.git", "ssh://github.com/owner/repo.git"}, - // SCP-like URL (leading slash) - FixGitURLParam{"github.com:/owner/repo.git", "ssh://github.com/owner/repo.git"}, - // Absolute path (becomes file:) - FixGitURLParam{"/home/me/repo", "file:///home/me/repo"}, - // Relative path (becomes file:// absolute) - FixGitURLParam{"relative/repo", "file:///relative/repo"}, - // Already file: scheme - // NOTE: This is not valid technically as it's not absolute - FixGitURLParam{"file:/var/repos/x", "file:/var/repos/x"}, - // IPV6 test case - FixGitURLParam{"user@[2001:db8:1::2]:/home/file", "ssh://user@[2001:db8:1::2]/home/file"})); - -TEST_P(FixGitURLTestSuite, parsesVariedGitUrls) -{ - auto & p = GetParam(); - const auto actual = fixGitURL(p.input).to_string(); - EXPECT_EQ(actual, p.expected); -} - -TEST_P(FixGitURLTestSuite, fixGitIsIdempotent) -{ - auto & p = GetParam(); - const auto actual = fixGitURL(p.expected).to_string(); - EXPECT_EQ(actual, p.expected); -} - -TEST_P(FixGitURLTestSuite, fixGitOutputParses) -{ - auto & p = GetParam(); - const auto parsed = fixGitURL(p.expected); - EXPECT_EQ(parseURL(parsed.to_string()), parsed); -} - TEST(parseURL, parsesSimpleHttpUrl) { auto s = "http://www.example.org/file.tar.gz"; diff --git a/src/libutil/include/nix/util/url.hh b/src/libutil/include/nix/util/url.hh index 55844ab95..f2bd79b08 100644 --- a/src/libutil/include/nix/util/url.hh +++ b/src/libutil/include/nix/util/url.hh @@ -327,23 +327,10 @@ struct ParsedUrlScheme ParsedUrlScheme parseUrlScheme(std::string_view scheme); -/** - * Normalize a Git remote string from various styles into a URL-like form. - * Input forms handled: - * 1) SCP-style SSH syntax: "[user@]host:path" -> "ssh://user@host/path" - * 2) Already "file:" URLs: "file:/abs/or/rel" -> unchanged - * 3) Bare paths / filenames: "src/repo" or "/abs" -> "file:src/repo" or "file:/abs" - * 4) Anything with "://": treated as a proper URL -> unchanged - * - * Note: for the scp-style, as they are converted to ssh-form, all paths are assumed to - * then be absolute whereas in programs like git, they retain the scp form which allows - * relative paths. - * - * Additionally, if no url can be determined, it is returned as a file:// URI. - * If the url does not start with a leading slash, one will be added since there are no - * relative path URIs. - */ -ParsedURL fixGitURL(std::string_view url); +/* Detects scp-style uris (e.g. git@github.com:NixOS/nix) and fixes + them by removing the `:` and assuming a scheme of `ssh://`. Also + changes absolute paths into file:// URLs. */ +ParsedURL fixGitURL(const std::string & url); /** * Whether a string is valid as RFC 3986 scheme name. diff --git a/src/libutil/url.cc b/src/libutil/url.cc index 7304a2150..1c7fd3f0f 100644 --- a/src/libutil/url.cc +++ b/src/libutil/url.cc @@ -408,77 +408,21 @@ ParsedUrlScheme parseUrlScheme(std::string_view scheme) }; } -struct ScpLike +ParsedURL fixGitURL(const std::string & url) { - ParsedURL::Authority authority; - std::string_view path; -}; - -/** - * Parse a scp url. This is a helper struct for fixGitURL. - * This is needed since we support scp-style urls for git urls. - * https://git-scm.com/book/ms/v2/Git-on-the-Server-The-Protocols - * - * A good reference is libgit2 also allows scp style - * https://github.com/libgit2/libgit2/blob/58d9363f02f1fa39e46d49b604f27008e75b72f2/src/util/net.c#L806 - */ -static std::optional parseScp(const std::string_view s) noexcept -{ - if (s.empty() || s.front() == '/') - return std::nullopt; - - // Find the colon that separates host from path. - // Find the right-most since ipv6 has colons - const auto colon = s.rfind(':'); - if (colon == std::string_view::npos) - return std::nullopt; - - // Split head:[path] - const auto head = s.substr(0, colon); - const auto path = s.substr(colon + 1); - - if (head.empty()) - return std::nullopt; - - return ScpLike{ - .authority = ParsedURL::Authority::parse(head), - .path = path, - }; -} - -ParsedURL fixGitURL(const std::string_view url) -{ - try { - if (auto parsed = parseURL(url); parsed.scheme == "file" || parsed.authority) - return parsed; - } catch (BadURL &) { - } - - // if the url does not start with forward slash, add one - auto splitMakeAbs = [&](std::string_view pathS) { - std::vector path; - - if (!hasPrefix(pathS, "/")) { - path.emplace_back(""); - } - splitStringInto(path, pathS, "/"); - - return path; - }; - - if (auto scp = parseScp(url)) { + std::regex scpRegex("([^/]*)@(.*):(.*)"); + if (!hasPrefix(url, "/") && std::regex_match(url, scpRegex)) + return parseURL(std::regex_replace(url, scpRegex, "ssh://$1@$2/$3")); + if (hasPrefix(url, "file:")) + return parseURL(url); + if (url.find("://") == std::string::npos) { return ParsedURL{ - .scheme = "ssh", - .authority = std::move(scp->authority), - .path = splitMakeAbs(scp->path), + .scheme = "file", + .authority = ParsedURL::Authority{}, + .path = splitString>(url, "/"), }; } - - return ParsedURL{ - .scheme = "file", - .authority = ParsedURL::Authority{}, - .path = splitMakeAbs(url), - }; + return parseURL(url); } // https://www.rfc-editor.org/rfc/rfc3986#section-3.1 From 2b310aee1310a0eb43dffb1095ae6c53f0649a7f Mon Sep 17 00:00:00 2001 From: Farid Zakaria Date: Sun, 24 Aug 2025 20:19:53 -0700 Subject: [PATCH 2/2] A few more URL tests Adapted from commit 04ad66af5f0fbec60783d8913292125f43954dcd --- src/libutil-tests/url.cc | 148 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) diff --git a/src/libutil-tests/url.cc b/src/libutil-tests/url.cc index 56b879846..d545c747b 100644 --- a/src/libutil-tests/url.cc +++ b/src/libutil-tests/url.cc @@ -12,6 +12,154 @@ namespace nix { using Authority = ParsedURL::Authority; using HostType = Authority::HostType; +struct FixGitURLParam +{ + std::string input; + std::string expected; + ParsedURL parsed; +}; + +std::ostream & operator<<(std::ostream & os, const FixGitURLParam & param) +{ + return os << "Input: \"" << param.input << "\", Expected: \"" << param.expected << "\""; +} + +class FixGitURLTestSuite : public ::testing::TestWithParam +{}; + +INSTANTIATE_TEST_SUITE_P( + FixGitURLs, + FixGitURLTestSuite, + ::testing::Values( + // https://github.com/NixOS/nix/issues/5958 + // Already proper URL with git+ssh + FixGitURLParam{ + .input = "git+ssh://user@domain:1234/path", + .expected = "git+ssh://user@domain:1234/path", + .parsed = + ParsedURL{ + .scheme = "git+ssh", + .authority = + ParsedURL::Authority{ + .host = "domain", + .user = "user", + .port = 1234, + }, + .path = {"", "path"}, + }, + }, + // SCP-like URL (rewritten to ssh://) + FixGitURLParam{ + .input = "git@github.com:owner/repo.git", + .expected = "ssh://git@github.com/owner/repo.git", + .parsed = + ParsedURL{ + .scheme = "ssh", + .authority = + ParsedURL::Authority{ + .host = "github.com", + .user = "git", + }, + .path = {"", "owner", "repo.git"}, + }, + }, + // Absolute path (becomes file:) + FixGitURLParam{ + .input = "/home/me/repo", + .expected = "file:///home/me/repo", + .parsed = + ParsedURL{ + .scheme = "file", + .authority = ParsedURL::Authority{}, + .path = {"", "home", "me", "repo"}, + }, + }, + // Already file: scheme + // NOTE: Git/SCP treat this as a `:`, so we are + // failing to "fix up" this case. + FixGitURLParam{ + .input = "file:/var/repos/x", + .expected = "file:/var/repos/x", + .parsed = + ParsedURL{ + .scheme = "file", + .authority = std::nullopt, + .path = {"", "var", "repos", "x"}, + }, + }, + // IPV6 test case + FixGitURLParam{ + .input = "user@[2001:db8:1::2]:/home/file", + .expected = "ssh://user@[2001:db8:1::2]//home/file", + .parsed = + ParsedURL{ + .scheme = "ssh", + .authority = + ParsedURL::Authority{ + .hostType = HostType::IPv6, + .host = "2001:db8:1::2", + .user = "user", + }, + .path = {"", "", "home", "file"}, + }, + })); + +TEST_P(FixGitURLTestSuite, parsesVariedGitUrls) +{ + auto & p = GetParam(); + const auto actual = fixGitURL(p.input); + EXPECT_EQ(actual, p.parsed); + EXPECT_EQ(actual.to_string(), p.expected); +} + +TEST(FixGitURLTestSuite, scpLikeNoUserParsesPoorly) +{ + // SCP-like URL (no user) + + // Cannot "to_string" this because has illegal path not starting + // with `/`. + EXPECT_EQ( + fixGitURL("github.com:owner/repo.git"), + (ParsedURL{ + .scheme = "file", + .authority = ParsedURL::Authority{}, + .path = {"github.com:owner", "repo.git"}, + })); +} + +TEST(FixGitURLTestSuite, scpLikePathLeadingSlashParsesPoorly) +{ + // SCP-like URL (no user) + + // Cannot "to_string" this because has illegal path not starting + // with `/`. + EXPECT_EQ( + fixGitURL("github.com:/owner/repo.git"), + (ParsedURL{ + .scheme = "file", + .authority = ParsedURL::Authority{}, + .path = {"github.com:", "owner", "repo.git"}, + })); +} + +TEST(FixGitURLTestSuite, relativePathParsesPoorly) +{ + // Relative path (becomes file:// absolute) + + // Cannot "to_string" this because has illegal path not starting + // with `/`. + EXPECT_EQ( + fixGitURL("relative/repo"), + (ParsedURL{ + .scheme = "file", + .authority = + ParsedURL::Authority{ + .hostType = ParsedURL::Authority::HostType::Name, + .host = "", + }, + .path = {"relative", "repo"}})); +} + TEST(parseURL, parsesSimpleHttpUrl) { auto s = "http://www.example.org/file.tar.gz";