diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index e40757dec..723c075f2 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -38,7 +38,8 @@ struct GitArchiveInputScheme : InputScheme if (url.scheme != schemeName()) return {}; - const auto & path = url.path; + /* This ignores empty path segments for back-compat. Older versions used a tokenizeString here. */ + auto path = url.pathSegments(/*skipEmpty=*/true) | std::ranges::to>(); std::optional rev; std::optional ref; diff --git a/src/libfetchers/indirect.cc b/src/libfetchers/indirect.cc index c5cbf156b..e05d27adc 100644 --- a/src/libfetchers/indirect.cc +++ b/src/libfetchers/indirect.cc @@ -14,7 +14,8 @@ struct IndirectInputScheme : InputScheme if (url.scheme != "flake") return {}; - const auto & path = url.path; + /* This ignores empty path segments for back-compat. Older versions used a tokenizeString here. */ + auto path = url.pathSegments(/*skipEmpty=*/true) | std::ranges::to>(); std::optional rev; std::optional ref; diff --git a/src/libflake-tests/flakeref.cc b/src/libflake-tests/flakeref.cc index 3636d3e98..e2cb91bb8 100644 --- a/src/libflake-tests/flakeref.cc +++ b/src/libflake-tests/flakeref.cc @@ -172,6 +172,74 @@ INSTANTIATE_TEST_SUITE_P( }, .description = "flake_id_ref_branch_trailing_slash", .expectedUrl = "flake:nixpkgs/branch/2aae6c35c94fcfb415dbe95f408b9ce91ee846ed", + }, + // The following tests are for back-compat with lax parsers in older versions + // that used `tokenizeString` for splitting path segments, which ignores empty + // strings. + InputFromURLTestCase{ + .url = "nixpkgs/branch////", + .attrs = + { + {"id", Attr("nixpkgs")}, + {"type", Attr("indirect")}, + {"ref", Attr("branch")}, + }, + .description = "flake_id_ref_branch_ignore_empty_trailing_segments", + .expectedUrl = "flake:nixpkgs/branch", + }, + InputFromURLTestCase{ + .url = "nixpkgs/branch///2aae6c35c94fcfb415dbe95f408b9ce91ee846ed///", + .attrs = + { + {"id", Attr("nixpkgs")}, + {"type", Attr("indirect")}, + {"ref", Attr("branch")}, + {"rev", Attr("2aae6c35c94fcfb415dbe95f408b9ce91ee846ed")}, + }, + .description = "flake_id_ref_branch_ignore_empty_segments_ref_rev", + .expectedUrl = "flake:nixpkgs/branch/2aae6c35c94fcfb415dbe95f408b9ce91ee846ed", + }, + InputFromURLTestCase{ + // Note that this is different from above because the "flake id" shorthand + // doesn't allow this. + .url = "flake:/nixpkgs///branch////", + .attrs = + { + {"id", Attr("nixpkgs")}, + {"type", Attr("indirect")}, + {"ref", Attr("branch")}, + }, + .description = "indirect_branch_empty_segments_everywhere", + .expectedUrl = "flake:nixpkgs/branch", + }, + InputFromURLTestCase{ + // TODO: Technically this has an empty authority, but it's ignored + // for now. Yes, this is what all versions going back to at least + // 2.18 did and yes, this should not be allowed. + .url = "github://////owner%42/////repo%41///branch%43////", + .attrs = + { + {"type", Attr("github")}, + {"owner", Attr("ownerB")}, + {"repo", Attr("repoA")}, + {"ref", Attr("branchC")}, + }, + .description = "github_ref_slashes_in_path_everywhere", + .expectedUrl = "github:ownerB/repoA/branchC", + }, + InputFromURLTestCase{ + // FIXME: Subgroups in gitlab URLs are busted. This double-encoding + // behavior exists since 2.18. See issue #9161 and PR #8845. + .url = "gitlab:/owner%252Fsubgroup/////repo%41///branch%43////", + .attrs = + { + {"type", Attr("gitlab")}, + {"owner", Attr("owner%2Fsubgroup")}, + {"repo", Attr("repoA")}, + {"ref", Attr("branchC")}, + }, + .description = "gitlab_ref_slashes_in_path_everywhere_with_pct_encoding", + .expectedUrl = "gitlab:owner%252Fsubgroup/repoA/branchC", }), [](const ::testing::TestParamInfo & info) { return info.param.description; }); diff --git a/src/libutil-tests/url.cc b/src/libutil-tests/url.cc index 9c698a943..56b879846 100644 --- a/src/libutil-tests/url.cc +++ b/src/libutil-tests/url.cc @@ -3,6 +3,8 @@ #include #include +#include + namespace nix { /* ----------- tests for url.hh --------------------------------------------------*/ @@ -686,7 +688,102 @@ TEST(parseURL, gitlabNamespacedProjectUrls) ASSERT_EQ(s, parsed.to_string()); } +/* ---------------------------------------------------------------------------- + * pathSegments + * --------------------------------------------------------------------------*/ + +struct ParsedURLPathSegmentsTestCase +{ + std::string url; + std::vector segments; + std::string path; + bool skipEmpty; + std::string description; +}; + +class ParsedURLPathSegmentsTest : public ::testing::TestWithParam +{}; + +TEST_P(ParsedURLPathSegmentsTest, segmentsAreCorrect) +{ + const auto & testCase = GetParam(); + auto segments = parseURL(testCase.url).pathSegments(/*skipEmpty=*/testCase.skipEmpty) + | std::ranges::to(); + EXPECT_EQ(segments, testCase.segments); + EXPECT_EQ(encodeUrlPath(segments), testCase.path); +} + +INSTANTIATE_TEST_SUITE_P( + ParsedURL, + ParsedURLPathSegmentsTest, + ::testing::Values( + ParsedURLPathSegmentsTestCase{ + .url = "scheme:", + .segments = {""}, + .path = "", + .skipEmpty = false, + .description = "no_authority_empty_path", + }, + ParsedURLPathSegmentsTestCase{ + .url = "scheme://", + .segments = {""}, + .path = "", + .skipEmpty = false, + .description = "empty_authority_empty_path", + }, + ParsedURLPathSegmentsTestCase{ + .url = "scheme:///", + .segments = {"", ""}, + .path = "/", + .skipEmpty = false, + .description = "empty_authority_empty_path_trailing", + }, + ParsedURLPathSegmentsTestCase{ + .url = "scheme://example.com/", + .segments = {"", ""}, + .path = "/", + .skipEmpty = false, + .description = "non_empty_authority_empty_path", + }, + ParsedURLPathSegmentsTestCase{ + .url = "scheme://example.com//", + .segments = {"", "", ""}, + .path = "//", + .skipEmpty = false, + .description = "non_empty_authority_non_empty_path", + }, + ParsedURLPathSegmentsTestCase{ + .url = "scheme://example.com///path///with//strange/empty///segments////", + .segments = {"path", "with", "strange", "empty", "segments"}, + .path = "path/with/strange/empty/segments", + .skipEmpty = true, + .description = "skip_all_empty_segments_with_authority", + }, + ParsedURLPathSegmentsTestCase{ + .url = "scheme://example.com///lots///empty///", + .segments = {"", "", "", "lots", "", "", "empty", "", "", ""}, + .path = "///lots///empty///", + .skipEmpty = false, + .description = "empty_segments_with_authority", + }, + ParsedURLPathSegmentsTestCase{ + .url = "scheme:/path///with//strange/empty///segments////", + .segments = {"path", "with", "strange", "empty", "segments"}, + .path = "path/with/strange/empty/segments", + .skipEmpty = true, + .description = "skip_all_empty_segments_no_authority_starts_with_slash", + }, + ParsedURLPathSegmentsTestCase{ + .url = "scheme:path///with//strange/empty///segments////", + .segments = {"path", "with", "strange", "empty", "segments"}, + .path = "path/with/strange/empty/segments", + .skipEmpty = true, + .description = "skip_all_empty_segments_no_authority_doesnt_start_with_slash", + }), + [](const auto & info) { return info.param.description; }); + TEST(nix, isValidSchemeName) + { ASSERT_TRUE(isValidSchemeName("http")); ASSERT_TRUE(isValidSchemeName("https")); diff --git a/src/libutil/include/nix/util/url.hh b/src/libutil/include/nix/util/url.hh index 1d9797551..5aa85230a 100644 --- a/src/libutil/include/nix/util/url.hh +++ b/src/libutil/include/nix/util/url.hh @@ -1,6 +1,7 @@ #pragma once ///@file +#include #include #include "nix/util/error.hh" @@ -230,6 +231,20 @@ struct ParsedURL * Remove `.` and `..` path segments. */ ParsedURL canonicalise(); + + /** + * Get a range of path segments (the substrings separated by '/' characters). + * + * @param skipEmpty Skip all empty path segments + */ + auto pathSegments(bool skipEmpty) const & + { + return std::views::filter(path, [skipEmpty](std::string_view segment) { + if (skipEmpty) + return !segment.empty(); + return true; + }); + } }; std::ostream & operator<<(std::ostream & os, const ParsedURL & url);