1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-11 13:06:01 +01:00

libfetchers: Restore path separator ignoring behavior for indirect and git-archive flakerefs

Old versions of nix happily accepted a lot of weird flake references,
which we didn't have tests for, so this was accidentally broken in
c436b7a32a.

This patch restores previous behavior and adds a plethora of tests
to ensure we don't break this in the future.

These test cases are aligned with how 2.18/2.28 parsed flake references.
This commit is contained in:
Sergei Zimmerman 2025-08-30 14:40:56 +03:00
parent 511d885d60
commit a38ebdd511
No known key found for this signature in database
5 changed files with 184 additions and 2 deletions

View file

@ -38,7 +38,8 @@ struct GitArchiveInputScheme : InputScheme
if (url.scheme != schemeName())
return {};
const auto & path = url.path;
/* This ignores empty path segments for back-compat. Older versions used a tokenizeString here. */
auto path = url.pathSegments(/*skipEmpty=*/true) | std::ranges::to<std::vector<std::string>>();
std::optional<Hash> rev;
std::optional<std::string> ref;

View file

@ -14,7 +14,8 @@ struct IndirectInputScheme : InputScheme
if (url.scheme != "flake")
return {};
const auto & path = url.path;
/* This ignores empty path segments for back-compat. Older versions used a tokenizeString here. */
auto path = url.pathSegments(/*skipEmpty=*/true) | std::ranges::to<std::vector<std::string>>();
std::optional<Hash> rev;
std::optional<std::string> ref;

View file

@ -172,6 +172,74 @@ INSTANTIATE_TEST_SUITE_P(
},
.description = "flake_id_ref_branch_trailing_slash",
.expectedUrl = "flake:nixpkgs/branch/2aae6c35c94fcfb415dbe95f408b9ce91ee846ed",
},
// The following tests are for back-compat with lax parsers in older versions
// that used `tokenizeString` for splitting path segments, which ignores empty
// strings.
InputFromURLTestCase{
.url = "nixpkgs/branch////",
.attrs =
{
{"id", Attr("nixpkgs")},
{"type", Attr("indirect")},
{"ref", Attr("branch")},
},
.description = "flake_id_ref_branch_ignore_empty_trailing_segments",
.expectedUrl = "flake:nixpkgs/branch",
},
InputFromURLTestCase{
.url = "nixpkgs/branch///2aae6c35c94fcfb415dbe95f408b9ce91ee846ed///",
.attrs =
{
{"id", Attr("nixpkgs")},
{"type", Attr("indirect")},
{"ref", Attr("branch")},
{"rev", Attr("2aae6c35c94fcfb415dbe95f408b9ce91ee846ed")},
},
.description = "flake_id_ref_branch_ignore_empty_segments_ref_rev",
.expectedUrl = "flake:nixpkgs/branch/2aae6c35c94fcfb415dbe95f408b9ce91ee846ed",
},
InputFromURLTestCase{
// Note that this is different from above because the "flake id" shorthand
// doesn't allow this.
.url = "flake:/nixpkgs///branch////",
.attrs =
{
{"id", Attr("nixpkgs")},
{"type", Attr("indirect")},
{"ref", Attr("branch")},
},
.description = "indirect_branch_empty_segments_everywhere",
.expectedUrl = "flake:nixpkgs/branch",
},
InputFromURLTestCase{
// TODO: Technically this has an empty authority, but it's ignored
// for now. Yes, this is what all versions going back to at least
// 2.18 did and yes, this should not be allowed.
.url = "github://////owner%42/////repo%41///branch%43////",
.attrs =
{
{"type", Attr("github")},
{"owner", Attr("ownerB")},
{"repo", Attr("repoA")},
{"ref", Attr("branchC")},
},
.description = "github_ref_slashes_in_path_everywhere",
.expectedUrl = "github:ownerB/repoA/branchC",
},
InputFromURLTestCase{
// FIXME: Subgroups in gitlab URLs are busted. This double-encoding
// behavior exists since 2.18. See issue #9161 and PR #8845.
.url = "gitlab:/owner%252Fsubgroup/////repo%41///branch%43////",
.attrs =
{
{"type", Attr("gitlab")},
{"owner", Attr("owner%2Fsubgroup")},
{"repo", Attr("repoA")},
{"ref", Attr("branchC")},
},
.description = "gitlab_ref_slashes_in_path_everywhere_with_pct_encoding",
.expectedUrl = "gitlab:owner%252Fsubgroup/repoA/branchC",
}),
[](const ::testing::TestParamInfo<InputFromURLTestCase> & info) { return info.param.description; });

View file

@ -3,6 +3,8 @@
#include <gtest/gtest.h>
#include <gmock/gmock.h>
#include <ranges>
namespace nix {
/* ----------- tests for url.hh --------------------------------------------------*/
@ -686,7 +688,102 @@ TEST(parseURL, gitlabNamespacedProjectUrls)
ASSERT_EQ(s, parsed.to_string());
}
/* ----------------------------------------------------------------------------
* pathSegments
* --------------------------------------------------------------------------*/
struct ParsedURLPathSegmentsTestCase
{
std::string url;
std::vector<std::string> segments;
std::string path;
bool skipEmpty;
std::string description;
};
class ParsedURLPathSegmentsTest : public ::testing::TestWithParam<ParsedURLPathSegmentsTestCase>
{};
TEST_P(ParsedURLPathSegmentsTest, segmentsAreCorrect)
{
const auto & testCase = GetParam();
auto segments = parseURL(testCase.url).pathSegments(/*skipEmpty=*/testCase.skipEmpty)
| std::ranges::to<decltype(testCase.segments)>();
EXPECT_EQ(segments, testCase.segments);
EXPECT_EQ(encodeUrlPath(segments), testCase.path);
}
INSTANTIATE_TEST_SUITE_P(
ParsedURL,
ParsedURLPathSegmentsTest,
::testing::Values(
ParsedURLPathSegmentsTestCase{
.url = "scheme:",
.segments = {""},
.path = "",
.skipEmpty = false,
.description = "no_authority_empty_path",
},
ParsedURLPathSegmentsTestCase{
.url = "scheme://",
.segments = {""},
.path = "",
.skipEmpty = false,
.description = "empty_authority_empty_path",
},
ParsedURLPathSegmentsTestCase{
.url = "scheme:///",
.segments = {"", ""},
.path = "/",
.skipEmpty = false,
.description = "empty_authority_empty_path_trailing",
},
ParsedURLPathSegmentsTestCase{
.url = "scheme://example.com/",
.segments = {"", ""},
.path = "/",
.skipEmpty = false,
.description = "non_empty_authority_empty_path",
},
ParsedURLPathSegmentsTestCase{
.url = "scheme://example.com//",
.segments = {"", "", ""},
.path = "//",
.skipEmpty = false,
.description = "non_empty_authority_non_empty_path",
},
ParsedURLPathSegmentsTestCase{
.url = "scheme://example.com///path///with//strange/empty///segments////",
.segments = {"path", "with", "strange", "empty", "segments"},
.path = "path/with/strange/empty/segments",
.skipEmpty = true,
.description = "skip_all_empty_segments_with_authority",
},
ParsedURLPathSegmentsTestCase{
.url = "scheme://example.com///lots///empty///",
.segments = {"", "", "", "lots", "", "", "empty", "", "", ""},
.path = "///lots///empty///",
.skipEmpty = false,
.description = "empty_segments_with_authority",
},
ParsedURLPathSegmentsTestCase{
.url = "scheme:/path///with//strange/empty///segments////",
.segments = {"path", "with", "strange", "empty", "segments"},
.path = "path/with/strange/empty/segments",
.skipEmpty = true,
.description = "skip_all_empty_segments_no_authority_starts_with_slash",
},
ParsedURLPathSegmentsTestCase{
.url = "scheme:path///with//strange/empty///segments////",
.segments = {"path", "with", "strange", "empty", "segments"},
.path = "path/with/strange/empty/segments",
.skipEmpty = true,
.description = "skip_all_empty_segments_no_authority_doesnt_start_with_slash",
}),
[](const auto & info) { return info.param.description; });
TEST(nix, isValidSchemeName)
{
ASSERT_TRUE(isValidSchemeName("http"));
ASSERT_TRUE(isValidSchemeName("https"));

View file

@ -1,6 +1,7 @@
#pragma once
///@file
#include <ranges>
#include <span>
#include "nix/util/error.hh"
@ -230,6 +231,20 @@ struct ParsedURL
* Remove `.` and `..` path segments.
*/
ParsedURL canonicalise();
/**
* Get a range of path segments (the substrings separated by '/' characters).
*
* @param skipEmpty Skip all empty path segments
*/
auto pathSegments(bool skipEmpty) const &
{
return std::views::filter(path, [skipEmpty](std::string_view segment) {
if (skipEmpty)
return !segment.empty();
return true;
});
}
};
std::ostream & operator<<(std::ostream & os, const ParsedURL & url);