1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-12-07 01:21:00 +01:00

Merge pull request #13819 from obsidiansystems/relative-url

Implement `parseURLRelative`, use in `HttpBinaryCacheStore`
This commit is contained in:
Sergei Zimmerman 2025-08-27 03:34:57 +03:00 committed by GitHub
commit 8ee74792fe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 278 additions and 30 deletions

View file

@ -113,7 +113,7 @@ static void fetchTree(
auto s = state.coerceToString(attr.pos, *attr.value, context, "", false, false).toOwned();
attrs.emplace(
state.symbols[attr.name],
params.isFetchGit && state.symbols[attr.name] == "url" ? fixGitURL(s) : s);
params.isFetchGit && state.symbols[attr.name] == "url" ? fixGitURL(s).to_string() : s);
} else if (attr.value->type() == nBool)
attrs.emplace(state.symbols[attr.name], Explicit<bool>{attr.value->boolean()});
else if (attr.value->type() == nInt) {
@ -175,7 +175,7 @@ static void fetchTree(
if (params.isFetchGit) {
fetchers::Attrs attrs;
attrs.emplace("type", "git");
attrs.emplace("url", fixGitURL(url));
attrs.emplace("url", fixGitURL(url).to_string());
if (!attrs.contains("exportIgnore")
&& (!attrs.contains("submodules") || !*fetchers::maybeGetBoolAttr(attrs, "submodules"))) {
attrs.emplace("exportIgnore", Explicit<bool>{true});

View file

@ -179,7 +179,7 @@ Fetch::Fetch(git_repository * repo, git_oid rev)
const auto remoteUrl = lfs::getLfsEndpointUrl(repo);
this->url = nix::parseURL(nix::fixGitURL(remoteUrl)).canonicalise();
this->url = nix::fixGitURL(remoteUrl).canonicalise();
}
bool Fetch::shouldFetch(const CanonPath & path) const

View file

@ -233,9 +233,7 @@ struct GitInputScheme : InputScheme
Input input{settings};
input.attrs = attrs;
auto url = fixGitURL(getStrAttr(attrs, "url"));
parseURL(url);
input.attrs["url"] = url;
input.attrs["url"] = fixGitURL(getStrAttr(attrs, "url")).to_string();
getShallowAttr(input);
getSubmodulesAttr(input);
getAllRefsAttr(input);

View file

@ -154,22 +154,17 @@ protected:
FileTransferRequest makeRequest(const std::string & path)
{
/* FIXME path is not a path, but a full relative or absolute
/* Otherwise the last path fragment will get discarded. */
auto cacheUriWithTrailingSlash = config->cacheUri;
if (!cacheUriWithTrailingSlash.path.empty())
cacheUriWithTrailingSlash.path += "/";
/* path is not a path, but a full relative or absolute
URL, e.g. we've seen in the wild NARINFO files have a URL
field which is
`nar/15f99rdaf26k39knmzry4xd0d97wp6yfpnfk1z9avakis7ipb9yg.nar?hash=zphkqn2wg8mnvbkixnl2aadkbn0rcnfj`
(note the query param) and that gets passed here.
What should actually happen is that we have two parsed URLs
(if we support relative URLs), and then we combined them with
a URL `operator/` which would be like
`std::filesystem::path`'s equivalent operator, which properly
combines the the URLs, whether the right is relative or
absolute. */
return FileTransferRequest(parseURL(
hasPrefix(path, "https://") || hasPrefix(path, "http://") || hasPrefix(path, "file://")
? path
: config->cacheUri.to_string() + "/" + path));
(note the query param) and that gets passed here. */
return FileTransferRequest(parseURLRelative(path, cacheUriWithTrailingSlash));
}
void getFile(const std::string & path, Sink & sink) override

View file

@ -290,6 +290,194 @@ TEST(parseURL, parsesHttpUrlWithEmptyPort)
ASSERT_EQ("http://www.example.org/file.tar.gz?foo=bar", parsed.to_string());
}
/* ----------------------------------------------------------------------------
* parseURLRelative
* --------------------------------------------------------------------------*/
TEST(parseURLRelative, resolvesRelativePath)
{
ParsedURL base = parseURL("http://example.org/dir/page.html");
auto parsed = parseURLRelative("subdir/file.txt", base);
ParsedURL expected{
.scheme = "http",
.authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "example.org"},
.path = "/dir/subdir/file.txt",
.query = {},
.fragment = "",
};
ASSERT_EQ(parsed, expected);
}
TEST(parseURLRelative, baseUrlIpv6AddressWithoutZoneId)
{
ParsedURL base = parseURL("http://[fe80::818c:da4d:8975:415c]/dir/page.html");
auto parsed = parseURLRelative("subdir/file.txt", base);
ParsedURL expected{
.scheme = "http",
.authority = ParsedURL::Authority{.hostType = HostType::IPv6, .host = "fe80::818c:da4d:8975:415c"},
.path = "/dir/subdir/file.txt",
.query = {},
.fragment = "",
};
ASSERT_EQ(parsed, expected);
}
TEST(parseURLRelative, resolvesRelativePathIpv6AddressWithZoneId)
{
ParsedURL base = parseURL("http://[fe80::818c:da4d:8975:415c\%25enp0s25]:8080/dir/page.html");
auto parsed = parseURLRelative("subdir/file2.txt", base);
ParsedURL expected{
.scheme = "http",
.authority = Authority{.hostType = HostType::IPv6, .host = "fe80::818c:da4d:8975:415c\%enp0s25", .port = 8080},
.path = "/dir/subdir/file2.txt",
.query = {},
.fragment = "",
};
ASSERT_EQ(parsed, expected);
}
TEST(parseURLRelative, resolvesRelativePathWithDot)
{
ParsedURL base = parseURL("http://example.org/dir/page.html");
auto parsed = parseURLRelative("./subdir/file.txt", base);
ParsedURL expected{
.scheme = "http",
.authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "example.org"},
.path = "/dir/subdir/file.txt",
.query = {},
.fragment = "",
};
ASSERT_EQ(parsed, expected);
}
TEST(parseURLRelative, resolvesParentDirectory)
{
ParsedURL base = parseURL("http://example.org:234/dir/page.html");
auto parsed = parseURLRelative("../up.txt", base);
ParsedURL expected{
.scheme = "http",
.authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "example.org", .port = 234},
.path = "/up.txt",
.query = {},
.fragment = "",
};
ASSERT_EQ(parsed, expected);
}
TEST(parseURLRelative, replacesPathWithAbsoluteRelative)
{
ParsedURL base = parseURL("http://example.org/dir/page.html");
auto parsed = parseURLRelative("/rooted.txt", base);
ParsedURL expected{
.scheme = "http",
.authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "example.org"},
.path = "/rooted.txt",
.query = {},
.fragment = "",
};
ASSERT_EQ(parsed, expected);
}
TEST(parseURLRelative, keepsQueryAndFragmentFromRelative)
{
// But discard query params on base URL
ParsedURL base = parseURL("https://www.example.org/path/index.html?z=3");
auto parsed = parseURLRelative("other.html?x=1&y=2#frag", base);
ParsedURL expected{
.scheme = "https",
.authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "www.example.org"},
.path = "/path/other.html",
.query = {{"x", "1"}, {"y", "2"}},
.fragment = "frag",
};
ASSERT_EQ(parsed, expected);
}
TEST(parseURLRelative, absOverride)
{
ParsedURL base = parseURL("http://example.org/path/page.html");
std::string_view abs = "https://127.0.0.1.org/secure";
auto parsed = parseURLRelative(abs, base);
auto parsedAbs = parseURL(abs);
ASSERT_EQ(parsed, parsedAbs);
}
TEST(parseURLRelative, absOverrideWithZoneId)
{
ParsedURL base = parseURL("http://example.org/path/page.html");
std::string_view abs = "https://[fe80::818c:da4d:8975:415c\%25enp0s25]/secure?foo=bar";
auto parsed = parseURLRelative(abs, base);
auto parsedAbs = parseURL(abs);
ASSERT_EQ(parsed, parsedAbs);
}
TEST(parseURLRelative, bothWithoutAuthority)
{
ParsedURL base = parseURL("mailto:mail-base@bar.baz?bcc=alice@asdf.com");
std::string_view over = "mailto:mail-override@foo.bar?subject=url-testing";
auto parsed = parseURLRelative(over, base);
auto parsedOverride = parseURL(over);
ASSERT_EQ(parsed, parsedOverride);
}
TEST(parseURLRelative, emptyRelative)
{
ParsedURL base = parseURL("https://www.example.org/path/index.html?a\%20b=5\%206&x\%20y=34#frag");
auto parsed = parseURLRelative("", base);
ParsedURL expected{
.scheme = "https",
.authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "www.example.org"},
.path = "/path/index.html",
.query = {{"a b", "5 6"}, {"x y", "34"}},
.fragment = "",
};
EXPECT_EQ(base.fragment, "frag");
EXPECT_EQ(parsed, expected);
}
TEST(parseURLRelative, fragmentRelative)
{
ParsedURL base = parseURL("https://www.example.org/path/index.html?a\%20b=5\%206&x\%20y=34#frag");
auto parsed = parseURLRelative("#frag2", base);
ParsedURL expected{
.scheme = "https",
.authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "www.example.org"},
.path = "/path/index.html",
.query = {{"a b", "5 6"}, {"x y", "34"}},
.fragment = "frag2",
};
EXPECT_EQ(parsed, expected);
}
TEST(parseURLRelative, queryRelative)
{
ParsedURL base = parseURL("https://www.example.org/path/index.html?a\%20b=5\%206&x\%20y=34#frag");
auto parsed = parseURLRelative("?asdf\%20qwer=1\%202\%203", base);
ParsedURL expected{
.scheme = "https",
.authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "www.example.org"},
.path = "/path/index.html",
.query = {{"asdf qwer", "1 2 3"}},
.fragment = "",
};
EXPECT_EQ(parsed, expected);
}
TEST(parseURLRelative, queryFragmentRelative)
{
ParsedURL base = parseURL("https://www.example.org/path/index.html?a\%20b=5\%206&x\%20y=34#frag");
auto parsed = parseURLRelative("?asdf\%20qwer=1\%202\%203#frag2", base);
ParsedURL expected{
.scheme = "https",
.authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "www.example.org"},
.path = "/path/index.html",
.query = {{"asdf qwer", "1 2 3"}},
.fragment = "frag2",
};
EXPECT_EQ(parsed, expected);
}
/* ----------------------------------------------------------------------------
* decodeQuery
* --------------------------------------------------------------------------*/

View file

@ -117,6 +117,16 @@ std::string encodeQuery(const StringMap & query);
*/
ParsedURL parseURL(std::string_view url, bool lenient = false);
/**
* Like `parseURL`, but also accepts relative URLs, which are resolved
* against the given base URL.
*
* This is specified in [IETF RFC 3986, section 5](https://datatracker.ietf.org/doc/html/rfc3986#section-5)
*
* Behavior should also match the `new URL(url, base)` JavaScript constructor.
*/
ParsedURL parseURLRelative(std::string_view url, const ParsedURL & base);
/**
* Although thats not really standardized anywhere, an number of tools
* use a scheme of the form 'x+y' in urls, where y is the transport layer
@ -136,7 +146,7 @@ ParsedUrlScheme parseUrlScheme(std::string_view scheme);
/* Detects scp-style uris (e.g. git@github.com:NixOS/nix) and fixes
them by removing the `:` and assuming a scheme of `ssh://`. Also
changes absolute paths into file:// URLs. */
std::string fixGitURL(const std::string & url);
ParsedURL fixGitURL(const std::string & url);
/**
* Whether a string is valid as RFC 3986 scheme name.

View file

@ -108,6 +108,8 @@ static std::string percentEncodeCharSet(std::string_view s, auto charSet)
return res;
}
static ParsedURL fromBoostUrlView(boost::urls::url_view url, bool lenient);
ParsedURL parseURL(std::string_view url, bool lenient)
try {
/* Account for several non-standard properties of nix urls (for back-compat):
@ -149,10 +151,15 @@ try {
}();
}
auto urlView = boost::urls::url_view(lenient ? fixedEncodedUrl : url);
return fromBoostUrlView(boost::urls::url_view(lenient ? fixedEncodedUrl : url), lenient);
} catch (boost::system::system_error & e) {
throw BadURL("'%s' is not a valid URL: %s", url, e.code().message());
}
static ParsedURL fromBoostUrlView(boost::urls::url_view urlView, bool lenient)
{
if (!urlView.has_scheme())
throw BadURL("'%s' doesn't have a scheme", url);
throw BadURL("'%s' doesn't have a scheme", urlView.buffer());
auto scheme = urlView.scheme();
auto authority = [&]() -> std::optional<ParsedURL::Authority> {
@ -170,7 +177,7 @@ try {
* scheme considers a missing authority or empty host invalid. */
auto transportIsFile = parseUrlScheme(scheme).transport == "file";
if (authority && authority->host.size() && transportIsFile)
throw BadURL("file:// URL '%s' has unexpected authority '%s'", url, *authority);
throw BadURL("file:// URL '%s' has unexpected authority '%s'", urlView.buffer(), *authority);
auto path = urlView.path(); /* Does pct-decoding */
auto fragment = urlView.fragment(); /* Does pct-decoding */
@ -189,8 +196,58 @@ try {
.query = decodeQuery(query, lenient),
.fragment = fragment,
};
} catch (boost::system::system_error & e) {
throw BadURL("'%s' is not a valid URL: %s", url, e.code().message());
}
ParsedURL parseURLRelative(std::string_view urlS, const ParsedURL & base)
try {
boost::urls::url resolved;
try {
resolved.set_scheme(base.scheme);
if (base.authority) {
auto & authority = *base.authority;
resolved.set_host_address(authority.host);
if (authority.user)
resolved.set_user(*authority.user);
if (authority.password)
resolved.set_password(*authority.password);
if (authority.port)
resolved.set_port_number(*authority.port);
}
resolved.set_path(base.path);
resolved.set_encoded_query(encodeQuery(base.query));
resolved.set_fragment(base.fragment);
} catch (boost::system::system_error & e) {
throw BadURL("'%s' is not a valid URL: %s", base.to_string(), e.code().message());
}
boost::urls::url_view url;
try {
url = urlS;
resolved.resolve(url).value();
} catch (boost::system::system_error & e) {
throw BadURL("'%s' is not a valid URL: %s", urlS, e.code().message());
}
auto ret = fromBoostUrlView(resolved, /*lenient=*/false);
/* Hack: Boost `url_view` supports Zone IDs, but `url` does not.
Just manually take the authority from the original URL to work
around it. See https://github.com/boostorg/url/issues/919 for
details. */
if (!url.has_authority()) {
ret.authority = base.authority;
}
/* Hack, work around fragment of base URL improperly being preserved
https://github.com/boostorg/url/issues/920 */
ret.fragment = url.has_fragment() ? std::string{url.fragment()} : "";
return ret;
} catch (BadURL & e) {
e.addTrace({}, "while resolving possibly-relative url '%s' against base URL '%s'", urlS, base);
throw;
}
std::string percentDecode(std::string_view in)
@ -287,17 +344,17 @@ ParsedUrlScheme parseUrlScheme(std::string_view scheme)
};
}
std::string fixGitURL(const std::string & url)
ParsedURL fixGitURL(const std::string & url)
{
std::regex scpRegex("([^/]*)@(.*):(.*)");
if (!hasPrefix(url, "/") && std::regex_match(url, scpRegex))
return std::regex_replace(url, scpRegex, "ssh://$1@$2/$3");
return parseURL(std::regex_replace(url, scpRegex, "ssh://$1@$2/$3"));
if (hasPrefix(url, "file:"))
return url;
return parseURL(url);
if (url.find("://") == std::string::npos) {
return (ParsedURL{.scheme = "file", .authority = ParsedURL::Authority{}, .path = url}).to_string();
return (ParsedURL{.scheme = "file", .authority = ParsedURL::Authority{}, .path = url});
}
return url;
return parseURL(url);
}
// https://www.rfc-editor.org/rfc/rfc3986#section-3.1