1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-09 20:16:03 +01:00

Limit to lenient parsing of non-standard URLs only where needed

This allows us to put `parseURL` in more spots without furthering
technical debt.
This commit is contained in:
John Ericson 2025-08-22 12:26:11 -04:00
parent 4083eff0c0
commit 72a548ed6a
8 changed files with 76 additions and 51 deletions

View file

@ -185,7 +185,7 @@ static void prim_fetchClosure(EvalState & state, const PosIdx pos, Value ** args
{.msg = HintFmt("attribute '%s' is missing in call to 'fetchClosure'", "fromStore"), {.msg = HintFmt("attribute '%s' is missing in call to 'fetchClosure'", "fromStore"),
.pos = state.positions[pos]}); .pos = state.positions[pos]});
auto parsedURL = parseURL(*fromStoreUrl); auto parsedURL = parseURL(*fromStoreUrl, /*lenient=*/true);
if (parsedURL.scheme != "http" && parsedURL.scheme != "https" if (parsedURL.scheme != "http" && parsedURL.scheme != "https"
&& !(getEnv("_NIX_IN_TEST").has_value() && parsedURL.scheme == "file")) && !(getEnv("_NIX_IN_TEST").has_value() && parsedURL.scheme == "file"))

View file

@ -13,8 +13,9 @@ TEST(getNameFromURL, getNameFromURL)
ASSERT_EQ(getNameFromURL(parseURL("path:~/repos/nixpkgs#packages.x86_64-linux.Hello")), "Hello"); ASSERT_EQ(getNameFromURL(parseURL("path:~/repos/nixpkgs#packages.x86_64-linux.Hello")), "Hello");
ASSERT_EQ(getNameFromURL(parseURL("path:.#nonStandardAttr.mylaptop")), "mylaptop"); ASSERT_EQ(getNameFromURL(parseURL("path:.#nonStandardAttr.mylaptop")), "mylaptop");
ASSERT_EQ(getNameFromURL(parseURL("path:./repos/myflake#nonStandardAttr.mylaptop")), "mylaptop"); ASSERT_EQ(getNameFromURL(parseURL("path:./repos/myflake#nonStandardAttr.mylaptop")), "mylaptop");
ASSERT_EQ(getNameFromURL(parseURL("path:./nixpkgs#packages.x86_64-linux.complex^bin,man")), "complex"); ASSERT_EQ(
ASSERT_EQ(getNameFromURL(parseURL("path:./myproj#packages.x86_64-linux.default^*")), "myproj"); getNameFromURL(parseURL("path:./nixpkgs#packages.x86_64-linux.complex^bin,man", /*lenient=*/true)), "complex");
ASSERT_EQ(getNameFromURL(parseURL("path:./myproj#packages.x86_64-linux.default^*", /*lenient=*/true)), "myproj");
ASSERT_EQ(getNameFromURL(parseURL("path:./myproj#defaultPackage.x86_64-linux")), "myproj"); ASSERT_EQ(getNameFromURL(parseURL("path:./myproj#defaultPackage.x86_64-linux")), "myproj");
ASSERT_EQ(getNameFromURL(parseURL("github:NixOS/nixpkgs#packages.x86_64-linux.hello")), "hello"); ASSERT_EQ(getNameFromURL(parseURL("github:NixOS/nixpkgs#packages.x86_64-linux.hello")), "hello");
@ -80,6 +81,6 @@ TEST(getNameFromURL, getNameFromURL)
ASSERT_EQ(getNameFromURL(parseURL("path:.")), std::nullopt); ASSERT_EQ(getNameFromURL(parseURL("path:.")), std::nullopt);
ASSERT_EQ(getNameFromURL(parseURL("file:.#")), std::nullopt); ASSERT_EQ(getNameFromURL(parseURL("file:.#")), std::nullopt);
ASSERT_EQ(getNameFromURL(parseURL("path:.#packages.x86_64-linux.default")), std::nullopt); ASSERT_EQ(getNameFromURL(parseURL("path:.#packages.x86_64-linux.default")), std::nullopt);
ASSERT_EQ(getNameFromURL(parseURL("path:.#packages.x86_64-linux.default^*")), std::nullopt); ASSERT_EQ(getNameFromURL(parseURL("path:.#packages.x86_64-linux.default^*", /*lenient=*/true)), std::nullopt);
} }
} // namespace nix } // namespace nix

View file

@ -82,7 +82,7 @@ std::pair<FlakeRef, std::string> parsePathFlakeRefWithFragment(
auto succeeds = std::regex_match(url, match, pathFlakeRegex); auto succeeds = std::regex_match(url, match, pathFlakeRegex);
assert(succeeds); assert(succeeds);
auto path = match[1].str(); auto path = match[1].str();
auto query = decodeQuery(match[3].str()); auto query = decodeQuery(match[3].str(), /*lenient=*/true);
auto fragment = percentDecode(match[5].str()); auto fragment = percentDecode(match[5].str());
if (baseDir) { if (baseDir) {
@ -210,7 +210,7 @@ std::optional<std::pair<FlakeRef, std::string>> parseURLFlakeRef(
bool isFlake) bool isFlake)
{ {
try { try {
auto parsed = parseURL(url); auto parsed = parseURL(url, /*lenient=*/true);
if (baseDir && (parsed.scheme == "path" || parsed.scheme == "git+file") && !isAbsolute(parsed.path)) if (baseDir && (parsed.scheme == "path" || parsed.scheme == "git+file") && !isAbsolute(parsed.path))
parsed.path = absPath(parsed.path, *baseDir); parsed.path = absPath(parsed.path, *baseDir);
return fromParsedURL(fetchSettings, std::move(parsed), isFlake); return fromParsedURL(fetchSettings, std::move(parsed), isFlake);
@ -289,7 +289,7 @@ FlakeRef FlakeRef::canonicalize() const
filtering the `dir` query parameter from the URL. */ filtering the `dir` query parameter from the URL. */
if (auto url = fetchers::maybeGetStrAttr(flakeRef.input.attrs, "url")) { if (auto url = fetchers::maybeGetStrAttr(flakeRef.input.attrs, "url")) {
try { try {
auto parsed = parseURL(*url); auto parsed = parseURL(*url, /*lenient=*/true);
if (auto dir2 = get(parsed.query, "dir")) { if (auto dir2 = get(parsed.query, "dir")) {
if (flakeRef.subdir != "" && flakeRef.subdir == *dir2) if (flakeRef.subdir != "" && flakeRef.subdir == *dir2)
parsed.query.erase("dir"); parsed.query.erase("dir");

View file

@ -45,7 +45,7 @@ StoreReference StoreReference::parse(const std::string & uri, const StoreReferen
{ {
auto params = extraParams; auto params = extraParams;
try { try {
auto parsedUri = parseURL(uri); auto parsedUri = parseURL(uri, /*lenient=*/true);
params.insert(parsedUri.query.begin(), parsedUri.query.end()); params.insert(parsedUri.query.begin(), parsedUri.query.end());
auto baseURI = parsedUri.authority.value_or(ParsedURL::Authority{}).to_string() + parsedUri.path; auto baseURI = parsedUri.authority.value_or(ParsedURL::Authority{}).to_string() + parsedUri.path;
@ -107,7 +107,7 @@ std::pair<std::string, StoreReference::Params> splitUriAndParams(const std::stri
StoreReference::Params params; StoreReference::Params params;
auto q = uri.find('?'); auto q = uri.find('?');
if (q != std::string::npos) { if (q != std::string::npos) {
params = decodeQuery(uri.substr(q + 1)); params = decodeQuery(uri.substr(q + 1), /*lenient=*/true);
uri = uri_.substr(0, q); uri = uri_.substr(0, q);
} }
return {uri, params}; return {uri, params};

View file

@ -221,15 +221,20 @@ TEST(parseURL, parsedUrlsWithUnescapedChars)
* 2. Unescaped spaces and quotes in query. * 2. Unescaped spaces and quotes in query.
*/ */
auto s = "http://www.example.org/file.tar.gz?query \"= 123\"#shevron^quote\"space "; auto s = "http://www.example.org/file.tar.gz?query \"= 123\"#shevron^quote\"space ";
auto url = parseURL(s);
ASSERT_EQ(url.fragment, "shevron^quote\"space "); /* Without leniency for back compat, this should throw. */
EXPECT_THROW(parseURL(s), Error);
/* With leniency for back compat, this should parse. */
auto url = parseURL(s, /*lenient=*/true);
EXPECT_EQ(url.fragment, "shevron^quote\"space ");
auto query = StringMap{ auto query = StringMap{
{"query \"", " 123\""}, {"query \"", " 123\""},
}; };
ASSERT_EQ(url.query, query); EXPECT_EQ(url.query, query);
} }
TEST(parseURL, parseFTPUrl) TEST(parseURL, parseFTPUrl)

View file

@ -96,14 +96,18 @@ MakeError(BadURL, Error);
std::string percentDecode(std::string_view in); std::string percentDecode(std::string_view in);
std::string percentEncode(std::string_view s, std::string_view keep = ""); std::string percentEncode(std::string_view s, std::string_view keep = "");
StringMap decodeQuery(const std::string & query); /**
* @param lenient @see parseURL
*/
StringMap decodeQuery(std::string_view query, bool lenient = false);
std::string encodeQuery(const StringMap & query); std::string encodeQuery(const StringMap & query);
/** /**
* Parse a Nix URL into a ParsedURL. * Parse a URL into a ParsedURL.
* *
* Nix URI is mostly compliant with RFC3986, but with some deviations: * @parm lenient Also allow some long-supported Nix URIs that are not quite compliant with RFC3986.
* Here are the deviations:
* - Fragments can contain unescaped (not URL encoded) '^', '"' or space literals. * - Fragments can contain unescaped (not URL encoded) '^', '"' or space literals.
* - Queries may contain unescaped '"' or spaces. * - Queries may contain unescaped '"' or spaces.
* *
@ -111,7 +115,7 @@ std::string encodeQuery(const StringMap & query);
* *
* @throws BadURL * @throws BadURL
*/ */
ParsedURL parseURL(std::string_view url); ParsedURL parseURL(std::string_view url, bool lenient = false);
/** /**
* Although thats not really standardized anywhere, an number of tools * Although thats not really standardized anywhere, an number of tools

View file

@ -108,41 +108,48 @@ static std::string percentEncodeCharSet(std::string_view s, auto charSet)
return res; return res;
} }
ParsedURL parseURL(std::string_view url) ParsedURL parseURL(std::string_view url, bool lenient)
try { try {
/* Account for several non-standard properties of nix urls (for back-compat): /* Account for several non-standard properties of nix urls (for back-compat):
* - Allow unescaped spaces ' ' and '"' characters in queries. * - Allow unescaped spaces ' ' and '"' characters in queries.
* - Allow '"', ' ' and '^' characters in the fragment component. * - Allow '"', ' ' and '^' characters in the fragment component.
* We could write our own grammar for this, but fixing it up here seems * We could write our own grammar for this, but fixing it up here seems
* more concise, since the deviation is rather minor. * more concise, since the deviation is rather minor.
*
* If `!lenient` don't bother initializing, because we can just
* parse `url` directly`.
*/ */
std::string fixedEncodedUrl = [&]() { std::string fixedEncodedUrl;
std::string fixed;
std::string_view view = url;
if (auto beforeQuery = splitPrefixTo(view, '?')) { if (lenient) {
fixed += *beforeQuery; fixedEncodedUrl = [&] {
fixed += '?'; std::string fixed;
auto fragmentStart = view.find('#'); std::string_view view = url;
auto queryView = view.substr(0, fragmentStart);
auto fixedQuery = percentEncodeCharSet(queryView, extraAllowedCharsInQuery);
fixed += fixedQuery;
view.remove_prefix(std::min(fragmentStart, view.size()));
}
if (auto beforeFragment = splitPrefixTo(view, '#')) { if (auto beforeQuery = splitPrefixTo(view, '?')) {
fixed += *beforeFragment; fixed += *beforeQuery;
fixed += '#'; fixed += '?';
auto fixedFragment = percentEncodeCharSet(view, extraAllowedCharsInFragment); auto fragmentStart = view.find('#');
fixed += fixedFragment; auto queryView = view.substr(0, fragmentStart);
auto fixedQuery = percentEncodeCharSet(queryView, extraAllowedCharsInQuery);
fixed += fixedQuery;
view.remove_prefix(std::min(fragmentStart, view.size()));
}
if (auto beforeFragment = splitPrefixTo(view, '#')) {
fixed += *beforeFragment;
fixed += '#';
auto fixedFragment = percentEncodeCharSet(view, extraAllowedCharsInFragment);
fixed += fixedFragment;
return fixed;
}
fixed += view;
return fixed; return fixed;
} }();
}
fixed += view; auto urlView = boost::urls::url_view(lenient ? fixedEncodedUrl : url);
return fixed;
}();
auto urlView = boost::urls::url_view(fixedEncodedUrl);
if (!urlView.has_scheme()) if (!urlView.has_scheme())
throw BadURL("'%s' doesn't have a scheme", url); throw BadURL("'%s' doesn't have a scheme", url);
@ -179,7 +186,7 @@ try {
.scheme = scheme, .scheme = scheme,
.authority = authority, .authority = authority,
.path = path, .path = path,
.query = decodeQuery(query), .query = decodeQuery(query, lenient),
.fragment = fragment, .fragment = fragment,
}; };
} catch (boost::system::system_error & e) { } catch (boost::system::system_error & e) {
@ -201,14 +208,17 @@ std::string percentEncode(std::string_view s, std::string_view keep)
s, [keep](char c) { return boost::urls::unreserved_chars(c) || keep.find(c) != keep.npos; }); s, [keep](char c) { return boost::urls::unreserved_chars(c) || keep.find(c) != keep.npos; });
} }
StringMap decodeQuery(std::string_view query) StringMap decodeQuery(std::string_view query, bool lenient)
try { try {
/* For back-compat unescaped characters are allowed. */ /* When `lenient = true`, for back-compat unescaped characters are allowed. */
auto fixedEncodedQuery = percentEncodeCharSet(query, extraAllowedCharsInQuery); std::string fixedEncodedQuery;
if (lenient) {
fixedEncodedQuery = percentEncodeCharSet(query, extraAllowedCharsInQuery);
}
StringMap result; StringMap result;
auto encodedQuery = boost::urls::params_encoded_view(fixedEncodedQuery); auto encodedQuery = boost::urls::params_encoded_view(lenient ? fixedEncodedQuery : query);
for (auto && [key, value, value_specified] : encodedQuery) { for (auto && [key, value, value_specified] : encodedQuery) {
if (!value_specified) { if (!value_specified) {
warn("dubious URI query '%s' is missing equal sign '%s', ignoring", std::string_view(key), "="); warn("dubious URI query '%s' is missing equal sign '%s', ignoring", std::string_view(key), "=");

View file

@ -105,7 +105,8 @@ std::string getNameFromElement(const ProfileElement & element)
{ {
std::optional<std::string> result = std::nullopt; std::optional<std::string> result = std::nullopt;
if (element.source) { if (element.source) {
result = getNameFromURL(parseURL(element.source->to_string())); // Seems to be for Flake URLs
result = getNameFromURL(parseURL(element.source->to_string(), /*lenient=*/true));
} }
return result.value_or(element.identifier()); return result.value_or(element.identifier());
} }
@ -160,11 +161,15 @@ struct ProfileManifest
e["outputs"].get<ExtendedOutputsSpec>()}; e["outputs"].get<ExtendedOutputsSpec>()};
} }
std::string name = std::string name = [&] {
elems.is_object() ? elem.key() if (elems.is_object())
: element.source return elem.key();
? getNameFromURL(parseURL(element.source->to_string())).value_or(element.identifier()) if (element.source) {
: element.identifier(); if (auto optName = getNameFromURL(parseURL(element.source->to_string(), /*lenient=*/true)))
return *optName;
}
return element.identifier();
}();
addElement(name, std::move(element)); addElement(name, std::move(element));
} }