mirror of
https://github.com/NixOS/nix.git
synced 2025-11-09 12:06:01 +01:00
Merge pull request #13812 from obsidiansystems/url-parse-leniency
Limit to lenient parsing of non-standard URLs only where needed
This commit is contained in:
commit
2fa2c0b09f
8 changed files with 76 additions and 51 deletions
|
|
@ -185,7 +185,7 @@ static void prim_fetchClosure(EvalState & state, const PosIdx pos, Value ** args
|
||||||
{.msg = HintFmt("attribute '%s' is missing in call to 'fetchClosure'", "fromStore"),
|
{.msg = HintFmt("attribute '%s' is missing in call to 'fetchClosure'", "fromStore"),
|
||||||
.pos = state.positions[pos]});
|
.pos = state.positions[pos]});
|
||||||
|
|
||||||
auto parsedURL = parseURL(*fromStoreUrl);
|
auto parsedURL = parseURL(*fromStoreUrl, /*lenient=*/true);
|
||||||
|
|
||||||
if (parsedURL.scheme != "http" && parsedURL.scheme != "https"
|
if (parsedURL.scheme != "http" && parsedURL.scheme != "https"
|
||||||
&& !(getEnv("_NIX_IN_TEST").has_value() && parsedURL.scheme == "file"))
|
&& !(getEnv("_NIX_IN_TEST").has_value() && parsedURL.scheme == "file"))
|
||||||
|
|
|
||||||
|
|
@ -13,8 +13,9 @@ TEST(getNameFromURL, getNameFromURL)
|
||||||
ASSERT_EQ(getNameFromURL(parseURL("path:~/repos/nixpkgs#packages.x86_64-linux.Hello")), "Hello");
|
ASSERT_EQ(getNameFromURL(parseURL("path:~/repos/nixpkgs#packages.x86_64-linux.Hello")), "Hello");
|
||||||
ASSERT_EQ(getNameFromURL(parseURL("path:.#nonStandardAttr.mylaptop")), "mylaptop");
|
ASSERT_EQ(getNameFromURL(parseURL("path:.#nonStandardAttr.mylaptop")), "mylaptop");
|
||||||
ASSERT_EQ(getNameFromURL(parseURL("path:./repos/myflake#nonStandardAttr.mylaptop")), "mylaptop");
|
ASSERT_EQ(getNameFromURL(parseURL("path:./repos/myflake#nonStandardAttr.mylaptop")), "mylaptop");
|
||||||
ASSERT_EQ(getNameFromURL(parseURL("path:./nixpkgs#packages.x86_64-linux.complex^bin,man")), "complex");
|
ASSERT_EQ(
|
||||||
ASSERT_EQ(getNameFromURL(parseURL("path:./myproj#packages.x86_64-linux.default^*")), "myproj");
|
getNameFromURL(parseURL("path:./nixpkgs#packages.x86_64-linux.complex^bin,man", /*lenient=*/true)), "complex");
|
||||||
|
ASSERT_EQ(getNameFromURL(parseURL("path:./myproj#packages.x86_64-linux.default^*", /*lenient=*/true)), "myproj");
|
||||||
ASSERT_EQ(getNameFromURL(parseURL("path:./myproj#defaultPackage.x86_64-linux")), "myproj");
|
ASSERT_EQ(getNameFromURL(parseURL("path:./myproj#defaultPackage.x86_64-linux")), "myproj");
|
||||||
|
|
||||||
ASSERT_EQ(getNameFromURL(parseURL("github:NixOS/nixpkgs#packages.x86_64-linux.hello")), "hello");
|
ASSERT_EQ(getNameFromURL(parseURL("github:NixOS/nixpkgs#packages.x86_64-linux.hello")), "hello");
|
||||||
|
|
@ -80,6 +81,6 @@ TEST(getNameFromURL, getNameFromURL)
|
||||||
ASSERT_EQ(getNameFromURL(parseURL("path:.")), std::nullopt);
|
ASSERT_EQ(getNameFromURL(parseURL("path:.")), std::nullopt);
|
||||||
ASSERT_EQ(getNameFromURL(parseURL("file:.#")), std::nullopt);
|
ASSERT_EQ(getNameFromURL(parseURL("file:.#")), std::nullopt);
|
||||||
ASSERT_EQ(getNameFromURL(parseURL("path:.#packages.x86_64-linux.default")), std::nullopt);
|
ASSERT_EQ(getNameFromURL(parseURL("path:.#packages.x86_64-linux.default")), std::nullopt);
|
||||||
ASSERT_EQ(getNameFromURL(parseURL("path:.#packages.x86_64-linux.default^*")), std::nullopt);
|
ASSERT_EQ(getNameFromURL(parseURL("path:.#packages.x86_64-linux.default^*", /*lenient=*/true)), std::nullopt);
|
||||||
}
|
}
|
||||||
} // namespace nix
|
} // namespace nix
|
||||||
|
|
|
||||||
|
|
@ -82,7 +82,7 @@ std::pair<FlakeRef, std::string> parsePathFlakeRefWithFragment(
|
||||||
auto succeeds = std::regex_match(url, match, pathFlakeRegex);
|
auto succeeds = std::regex_match(url, match, pathFlakeRegex);
|
||||||
assert(succeeds);
|
assert(succeeds);
|
||||||
auto path = match[1].str();
|
auto path = match[1].str();
|
||||||
auto query = decodeQuery(match[3]);
|
auto query = decodeQuery(match[3].str(), /*lenient=*/true);
|
||||||
auto fragment = percentDecode(match[5].str());
|
auto fragment = percentDecode(match[5].str());
|
||||||
|
|
||||||
if (baseDir) {
|
if (baseDir) {
|
||||||
|
|
@ -210,7 +210,7 @@ std::optional<std::pair<FlakeRef, std::string>> parseURLFlakeRef(
|
||||||
bool isFlake)
|
bool isFlake)
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
auto parsed = parseURL(url);
|
auto parsed = parseURL(url, /*lenient=*/true);
|
||||||
if (baseDir && (parsed.scheme == "path" || parsed.scheme == "git+file") && !isAbsolute(parsed.path))
|
if (baseDir && (parsed.scheme == "path" || parsed.scheme == "git+file") && !isAbsolute(parsed.path))
|
||||||
parsed.path = absPath(parsed.path, *baseDir);
|
parsed.path = absPath(parsed.path, *baseDir);
|
||||||
return fromParsedURL(fetchSettings, std::move(parsed), isFlake);
|
return fromParsedURL(fetchSettings, std::move(parsed), isFlake);
|
||||||
|
|
@ -289,7 +289,7 @@ FlakeRef FlakeRef::canonicalize() const
|
||||||
filtering the `dir` query parameter from the URL. */
|
filtering the `dir` query parameter from the URL. */
|
||||||
if (auto url = fetchers::maybeGetStrAttr(flakeRef.input.attrs, "url")) {
|
if (auto url = fetchers::maybeGetStrAttr(flakeRef.input.attrs, "url")) {
|
||||||
try {
|
try {
|
||||||
auto parsed = parseURL(*url);
|
auto parsed = parseURL(*url, /*lenient=*/true);
|
||||||
if (auto dir2 = get(parsed.query, "dir")) {
|
if (auto dir2 = get(parsed.query, "dir")) {
|
||||||
if (flakeRef.subdir != "" && flakeRef.subdir == *dir2)
|
if (flakeRef.subdir != "" && flakeRef.subdir == *dir2)
|
||||||
parsed.query.erase("dir");
|
parsed.query.erase("dir");
|
||||||
|
|
|
||||||
|
|
@ -45,7 +45,7 @@ StoreReference StoreReference::parse(const std::string & uri, const StoreReferen
|
||||||
{
|
{
|
||||||
auto params = extraParams;
|
auto params = extraParams;
|
||||||
try {
|
try {
|
||||||
auto parsedUri = parseURL(uri);
|
auto parsedUri = parseURL(uri, /*lenient=*/true);
|
||||||
params.insert(parsedUri.query.begin(), parsedUri.query.end());
|
params.insert(parsedUri.query.begin(), parsedUri.query.end());
|
||||||
|
|
||||||
auto baseURI = parsedUri.authority.value_or(ParsedURL::Authority{}).to_string() + parsedUri.path;
|
auto baseURI = parsedUri.authority.value_or(ParsedURL::Authority{}).to_string() + parsedUri.path;
|
||||||
|
|
@ -107,7 +107,7 @@ std::pair<std::string, StoreReference::Params> splitUriAndParams(const std::stri
|
||||||
StoreReference::Params params;
|
StoreReference::Params params;
|
||||||
auto q = uri.find('?');
|
auto q = uri.find('?');
|
||||||
if (q != std::string::npos) {
|
if (q != std::string::npos) {
|
||||||
params = decodeQuery(uri.substr(q + 1));
|
params = decodeQuery(uri.substr(q + 1), /*lenient=*/true);
|
||||||
uri = uri_.substr(0, q);
|
uri = uri_.substr(0, q);
|
||||||
}
|
}
|
||||||
return {uri, params};
|
return {uri, params};
|
||||||
|
|
|
||||||
|
|
@ -221,15 +221,20 @@ TEST(parseURL, parsedUrlsWithUnescapedChars)
|
||||||
* 2. Unescaped spaces and quotes in query.
|
* 2. Unescaped spaces and quotes in query.
|
||||||
*/
|
*/
|
||||||
auto s = "http://www.example.org/file.tar.gz?query \"= 123\"#shevron^quote\"space ";
|
auto s = "http://www.example.org/file.tar.gz?query \"= 123\"#shevron^quote\"space ";
|
||||||
auto url = parseURL(s);
|
|
||||||
|
|
||||||
ASSERT_EQ(url.fragment, "shevron^quote\"space ");
|
/* Without leniency for back compat, this should throw. */
|
||||||
|
EXPECT_THROW(parseURL(s), Error);
|
||||||
|
|
||||||
|
/* With leniency for back compat, this should parse. */
|
||||||
|
auto url = parseURL(s, /*lenient=*/true);
|
||||||
|
|
||||||
|
EXPECT_EQ(url.fragment, "shevron^quote\"space ");
|
||||||
|
|
||||||
auto query = StringMap{
|
auto query = StringMap{
|
||||||
{"query \"", " 123\""},
|
{"query \"", " 123\""},
|
||||||
};
|
};
|
||||||
|
|
||||||
ASSERT_EQ(url.query, query);
|
EXPECT_EQ(url.query, query);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(parseURL, parseFTPUrl)
|
TEST(parseURL, parseFTPUrl)
|
||||||
|
|
|
||||||
|
|
@ -96,14 +96,18 @@ MakeError(BadURL, Error);
|
||||||
std::string percentDecode(std::string_view in);
|
std::string percentDecode(std::string_view in);
|
||||||
std::string percentEncode(std::string_view s, std::string_view keep = "");
|
std::string percentEncode(std::string_view s, std::string_view keep = "");
|
||||||
|
|
||||||
StringMap decodeQuery(const std::string & query);
|
/**
|
||||||
|
* @param lenient @see parseURL
|
||||||
|
*/
|
||||||
|
StringMap decodeQuery(std::string_view query, bool lenient = false);
|
||||||
|
|
||||||
std::string encodeQuery(const StringMap & query);
|
std::string encodeQuery(const StringMap & query);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse a Nix URL into a ParsedURL.
|
* Parse a URL into a ParsedURL.
|
||||||
*
|
*
|
||||||
* Nix URI is mostly compliant with RFC3986, but with some deviations:
|
* @parm lenient Also allow some long-supported Nix URIs that are not quite compliant with RFC3986.
|
||||||
|
* Here are the deviations:
|
||||||
* - Fragments can contain unescaped (not URL encoded) '^', '"' or space literals.
|
* - Fragments can contain unescaped (not URL encoded) '^', '"' or space literals.
|
||||||
* - Queries may contain unescaped '"' or spaces.
|
* - Queries may contain unescaped '"' or spaces.
|
||||||
*
|
*
|
||||||
|
|
@ -111,7 +115,7 @@ std::string encodeQuery(const StringMap & query);
|
||||||
*
|
*
|
||||||
* @throws BadURL
|
* @throws BadURL
|
||||||
*/
|
*/
|
||||||
ParsedURL parseURL(std::string_view url);
|
ParsedURL parseURL(std::string_view url, bool lenient = false);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Although that’s not really standardized anywhere, an number of tools
|
* Although that’s not really standardized anywhere, an number of tools
|
||||||
|
|
|
||||||
|
|
@ -108,15 +108,21 @@ static std::string percentEncodeCharSet(std::string_view s, auto charSet)
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
ParsedURL parseURL(std::string_view url)
|
ParsedURL parseURL(std::string_view url, bool lenient)
|
||||||
try {
|
try {
|
||||||
/* Account for several non-standard properties of nix urls (for back-compat):
|
/* Account for several non-standard properties of nix urls (for back-compat):
|
||||||
* - Allow unescaped spaces ' ' and '"' characters in queries.
|
* - Allow unescaped spaces ' ' and '"' characters in queries.
|
||||||
* - Allow '"', ' ' and '^' characters in the fragment component.
|
* - Allow '"', ' ' and '^' characters in the fragment component.
|
||||||
* We could write our own grammar for this, but fixing it up here seems
|
* We could write our own grammar for this, but fixing it up here seems
|
||||||
* more concise, since the deviation is rather minor.
|
* more concise, since the deviation is rather minor.
|
||||||
|
*
|
||||||
|
* If `!lenient` don't bother initializing, because we can just
|
||||||
|
* parse `url` directly`.
|
||||||
*/
|
*/
|
||||||
std::string fixedEncodedUrl = [&]() {
|
std::string fixedEncodedUrl;
|
||||||
|
|
||||||
|
if (lenient) {
|
||||||
|
fixedEncodedUrl = [&] {
|
||||||
std::string fixed;
|
std::string fixed;
|
||||||
std::string_view view = url;
|
std::string_view view = url;
|
||||||
|
|
||||||
|
|
@ -141,8 +147,9 @@ try {
|
||||||
fixed += view;
|
fixed += view;
|
||||||
return fixed;
|
return fixed;
|
||||||
}();
|
}();
|
||||||
|
}
|
||||||
|
|
||||||
auto urlView = boost::urls::url_view(fixedEncodedUrl);
|
auto urlView = boost::urls::url_view(lenient ? fixedEncodedUrl : url);
|
||||||
|
|
||||||
if (!urlView.has_scheme())
|
if (!urlView.has_scheme())
|
||||||
throw BadURL("'%s' doesn't have a scheme", url);
|
throw BadURL("'%s' doesn't have a scheme", url);
|
||||||
|
|
@ -179,7 +186,7 @@ try {
|
||||||
.scheme = scheme,
|
.scheme = scheme,
|
||||||
.authority = authority,
|
.authority = authority,
|
||||||
.path = path,
|
.path = path,
|
||||||
.query = decodeQuery(std::string(query)),
|
.query = decodeQuery(query, lenient),
|
||||||
.fragment = fragment,
|
.fragment = fragment,
|
||||||
};
|
};
|
||||||
} catch (boost::system::system_error & e) {
|
} catch (boost::system::system_error & e) {
|
||||||
|
|
@ -201,14 +208,17 @@ std::string percentEncode(std::string_view s, std::string_view keep)
|
||||||
s, [keep](char c) { return boost::urls::unreserved_chars(c) || keep.find(c) != keep.npos; });
|
s, [keep](char c) { return boost::urls::unreserved_chars(c) || keep.find(c) != keep.npos; });
|
||||||
}
|
}
|
||||||
|
|
||||||
StringMap decodeQuery(const std::string & query)
|
StringMap decodeQuery(std::string_view query, bool lenient)
|
||||||
try {
|
try {
|
||||||
/* For back-compat unescaped characters are allowed. */
|
/* When `lenient = true`, for back-compat unescaped characters are allowed. */
|
||||||
auto fixedEncodedQuery = percentEncodeCharSet(query, extraAllowedCharsInQuery);
|
std::string fixedEncodedQuery;
|
||||||
|
if (lenient) {
|
||||||
|
fixedEncodedQuery = percentEncodeCharSet(query, extraAllowedCharsInQuery);
|
||||||
|
}
|
||||||
|
|
||||||
StringMap result;
|
StringMap result;
|
||||||
|
|
||||||
auto encodedQuery = boost::urls::params_encoded_view(fixedEncodedQuery);
|
auto encodedQuery = boost::urls::params_encoded_view(lenient ? fixedEncodedQuery : query);
|
||||||
for (auto && [key, value, value_specified] : encodedQuery) {
|
for (auto && [key, value, value_specified] : encodedQuery) {
|
||||||
if (!value_specified) {
|
if (!value_specified) {
|
||||||
warn("dubious URI query '%s' is missing equal sign '%s', ignoring", std::string_view(key), "=");
|
warn("dubious URI query '%s' is missing equal sign '%s', ignoring", std::string_view(key), "=");
|
||||||
|
|
|
||||||
|
|
@ -105,7 +105,8 @@ std::string getNameFromElement(const ProfileElement & element)
|
||||||
{
|
{
|
||||||
std::optional<std::string> result = std::nullopt;
|
std::optional<std::string> result = std::nullopt;
|
||||||
if (element.source) {
|
if (element.source) {
|
||||||
result = getNameFromURL(parseURL(element.source->to_string()));
|
// Seems to be for Flake URLs
|
||||||
|
result = getNameFromURL(parseURL(element.source->to_string(), /*lenient=*/true));
|
||||||
}
|
}
|
||||||
return result.value_or(element.identifier());
|
return result.value_or(element.identifier());
|
||||||
}
|
}
|
||||||
|
|
@ -160,11 +161,15 @@ struct ProfileManifest
|
||||||
e["outputs"].get<ExtendedOutputsSpec>()};
|
e["outputs"].get<ExtendedOutputsSpec>()};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string name =
|
std::string name = [&] {
|
||||||
elems.is_object() ? elem.key()
|
if (elems.is_object())
|
||||||
: element.source
|
return elem.key();
|
||||||
? getNameFromURL(parseURL(element.source->to_string())).value_or(element.identifier())
|
if (element.source) {
|
||||||
: element.identifier();
|
if (auto optName = getNameFromURL(parseURL(element.source->to_string(), /*lenient=*/true)))
|
||||||
|
return *optName;
|
||||||
|
}
|
||||||
|
return element.identifier();
|
||||||
|
}();
|
||||||
|
|
||||||
addElement(name, std::move(element));
|
addElement(name, std::move(element));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue