1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-12-21 00:11:08 +01:00

Merge pull request #13773 from xokdvium/quotes-fragment

libutil: Fix handling of unescaped spaces, quotes and shevrons in queries and fragments
This commit is contained in:
Jörg Thalheim 2025-08-17 14:45:42 +02:00 committed by GitHub
commit 1cf0fe3186
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 96 additions and 33 deletions

View file

@ -74,6 +74,20 @@ TEST(parseFlakeRef, GitArchiveInput)
auto flakeref = parseFlakeRef(fetchSettings, s);
ASSERT_EQ(flakeref.to_string(), "github:foo/bar/branch%23");
}
{
auto s = "github:foo/bar?ref=branch#\"name.with.dot\""; // unescaped quotes `"`
auto [flakeref, fragment] = parseFlakeRefWithFragment(fetchSettings, s);
ASSERT_EQ(fragment, "\"name.with.dot\"");
ASSERT_EQ(flakeref.to_string(), "github:foo/bar/branch");
}
{
auto s = "github:foo/bar#\"name.with.dot\""; // unescaped quotes `"`
auto [flakeref, fragment] = parseFlakeRefWithFragment(fetchSettings, s);
ASSERT_EQ(fragment, "\"name.with.dot\"");
ASSERT_EQ(flakeref.to_string(), "github:foo/bar");
}
}
TEST(to_string, doesntReencodeUrl)

View file

@ -212,6 +212,26 @@ TEST(parseURL, parsedUrlsIsEqualToItself)
ASSERT_TRUE(url == url);
}
TEST(parseURL, parsedUrlsWithUnescapedChars)
{
/* Test for back-compat. Behavior is rather questionable, but
* is ingrained pretty deep into how URL parsing is shared between
* flakes and libstore.
* 1. Unescaped spaces, quotes and shevron (^) in fragment.
* 2. Unescaped spaces and quotes in query.
*/
auto s = "http://www.example.org/file.tar.gz?query \"= 123\"#shevron^quote\"space ";
auto url = parseURL(s);
ASSERT_EQ(url.fragment, "shevron^quote\"space ");
auto query = StringMap{
{"query \"", " 123\""},
};
ASSERT_EQ(url.query, query);
}
TEST(parseURL, parseFTPUrl)
{
auto s = "ftp://ftp.nixos.org/downloads/nixos.iso";

View file

@ -104,8 +104,8 @@ std::string encodeQuery(const StringMap & query);
* Parse a Nix URL into a ParsedURL.
*
* Nix URI is mostly compliant with RFC3986, but with some deviations:
* - Literal spaces are allowed and don't have to be percent encoded.
* This is mostly done for backward compatibility.
* - Fragments can contain unescaped (not URL encoded) '^', '"' or space literals.
* - Queries may contain unescaped '"' or spaces.
*
* @note IPv6 ZoneId literals (RFC4007) are represented in URIs according to RFC6874.
*

View file

@ -11,28 +11,6 @@ namespace nix {
std::regex refRegex(refRegexS, std::regex::ECMAScript);
std::regex revRegex(revRegexS, std::regex::ECMAScript);
/**
* Drop trailing shevron for output installable syntax.
*
* FIXME: parseURL shouldn't really be used for parsing the OutputSpec, but it does
* get used. That code should actually use ExtendedOutputsSpec::parseOpt.
*/
static std::string_view dropShevronSuffix(std::string_view url)
{
auto shevron = url.rfind("^");
if (shevron == std::string_view::npos)
return url;
return url.substr(0, shevron);
}
/**
* Percent encode spaces in the url.
*/
static std::string percentEncodeSpaces(std::string_view url)
{
return replaceStrings(std::string(url), " ", percentEncode(" "));
}
ParsedURL::Authority ParsedURL::Authority::parse(std::string_view encodedAuthority)
{
auto parsed = boost::urls::parse_authority(encodedAuthority);
@ -108,14 +86,65 @@ std::string ParsedURL::Authority::to_string() const
return std::move(oss).str();
}
/**
* Additional characters that don't need URL encoding in the fragment.
*/
static constexpr boost::urls::grammar::lut_chars extraAllowedCharsInFragment = " \"^";
/**
* Additional characters that don't need URL encoding in the query.
*/
static constexpr boost::urls::grammar::lut_chars extraAllowedCharsInQuery = " \"";
static std::string percentEncodeCharSet(std::string_view s, auto charSet)
{
std::string res;
for (auto c : s) {
if (charSet(c))
res += percentEncode(std::string_view{&c, &c + 1});
else
res += c;
}
return res;
}
ParsedURL parseURL(const std::string & url)
try {
/* Drop the shevron suffix used for the flakerefs. Shevron character is reserved and
shouldn't appear in normal URIs. */
auto unparsedView = dropShevronSuffix(url);
/* For back-compat literal spaces are allowed. */
auto withFixedSpaces = percentEncodeSpaces(unparsedView);
auto urlView = boost::urls::url_view(withFixedSpaces);
auto unparsedView = url;
/* Account for several non-standard properties of nix urls (for back-compat):
* - Allow unescaped spaces ' ' and '"' characters in queries.
* - Allow '"', ' ' and '^' characters in the fragment component.
* We could write our own grammar for this, but fixing it up here seems
* more concise, since the deviation is rather minor.
*/
std::string fixedEncodedUrl = [&]() {
std::string fixed;
std::string_view view = url;
if (auto beforeQuery = splitPrefixTo(view, '?')) {
fixed += *beforeQuery;
fixed += '?';
auto fragmentStart = view.find('#');
auto queryView = view.substr(0, fragmentStart);
auto fixedQuery = percentEncodeCharSet(queryView, extraAllowedCharsInQuery);
fixed += fixedQuery;
view.remove_prefix(std::min(fragmentStart, view.size()));
}
if (auto beforeFragment = splitPrefixTo(view, '#')) {
fixed += *beforeFragment;
fixed += '#';
auto fixedFragment = percentEncodeCharSet(view, extraAllowedCharsInFragment);
fixed += fixedFragment;
return fixed;
}
fixed += view;
return fixed;
}();
auto urlView = boost::urls::url_view(fixedEncodedUrl);
if (!urlView.has_scheme())
throw BadURL("'%s' doesn't have a scheme", url);
@ -176,12 +205,12 @@ std::string percentEncode(std::string_view s, std::string_view keep)
StringMap decodeQuery(const std::string & query)
try {
/* For back-compat literal spaces are allowed. */
auto withFixedSpaces = percentEncodeSpaces(query);
/* For back-compat unescaped characters are allowed. */
auto fixedEncodedQuery = percentEncodeCharSet(query, extraAllowedCharsInQuery);
StringMap result;
auto encodedQuery = boost::urls::params_encoded_view(withFixedSpaces);
auto encodedQuery = boost::urls::params_encoded_view(fixedEncodedQuery);
for (auto && [key, value, value_specified] : encodedQuery) {
if (!value_specified) {
warn("dubious URI query '%s' is missing equal sign '%s', ignoring", std::string_view(key), "=");