1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-14 06:22:42 +01:00

libutil: Refactor percentDecode,percentEncode to use Boost.URL

The myriad of hand-rolled URL parsing and validation code
is a constant source of problems. Regexes are not a great way
of writing parsers and there's a history of getting them wrong.
Boost.URL is a good library we can outsource most of the heavy
lifting to.
This commit is contained in:
Sergei Zimmerman 2025-07-18 21:20:36 +03:00
parent d9053390ce
commit ad449c0288
No known key found for this signature in database
4 changed files with 25 additions and 29 deletions

View file

@ -62,6 +62,7 @@ scope: {
"--with-context"
"--with-coroutine"
"--with-iostreams"
"--with-url"
];
enableIcu = false;
}).overrideAttrs

View file

@ -1,5 +1,7 @@
#include "nix/util/url.hh"
#include "nix/util/tests/gmock-matchers.hh"
#include <gtest/gtest.h>
#include <gmock/gmock.h>
namespace nix {
@ -289,6 +291,14 @@ TEST(percentDecode, trailingPercent)
ASSERT_EQ(d, s);
}
TEST(percentDecode, incompleteEncoding)
{
ASSERT_THAT(
[]() { percentDecode("%1"); },
::testing::ThrowsMessage<BadURL>(
testing::HasSubstrIgnoreANSIMatcher("error: invalid URI parameter '%1': incomplete pct-encoding")));
}
/* ----------------------------------------------------------------------------
* percentEncode
* --------------------------------------------------------------------------*/

View file

@ -57,7 +57,7 @@ deps_private += blake3
boost = dependency(
'boost',
modules : ['context', 'coroutine', 'iostreams'],
modules : ['context', 'coroutine', 'iostreams', 'url'],
include_type: 'system',
version: '>=1.82.0'
)

View file

@ -4,6 +4,8 @@
#include "nix/util/split.hh"
#include "nix/util/canon-path.hh"
#include <boost/url.hpp>
namespace nix {
std::regex refRegex(refRegexS, std::regex::ECMAScript);
@ -48,21 +50,17 @@ ParsedURL parseURL(const std::string & url)
std::string percentDecode(std::string_view in)
{
std::string decoded;
for (size_t i = 0; i < in.size();) {
if (in[i] == '%') {
if (i + 2 >= in.size())
throw BadURL("invalid URI parameter '%s'", in);
try {
decoded += std::stoul(std::string(in, i + 1, 2), 0, 16);
i += 3;
} catch (...) {
throw BadURL("invalid URI parameter '%s'", in);
}
} else
decoded += in[i++];
}
return decoded;
auto pctView = boost::urls::make_pct_string_view(in);
if (pctView.has_value())
return pctView->decode();
auto error = pctView.error();
throw BadURL("invalid URI parameter '%s': %s", in, error.message());
}
std::string percentEncode(std::string_view s, std::string_view keep)
{
return boost::urls::encode(
s, [keep](char c) { return boost::urls::unreserved_chars(c) || keep.find(c) != keep.npos; });
}
StringMap decodeQuery(const std::string & query)
@ -85,19 +83,6 @@ StringMap decodeQuery(const std::string & query)
const static std::string allowedInQuery = ":@/?";
const static std::string allowedInPath = ":@/";
std::string percentEncode(std::string_view s, std::string_view keep)
{
std::string res;
for (auto & c : s)
// unreserved + keep
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || strchr("-._~", c)
|| keep.find(c) != std::string::npos)
res += c;
else
res += fmt("%%%02X", c & 0xFF);
return res;
}
std::string encodeQuery(const StringMap & ss)
{
std::string res;