1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-11 04:56:01 +01:00

Better stringSplit

I need this for some `ParseURL` improvements, but I figure this is
better to send as its own PR.

I changed the tests willy-nilly to sometimes use
`std::list<std::string_view>` instead of `Strings` (which is
`std::list<std::string>`).

Co-Authored-By: Sergei Zimmerman <sergei@zimmerman.foo>
This commit is contained in:
John Ericson 2025-08-26 14:37:13 -04:00
parent 0bd9d6a28e
commit cc4aa70e6e
2 changed files with 66 additions and 45 deletions

View file

@ -2,6 +2,7 @@
#include <rapidcheck/gtest.h> #include <rapidcheck/gtest.h>
#include "nix/util/strings.hh" #include "nix/util/strings.hh"
#include "nix/util/strings-inline.hh"
#include "nix/util/error.hh" #include "nix/util/error.hh"
namespace nix { namespace nix {
@ -271,113 +272,122 @@ TEST(tokenizeString, tokenizeSepEmpty)
* splitString * splitString
* --------------------------------------------------------------------------*/ * --------------------------------------------------------------------------*/
TEST(splitString, empty) using SplitStringTestContainerTypes = ::testing::
{ Types<std::vector<std::string>, std::vector<std::string_view>, std::list<std::string>, std::list<std::string_view>>;
Strings expected = {""};
ASSERT_EQ(splitString<Strings>("", " \t\n\r"), expected); template<typename T>
class splitStringTest : public ::testing::Test
{};
TYPED_TEST_SUITE(splitStringTest, SplitStringTestContainerTypes);
TYPED_TEST(splitStringTest, empty)
{
TypeParam expected = {""};
EXPECT_EQ(splitString<TypeParam>("", " \t\n\r"), expected);
} }
TEST(splitString, oneSep) TYPED_TEST(splitStringTest, oneSep)
{ {
Strings expected = {"", ""}; TypeParam expected = {"", ""};
ASSERT_EQ(splitString<Strings>(" ", " \t\n\r"), expected); EXPECT_EQ(splitString<TypeParam>(" ", " \t\n\r"), expected);
} }
TEST(splitString, twoSep) TYPED_TEST(splitStringTest, twoSep)
{ {
Strings expected = {"", "", ""}; TypeParam expected = {"", "", ""};
ASSERT_EQ(splitString<Strings>(" \n", " \t\n\r"), expected); EXPECT_EQ(splitString<TypeParam>(" \n", " \t\n\r"), expected);
} }
TEST(splitString, tokenizeSpacesWithSpaces) TYPED_TEST(splitStringTest, tokenizeSpacesWithSpaces)
{ {
auto s = "foo bar baz"; auto s = "foo bar baz";
Strings expected = {"foo", "bar", "baz"}; TypeParam expected = {"foo", "bar", "baz"};
ASSERT_EQ(splitString<Strings>(s, " \t\n\r"), expected); EXPECT_EQ(splitString<TypeParam>(s, " \t\n\r"), expected);
} }
TEST(splitString, tokenizeTabsWithDefaults) TYPED_TEST(splitStringTest, tokenizeTabsWithDefaults)
{ {
auto s = "foo\tbar\tbaz"; auto s = "foo\tbar\tbaz";
// Using it like this is weird, but shows the difference with tokenizeString, which also has this test // Using it like this is weird, but shows the difference with tokenizeString, which also has this test
Strings expected = {"foo", "bar", "baz"}; TypeParam expected = {"foo", "bar", "baz"};
ASSERT_EQ(splitString<Strings>(s, " \t\n\r"), expected); EXPECT_EQ(splitString<TypeParam>(s, " \t\n\r"), expected);
} }
TEST(splitString, tokenizeTabsSpacesWithDefaults) TYPED_TEST(splitStringTest, tokenizeTabsSpacesWithDefaults)
{ {
auto s = "foo\t bar\t baz"; auto s = "foo\t bar\t baz";
// Using it like this is weird, but shows the difference with tokenizeString, which also has this test // Using it like this is weird, but shows the difference with tokenizeString, which also has this test
Strings expected = {"foo", "", "bar", "", "baz"}; TypeParam expected = {"foo", "", "bar", "", "baz"};
ASSERT_EQ(splitString<Strings>(s, " \t\n\r"), expected); EXPECT_EQ(splitString<TypeParam>(s, " \t\n\r"), expected);
} }
TEST(splitString, tokenizeTabsSpacesNewlineWithDefaults) TYPED_TEST(splitStringTest, tokenizeTabsSpacesNewlineWithDefaults)
{ {
auto s = "foo\t\n bar\t\n baz"; auto s = "foo\t\n bar\t\n baz";
// Using it like this is weird, but shows the difference with tokenizeString, which also has this test // Using it like this is weird, but shows the difference with tokenizeString, which also has this test
Strings expected = {"foo", "", "", "bar", "", "", "baz"}; TypeParam expected = {"foo", "", "", "bar", "", "", "baz"};
ASSERT_EQ(splitString<Strings>(s, " \t\n\r"), expected); EXPECT_EQ(splitString<TypeParam>(s, " \t\n\r"), expected);
} }
TEST(splitString, tokenizeTabsSpacesNewlineRetWithDefaults) TYPED_TEST(splitStringTest, tokenizeTabsSpacesNewlineRetWithDefaults)
{ {
auto s = "foo\t\n\r bar\t\n\r baz"; auto s = "foo\t\n\r bar\t\n\r baz";
// Using it like this is weird, but shows the difference with tokenizeString, which also has this test // Using it like this is weird, but shows the difference with tokenizeString, which also has this test
Strings expected = {"foo", "", "", "", "bar", "", "", "", "baz"}; TypeParam expected = {"foo", "", "", "", "bar", "", "", "", "baz"};
ASSERT_EQ(splitString<Strings>(s, " \t\n\r"), expected); EXPECT_EQ(splitString<TypeParam>(s, " \t\n\r"), expected);
auto s2 = "foo \t\n\r bar \t\n\r baz"; auto s2 = "foo \t\n\r bar \t\n\r baz";
Strings expected2 = {"foo", "", "", "", "", "bar", "", "", "", "", "baz"}; TypeParam expected2 = {"foo", "", "", "", "", "bar", "", "", "", "", "baz"};
ASSERT_EQ(splitString<Strings>(s2, " \t\n\r"), expected2); EXPECT_EQ(splitString<TypeParam>(s2, " \t\n\r"), expected2);
} }
TEST(splitString, tokenizeWithCustomSep) TYPED_TEST(splitStringTest, tokenizeWithCustomSep)
{ {
auto s = "foo\n,bar\n,baz\n"; auto s = "foo\n,bar\n,baz\n";
Strings expected = {"foo\n", "bar\n", "baz\n"}; TypeParam expected = {"foo\n", "bar\n", "baz\n"};
ASSERT_EQ(splitString<Strings>(s, ","), expected); EXPECT_EQ(splitString<TypeParam>(s, ","), expected);
} }
TEST(splitString, tokenizeSepAtStart) TYPED_TEST(splitStringTest, tokenizeSepAtStart)
{ {
auto s = ",foo,bar,baz"; auto s = ",foo,bar,baz";
Strings expected = {"", "foo", "bar", "baz"}; TypeParam expected = {"", "foo", "bar", "baz"};
ASSERT_EQ(splitString<Strings>(s, ","), expected); EXPECT_EQ(splitString<TypeParam>(s, ","), expected);
} }
TEST(splitString, tokenizeSepAtEnd) TYPED_TEST(splitStringTest, tokenizeSepAtEnd)
{ {
auto s = "foo,bar,baz,"; auto s = "foo,bar,baz,";
Strings expected = {"foo", "bar", "baz", ""}; TypeParam expected = {"foo", "bar", "baz", ""};
ASSERT_EQ(splitString<Strings>(s, ","), expected); EXPECT_EQ(splitString<TypeParam>(s, ","), expected);
} }
TEST(splitString, tokenizeSepEmpty) TYPED_TEST(splitStringTest, tokenizeSepEmpty)
{ {
auto s = "foo,,baz"; auto s = "foo,,baz";
Strings expected = {"foo", "", "baz"}; TypeParam expected = {"foo", "", "baz"};
ASSERT_EQ(splitString<Strings>(s, ","), expected); EXPECT_EQ(splitString<TypeParam>(s, ","), expected);
} }
// concatStringsSep sep . splitString sep = id if sep is 1 char // concatStringsSep sep . splitString sep = id if sep is 1 char
RC_GTEST_PROP(splitString, recoveredByConcatStringsSep, (const std::string & s)) RC_GTEST_TYPED_FIXTURE_PROP(splitStringTest, recoveredByConcatStringsSep, (const std::string & s))
{ {
RC_ASSERT(concatStringsSep("/", splitString<Strings>(s, "/")) == s); RC_ASSERT(concatStringsSep("/", splitString<TypeParam>(s, "/")) == s);
RC_ASSERT(concatStringsSep("a", splitString<Strings>(s, "a")) == s); RC_ASSERT(concatStringsSep("a", splitString<TypeParam>(s, "a")) == s);
} }
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------

View file

@ -26,18 +26,29 @@ C tokenizeString(std::string_view s, std::string_view separators)
} }
template<class C, class CharT> template<class C, class CharT>
C basicSplitString(std::basic_string_view<CharT> s, std::basic_string_view<CharT> separators) void basicSplitStringInto(C & accum, std::basic_string_view<CharT> s, std::basic_string_view<CharT> separators)
{ {
C result;
size_t pos = 0; size_t pos = 0;
while (pos <= s.size()) { while (pos <= s.size()) {
auto end = s.find_first_of(separators, pos); auto end = s.find_first_of(separators, pos);
if (end == s.npos) if (end == s.npos)
end = s.size(); end = s.size();
result.insert(result.end(), std::basic_string<CharT>(s, pos, end - pos)); accum.insert(accum.end(), typename C::value_type{s.substr(pos, end - pos)});
pos = end + 1; pos = end + 1;
} }
}
template<typename C>
void splitStringInto(C & accum, std::string_view s, std::string_view separators)
{
basicSplitStringInto<C, char>(accum, s, separators);
}
template<class C, class CharT>
C basicSplitString(std::basic_string_view<CharT> s, std::basic_string_view<CharT> separators)
{
C result;
basicSplitStringInto(result, s, separators);
return result; return result;
} }