mirror of
https://github.com/NixOS/nix.git
synced 2025-11-10 20:46:01 +01:00
Better stringSplit
I need this for some `ParseURL` improvements, but I figure this is better to send as its own PR. I changed the tests willy-nilly to sometimes use `std::list<std::string_view>` instead of `Strings` (which is `std::list<std::string>`). Co-Authored-By: Sergei Zimmerman <sergei@zimmerman.foo>
This commit is contained in:
parent
0bd9d6a28e
commit
cc4aa70e6e
2 changed files with 66 additions and 45 deletions
|
|
@ -2,6 +2,7 @@
|
||||||
#include <rapidcheck/gtest.h>
|
#include <rapidcheck/gtest.h>
|
||||||
|
|
||||||
#include "nix/util/strings.hh"
|
#include "nix/util/strings.hh"
|
||||||
|
#include "nix/util/strings-inline.hh"
|
||||||
#include "nix/util/error.hh"
|
#include "nix/util/error.hh"
|
||||||
|
|
||||||
namespace nix {
|
namespace nix {
|
||||||
|
|
@ -271,113 +272,122 @@ TEST(tokenizeString, tokenizeSepEmpty)
|
||||||
* splitString
|
* splitString
|
||||||
* --------------------------------------------------------------------------*/
|
* --------------------------------------------------------------------------*/
|
||||||
|
|
||||||
TEST(splitString, empty)
|
using SplitStringTestContainerTypes = ::testing::
|
||||||
{
|
Types<std::vector<std::string>, std::vector<std::string_view>, std::list<std::string>, std::list<std::string_view>>;
|
||||||
Strings expected = {""};
|
|
||||||
|
|
||||||
ASSERT_EQ(splitString<Strings>("", " \t\n\r"), expected);
|
template<typename T>
|
||||||
|
class splitStringTest : public ::testing::Test
|
||||||
|
{};
|
||||||
|
|
||||||
|
TYPED_TEST_SUITE(splitStringTest, SplitStringTestContainerTypes);
|
||||||
|
|
||||||
|
TYPED_TEST(splitStringTest, empty)
|
||||||
|
{
|
||||||
|
TypeParam expected = {""};
|
||||||
|
|
||||||
|
EXPECT_EQ(splitString<TypeParam>("", " \t\n\r"), expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(splitString, oneSep)
|
TYPED_TEST(splitStringTest, oneSep)
|
||||||
{
|
{
|
||||||
Strings expected = {"", ""};
|
TypeParam expected = {"", ""};
|
||||||
|
|
||||||
ASSERT_EQ(splitString<Strings>(" ", " \t\n\r"), expected);
|
EXPECT_EQ(splitString<TypeParam>(" ", " \t\n\r"), expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(splitString, twoSep)
|
TYPED_TEST(splitStringTest, twoSep)
|
||||||
{
|
{
|
||||||
Strings expected = {"", "", ""};
|
TypeParam expected = {"", "", ""};
|
||||||
|
|
||||||
ASSERT_EQ(splitString<Strings>(" \n", " \t\n\r"), expected);
|
EXPECT_EQ(splitString<TypeParam>(" \n", " \t\n\r"), expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(splitString, tokenizeSpacesWithSpaces)
|
TYPED_TEST(splitStringTest, tokenizeSpacesWithSpaces)
|
||||||
{
|
{
|
||||||
auto s = "foo bar baz";
|
auto s = "foo bar baz";
|
||||||
Strings expected = {"foo", "bar", "baz"};
|
TypeParam expected = {"foo", "bar", "baz"};
|
||||||
|
|
||||||
ASSERT_EQ(splitString<Strings>(s, " \t\n\r"), expected);
|
EXPECT_EQ(splitString<TypeParam>(s, " \t\n\r"), expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(splitString, tokenizeTabsWithDefaults)
|
TYPED_TEST(splitStringTest, tokenizeTabsWithDefaults)
|
||||||
{
|
{
|
||||||
auto s = "foo\tbar\tbaz";
|
auto s = "foo\tbar\tbaz";
|
||||||
// Using it like this is weird, but shows the difference with tokenizeString, which also has this test
|
// Using it like this is weird, but shows the difference with tokenizeString, which also has this test
|
||||||
Strings expected = {"foo", "bar", "baz"};
|
TypeParam expected = {"foo", "bar", "baz"};
|
||||||
|
|
||||||
ASSERT_EQ(splitString<Strings>(s, " \t\n\r"), expected);
|
EXPECT_EQ(splitString<TypeParam>(s, " \t\n\r"), expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(splitString, tokenizeTabsSpacesWithDefaults)
|
TYPED_TEST(splitStringTest, tokenizeTabsSpacesWithDefaults)
|
||||||
{
|
{
|
||||||
auto s = "foo\t bar\t baz";
|
auto s = "foo\t bar\t baz";
|
||||||
// Using it like this is weird, but shows the difference with tokenizeString, which also has this test
|
// Using it like this is weird, but shows the difference with tokenizeString, which also has this test
|
||||||
Strings expected = {"foo", "", "bar", "", "baz"};
|
TypeParam expected = {"foo", "", "bar", "", "baz"};
|
||||||
|
|
||||||
ASSERT_EQ(splitString<Strings>(s, " \t\n\r"), expected);
|
EXPECT_EQ(splitString<TypeParam>(s, " \t\n\r"), expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(splitString, tokenizeTabsSpacesNewlineWithDefaults)
|
TYPED_TEST(splitStringTest, tokenizeTabsSpacesNewlineWithDefaults)
|
||||||
{
|
{
|
||||||
auto s = "foo\t\n bar\t\n baz";
|
auto s = "foo\t\n bar\t\n baz";
|
||||||
// Using it like this is weird, but shows the difference with tokenizeString, which also has this test
|
// Using it like this is weird, but shows the difference with tokenizeString, which also has this test
|
||||||
Strings expected = {"foo", "", "", "bar", "", "", "baz"};
|
TypeParam expected = {"foo", "", "", "bar", "", "", "baz"};
|
||||||
|
|
||||||
ASSERT_EQ(splitString<Strings>(s, " \t\n\r"), expected);
|
EXPECT_EQ(splitString<TypeParam>(s, " \t\n\r"), expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(splitString, tokenizeTabsSpacesNewlineRetWithDefaults)
|
TYPED_TEST(splitStringTest, tokenizeTabsSpacesNewlineRetWithDefaults)
|
||||||
{
|
{
|
||||||
auto s = "foo\t\n\r bar\t\n\r baz";
|
auto s = "foo\t\n\r bar\t\n\r baz";
|
||||||
// Using it like this is weird, but shows the difference with tokenizeString, which also has this test
|
// Using it like this is weird, but shows the difference with tokenizeString, which also has this test
|
||||||
Strings expected = {"foo", "", "", "", "bar", "", "", "", "baz"};
|
TypeParam expected = {"foo", "", "", "", "bar", "", "", "", "baz"};
|
||||||
|
|
||||||
ASSERT_EQ(splitString<Strings>(s, " \t\n\r"), expected);
|
EXPECT_EQ(splitString<TypeParam>(s, " \t\n\r"), expected);
|
||||||
|
|
||||||
auto s2 = "foo \t\n\r bar \t\n\r baz";
|
auto s2 = "foo \t\n\r bar \t\n\r baz";
|
||||||
Strings expected2 = {"foo", "", "", "", "", "bar", "", "", "", "", "baz"};
|
TypeParam expected2 = {"foo", "", "", "", "", "bar", "", "", "", "", "baz"};
|
||||||
|
|
||||||
ASSERT_EQ(splitString<Strings>(s2, " \t\n\r"), expected2);
|
EXPECT_EQ(splitString<TypeParam>(s2, " \t\n\r"), expected2);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(splitString, tokenizeWithCustomSep)
|
TYPED_TEST(splitStringTest, tokenizeWithCustomSep)
|
||||||
{
|
{
|
||||||
auto s = "foo\n,bar\n,baz\n";
|
auto s = "foo\n,bar\n,baz\n";
|
||||||
Strings expected = {"foo\n", "bar\n", "baz\n"};
|
TypeParam expected = {"foo\n", "bar\n", "baz\n"};
|
||||||
|
|
||||||
ASSERT_EQ(splitString<Strings>(s, ","), expected);
|
EXPECT_EQ(splitString<TypeParam>(s, ","), expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(splitString, tokenizeSepAtStart)
|
TYPED_TEST(splitStringTest, tokenizeSepAtStart)
|
||||||
{
|
{
|
||||||
auto s = ",foo,bar,baz";
|
auto s = ",foo,bar,baz";
|
||||||
Strings expected = {"", "foo", "bar", "baz"};
|
TypeParam expected = {"", "foo", "bar", "baz"};
|
||||||
|
|
||||||
ASSERT_EQ(splitString<Strings>(s, ","), expected);
|
EXPECT_EQ(splitString<TypeParam>(s, ","), expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(splitString, tokenizeSepAtEnd)
|
TYPED_TEST(splitStringTest, tokenizeSepAtEnd)
|
||||||
{
|
{
|
||||||
auto s = "foo,bar,baz,";
|
auto s = "foo,bar,baz,";
|
||||||
Strings expected = {"foo", "bar", "baz", ""};
|
TypeParam expected = {"foo", "bar", "baz", ""};
|
||||||
|
|
||||||
ASSERT_EQ(splitString<Strings>(s, ","), expected);
|
EXPECT_EQ(splitString<TypeParam>(s, ","), expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(splitString, tokenizeSepEmpty)
|
TYPED_TEST(splitStringTest, tokenizeSepEmpty)
|
||||||
{
|
{
|
||||||
auto s = "foo,,baz";
|
auto s = "foo,,baz";
|
||||||
Strings expected = {"foo", "", "baz"};
|
TypeParam expected = {"foo", "", "baz"};
|
||||||
|
|
||||||
ASSERT_EQ(splitString<Strings>(s, ","), expected);
|
EXPECT_EQ(splitString<TypeParam>(s, ","), expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
// concatStringsSep sep . splitString sep = id if sep is 1 char
|
// concatStringsSep sep . splitString sep = id if sep is 1 char
|
||||||
RC_GTEST_PROP(splitString, recoveredByConcatStringsSep, (const std::string & s))
|
RC_GTEST_TYPED_FIXTURE_PROP(splitStringTest, recoveredByConcatStringsSep, (const std::string & s))
|
||||||
{
|
{
|
||||||
RC_ASSERT(concatStringsSep("/", splitString<Strings>(s, "/")) == s);
|
RC_ASSERT(concatStringsSep("/", splitString<TypeParam>(s, "/")) == s);
|
||||||
RC_ASSERT(concatStringsSep("a", splitString<Strings>(s, "a")) == s);
|
RC_ASSERT(concatStringsSep("a", splitString<TypeParam>(s, "a")) == s);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------------
|
/* ----------------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -26,18 +26,29 @@ C tokenizeString(std::string_view s, std::string_view separators)
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class C, class CharT>
|
template<class C, class CharT>
|
||||||
C basicSplitString(std::basic_string_view<CharT> s, std::basic_string_view<CharT> separators)
|
void basicSplitStringInto(C & accum, std::basic_string_view<CharT> s, std::basic_string_view<CharT> separators)
|
||||||
{
|
{
|
||||||
C result;
|
|
||||||
size_t pos = 0;
|
size_t pos = 0;
|
||||||
while (pos <= s.size()) {
|
while (pos <= s.size()) {
|
||||||
auto end = s.find_first_of(separators, pos);
|
auto end = s.find_first_of(separators, pos);
|
||||||
if (end == s.npos)
|
if (end == s.npos)
|
||||||
end = s.size();
|
end = s.size();
|
||||||
result.insert(result.end(), std::basic_string<CharT>(s, pos, end - pos));
|
accum.insert(accum.end(), typename C::value_type{s.substr(pos, end - pos)});
|
||||||
pos = end + 1;
|
pos = end + 1;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename C>
|
||||||
|
void splitStringInto(C & accum, std::string_view s, std::string_view separators)
|
||||||
|
{
|
||||||
|
basicSplitStringInto<C, char>(accum, s, separators);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class C, class CharT>
|
||||||
|
C basicSplitString(std::basic_string_view<CharT> s, std::basic_string_view<CharT> separators)
|
||||||
|
{
|
||||||
|
C result;
|
||||||
|
basicSplitStringInto(result, s, separators);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue