Merge bitcoin/bitcoin#22953: refactor: introduce single-separator split helper (boost::split replacement)

a62e84438d fuzz: add `SplitString` fuzz target (MarcoFalke)
4fad7e46d9 test: add unit tests for `SplitString` helper (Kiminuo)
9cc8e876e4 refactor: introduce single-separator split helper `SplitString` (Sebastian Falbesoner)

Pull request description:

  This PR adds a simple string split helper `SplitString` that takes use of the spanparsing `Split` function that was first introduced in #13697 (commit fe8a7dcd78). This enables to replace most calls to `boost::split`, in the cases where only a single separator character is used. Note that while previous attempts to replace `boost::split` were controversial (e.g. #13751), this one has a trivial implementation: it merely uses an internal helper (that is unit tested and in regular use with output descriptiors) and converts its result from spans to strings. As a drawback though, not all `boost::split` instances can be tackled.

  As a possible optimization, one could return a vector of `std::string_view`s (available since C++17) instead of strings, to avoid copies. This would need more carefulness on the caller sites though, to avoid potential lifetime issues, and it's probably not worth it, considering that none of the places where strings are split are really performance-critical.

ACKs for top commit:
  martinus:
    Code review ACK a62e84438d. Ran all tests. I also like that with `boost::split` it was not obvious that the resulting container was cleared, and with `SplitString` API that's obvious.

Tree-SHA512: 10cb22619ebe46831b1f8e83584a89381a036b54c88701484ac00743e2a62cfe52c9f3ecdbb2d0815e536c99034558277cc263600ec3f3588b291c07eef8ed24
This commit is contained in:
fanquake 2022-04-26 09:30:34 +01:00
commit f436bfd126
No known key found for this signature in database
GPG key ID: 2EEB9F5CC09526C1
14 changed files with 97 additions and 71 deletions

View file

@ -31,8 +31,6 @@
#include <memory>
#include <stdio.h>
#include <boost/algorithm/string.hpp>
static bool fCreateBlank;
static std::map<std::string,UniValue> registers;
static const int CONTINUE_EXECUTION=-1;
@ -251,8 +249,7 @@ static T TrimAndParse(const std::string& int_str, const std::string& err)
static void MutateTxAddInput(CMutableTransaction& tx, const std::string& strInput)
{
std::vector<std::string> vStrInputParts;
boost::split(vStrInputParts, strInput, boost::is_any_of(":"));
std::vector<std::string> vStrInputParts = SplitString(strInput, ':');
// separate TXID:VOUT in string
if (vStrInputParts.size()<2)
@ -287,8 +284,7 @@ static void MutateTxAddInput(CMutableTransaction& tx, const std::string& strInpu
static void MutateTxAddOutAddr(CMutableTransaction& tx, const std::string& strInput)
{
// Separate into VALUE:ADDRESS
std::vector<std::string> vStrInputParts;
boost::split(vStrInputParts, strInput, boost::is_any_of(":"));
std::vector<std::string> vStrInputParts = SplitString(strInput, ':');
if (vStrInputParts.size() != 2)
throw std::runtime_error("TX output missing or too many separators");
@ -312,8 +308,7 @@ static void MutateTxAddOutAddr(CMutableTransaction& tx, const std::string& strIn
static void MutateTxAddOutPubKey(CMutableTransaction& tx, const std::string& strInput)
{
// Separate into VALUE:PUBKEY[:FLAGS]
std::vector<std::string> vStrInputParts;
boost::split(vStrInputParts, strInput, boost::is_any_of(":"));
std::vector<std::string> vStrInputParts = SplitString(strInput, ':');
if (vStrInputParts.size() < 2 || vStrInputParts.size() > 3)
throw std::runtime_error("TX output missing or too many separators");
@ -356,8 +351,7 @@ static void MutateTxAddOutPubKey(CMutableTransaction& tx, const std::string& str
static void MutateTxAddOutMultiSig(CMutableTransaction& tx, const std::string& strInput)
{
// Separate into VALUE:REQUIRED:NUMKEYS:PUBKEY1:PUBKEY2:....[:FLAGS]
std::vector<std::string> vStrInputParts;
boost::split(vStrInputParts, strInput, boost::is_any_of(":"));
std::vector<std::string> vStrInputParts = SplitString(strInput, ':');
// Check that there are enough parameters
if (vStrInputParts.size()<3)
@ -460,8 +454,7 @@ static void MutateTxAddOutData(CMutableTransaction& tx, const std::string& strIn
static void MutateTxAddOutScript(CMutableTransaction& tx, const std::string& strInput)
{
// separate VALUE:SCRIPT[:FLAGS]
std::vector<std::string> vStrInputParts;
boost::split(vStrInputParts, strInput, boost::is_any_of(":"));
std::vector<std::string> vStrInputParts = SplitString(strInput, ':');
if (vStrInputParts.size() < 2)
throw std::runtime_error("TX output missing separator");

View file

@ -10,13 +10,11 @@
#include <deploymentinfo.h>
#include <hash.h> // for signet block challenge hash
#include <script/interpreter.h>
#include <util/string.h>
#include <util/system.h>
#include <assert.h>
#include <boost/algorithm/string/classification.hpp>
#include <boost/algorithm/string/split.hpp>
static CBlock CreateGenesisBlock(const char* pszTimestamp, const CScript& genesisOutputScript, uint32_t nTime, uint32_t nNonce, uint32_t nBits, int32_t nVersion, const CAmount& genesisReward)
{
CMutableTransaction txNew;
@ -528,8 +526,7 @@ void CRegTestParams::UpdateActivationParametersFromArgs(const ArgsManager& args)
if (!args.IsArgSet("-vbparams")) return;
for (const std::string& strDeployment : args.GetArgs("-vbparams")) {
std::vector<std::string> vDeploymentParams;
boost::split(vDeploymentParams, strDeployment, boost::is_any_of(":"));
std::vector<std::string> vDeploymentParams = SplitString(strDeployment, ':');
if (vDeploymentParams.size() < 3 || 4 < vDeploymentParams.size()) {
throw std::runtime_error("Version bits parameters malformed, expecting deployment:start:end[:min_activation_height]");
}

View file

@ -32,8 +32,6 @@
#include <any>
#include <string>
#include <boost/algorithm/string.hpp>
#include <univalue.h>
using node::GetTransaction;
@ -191,8 +189,7 @@ static bool rest_headers(const std::any& context,
return false;
std::string param;
const RESTResponseFormat rf = ParseDataFormat(param, strURIPart);
std::vector<std::string> path;
boost::split(path, param, boost::is_any_of("/"));
std::vector<std::string> path = SplitString(param, '/');
std::string raw_count;
std::string hashStr;
@ -362,8 +359,7 @@ static bool rest_filter_header(const std::any& context, HTTPRequest* req, const
std::string param;
const RESTResponseFormat rf = ParseDataFormat(param, strURIPart);
std::vector<std::string> uri_parts;
boost::split(uri_parts, param, boost::is_any_of("/"));
std::vector<std::string> uri_parts = SplitString(param, '/');
std::string raw_count;
std::string raw_blockhash;
if (uri_parts.size() == 3) {
@ -483,8 +479,7 @@ static bool rest_block_filter(const std::any& context, HTTPRequest* req, const s
const RESTResponseFormat rf = ParseDataFormat(param, strURIPart);
// request is sent over URI scheme /rest/blockfilter/filtertype/blockhash
std::vector<std::string> uri_parts;
boost::split(uri_parts, param, boost::is_any_of("/"));
std::vector<std::string> uri_parts = SplitString(param, '/');
if (uri_parts.size() != 2) {
return RESTERR(req, HTTP_BAD_REQUEST, "Invalid URI format. Expected /rest/blockfilter/<filtertype>/<blockhash>");
}
@ -712,7 +707,7 @@ static bool rest_getutxos(const std::any& context, HTTPRequest* req, const std::
if (param.length() > 1)
{
std::string strUriParams = param.substr(1);
boost::split(uriParts, strUriParams, boost::is_any_of("/"));
uriParts = SplitString(strUriParams, '/');
}
// throw exception in case of an empty request

View file

@ -9,10 +9,9 @@
#include <shutdown.h>
#include <sync.h>
#include <util/strencodings.h>
#include <util/string.h>
#include <util/system.h>
#include <boost/algorithm/string/classification.hpp>
#include <boost/algorithm/string/split.hpp>
#include <boost/signals2/signal.hpp>
#include <cassert>
@ -407,8 +406,7 @@ static inline JSONRPCRequest transformNamedArguments(const JSONRPCRequest& in, c
// Process expected parameters.
int hole = 0;
for (const std::string &argNamePattern: argNames) {
std::vector<std::string> vargNames;
boost::algorithm::split(vargNames, argNamePattern, boost::algorithm::is_any_of("|"));
std::vector<std::string> vargNames = SplitString(argNamePattern, '|');
auto fr = argsIn.end();
for (const std::string & argName : vargNames) {
fr = argsIn.find(argName);

View file

@ -16,9 +16,6 @@
#include <tuple>
#include <boost/algorithm/string/classification.hpp>
#include <boost/algorithm/string/split.hpp>
const std::string UNIX_EPOCH_TIME = "UNIX epoch time";
const std::string EXAMPLE_ADDRESS[2] = {"bc1q09vm5lfy0j5reeulh4x5752q25uqqvz34hufdl", "bc1q02ad21edsxd23d32dfgqqsz4vv4nmtfzuklhy3"};
@ -514,8 +511,7 @@ RPCHelpMan::RPCHelpMan(std::string name, std::string description, std::vector<RP
{
std::set<std::string> named_args;
for (const auto& arg : m_args) {
std::vector<std::string> names;
boost::split(names, arg.m_names, boost::is_any_of("|"));
std::vector<std::string> names = SplitString(arg.m_names, '|');
// Should have unique named arguments
for (const std::string& name : names) {
CHECK_NONFATAL(named_args.insert(name).second);
@ -666,8 +662,7 @@ UniValue RPCHelpMan::GetArgMap() const
UniValue arr{UniValue::VARR};
for (int i{0}; i < int(m_args.size()); ++i) {
const auto& arg = m_args.at(i);
std::vector<std::string> arg_names;
boost::split(arg_names, arg.m_names, boost::is_any_of("|"));
std::vector<std::string> arg_names = SplitString(arg.m_names, '|');
for (const auto& arg_name : arg_names) {
UniValue map{UniValue::VARR};
map.push_back(m_name);

View file

@ -11,8 +11,8 @@
#include <streams.h>
#include <univalue.h>
#include <util/strencodings.h>
#include <util/string.h>
#include <boost/algorithm/string.hpp>
#include <cstdint>
#include <string>
#include <vector>
@ -130,8 +130,7 @@ unsigned int ParseScriptFlags(const std::string& str)
if (str.empty()) return 0;
unsigned int flags = 0;
std::vector<std::string> words;
boost::algorithm::split(words, str, boost::algorithm::is_any_of(","));
std::vector<std::string> words = SplitString(str, ',');
for (const std::string& word : words) {
auto it = FLAG_NAMES.find(word);

View file

@ -224,6 +224,7 @@ FUZZ_TARGET(string)
int64_t amount_out;
(void)ParseFixedPoint(random_string_1, fuzzed_data_provider.ConsumeIntegralInRange<int>(0, 1024), &amount_out);
}
(void)SplitString(random_string_1, fuzzed_data_provider.ConsumeIntegral<char>());
{
(void)Untranslated(random_string_1);
const bilingual_str bs1{random_string_1, random_string_2};

View file

@ -31,8 +31,6 @@
#include <map>
#include <string>
#include <boost/algorithm/string/classification.hpp>
#include <boost/algorithm/string/split.hpp>
#include <boost/test/unit_test.hpp>
#include <univalue.h>
@ -70,8 +68,7 @@ unsigned int ParseScriptFlags(std::string strFlags)
{
if (strFlags.empty() || strFlags == "NONE") return 0;
unsigned int flags = 0;
std::vector<std::string> words;
boost::algorithm::split(words, strFlags, boost::algorithm::is_any_of(","));
std::vector<std::string> words = SplitString(strFlags, ',');
for (const std::string& word : words)
{

View file

@ -2349,6 +2349,55 @@ BOOST_AUTO_TEST_CASE(test_spanparsing)
BOOST_CHECK_EQUAL(SpanToStr(results[3]), "");
}
BOOST_AUTO_TEST_CASE(test_SplitString)
{
// Empty string.
{
std::vector<std::string> result = SplitString("", '-');
BOOST_CHECK_EQUAL(result.size(), 1);
BOOST_CHECK_EQUAL(result[0], "");
}
// Empty items.
{
std::vector<std::string> result = SplitString("-", '-');
BOOST_CHECK_EQUAL(result.size(), 2);
BOOST_CHECK_EQUAL(result[0], "");
BOOST_CHECK_EQUAL(result[1], "");
}
// More empty items.
{
std::vector<std::string> result = SplitString("--", '-');
BOOST_CHECK_EQUAL(result.size(), 3);
BOOST_CHECK_EQUAL(result[0], "");
BOOST_CHECK_EQUAL(result[1], "");
BOOST_CHECK_EQUAL(result[2], "");
}
// Separator is not present.
{
std::vector<std::string> result = SplitString("abc", '-');
BOOST_CHECK_EQUAL(result.size(), 1);
BOOST_CHECK_EQUAL(result[0], "abc");
}
// Basic behavior.
{
std::vector<std::string> result = SplitString("a-b", '-');
BOOST_CHECK_EQUAL(result.size(), 2);
BOOST_CHECK_EQUAL(result[0], "a");
BOOST_CHECK_EQUAL(result[1], "b");
}
// Case-sensitivity of the separator.
{
std::vector<std::string> result = SplitString("AAA", 'a');
BOOST_CHECK_EQUAL(result.size(), 1);
BOOST_CHECK_EQUAL(result[0], "AAA");
}
}
BOOST_AUTO_TEST_CASE(test_LogEscapeMessage)
{
// ASCII and UTF-8 must pass through unaltered.

View file

@ -24,9 +24,7 @@
#include <set>
#include <vector>
#include <boost/algorithm/string/classification.hpp>
#include <boost/algorithm/string/replace.hpp>
#include <boost/algorithm/string/split.hpp>
#include <event2/buffer.h>
#include <event2/bufferevent.h>
@ -347,8 +345,8 @@ void TorController::get_socks_cb(TorControlConnection& _conn, const TorControlRe
for (const auto& line : reply.lines) {
if (0 == line.compare(0, 20, "net/listeners/socks=")) {
const std::string port_list_str = line.substr(20);
std::vector<std::string> port_list;
boost::split(port_list, port_list_str, boost::is_any_of(" "));
std::vector<std::string> port_list = SplitString(port_list_str, ' ');
for (auto& portstr : port_list) {
if (portstr.empty()) continue;
if ((portstr[0] == '"' || portstr[0] == '\'') && portstr.size() >= 2 && (*portstr.rbegin() == portstr[0])) {
@ -542,8 +540,10 @@ void TorController::protocolinfo_cb(TorControlConnection& _conn, const TorContro
if (l.first == "AUTH") {
std::map<std::string,std::string> m = ParseTorReplyMapping(l.second);
std::map<std::string,std::string>::iterator i;
if ((i = m.find("METHODS")) != m.end())
boost::split(methods, i->second, boost::is_any_of(","));
if ((i = m.find("METHODS")) != m.end()) {
std::vector<std::string> m_vec = SplitString(i->second, ',');
methods = std::set<std::string>(m_vec.begin(), m_vec.end());
}
if ((i = m.find("COOKIEFILE")) != m.end())
cookiefile = i->second;
} else if (l.first == "VERSION") {

View file

@ -48,20 +48,4 @@ Span<const char> Expr(Span<const char>& sp)
return ret;
}
std::vector<Span<const char>> Split(const Span<const char>& sp, char sep)
{
std::vector<Span<const char>> ret;
auto it = sp.begin();
auto start = it;
while (it != sp.end()) {
if (*it == sep) {
ret.emplace_back(start, it);
start = it + 1;
}
++it;
}
ret.emplace_back(start, it);
return ret;
}
} // namespace spanparsing

View file

@ -43,7 +43,22 @@ Span<const char> Expr(Span<const char>& sp);
* Note that this function does not care about braces, so splitting
* "foo(bar(1),2),3) on ',' will return {"foo(bar(1)", "2)", "3)"}.
*/
std::vector<Span<const char>> Split(const Span<const char>& sp, char sep);
template <typename T = Span<const char>>
std::vector<T> Split(const Span<const char>& sp, char sep)
{
std::vector<T> ret;
auto it = sp.begin();
auto start = it;
while (it != sp.end()) {
if (*it == sep) {
ret.emplace_back(start, it);
start = it + 1;
}
++it;
}
ret.emplace_back(start, it);
return ret;
}
} // namespace spanparsing

View file

@ -6,6 +6,7 @@
#define BITCOIN_UTIL_STRING_H
#include <attributes.h>
#include <util/spanparsing.h>
#include <algorithm>
#include <array>
@ -15,6 +16,11 @@
#include <string>
#include <vector>
[[nodiscard]] inline std::vector<std::string> SplitString(std::string_view str, char sep)
{
return spanparsing::Split<std::string>(str, sep);
}
[[nodiscard]] inline std::string TrimString(const std::string& str, const std::string& pattern = " \f\n\r\t\v")
{
std::string::size_type front = str.find_first_not_of(pattern);

View file

@ -26,8 +26,6 @@
#include <tuple>
#include <string>
#include <boost/algorithm/string.hpp>
#include <univalue.h>
@ -546,8 +544,7 @@ RPCHelpMan importwallet()
if (line.empty() || line[0] == '#')
continue;
std::vector<std::string> vstr;
boost::split(vstr, line, boost::is_any_of(" "));
std::vector<std::string> vstr = SplitString(line, ' ');
if (vstr.size() < 2)
continue;
CKey key = DecodeSecret(vstr[0]);