Merge bitcoin/bitcoin#22953: refactor: introduce single-separator split helper (boost::split replacement)

a62e84438d fuzz: add `SplitString` fuzz target (MarcoFalke) 4fad7e46d9 test: add unit tests for `SplitString` helper (Kiminuo) 9cc8e876e4 refactor: introduce single-separator split helper `SplitString` (Sebastian Falbesoner) Pull request description: This PR adds a simple string split helper `SplitString` that takes use of the spanparsing `Split` function that was first introduced in #13697 (commit fe8a7dcd78). This enables to replace most calls to `boost::split`, in the cases where only a single separator character is used. Note that while previous attempts to replace `boost::split` were controversial (e.g. #13751), this one has a trivial implementation: it merely uses an internal helper (that is unit tested and in regular use with output descriptiors) and converts its result from spans to strings. As a drawback though, not all `boost::split` instances can be tackled. As a possible optimization, one could return a vector of `std::string_view`s (available since C++17) instead of strings, to avoid copies. This would need more carefulness on the caller sites though, to avoid potential lifetime issues, and it's probably not worth it, considering that none of the places where strings are split are really performance-critical. ACKs for top commit: martinus: Code review ACK a62e84438d. Ran all tests. I also like that with `boost::split` it was not obvious that the resulting container was cleared, and with `SplitString` API that's obvious. Tree-SHA512: 10cb22619ebe46831b1f8e83584a89381a036b54c88701484ac00743e2a62cfe52c9f3ecdbb2d0815e536c99034558277cc263600ec3f3588b291c07eef8ed24
2025-04-29 14:59:39 -04:00 · 2022-04-26 09:30:34 +01:00 · 2022-04-26 09:30:34 +01:00 · f436bfd126
commit f436bfd126
parent a19f641a80 a62e84438d
14 changed files with 97 additions and 71 deletions
--- a/src/bitcoin-tx.cpp
+++ b/src/bitcoin-tx.cpp
@ -31,8 +31,6 @@
 #include <memory>
 #include <stdio.h>

-#include <boost/algorithm/string.hpp>
-
 static bool fCreateBlank;
 static std::map<std::string,UniValue> registers;
 static const int CONTINUE_EXECUTION=-1;
@ -251,8 +249,7 @@ static T TrimAndParse(const std::string& int_str, const std::string& err)

 static void MutateTxAddInput(CMutableTransaction& tx, const std::string& strInput)
 {
-    std::vector<std::string> vStrInputParts;
-    boost::split(vStrInputParts, strInput, boost::is_any_of(":"));
+    std::vector<std::string> vStrInputParts = SplitString(strInput, ':');

    // separate TXID:VOUT in string
    if (vStrInputParts.size()<2)
@ -287,8 +284,7 @@ static void MutateTxAddInput(CMutableTransaction& tx, const std::string& strInpu
 static void MutateTxAddOutAddr(CMutableTransaction& tx, const std::string& strInput)
 {
    // Separate into VALUE:ADDRESS
-    std::vector<std::string> vStrInputParts;
-    boost::split(vStrInputParts, strInput, boost::is_any_of(":"));
+    std::vector<std::string> vStrInputParts = SplitString(strInput, ':');

    if (vStrInputParts.size() != 2)
        throw std::runtime_error("TX output missing or too many separators");
@ -312,8 +308,7 @@ static void MutateTxAddOutAddr(CMutableTransaction& tx, const std::string& strIn
 static void MutateTxAddOutPubKey(CMutableTransaction& tx, const std::string& strInput)
 {
    // Separate into VALUE:PUBKEY[:FLAGS]
-    std::vector<std::string> vStrInputParts;
-    boost::split(vStrInputParts, strInput, boost::is_any_of(":"));
+    std::vector<std::string> vStrInputParts = SplitString(strInput, ':');

    if (vStrInputParts.size() < 2 || vStrInputParts.size() > 3)
        throw std::runtime_error("TX output missing or too many separators");
@ -356,8 +351,7 @@ static void MutateTxAddOutPubKey(CMutableTransaction& tx, const std::string& str
 static void MutateTxAddOutMultiSig(CMutableTransaction& tx, const std::string& strInput)
 {
    // Separate into VALUE:REQUIRED:NUMKEYS:PUBKEY1:PUBKEY2:....[:FLAGS]
-    std::vector<std::string> vStrInputParts;
-    boost::split(vStrInputParts, strInput, boost::is_any_of(":"));
+    std::vector<std::string> vStrInputParts = SplitString(strInput, ':');

    // Check that there are enough parameters
    if (vStrInputParts.size()<3)
@ -460,8 +454,7 @@ static void MutateTxAddOutData(CMutableTransaction& tx, const std::string& strIn
 static void MutateTxAddOutScript(CMutableTransaction& tx, const std::string& strInput)
 {
    // separate VALUE:SCRIPT[:FLAGS]
-    std::vector<std::string> vStrInputParts;
-    boost::split(vStrInputParts, strInput, boost::is_any_of(":"));
+    std::vector<std::string> vStrInputParts = SplitString(strInput, ':');
    if (vStrInputParts.size() < 2)
        throw std::runtime_error("TX output missing separator");

--- a/src/chainparams.cpp
+++ b/src/chainparams.cpp
@ -10,13 +10,11 @@
 #include <deploymentinfo.h>
 #include <hash.h> // for signet block challenge hash
 #include <script/interpreter.h>
+#include <util/string.h>
 #include <util/system.h>

 #include <assert.h>

-#include <boost/algorithm/string/classification.hpp>
-#include <boost/algorithm/string/split.hpp>
-
 static CBlock CreateGenesisBlock(const char* pszTimestamp, const CScript& genesisOutputScript, uint32_t nTime, uint32_t nNonce, uint32_t nBits, int32_t nVersion, const CAmount& genesisReward)
 {
    CMutableTransaction txNew;
@ -528,8 +526,7 @@ void CRegTestParams::UpdateActivationParametersFromArgs(const ArgsManager& args)
    if (!args.IsArgSet("-vbparams")) return;

    for (const std::string& strDeployment : args.GetArgs("-vbparams")) {
-        std::vector<std::string> vDeploymentParams;
-        boost::split(vDeploymentParams, strDeployment, boost::is_any_of(":"));
+        std::vector<std::string> vDeploymentParams = SplitString(strDeployment, ':');
        if (vDeploymentParams.size() < 3 || 4 < vDeploymentParams.size()) {
            throw std::runtime_error("Version bits parameters malformed, expecting deployment:start:end[:min_activation_height]");
        }
--- a/src/rest.cpp
+++ b/src/rest.cpp
@ -32,8 +32,6 @@
 #include <any>
 #include <string>

-#include <boost/algorithm/string.hpp>
-
 #include <univalue.h>

 using node::GetTransaction;
@ -191,8 +189,7 @@ static bool rest_headers(const std::any& context,
        return false;
    std::string param;
    const RESTResponseFormat rf = ParseDataFormat(param, strURIPart);
-    std::vector<std::string> path;
-    boost::split(path, param, boost::is_any_of("/"));
+    std::vector<std::string> path = SplitString(param, '/');

    std::string raw_count;
    std::string hashStr;
@ -362,8 +359,7 @@ static bool rest_filter_header(const std::any& context, HTTPRequest* req, const
    std::string param;
    const RESTResponseFormat rf = ParseDataFormat(param, strURIPart);

-    std::vector<std::string> uri_parts;
-    boost::split(uri_parts, param, boost::is_any_of("/"));
+    std::vector<std::string> uri_parts = SplitString(param, '/');
    std::string raw_count;
    std::string raw_blockhash;
    if (uri_parts.size() == 3) {
@ -483,8 +479,7 @@ static bool rest_block_filter(const std::any& context, HTTPRequest* req, const s
    const RESTResponseFormat rf = ParseDataFormat(param, strURIPart);

    // request is sent over URI scheme /rest/blockfilter/filtertype/blockhash
-    std::vector<std::string> uri_parts;
-    boost::split(uri_parts, param, boost::is_any_of("/"));
+    std::vector<std::string> uri_parts = SplitString(param, '/');
    if (uri_parts.size() != 2) {
        return RESTERR(req, HTTP_BAD_REQUEST, "Invalid URI format. Expected /rest/blockfilter/<filtertype>/<blockhash>");
    }
@ -712,7 +707,7 @@ static bool rest_getutxos(const std::any& context, HTTPRequest* req, const std::
    if (param.length() > 1)
    {
        std::string strUriParams = param.substr(1);
-        boost::split(uriParts, strUriParams, boost::is_any_of("/"));
+        uriParts = SplitString(strUriParams, '/');
    }

    // throw exception in case of an empty request
--- a/src/rpc/server.cpp
+++ b/src/rpc/server.cpp
@ -9,10 +9,9 @@
 #include <shutdown.h>
 #include <sync.h>
 #include <util/strencodings.h>
+#include <util/string.h>
 #include <util/system.h>

-#include <boost/algorithm/string/classification.hpp>
-#include <boost/algorithm/string/split.hpp>
 #include <boost/signals2/signal.hpp>

 #include <cassert>
@ -407,8 +406,7 @@ static inline JSONRPCRequest transformNamedArguments(const JSONRPCRequest& in, c
    // Process expected parameters.
    int hole = 0;
    for (const std::string &argNamePattern: argNames) {
-        std::vector<std::string> vargNames;
-        boost::algorithm::split(vargNames, argNamePattern, boost::algorithm::is_any_of("|"));
+        std::vector<std::string> vargNames = SplitString(argNamePattern, '|');
        auto fr = argsIn.end();
        for (const std::string & argName : vargNames) {
            fr = argsIn.find(argName);
--- a/src/rpc/util.cpp
+++ b/src/rpc/util.cpp
@ -16,9 +16,6 @@

 #include <tuple>

-#include <boost/algorithm/string/classification.hpp>
-#include <boost/algorithm/string/split.hpp>
-
 const std::string UNIX_EPOCH_TIME = "UNIX epoch time";
 const std::string EXAMPLE_ADDRESS[2] = {"bc1q09vm5lfy0j5reeulh4x5752q25uqqvz34hufdl", "bc1q02ad21edsxd23d32dfgqqsz4vv4nmtfzuklhy3"};

@ -514,8 +511,7 @@ RPCHelpMan::RPCHelpMan(std::string name, std::string description, std::vector<RP
 {
    std::set<std::string> named_args;
    for (const auto& arg : m_args) {
-        std::vector<std::string> names;
-        boost::split(names, arg.m_names, boost::is_any_of("|"));
+        std::vector<std::string> names = SplitString(arg.m_names, '|');
        // Should have unique named arguments
        for (const std::string& name : names) {
            CHECK_NONFATAL(named_args.insert(name).second);
@ -666,8 +662,7 @@ UniValue RPCHelpMan::GetArgMap() const
    UniValue arr{UniValue::VARR};
    for (int i{0}; i < int(m_args.size()); ++i) {
        const auto& arg = m_args.at(i);
-        std::vector<std::string> arg_names;
-        boost::split(arg_names, arg.m_names, boost::is_any_of("|"));
+        std::vector<std::string> arg_names = SplitString(arg.m_names, '|');
        for (const auto& arg_name : arg_names) {
            UniValue map{UniValue::VARR};
            map.push_back(m_name);
--- a/src/test/fuzz/script_assets_test_minimizer.cpp
+++ b/src/test/fuzz/script_assets_test_minimizer.cpp
@ -11,8 +11,8 @@
 #include <streams.h>
 #include <univalue.h>
 #include <util/strencodings.h>
+#include <util/string.h>

-#include <boost/algorithm/string.hpp>
 #include <cstdint>
 #include <string>
 #include <vector>
@ -130,8 +130,7 @@ unsigned int ParseScriptFlags(const std::string& str)
    if (str.empty()) return 0;

    unsigned int flags = 0;
-    std::vector<std::string> words;
-    boost::algorithm::split(words, str, boost::algorithm::is_any_of(","));
+    std::vector<std::string> words = SplitString(str, ',');

    for (const std::string& word : words) {
        auto it = FLAG_NAMES.find(word);
--- a/src/test/fuzz/string.cpp
+++ b/src/test/fuzz/string.cpp
@ -224,6 +224,7 @@ FUZZ_TARGET(string)
        int64_t amount_out;
        (void)ParseFixedPoint(random_string_1, fuzzed_data_provider.ConsumeIntegralInRange<int>(0, 1024), &amount_out);
    }
+    (void)SplitString(random_string_1, fuzzed_data_provider.ConsumeIntegral<char>());
    {
        (void)Untranslated(random_string_1);
        const bilingual_str bs1{random_string_1, random_string_2};
--- a/src/test/transaction_tests.cpp
+++ b/src/test/transaction_tests.cpp
@ -31,8 +31,6 @@
 #include <map>
 #include <string>

-#include <boost/algorithm/string/classification.hpp>
-#include <boost/algorithm/string/split.hpp>
 #include <boost/test/unit_test.hpp>

 #include <univalue.h>
@ -70,8 +68,7 @@ unsigned int ParseScriptFlags(std::string strFlags)
 {
    if (strFlags.empty() || strFlags == "NONE") return 0;
    unsigned int flags = 0;
-    std::vector<std::string> words;
-    boost::algorithm::split(words, strFlags, boost::algorithm::is_any_of(","));
+    std::vector<std::string> words = SplitString(strFlags, ',');

    for (const std::string& word : words)
    {
--- a/src/test/util_tests.cpp
+++ b/src/test/util_tests.cpp
@ -2349,6 +2349,55 @@ BOOST_AUTO_TEST_CASE(test_spanparsing)
    BOOST_CHECK_EQUAL(SpanToStr(results[3]), "");
 }

+BOOST_AUTO_TEST_CASE(test_SplitString)
+{
+    // Empty string.
+    {
+        std::vector<std::string> result = SplitString("", '-');
+        BOOST_CHECK_EQUAL(result.size(), 1);
+        BOOST_CHECK_EQUAL(result[0], "");
+    }
+
+    // Empty items.
+    {
+        std::vector<std::string> result = SplitString("-", '-');
+        BOOST_CHECK_EQUAL(result.size(), 2);
+        BOOST_CHECK_EQUAL(result[0], "");
+        BOOST_CHECK_EQUAL(result[1], "");
+    }
+
+    // More empty items.
+    {
+        std::vector<std::string> result = SplitString("--", '-');
+        BOOST_CHECK_EQUAL(result.size(), 3);
+        BOOST_CHECK_EQUAL(result[0], "");
+        BOOST_CHECK_EQUAL(result[1], "");
+        BOOST_CHECK_EQUAL(result[2], "");
+    }
+
+    // Separator is not present.
+    {
+        std::vector<std::string> result = SplitString("abc", '-');
+        BOOST_CHECK_EQUAL(result.size(), 1);
+        BOOST_CHECK_EQUAL(result[0], "abc");
+    }
+
+    // Basic behavior.
+    {
+        std::vector<std::string> result = SplitString("a-b", '-');
+        BOOST_CHECK_EQUAL(result.size(), 2);
+        BOOST_CHECK_EQUAL(result[0], "a");
+        BOOST_CHECK_EQUAL(result[1], "b");
+    }
+
+    // Case-sensitivity of the separator.
+    {
+        std::vector<std::string> result = SplitString("AAA", 'a');
+        BOOST_CHECK_EQUAL(result.size(), 1);
+        BOOST_CHECK_EQUAL(result[0], "AAA");
+    }
+}
+
 BOOST_AUTO_TEST_CASE(test_LogEscapeMessage)
 {
    // ASCII and UTF-8 must pass through unaltered.
--- a/src/torcontrol.cpp
+++ b/src/torcontrol.cpp
@ -24,9 +24,7 @@
 #include <set>
 #include <vector>

-#include <boost/algorithm/string/classification.hpp>
 #include <boost/algorithm/string/replace.hpp>
-#include <boost/algorithm/string/split.hpp>

 #include <event2/buffer.h>
 #include <event2/bufferevent.h>
@ -347,8 +345,8 @@ void TorController::get_socks_cb(TorControlConnection& _conn, const TorControlRe
        for (const auto& line : reply.lines) {
            if (0 == line.compare(0, 20, "net/listeners/socks=")) {
                const std::string port_list_str = line.substr(20);
-                std::vector<std::string> port_list;
-                boost::split(port_list, port_list_str, boost::is_any_of(" "));
+                std::vector<std::string> port_list = SplitString(port_list_str, ' ');
+
                for (auto& portstr : port_list) {
                    if (portstr.empty()) continue;
                    if ((portstr[0] == '"' || portstr[0] == '\'') && portstr.size() >= 2 && (*portstr.rbegin() == portstr[0])) {
@ -542,8 +540,10 @@ void TorController::protocolinfo_cb(TorControlConnection& _conn, const TorContro
            if (l.first == "AUTH") {
                std::map<std::string,std::string> m = ParseTorReplyMapping(l.second);
                std::map<std::string,std::string>::iterator i;
-                if ((i = m.find("METHODS")) != m.end())
-                    boost::split(methods, i->second, boost::is_any_of(","));
+                if ((i = m.find("METHODS")) != m.end()) {
+                    std::vector<std::string> m_vec = SplitString(i->second, ',');
+                    methods = std::set<std::string>(m_vec.begin(), m_vec.end());
+                }
                if ((i = m.find("COOKIEFILE")) != m.end())
                    cookiefile = i->second;
            } else if (l.first == "VERSION") {
--- a/src/util/spanparsing.cpp
+++ b/src/util/spanparsing.cpp
@ -48,20 +48,4 @@ Span<const char> Expr(Span<const char>& sp)
    return ret;
 }

-std::vector<Span<const char>> Split(const Span<const char>& sp, char sep)
-{
-    std::vector<Span<const char>> ret;
-    auto it = sp.begin();
-    auto start = it;
-    while (it != sp.end()) {
-        if (*it == sep) {
-            ret.emplace_back(start, it);
-            start = it + 1;
-        }
-        ++it;
-    }
-    ret.emplace_back(start, it);
-    return ret;
-}
-
 } // namespace spanparsing
--- a/src/util/spanparsing.h
+++ b/src/util/spanparsing.h
@ -43,7 +43,22 @@ Span<const char> Expr(Span<const char>& sp);
 * Note that this function does not care about braces, so splitting
 * "foo(bar(1),2),3) on ',' will return {"foo(bar(1)", "2)", "3)"}.
 */
-std::vector<Span<const char>> Split(const Span<const char>& sp, char sep);
+template <typename T = Span<const char>>
+std::vector<T> Split(const Span<const char>& sp, char sep)
+{
+    std::vector<T> ret;
+    auto it = sp.begin();
+    auto start = it;
+    while (it != sp.end()) {
+        if (*it == sep) {
+            ret.emplace_back(start, it);
+            start = it + 1;
+        }
+        ++it;
+    }
+    ret.emplace_back(start, it);
+    return ret;
+}

 } // namespace spanparsing

--- a/src/util/string.h
+++ b/src/util/string.h
@ -6,6 +6,7 @@
 #define BITCOIN_UTIL_STRING_H

 #include <attributes.h>
+#include <util/spanparsing.h>

 #include <algorithm>
 #include <array>
@ -15,6 +16,11 @@
 #include <string>
 #include <vector>

+[[nodiscard]] inline std::vector<std::string> SplitString(std::string_view str, char sep)
+{
+    return spanparsing::Split<std::string>(str, sep);
+}
+
 [[nodiscard]] inline std::string TrimString(const std::string& str, const std::string& pattern = " \f\n\r\t\v")
 {
    std::string::size_type front = str.find_first_not_of(pattern);
--- a/src/wallet/rpc/backup.cpp
+++ b/src/wallet/rpc/backup.cpp
@ -26,8 +26,6 @@
 #include <tuple>
 #include <string>

-#include <boost/algorithm/string.hpp>
-
 #include <univalue.h>


@ -546,8 +544,7 @@ RPCHelpMan importwallet()
            if (line.empty() || line[0] == '#')
                continue;

-            std::vector<std::string> vstr;
-            boost::split(vstr, line, boost::is_any_of(" "));
+            std::vector<std::string> vstr = SplitString(line, ' ');
            if (vstr.size() < 2)
                continue;
            CKey key = DecodeSecret(vstr[0]);