From faa62c0112f2b7ab69c80a5178f5b79217bed0a6 Mon Sep 17 00:00:00 2001 From: MarcoFalke <*~=`'#}+{/-|&$^_@721217.xyz> Date: Tue, 30 Jul 2024 13:23:37 +0200 Subject: [PATCH] util: Add ConstevalFormatString The type is used to wrap a format string once it has been compile-time checked to contain the right number of format specifiers. --- src/test/CMakeLists.txt | 1 + src/test/util_string_tests.cpp | 86 ++++++++++++++++++++++++++++++++++ src/util/string.h | 63 ++++++++++++++++++++++++- 3 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 src/test/util_string_tests.cpp diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 92b39f0497..afa4addb4e 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -132,6 +132,7 @@ add_executable(test_bitcoin txvalidation_tests.cpp txvalidationcache_tests.cpp uint256_tests.cpp + util_string_tests.cpp util_tests.cpp util_threadnames_tests.cpp validation_block_tests.cpp diff --git a/src/test/util_string_tests.cpp b/src/test/util_string_tests.cpp new file mode 100644 index 0000000000..8b974ffe6f --- /dev/null +++ b/src/test/util_string_tests.cpp @@ -0,0 +1,86 @@ +// Copyright (c) 2024-present The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include + +#include + +using namespace util; + +BOOST_AUTO_TEST_SUITE(util_string_tests) + +// Helper to allow compile-time sanity checks while providing the number of +// args directly. Normally PassFmt would be used. +template +inline void PassFmt(util::ConstevalFormatString fmt) +{ + // This was already executed at compile-time, but is executed again at run-time to avoid -Wunused. + decltype(fmt)::Detail_CheckNumFormatSpecifiers(fmt.fmt); +} +template +inline void FailFmtWithError(std::string_view wrong_fmt, std::string_view error) +{ + using ErrType = const char*; + auto check_throw{[error](const ErrType& str) { return str == error; }}; + BOOST_CHECK_EXCEPTION(util::ConstevalFormatString::Detail_CheckNumFormatSpecifiers(wrong_fmt), ErrType, check_throw); +} + +BOOST_AUTO_TEST_CASE(ConstevalFormatString_NumSpec) +{ + PassFmt<0>(""); + PassFmt<0>("%%"); + PassFmt<1>("%s"); + PassFmt<0>("%%s"); + PassFmt<0>("s%%"); + PassFmt<1>("%%%s"); + PassFmt<1>("%s%%"); + PassFmt<0>(" 1$s"); + PassFmt<1>("%1$s"); + PassFmt<1>("%1$s%1$s"); + PassFmt<2>("%2$s"); + PassFmt<2>("%2$s 4$s %2$s"); + PassFmt<129>("%129$s 999$s %2$s"); + PassFmt<1>("%02d"); + PassFmt<1>("%+2s"); + PassFmt<1>("%.6i"); + PassFmt<1>("%5.2f"); + PassFmt<1>("%#x"); + PassFmt<1>("%1$5i"); + PassFmt<1>("%1$-5i"); + PassFmt<1>("%1$.5i"); + // tinyformat accepts almost any "type" spec, even '%', or '_', or '\n'. + PassFmt<1>("%123%"); + PassFmt<1>("%123%s"); + PassFmt<1>("%_"); + PassFmt<1>("%\n"); + + // The `*` specifier behavior is unsupported and can lead to runtime + // errors when used in a ConstevalFormatString. Please refer to the + // note in the ConstevalFormatString docs. + PassFmt<1>("%*c"); + PassFmt<2>("%2$*3$d"); + PassFmt<1>("%.*f"); + + auto err_mix{"Format specifiers must be all positional or all non-positional!"}; + FailFmtWithError<1>("%s%1$s", err_mix); + + auto err_num{"Format specifier count must match the argument count!"}; + FailFmtWithError<1>("", err_num); + FailFmtWithError<0>("%s", err_num); + FailFmtWithError<2>("%s", err_num); + FailFmtWithError<0>("%1$s", err_num); + FailFmtWithError<2>("%1$s", err_num); + + auto err_0_pos{"Positional format specifier must have position of at least 1"}; + FailFmtWithError<1>("%$s", err_0_pos); + FailFmtWithError<1>("%$", err_0_pos); + FailFmtWithError<0>("%0$", err_0_pos); + FailFmtWithError<0>("%0$s", err_0_pos); + + auto err_term{"Format specifier incorrectly terminated by end of string"}; + FailFmtWithError<1>("%", err_term); + FailFmtWithError<1>("%1$", err_term); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/util/string.h b/src/util/string.h index 30c0a0d6c1..bdf074a9a6 100644 --- a/src/util/string.h +++ b/src/util/string.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2022 The Bitcoin Core developers +// Copyright (c) 2019-present The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. @@ -17,6 +17,67 @@ #include namespace util { +/** + * @brief A wrapper for a compile-time partially validated format string + * + * This struct can be used to enforce partial compile-time validation of format + * strings, to reduce the likelihood of tinyformat throwing exceptions at + * run-time. Validation is partial to try and prevent the most common errors + * while avoiding re-implementing the entire parsing logic. + * + * @note Counting of `*` dynamic width and precision fields (such as `%*c`, + * `%2$*3$d`, `%.*f`) is not implemented to minimize code complexity as long as + * they are not used in the codebase. Usage of these fields is not counted and + * can lead to run-time exceptions. Code wanting to use the `*` specifier can + * side-step this struct and call tinyformat directly. + */ +template +struct ConstevalFormatString { + const char* const fmt; + consteval ConstevalFormatString(const char* str) : fmt{str} { Detail_CheckNumFormatSpecifiers(fmt); } + constexpr static void Detail_CheckNumFormatSpecifiers(std::string_view str) + { + unsigned count_normal{0}; // Number of "normal" specifiers, like %s + unsigned count_pos{0}; // Max number in positional specifier, like %8$s + for (auto it{str.begin()}; it < str.end();) { + if (*it != '%') { + ++it; + continue; + } + + if (++it >= str.end()) throw "Format specifier incorrectly terminated by end of string"; + if (*it == '%') { + // Percent escape: %% + ++it; + continue; + } + + unsigned maybe_num{0}; + while ('0' <= *it && *it <= '9') { + maybe_num *= 10; + maybe_num += *it - '0'; + ++it; + }; + + if (*it == '$') { + // Positional specifier, like %8$s + if (maybe_num == 0) throw "Positional format specifier must have position of at least 1"; + count_pos = std::max(count_pos, maybe_num); + if (++it >= str.end()) throw "Format specifier incorrectly terminated by end of string"; + } else { + // Non-positional specifier, like %s + ++count_normal; + ++it; + } + // The remainder "[flags][width][.precision][length]type" of the + // specifier is not checked. Parsing continues with the next '%'. + } + if (count_normal && count_pos) throw "Format specifiers must be all positional or all non-positional!"; + unsigned count{count_normal | count_pos}; + if (num_params != count) throw "Format specifier count must match the argument count!"; + } +}; + void ReplaceAll(std::string& in_out, const std::string& search, const std::string& substitute); /** Split a string on any char found in separators, returning a vector.