mirror of
https://github.com/bitcoin/bitcoin.git
synced 2025-04-29 14:59:39 -04:00
string: add LineReader
This is a helper struct to parse HTTP messages from data in buffers from sockets. HTTP messages begin with headers which are CRLF-terminated lines (\n or \r\n) followed by an arbitrary amount of body data. Whitespace is trimmed from the field lines but not the body. https://httpwg.org/specs/rfc9110.html#rfc.section.5
This commit is contained in:
parent
5c5de980b1
commit
43b4d0d2f3
5 changed files with 116 additions and 0 deletions
|
@ -161,4 +161,47 @@ BOOST_AUTO_TEST_CASE(case_insensitive_comparator_test)
|
|||
BOOST_CHECK(cmp("a", "\xe4"));
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(line_reader_test)
|
||||
{
|
||||
{
|
||||
// Check three lines terminated by \n, \r\n, and end of buffer, trimming whitespace
|
||||
const std::vector<std::byte> input{StringToBuffer("once upon a time\n there was a dog \r\nwho liked food")};
|
||||
LineReader reader(input, /*max_read=*/128);
|
||||
std::optional<std::string> line1{reader.ReadLine()};
|
||||
BOOST_CHECK_EQUAL(reader.Left(), 33);
|
||||
std::optional<std::string> line2{reader.ReadLine()};
|
||||
BOOST_CHECK_EQUAL(reader.Left(), 14);
|
||||
std::optional<std::string> line3{reader.ReadLine()};
|
||||
std::optional<std::string> line4{reader.ReadLine()};
|
||||
BOOST_CHECK(line1);
|
||||
BOOST_CHECK(line2);
|
||||
BOOST_CHECK(line3);
|
||||
BOOST_CHECK(!line4);
|
||||
BOOST_CHECK_EQUAL(line1.value(), "once upon a time");
|
||||
BOOST_CHECK_EQUAL(line2.value(), "there was a dog");
|
||||
BOOST_CHECK_EQUAL(line3.value(), "who liked food");
|
||||
}
|
||||
{
|
||||
// Do not exceed max_read while searching for EOL
|
||||
const std::vector<std::byte> input1{StringToBuffer("once upon a time there was a dog\nwho liked food")};
|
||||
LineReader reader1(input1, /*max_read=*/10);
|
||||
BOOST_CHECK_THROW(reader1.ReadLine(), std::runtime_error);
|
||||
|
||||
const std::vector<std::byte> input2{StringToBuffer("once upon\n a time there was a dog who liked food")};
|
||||
LineReader reader2(input2, /*max_read=*/10);
|
||||
BOOST_CHECK_EQUAL(reader2.ReadLine(), "once upon");
|
||||
BOOST_CHECK_THROW(reader2.ReadLine(), std::runtime_error);
|
||||
}
|
||||
{
|
||||
// Read specific number of bytes regardless of max_read or \n unless buffer is too short
|
||||
const std::vector<std::byte> input{StringToBuffer("once upon a time\n there was a dog \r\nwho liked food")};
|
||||
LineReader reader(input, /*max_read=*/1);
|
||||
BOOST_CHECK_EQUAL(reader.ReadLength(0), "");
|
||||
BOOST_CHECK_EQUAL(reader.ReadLength(3), "onc");
|
||||
BOOST_CHECK_EQUAL(reader.ReadLength(8), "e upon a");
|
||||
BOOST_CHECK_EQUAL(reader.ReadLength(8), " time\n t");
|
||||
BOOST_CHECK_THROW(reader.ReadLength(128), std::runtime_error);
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_SUITE_END()
|
||||
|
|
|
@ -497,3 +497,9 @@ std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_mu
|
|||
}
|
||||
return *parsed_num * unit_amount;
|
||||
}
|
||||
|
||||
std::vector<std::byte> StringToBuffer(const std::string& str)
|
||||
{
|
||||
auto span = std::as_bytes(std::span(str));
|
||||
return {span.begin(), span.end()};
|
||||
}
|
||||
|
|
|
@ -375,6 +375,15 @@ std::string Capitalize(std::string str);
|
|||
*/
|
||||
std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier);
|
||||
|
||||
/**
|
||||
* Returns a byte vector filled with data from a string. Used to test string-
|
||||
* encoded data from a socket like HTTP headers.
|
||||
*
|
||||
* @param[in] str the string to convert into bytes
|
||||
* @returns byte vector
|
||||
*/
|
||||
std::vector<std::byte> StringToBuffer(const std::string& str);
|
||||
|
||||
namespace util {
|
||||
/** consteval version of HexDigit() without the lookup table. */
|
||||
consteval uint8_t ConstevalHexDigit(const char c)
|
||||
|
|
|
@ -13,4 +13,42 @@ void ReplaceAll(std::string& in_out, const std::string& search, const std::strin
|
|||
if (search.empty()) return;
|
||||
in_out = std::regex_replace(in_out, std::regex(search), substitute);
|
||||
}
|
||||
|
||||
LineReader::LineReader(std::span<const std::byte> buffer, size_t max_read)
|
||||
: start(buffer.begin()), end(buffer.end()), max_read(max_read), it(buffer.begin()) {}
|
||||
|
||||
std::optional<std::string> LineReader::ReadLine()
|
||||
{
|
||||
if (it == end) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
auto line_start = it;
|
||||
size_t count = 0;
|
||||
while (it != end) {
|
||||
char c = static_cast<char>(*it);
|
||||
++it;
|
||||
++count;
|
||||
if (c == '\n') break;
|
||||
if (count >= max_read) throw std::runtime_error("max_read exceeded by LineReader");
|
||||
}
|
||||
const std::string_view untrimmed_line(reinterpret_cast<const char *>(&*line_start), count);
|
||||
const std::string_view line = TrimStringView(untrimmed_line); // delete trailing \r and/or \n
|
||||
return std::string(line);
|
||||
}
|
||||
|
||||
// Ignores max_read but won't overflow
|
||||
std::string LineReader::ReadLength(size_t len)
|
||||
{
|
||||
if (len == 0) return "";
|
||||
if (Left() < len) throw std::runtime_error("Not enough data in buffer");
|
||||
std::string out(reinterpret_cast<const char*>(&(*it)), len);
|
||||
it += len;
|
||||
return out;
|
||||
}
|
||||
|
||||
size_t LineReader::Left() const
|
||||
{
|
||||
return std::distance(it, end);
|
||||
}
|
||||
} // namespace util
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <locale>
|
||||
#include <optional>
|
||||
#include <sstream>
|
||||
#include <string> // IWYU pragma: export
|
||||
#include <string_view> // IWYU pragma: export
|
||||
|
@ -248,6 +249,25 @@ template <typename T1, size_t PREFIX_LEN>
|
|||
return obj.size() >= PREFIX_LEN &&
|
||||
std::equal(std::begin(prefix), std::end(prefix), std::begin(obj));
|
||||
}
|
||||
|
||||
struct LineReader {
|
||||
const std::span<const std::byte>::iterator start;
|
||||
const std::span<const std::byte>::iterator end;
|
||||
const size_t max_read;
|
||||
std::span<const std::byte>::iterator it;
|
||||
|
||||
explicit LineReader(std::span<const std::byte> buffer, size_t max_read);
|
||||
|
||||
// Returns a string from current iterator position up to next \n
|
||||
// and advances iterator, does not return trailing \n or \r.
|
||||
// Will not search for \n past max_read.
|
||||
std::optional<std::string> ReadLine();
|
||||
// Returns string from current iterator position of specified length
|
||||
// and advances iterator. May exceed max_read but will not read past end of buffer.
|
||||
std::string ReadLength(size_t len);
|
||||
// Returns remaining size of bytes in buffer
|
||||
size_t Left() const;
|
||||
};
|
||||
} // namespace util
|
||||
|
||||
#endif // BITCOIN_UTIL_STRING_H
|
||||
|
|
Loading…
Add table
Reference in a new issue