string: add LineReader

This is a helper struct to parse HTTP messages from data in buffers
from sockets. HTTP messages begin with headers which are
CRLF-terminated lines (\n or \r\n) followed by an arbitrary amount of
body data. Whitespace is trimmed from the field lines but not the body.

https://httpwg.org/specs/rfc9110.html#rfc.section.5
This commit is contained in:
Matthew Zipkin 2024-09-27 15:22:17 -04:00 committed by Matthew Zipkin
parent 5c5de980b1
commit 43b4d0d2f3
No known key found for this signature in database
GPG key ID: E7E2984B6289C93A
5 changed files with 116 additions and 0 deletions

View file

@ -161,4 +161,47 @@ BOOST_AUTO_TEST_CASE(case_insensitive_comparator_test)
BOOST_CHECK(cmp("a", "\xe4"));
}
BOOST_AUTO_TEST_CASE(line_reader_test)
{
{
// Check three lines terminated by \n, \r\n, and end of buffer, trimming whitespace
const std::vector<std::byte> input{StringToBuffer("once upon a time\n there was a dog \r\nwho liked food")};
LineReader reader(input, /*max_read=*/128);
std::optional<std::string> line1{reader.ReadLine()};
BOOST_CHECK_EQUAL(reader.Left(), 33);
std::optional<std::string> line2{reader.ReadLine()};
BOOST_CHECK_EQUAL(reader.Left(), 14);
std::optional<std::string> line3{reader.ReadLine()};
std::optional<std::string> line4{reader.ReadLine()};
BOOST_CHECK(line1);
BOOST_CHECK(line2);
BOOST_CHECK(line3);
BOOST_CHECK(!line4);
BOOST_CHECK_EQUAL(line1.value(), "once upon a time");
BOOST_CHECK_EQUAL(line2.value(), "there was a dog");
BOOST_CHECK_EQUAL(line3.value(), "who liked food");
}
{
// Do not exceed max_read while searching for EOL
const std::vector<std::byte> input1{StringToBuffer("once upon a time there was a dog\nwho liked food")};
LineReader reader1(input1, /*max_read=*/10);
BOOST_CHECK_THROW(reader1.ReadLine(), std::runtime_error);
const std::vector<std::byte> input2{StringToBuffer("once upon\n a time there was a dog who liked food")};
LineReader reader2(input2, /*max_read=*/10);
BOOST_CHECK_EQUAL(reader2.ReadLine(), "once upon");
BOOST_CHECK_THROW(reader2.ReadLine(), std::runtime_error);
}
{
// Read specific number of bytes regardless of max_read or \n unless buffer is too short
const std::vector<std::byte> input{StringToBuffer("once upon a time\n there was a dog \r\nwho liked food")};
LineReader reader(input, /*max_read=*/1);
BOOST_CHECK_EQUAL(reader.ReadLength(0), "");
BOOST_CHECK_EQUAL(reader.ReadLength(3), "onc");
BOOST_CHECK_EQUAL(reader.ReadLength(8), "e upon a");
BOOST_CHECK_EQUAL(reader.ReadLength(8), " time\n t");
BOOST_CHECK_THROW(reader.ReadLength(128), std::runtime_error);
}
}
BOOST_AUTO_TEST_SUITE_END()

View file

@ -497,3 +497,9 @@ std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_mu
}
return *parsed_num * unit_amount;
}
std::vector<std::byte> StringToBuffer(const std::string& str)
{
auto span = std::as_bytes(std::span(str));
return {span.begin(), span.end()};
}

View file

@ -375,6 +375,15 @@ std::string Capitalize(std::string str);
*/
std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier);
/**
* Returns a byte vector filled with data from a string. Used to test string-
* encoded data from a socket like HTTP headers.
*
* @param[in] str the string to convert into bytes
* @returns byte vector
*/
std::vector<std::byte> StringToBuffer(const std::string& str);
namespace util {
/** consteval version of HexDigit() without the lookup table. */
consteval uint8_t ConstevalHexDigit(const char c)

View file

@ -13,4 +13,42 @@ void ReplaceAll(std::string& in_out, const std::string& search, const std::strin
if (search.empty()) return;
in_out = std::regex_replace(in_out, std::regex(search), substitute);
}
LineReader::LineReader(std::span<const std::byte> buffer, size_t max_read)
: start(buffer.begin()), end(buffer.end()), max_read(max_read), it(buffer.begin()) {}
std::optional<std::string> LineReader::ReadLine()
{
if (it == end) {
return std::nullopt;
}
auto line_start = it;
size_t count = 0;
while (it != end) {
char c = static_cast<char>(*it);
++it;
++count;
if (c == '\n') break;
if (count >= max_read) throw std::runtime_error("max_read exceeded by LineReader");
}
const std::string_view untrimmed_line(reinterpret_cast<const char *>(&*line_start), count);
const std::string_view line = TrimStringView(untrimmed_line); // delete trailing \r and/or \n
return std::string(line);
}
// Ignores max_read but won't overflow
std::string LineReader::ReadLength(size_t len)
{
if (len == 0) return "";
if (Left() < len) throw std::runtime_error("Not enough data in buffer");
std::string out(reinterpret_cast<const char*>(&(*it)), len);
it += len;
return out;
}
size_t LineReader::Left() const
{
return std::distance(it, end);
}
} // namespace util

View file

@ -11,6 +11,7 @@
#include <cstdint>
#include <cstring>
#include <locale>
#include <optional>
#include <sstream>
#include <string> // IWYU pragma: export
#include <string_view> // IWYU pragma: export
@ -248,6 +249,25 @@ template <typename T1, size_t PREFIX_LEN>
return obj.size() >= PREFIX_LEN &&
std::equal(std::begin(prefix), std::end(prefix), std::begin(obj));
}
struct LineReader {
const std::span<const std::byte>::iterator start;
const std::span<const std::byte>::iterator end;
const size_t max_read;
std::span<const std::byte>::iterator it;
explicit LineReader(std::span<const std::byte> buffer, size_t max_read);
// Returns a string from current iterator position up to next \n
// and advances iterator, does not return trailing \n or \r.
// Will not search for \n past max_read.
std::optional<std::string> ReadLine();
// Returns string from current iterator position of specified length
// and advances iterator. May exceed max_read but will not read past end of buffer.
std::string ReadLength(size_t len);
// Returns remaining size of bytes in buffer
size_t Left() const;
};
} // namespace util
#endif // BITCOIN_UTIL_STRING_H