epee: speedup word/number matching

Number matching semantics are slightly changed: since this is used
as a filter to check whether a number is signed and/or floating
point, we can speed this up further. strto* functions are called
afterwards and will error out where necessary. We now also accept
numbers like .4 which were not accepted before.

The strto* calls on a boost::string_ref will not access unallocated
memory since the parsers always stop at the first bad character,
and the original string is zero terminated.

in arbitrary time measurement units for some arbitrary test case:

match_number2: 235 -> 70
match_word2: 330 -> 108
This commit is contained in:
moneromooo-monero 2018-12-26 09:46:41 +00:00
parent 6285c43ffc
commit 21777daf6e
No known key found for this signature in database
GPG key ID: 686F07454D6CEFC3
3 changed files with 173 additions and 45 deletions

View file

@ -29,6 +29,7 @@
#pragma once #pragma once
#include <algorithm> #include <algorithm>
#include <boost/utility/string_ref.hpp>
namespace epee namespace epee
{ {
@ -36,6 +37,40 @@ namespace misc_utils
{ {
namespace parse namespace parse
{ {
// 1: digit
// 2: .eE (floating point)
// 4: alpha
// 8: whitespace
// 16: allowed in float but doesn't necessarily mean it's a float
static const constexpr uint8_t lut[256]={
0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 0, 0, // 16
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 32
8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 16, 18, 0, // 48
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 0, 0, 0, 0, 0, 0, // 64
0, 4, 4, 4, 4, 22, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 80
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 96
0, 4, 4, 4, 4, 22, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 112
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 128
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
inline bool isspace(char c)
{
return lut[(uint8_t)c] & 8;
}
inline bool isdigit(char c)
{
return lut[(uint8_t)c] & 1;
}
inline std::string transform_to_escape_sequence(const std::string& src) inline std::string transform_to_escape_sequence(const std::string& src)
{ {
static const char escaped[] = "\b\f\n\r\t\v\"\\/"; static const char escaped[] = "\b\f\n\r\t\v\"\\/";
@ -159,25 +194,34 @@ namespace misc_utils
return false; return false;
} }
} }
inline void match_number2(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, std::string& val, bool& is_float_val, bool& is_signed_val) inline void match_number2(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, boost::string_ref& val, bool& is_float_val, bool& is_signed_val)
{ {
val.clear(); val.clear();
is_float_val = false; uint8_t float_flag = 0;
for(std::string::const_iterator it = star_end_string;it != buf_end;it++) is_signed_val = false;
size_t chars = 0;
std::string::const_iterator it = star_end_string;
if (it != buf_end && *it == '-')
{ {
if(isdigit(*it) || (it == star_end_string && *it == '-') || (val.size() && *it == '.' ) || (is_float_val && (*it == 'e' || *it == 'E' || *it == '-' || *it == '+' )) ) is_signed_val = true;
++chars;
++it;
}
for(;it != buf_end;it++)
{
const uint8_t flags = lut[(uint8_t)*it];
if (flags & 16)
{ {
if(!val.size() && *it == '-') float_flag |= flags;
is_signed_val = true; ++chars;
if(*it == '.' )
is_float_val = true;
val.push_back(*it);
} }
else else
{ {
val = boost::string_ref(&*star_end_string, chars);
if(val.size()) if(val.size())
{ {
star_end_string = --it; star_end_string = --it;
is_float_val = !!(float_flag & 2);
return; return;
} }
else else
@ -186,7 +230,7 @@ namespace misc_utils
} }
ASSERT_MES_AND_THROW("wrong number in json entry: " << std::string(star_end_string, buf_end)); ASSERT_MES_AND_THROW("wrong number in json entry: " << std::string(star_end_string, buf_end));
} }
inline bool match_number(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, std::string& val) inline bool match_number(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, boost::string_ref& val)
{ {
try try
{ {
@ -199,15 +243,15 @@ namespace misc_utils
return false; return false;
} }
} }
inline void match_word2(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, std::string& val) inline void match_word2(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, boost::string_ref& val)
{ {
val.clear(); val.clear();
for(std::string::const_iterator it = star_end_string;it != buf_end;it++) for(std::string::const_iterator it = star_end_string;it != buf_end;it++)
{ {
if(!isalpha(*it)) if (!(lut[(uint8_t)*it] & 4))
{ {
val.assign(star_end_string, it); val = boost::string_ref(&*star_end_string, std::distance(star_end_string, it));
if(val.size()) if(val.size())
{ {
star_end_string = --it; star_end_string = --it;
@ -218,7 +262,7 @@ namespace misc_utils
} }
ASSERT_MES_AND_THROW("failed to match word number in json entry: " << std::string(star_end_string, buf_end)); ASSERT_MES_AND_THROW("failed to match word number in json entry: " << std::string(star_end_string, buf_end));
} }
inline bool match_word(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, std::string& val) inline bool match_word(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, boost::string_ref& val)
{ {
try try
{ {

View file

@ -39,7 +39,7 @@ namespace epee
{ {
namespace json namespace json
{ {
#define CHECK_ISSPACE() if(!isspace(*it)){ ASSERT_MES_AND_THROW("Wrong JSON character at: " << std::string(it, buf_end));} #define CHECK_ISSPACE() if(!epee::misc_utils::parse::isspace(*it)){ ASSERT_MES_AND_THROW("Wrong JSON character at: " << std::string(it, buf_end));}
/*inline void parse_error() /*inline void parse_error()
{ {
@ -114,11 +114,11 @@ namespace epee
std::string val; std::string val;
match_string2(it, buf_end, val); match_string2(it, buf_end, val);
//insert text value //insert text value
stg.set_value(name, val, current_section); stg.set_value(name, std::move(val), current_section);
state = match_state_wonder_after_value; state = match_state_wonder_after_value;
}else if (isdigit(*it) || *it == '-') }else if (epee::misc_utils::parse::isdigit(*it) || *it == '-')
{//just a named number value started {//just a named number value started
std::string val; boost::string_ref val;
bool is_v_float = false;bool is_signed = false; bool is_v_float = false;bool is_signed = false;
match_number2(it, buf_end, val, is_v_float, is_signed); match_number2(it, buf_end, val, is_v_float, is_signed);
if(!is_v_float) if(!is_v_float)
@ -126,27 +126,27 @@ namespace epee
if(is_signed) if(is_signed)
{ {
errno = 0; errno = 0;
int64_t nval = strtoll(val.c_str(), NULL, 10); int64_t nval = strtoll(val.data(), NULL, 10);
if (errno) throw std::runtime_error("Invalid number: " + val); if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
stg.set_value(name, nval, current_section); stg.set_value(name, nval, current_section);
}else }else
{ {
errno = 0; errno = 0;
uint64_t nval = strtoull(val.c_str(), NULL, 10); uint64_t nval = strtoull(val.data(), NULL, 10);
if (errno) throw std::runtime_error("Invalid number: " + val); if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
stg.set_value(name, nval, current_section); stg.set_value(name, nval, current_section);
} }
}else }else
{ {
errno = 0; errno = 0;
double nval = strtod(val.c_str(), NULL); double nval = strtod(val.data(), NULL);
if (errno) throw std::runtime_error("Invalid number: " + val); if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
stg.set_value(name, nval, current_section); stg.set_value(name, nval, current_section);
} }
state = match_state_wonder_after_value; state = match_state_wonder_after_value;
}else if(isalpha(*it) ) }else if(isalpha(*it) )
{// could be null, true or false {// could be null, true or false
std::string word; boost::string_ref word;
match_word2(it, buf_end, word); match_word2(it, buf_end, word);
if(boost::iequals(word, "null")) if(boost::iequals(word, "null"))
{ {
@ -203,13 +203,13 @@ namespace epee
//mean array of strings //mean array of strings
std::string val; std::string val;
match_string2(it, buf_end, val); match_string2(it, buf_end, val);
h_array = stg.insert_first_value(name, val, current_section); h_array = stg.insert_first_value(name, std::move(val), current_section);
CHECK_AND_ASSERT_THROW_MES(h_array, " failed to insert values entry"); CHECK_AND_ASSERT_THROW_MES(h_array, " failed to insert values entry");
state = match_state_array_after_value; state = match_state_array_after_value;
array_md = array_mode_string; array_md = array_mode_string;
}else if (isdigit(*it) || *it == '-') }else if (epee::misc_utils::parse::isdigit(*it) || *it == '-')
{//array of numbers value started {//array of numbers value started
std::string val; boost::string_ref val;
bool is_v_float = false;bool is_signed_val = false; bool is_v_float = false;bool is_signed_val = false;
match_number2(it, buf_end, val, is_v_float, is_signed_val); match_number2(it, buf_end, val, is_v_float, is_signed_val);
if(!is_v_float) if(!is_v_float)
@ -217,22 +217,22 @@ namespace epee
if (is_signed_val) if (is_signed_val)
{ {
errno = 0; errno = 0;
int64_t nval = strtoll(val.c_str(), NULL, 10); int64_t nval = strtoll(val.data(), NULL, 10);
if (errno) throw std::runtime_error("Invalid number: " + val); if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
h_array = stg.insert_first_value(name, nval, current_section); h_array = stg.insert_first_value(name, nval, current_section);
}else }else
{ {
errno = 0; errno = 0;
uint64_t nval = strtoull(val.c_str(), NULL, 10); uint64_t nval = strtoull(val.data(), NULL, 10);
if (errno) throw std::runtime_error("Invalid number: " + val); if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
h_array = stg.insert_first_value(name, nval, current_section); h_array = stg.insert_first_value(name, nval, current_section);
} }
CHECK_AND_ASSERT_THROW_MES(h_array, " failed to insert values section entry"); CHECK_AND_ASSERT_THROW_MES(h_array, " failed to insert values section entry");
}else }else
{ {
errno = 0; errno = 0;
double nval = strtod(val.c_str(), NULL); double nval = strtod(val.data(), NULL);
if (errno) throw std::runtime_error("Invalid number: " + val); if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
h_array = stg.insert_first_value(name, nval, current_section); h_array = stg.insert_first_value(name, nval, current_section);
CHECK_AND_ASSERT_THROW_MES(h_array, " failed to insert values section entry"); CHECK_AND_ASSERT_THROW_MES(h_array, " failed to insert values section entry");
} }
@ -245,7 +245,7 @@ namespace epee
state = match_state_wonder_after_value; state = match_state_wonder_after_value;
}else if(isalpha(*it) ) }else if(isalpha(*it) )
{// array of booleans {// array of booleans
std::string word; boost::string_ref word;
match_word2(it, buf_end, word); match_word2(it, buf_end, word);
if(boost::iequals(word, "true")) if(boost::iequals(word, "true"))
{ {
@ -291,15 +291,15 @@ namespace epee
{ {
std::string val; std::string val;
match_string2(it, buf_end, val); match_string2(it, buf_end, val);
bool res = stg.insert_next_value(h_array, val); bool res = stg.insert_next_value(h_array, std::move(val));
CHECK_AND_ASSERT_THROW_MES(res, "failed to insert values"); CHECK_AND_ASSERT_THROW_MES(res, "failed to insert values");
state = match_state_array_after_value; state = match_state_array_after_value;
}else CHECK_ISSPACE(); }else CHECK_ISSPACE();
break; break;
case array_mode_numbers: case array_mode_numbers:
if (isdigit(*it) || *it == '-') if (epee::misc_utils::parse::isdigit(*it) || *it == '-')
{//array of numbers value started {//array of numbers value started
std::string val; boost::string_ref val;
bool is_v_float = false;bool is_signed_val = false; bool is_v_float = false;bool is_signed_val = false;
match_number2(it, buf_end, val, is_v_float, is_signed_val); match_number2(it, buf_end, val, is_v_float, is_signed_val);
bool insert_res = false; bool insert_res = false;
@ -308,21 +308,21 @@ namespace epee
if (is_signed_val) if (is_signed_val)
{ {
errno = 0; errno = 0;
int64_t nval = strtoll(val.c_str(), NULL, 10); int64_t nval = strtoll(val.data(), NULL, 10);
if (errno) throw std::runtime_error("Invalid number: " + val); if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
insert_res = stg.insert_next_value(h_array, nval); insert_res = stg.insert_next_value(h_array, nval);
}else }else
{ {
errno = 0; errno = 0;
uint64_t nval = strtoull(val.c_str(), NULL, 10); uint64_t nval = strtoull(val.data(), NULL, 10);
if (errno) throw std::runtime_error("Invalid number: " + val); if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
insert_res = stg.insert_next_value(h_array, nval); insert_res = stg.insert_next_value(h_array, nval);
} }
}else }else
{ {
errno = 0; errno = 0;
double nval = strtod(val.c_str(), NULL); double nval = strtod(val.data(), NULL);
if (errno) throw std::runtime_error("Invalid number: " + val); if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
insert_res = stg.insert_next_value(h_array, nval); insert_res = stg.insert_next_value(h_array, nval);
} }
CHECK_AND_ASSERT_THROW_MES(insert_res, "Failed to insert next value"); CHECK_AND_ASSERT_THROW_MES(insert_res, "Failed to insert next value");
@ -333,7 +333,7 @@ namespace epee
case array_mode_booleans: case array_mode_booleans:
if(isalpha(*it) ) if(isalpha(*it) )
{// array of booleans {// array of booleans
std::string word; boost::string_ref word;
match_word2(it, buf_end, word); match_word2(it, buf_end, word);
if(boost::iequals(word, "true")) if(boost::iequals(word, "true"))
{ {

View file

@ -50,6 +50,7 @@
#include "p2p/net_peerlist_boost_serialization.h" #include "p2p/net_peerlist_boost_serialization.h"
#include "span.h" #include "span.h"
#include "string_tools.h" #include "string_tools.h"
#include "storages/parserse_base_utils.h"
namespace namespace
{ {
@ -833,3 +834,86 @@ TEST(net_buffer, move)
ASSERT_TRUE(!memcmp(span.data() + 1, std::string(4000, '0').c_str(), 4000)); ASSERT_TRUE(!memcmp(span.data() + 1, std::string(4000, '0').c_str(), 4000));
} }
TEST(parsing, isspace)
{
ASSERT_FALSE(epee::misc_utils::parse::isspace(0));
for (int c = 1; c < 256; ++c)
{
ASSERT_EQ(epee::misc_utils::parse::isspace(c), strchr("\r\n\t\f\v ", c) != NULL);
}
}
TEST(parsing, isdigit)
{
ASSERT_FALSE(epee::misc_utils::parse::isdigit(0));
for (int c = 1; c < 256; ++c)
{
ASSERT_EQ(epee::misc_utils::parse::isdigit(c), strchr("0123456789", c) != NULL);
}
}
TEST(parsing, number)
{
boost::string_ref val;
std::string s;
std::string::const_iterator i;
// the parser expects another character to end the number, and accepts things
// that aren't numbers, as it's meant as a pre-filter for strto* functions,
// so we just check that numbers get accepted, but don't test non numbers
s = "0 ";
i = s.begin();
epee::misc_utils::parse::match_number(i, s.end(), val);
ASSERT_EQ(val, "0");
s = "000 ";
i = s.begin();
epee::misc_utils::parse::match_number(i, s.end(), val);
ASSERT_EQ(val, "000");
s = "10x";
i = s.begin();
epee::misc_utils::parse::match_number(i, s.end(), val);
ASSERT_EQ(val, "10");
s = "10.09/";
i = s.begin();
epee::misc_utils::parse::match_number(i, s.end(), val);
ASSERT_EQ(val, "10.09");
s = "-1.r";
i = s.begin();
epee::misc_utils::parse::match_number(i, s.end(), val);
ASSERT_EQ(val, "-1.");
s = "-49.;";
i = s.begin();
epee::misc_utils::parse::match_number(i, s.end(), val);
ASSERT_EQ(val, "-49.");
s = "0.78/";
i = s.begin();
epee::misc_utils::parse::match_number(i, s.end(), val);
ASSERT_EQ(val, "0.78");
s = "33E9$";
i = s.begin();
epee::misc_utils::parse::match_number(i, s.end(), val);
ASSERT_EQ(val, "33E9");
s = ".34e2=";
i = s.begin();
epee::misc_utils::parse::match_number(i, s.end(), val);
ASSERT_EQ(val, ".34e2");
s = "-9.34e-2=";
i = s.begin();
epee::misc_utils::parse::match_number(i, s.end(), val);
ASSERT_EQ(val, "-9.34e-2");
s = "+9.34e+03=";
i = s.begin();
epee::misc_utils::parse::match_number(i, s.end(), val);
ASSERT_EQ(val, "+9.34e+03");
}