small fixup, implement deduplication support

fixes potential binsearch+binfilter filters mis-ordering case.
implements optional filters deduplication except for regex filters.
adds -v flag for more verbose output.
This commit is contained in:
cathugger 2018-09-26 20:54:14 +03:00
parent 908d4957f6
commit abc08bc47b
No known key found for this signature in database
GPG key ID: 9BADDA2DAF6F01A8
4 changed files with 136 additions and 10 deletions

View file

@ -176,6 +176,13 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([])],
[cstd="$cstd -Wall"], [cstd="$cstd -Wall"],
[AC_MSG_RESULT([no])] [AC_MSG_RESULT([no])]
) )
CFLAGS="$cstd -Wno-maybe-uninitialized"
AC_MSG_CHECKING([whether CC supports -Wno-maybe-uninitialized])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([])],
[AC_MSG_RESULT([yes])]
[cstd="$cstd -Wno-maybe-uninitialized"],
[AC_MSG_RESULT([no])]
)
if test "x$c99" = "xyes" -a "x$ed25519impl" != "xdonna" -a "x$enable_intfilter" != "x128" if test "x$c99" = "xyes" -a "x$ed25519impl" != "xdonna" -a "x$enable_intfilter" != "x128"
then then
CFLAGS="$cstd -pedantic" CFLAGS="$cstd -pedantic"

115
filters.h
View file

@ -31,14 +31,17 @@
#ifdef NEEDBINFILTER #ifdef NEEDBINFILTER
# ifndef BINFILTERLEN # ifndef BINFILTERLEN
# define BINFILTERLEN PUBLIC_LEN # define BINFILTERLEN PUBLIC_LEN
# endif # endif
struct binfilter { struct binfilter {
u8 f[BINFILTERLEN]; u8 f[BINFILTERLEN];
size_t len; // real len minus one size_t len; // real len minus one
u8 mask; u8 mask;
} ; } ;
#endif // NEEDBINFILTER #endif // NEEDBINFILTER
@ -117,6 +120,14 @@ static inline int filter_compare(const void *p1,const void *p2)
# ifdef EXPANDMASK # ifdef EXPANDMASK
/* /*
* for mask expansion, we need to figure out how much bits
* we need to fill in with different values.
* while in big endian machines this is quite easy,
* representation we use for little endian ones may
* leave gap of bits we don't want to touch.
*
* initial idea draft:
*
* raw representation -- FF.FF.F0.00 * raw representation -- FF.FF.F0.00
* big endian -- 0xFFFFF000 * big endian -- 0xFFFFF000
* little endian -- 0x00F0FFFF * little endian -- 0x00F0FFFF
@ -153,12 +164,24 @@ static inline int filter_compare(const void *p1,const void *p2)
* or.. * or..
* realmask <- (val & 0x000000dd) | ((val << relshiftval) & 0x0sss0000) * realmask <- (val & 0x000000dd) | ((val << relshiftval) & 0x0sss0000)
* ... * ...
*
* above method doesn't work in some cases. better way: * above method doesn't work in some cases. better way:
*
* l: 0x80ffFFff ^ 0x00f0FFff -> 0x800f0000 * l: 0x80ffFFff ^ 0x00f0FFff -> 0x800f0000
* 0x800f0000 >> 16 -> 0x0000800f * 0x800f0000 >> 16 -> 0x0000800f
* 0x0000800f + 1 -> 0x00008010 * 0x0000800f + 1 -> 0x00008010
* 0x0000800f & 0x00008010 -> 0x00008000 <- smask * 0x0000800f & 0x00008010 -> 0x00008000 <- smask
* 0x0000800f ^ 0x00008000 -> 0x0000000f <- dmask * 0x0000800f ^ 0x00008000 -> 0x0000000f <- dmask
*
* cross <- difference between mask we desire and mask we currently have
* shift cross to left variable ammount of times to eliminate zeros
* save shift ammount as ishift (initial shift)
* then, we eliminate first area of ones; if there was no gap, result is already all zeros
* save this thing as smask. it's only higher bits.
* XOR smask and cross; result is only lower bits.
* shift smask to left variable ammount of times until gap is eliminated.
* save resulting mask as cmask;
* save resulting shift value as rshift.
*/ */
static int flattened = 0; static int flattened = 0;
@ -289,18 +312,29 @@ static inline int filter_compare(const void *p1,const void *p2)
{ {
const struct binfilter *b1 = (const struct binfilter *)p1; const struct binfilter *b1 = (const struct binfilter *)p1;
const struct binfilter *b2 = (const struct binfilter *)p2; const struct binfilter *b2 = (const struct binfilter *)p2;
size_t l = b1->len <= b2->len ? b1->len : b2->len; size_t l = b1->len <= b2->len ? b1->len : b2->len;
int cmp = memcmp(b1->f,b2->f,l); int cmp = memcmp(b1->f,b2->f,l);
if (cmp) if (cmp != 0)
return cmp; return cmp;
if (b1->len < b2->len) if (b1->len < b2->len)
return -1; return -1;
if (b1->len > b2->len) if (b1->len > b2->len)
return 1; return +1;
u8 cmask = b1->mask & b2->mask;
if ((b1->f[l] & cmask) < (b2->f[l] & cmask))
return -1;
if ((b1->f[l] & cmask) > (b2->f[l] & cmask))
return +1;
if (b1->mask < b2->mask) if (b1->mask < b2->mask)
return -1; return -1;
if (b1->mask > b2->mask) if (b1->mask > b2->mask)
return 1; return +1;
return 0; return 0;
} }
@ -345,6 +379,7 @@ static void filters_add(const char *filter)
fprintf(stderr,"^\n"); fprintf(stderr,"^\n");
return; return;
} }
ret = BASE32_FROM_LEN(ret); ret = BASE32_FROM_LEN(ret);
if (!ret) if (!ret)
return; return;
@ -372,12 +407,14 @@ static void filters_add(const char *filter)
mc.b[bf.len] = bf.mask; mc.b[bf.len] = bf.mask;
memcpy(fc.b,bf.f,sizeof(fc.b)); memcpy(fc.b,bf.f,sizeof(fc.b));
fc.i &= mc.i; fc.i &= mc.i;
struct intfilter ifltr = { struct intfilter ifltr = {
.f = fc.i, .f = fc.i,
# ifndef OMITMASK # ifndef OMITMASK
.m = mc.i, .m = mc.i,
# endif # endif
}; };
# ifdef OMITMASK # ifdef OMITMASK
ifilter_addflatten(&ifltr,mc.i); ifilter_addflatten(&ifltr,mc.i);
# else // OMITMASK # else // OMITMASK
@ -394,6 +431,7 @@ static void filters_add(const char *filter)
int errornum; int errornum;
PCRE2_SIZE erroroffset; PCRE2_SIZE erroroffset;
pcre2_code *re; pcre2_code *re;
re = pcre2_compile((PCRE2_SPTR8)filter,PCRE2_ZERO_TERMINATED, re = pcre2_compile((PCRE2_SPTR8)filter,PCRE2_ZERO_TERMINATED,
PCRE2_NO_UTF_CHECK | PCRE2_ANCHORED,&errornum,&erroroffset,0); PCRE2_NO_UTF_CHECK | PCRE2_ANCHORED,&errornum,&erroroffset,0);
if (!re) { if (!re) {
@ -403,8 +441,10 @@ static void filters_add(const char *filter)
(size_t)erroroffset,buffer); (size_t)erroroffset,buffer);
return; return;
} }
// attempt to JIT. ignore error // attempt to JIT. ignore error
(void) pcre2_jit_compile(re,PCRE2_JIT_COMPLETE); (void) pcre2_jit_compile(re,PCRE2_JIT_COMPLETE);
struct pcre2filter f; struct pcre2filter f;
memset(&f,0,sizeof(f)); memset(&f,0,sizeof(f));
f.re = re; f.re = re;
@ -417,12 +457,73 @@ static void filters_add(const char *filter)
#endif // PCRE2FILTER #endif // PCRE2FILTER
} }
#ifdef NEEDBINFILTER #ifndef PCRE2FILTER
static inline int filters_a_includes_b(size_t a,size_t b)
{
# ifdef INTFILTER
# ifdef OMITMASK
return VEC_BUF(filters,a).f == VEC_BUF(filters,b).f;
# else // OMITMASK
return VEC_BUF(filters,a).f == (VEC_BUF(filters,b).f & VEC_BUF(filters,a).m);
# endif // OMITMASK
# else // INTFILTER
const struct binfilter *fa = &VEC_BUF(filters,a);
const struct binfilter *fb = &VEC_BUF(filters,b);
if (fa->len > fb->len)
return 0;
size_t l = fa->len;
int cmp = memcmp(fa->f,fb->f,l);
if (cmp != 0)
return 0;
if (fa->len < fb->len)
return 1;
if (fa->mask > fb->mask)
return 0;
return fa->f[l] == (fb->f[l] & fa->mask);
# endif // INTFILTER
}
static void filters_dedup(void) static void filters_dedup(void)
{ {
//TODO size_t last = ~(size_t)0; // index after last matching element
size_t chk; // element to compare against
size_t st; // start of area to destroy
size_t len = VEC_LENGTH(filters);
for (size_t i = 1;i < len;++i) {
if (last != i) {
if (filters_a_includes_b(i - 1,i)) {
if (last != ~(size_t)0) {
memmove(&VEC_BUF(filters,st),
&VEC_BUF(filters,last),
(i - last) * VEC_ELSIZE(filters));
st += i - last;
}
else
st = i;
chk = i - 1;
last = i + 1;
}
}
else {
if (filters_a_includes_b(chk,i))
last = i + 1;
}
}
if (last != ~(size_t)0) {
memmove(&VEC_BUF(filters,st),
&VEC_BUF(filters,last),
(len - last) * VEC_ELSIZE(filters));
st += len - last;
VEC_SETLENGTH(filters,st);
}
} }
#endif // NEEDBINFILTER #endif // !PCRE2FILTER
static void filters_prepare(void) static void filters_prepare(void)
{ {
@ -643,7 +744,7 @@ static void filters_print(void)
u8 bufx[128]; u8 bufx[128];
#endif #endif
if (i >= 20) { if (!verboseflag && i >= 20) {
size_t notshown = l - i; size_t notshown = l - i;
fprintf(stderr,"[another " FSZ " %s not shown]\n", fprintf(stderr,"[another " FSZ " %s not shown]\n",
notshown,notshown == 1 ? "filter" : "filters"); notshown,notshown == 1 ? "filter" : "filters");

19
main.c
View file

@ -42,8 +42,10 @@ static char *workdir = 0;
static size_t workdirlen = 0; static size_t workdirlen = 0;
static int quietflag = 0; static int quietflag = 0;
//static int wantdedup = 0; static int verboseflag = 0;
#define wantdedup 0 #ifndef PCRE2FILTER
static int wantdedup = 0;
#endif
// 0, direndpos, onionendpos // 0, direndpos, onionendpos
// printstartpos = either 0 or direndpos // printstartpos = either 0 or direndpos
@ -402,9 +404,11 @@ static void printhelp(FILE *out,const char *progname)
" %s -f filterfile [options]\n" " %s -f filterfile [options]\n"
"Options:\n" "Options:\n"
"\t-h - print help to stdout and quit\n" "\t-h - print help to stdout and quit\n"
"\t-f - instead of specifying filter(s) via commandline, specify filter file which contains filters separated by newlines\n" "\t-f - specify filter file which contains filters separated by newlines\n"
"\t-D - deduplicate filters\n"
"\t-q - do not print diagnostic output to stderr\n" "\t-q - do not print diagnostic output to stderr\n"
"\t-x - do not print onion names\n" "\t-x - do not print onion names\n"
"\t-v - print more diagnostic data\n"
"\t-o filename - output onion names to specified file (append)\n" "\t-o filename - output onion names to specified file (append)\n"
"\t-O filename - output onion names to specified file (overwrite)\n" "\t-O filename - output onion names to specified file (overwrite)\n"
"\t-F - include directory names in onion names output\n" "\t-F - include directory names in onion names output\n"
@ -546,10 +550,19 @@ int main(int argc,char **argv)
else else
e_additional(); e_additional();
} }
else if (*arg == 'D') {
#ifndef PCRE2FILTER
wantdedup = 1;
#else
fprintf(stderr,"WARNING: deduplication isn't supported with regex filters\n");
#endif
}
else if (*arg == 'q') else if (*arg == 'q')
++quietflag; ++quietflag;
else if (*arg == 'x') else if (*arg == 'x')
fout = 0; fout = 0;
else if (*arg == 'v')
verboseflag = 1;
else if (*arg == 'o') { else if (*arg == 'o') {
outfileoverwrite = 0; outfileoverwrite = 0;
if (argc--) if (argc--)

5
vec.h
View file

@ -22,6 +22,11 @@ void vec_addn(struct vec_basestruct *ctl,size_t sz,size_t n);
#define VEC_ADDN(ctl,n) \ #define VEC_ADDN(ctl,n) \
vec_addn((struct vec_basestruct *)&(ctl),VEC_ELSIZE(ctl),(size_t)(n)) vec_addn((struct vec_basestruct *)&(ctl),VEC_ELSIZE(ctl),(size_t)(n))
#define VEC_SETLENGTH(ctl,n) \
do { \
(ctl).len = n; \
} while (0)
#define VEC_REMOVEN(ctl,n,m) \ #define VEC_REMOVEN(ctl,n,m) \
do { \ do { \
(ctl).len -= m; \ (ctl).len -= m; \