mirror of
https://github.com/cathugger/mkp224o.git
synced 2025-01-09 11:07:19 -03:00
fix intfilter expansion logic
also makes it simpler and probably faster. someone should contrib non-gnuc popcount if they care, i ran out of patience with this (yes i know it's simple) (it probably doesn't even need to be exactly popcount).
This commit is contained in:
parent
68a06c4ced
commit
f43c3b021e
2 changed files with 57 additions and 93 deletions
|
@ -5,6 +5,7 @@ static inline size_t S(filter_len)(size_t i)
|
||||||
# ifndef OMITMASK
|
# ifndef OMITMASK
|
||||||
const u8 *m = (const u8 *)&VEC_BUF(filters,i).m;
|
const u8 *m = (const u8 *)&VEC_BUF(filters,i).m;
|
||||||
# else // OMITMASK
|
# else // OMITMASK
|
||||||
|
(void) i;
|
||||||
const u8 *m = (const u8 *)&ifiltermask;
|
const u8 *m = (const u8 *)&ifiltermask;
|
||||||
# endif // OMITMASK
|
# endif // OMITMASK
|
||||||
size_t c = 0;
|
size_t c = 0;
|
||||||
|
|
|
@ -17,112 +17,81 @@ static inline int filter_compare(const void *p1,const void *p2)
|
||||||
# ifdef EXPANDMASK
|
# ifdef EXPANDMASK
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* for mask expansion, we need to figure out how much bits
|
* so we have 2 masks with basically random bits
|
||||||
* we need to fill in with different values.
|
* we first gonna find where these masks are common
|
||||||
* while in big endian machines this is quite easy,
|
* then we gonna find where new mask has more bits than old
|
||||||
* representation we use for little endian ones may
|
* common areas must be unchanged
|
||||||
* leave gap of bits we don't want to touch.
|
* gaps in both must be unchanged
|
||||||
*
|
* but new bits must be filled
|
||||||
* initial idea draft:
|
* therefore, lets just fill old gaps and common areas with 1s
|
||||||
*
|
* before add, OR with these 1s
|
||||||
* raw representation -- FF.FF.F0.00
|
* then perform add. these 1s have property to push positive bits to 0s
|
||||||
* big endian -- 0xFFFFF000
|
* we already know how much new gaps we need to fill, so this wont overflow
|
||||||
* little endian -- 0x00F0FFFF
|
* after this addition, AND result with NEG of combined mask, and OR with old value
|
||||||
* b: 0xFFffF000 ^ 0xFFff0000 -> 0x0000F000
|
* this will produce new proper value
|
||||||
* 0x0000F000 + 1 -> 0x0000F001
|
* we need to re-fill 1s before every add to keep structure working
|
||||||
* 0x0000F000 & 0x0000F001 -> 0x0000F000 <- shifted mask
|
|
||||||
* 0x0000F000 ^ 0x0000F000 -> 0x00000000 <- direct mask
|
|
||||||
* 0x0000F000 ^ 0x00000000 -> 0x0000F000 <- shifted mask
|
|
||||||
* l: 0x00f0FFff ^ 0x0000FFff -> 0x00f00000
|
|
||||||
* 0x00f00000 + 1 -> 0x00f00001
|
|
||||||
* 0x00f00000 & 0x00f00001 -> 0x00f00000 <- shifted mask
|
|
||||||
* 0x00f00000 ^ 0x00f00000 -> 0x00000000 <- direct mask
|
|
||||||
* 0x00f00000 ^ 0x00000000 -> 0x00f00000 <- shifted mask
|
|
||||||
*
|
|
||||||
* b: 0xFFffFFff ^ 0xF0000000 -> 0x0FffFFff
|
|
||||||
* 0x0FffFFff + 1 -> 0x10000000
|
|
||||||
* 0x0FffFFff & 0x10000000 -> 0x00000000 <- shifted mask
|
|
||||||
* 0x0FffFFff ^ 0x00000000 -> 0x0FffFFff <- direct mask
|
|
||||||
* 0x0FffFFff ^ 0x0FffFFff -> 0x00000000 <- shifted mask
|
|
||||||
* l: 0xFFffFFff ^ 0x000000f0 -> 0xFFffFF0f
|
|
||||||
* 0xFFffFF0f + 1 -> 0xFFffFF10
|
|
||||||
* 0xFFffFF0f & 0xFFffFF10 -> 0xFFffFF00 <- shifted mask
|
|
||||||
* 0xFFffFF0f ^ 0xFFffFF00 -> 0x0000000f <- direct mask
|
|
||||||
* 0xFFffFF0f ^ 0x0000000f -> 0xFFffFF00 <- shifted mask
|
|
||||||
*
|
|
||||||
* essentially, we have to make direct mask + shifted mask bits worth of information
|
|
||||||
* and then split it into 2 parts
|
|
||||||
* we do not need absolute shifted mask shifting value, just relative to direct mask
|
|
||||||
* 0x0sss00dd - shifted & direct mask combo
|
|
||||||
* 0x000sssdd - combined mask
|
|
||||||
* 8 - relshiftval
|
|
||||||
* generate values from 0x00000000 to 0x000sssdd
|
|
||||||
* for each value, realmask <- (val & 0x000000dd) | ((val & 0x000sss00) << relshiftval)
|
|
||||||
* or..
|
|
||||||
* realmask <- (val & 0x000000dd) | ((val << relshiftval) & 0x0sss0000)
|
|
||||||
* ...
|
|
||||||
*
|
|
||||||
* above method doesn't work in some cases. better way:
|
|
||||||
*
|
|
||||||
* l: 0x80ffFFff ^ 0x00f0FFff -> 0x800f0000
|
|
||||||
* 0x800f0000 >> 16 -> 0x0000800f
|
|
||||||
* 0x0000800f + 1 -> 0x00008010
|
|
||||||
* 0x0000800f & 0x00008010 -> 0x00008000 <- smask
|
|
||||||
* 0x0000800f ^ 0x00008000 -> 0x0000000f <- dmask
|
|
||||||
*
|
|
||||||
* cross <- difference between mask we desire and mask we currently have
|
|
||||||
* shift cross to left variable ammount of times to eliminate zeros
|
|
||||||
* save shift ammount as ishift (initial shift)
|
|
||||||
* then, we eliminate first area of ones; if there was no gap, result is already all zeros
|
|
||||||
* save this thing as smask. it's only higher bits.
|
|
||||||
* XOR smask and cross; result is only lower bits.
|
|
||||||
* shift smask to left variable ammount of times until gap is eliminated.
|
|
||||||
* save resulting mask as cmask;
|
|
||||||
* save resulting shift value as rshift.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int flattened = 0;
|
int flattened = 0;
|
||||||
|
|
||||||
#define EXPVAL(init,j,dmask,smask,ishift,rshift) \
|
|
||||||
((init) | ((((j) & (dmask)) | (((j) << (rshift)) & (smask))) << (ishift)))
|
|
||||||
// add expanded set of values
|
// add expanded set of values
|
||||||
// allocates space on its own
|
// allocates space on its own
|
||||||
static void ifilter_addexpanded(
|
static void ifilter_addexpanded(
|
||||||
struct intfilter *ifltr,
|
struct intfilter *ifltr,
|
||||||
IFT dmask,IFT smask,IFT cmask,
|
register IFT newbits,
|
||||||
int ishift,int rshift)
|
register IFT notnewbits,
|
||||||
|
register IFT newbitsum)
|
||||||
{
|
{
|
||||||
flattened = 1;
|
flattened = 1;
|
||||||
size_t i = VEC_LENGTH(filters);
|
size_t i = VEC_LENGTH(filters);
|
||||||
VEC_ADDN(filters,cmask + 1);
|
VEC_ADDN(filters,newbitsum + 1);
|
||||||
|
register IFT x = ifltr->f;
|
||||||
|
register IFT y = 0;
|
||||||
for (size_t j = 0;;++j) {
|
for (size_t j = 0;;++j) {
|
||||||
VEC_BUF(filters,i + j).f =
|
VEC_BUF(filters,i + j).f = x | y;
|
||||||
EXPVAL(ifltr->f,j,dmask,smask,ishift,rshift);
|
if (j == newbitsum)
|
||||||
if (j == cmask)
|
|
||||||
break;
|
break;
|
||||||
|
y = ((y | notnewbits) + 1) & newbits;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// expand existing stuff
|
// expand existing stuff
|
||||||
// allocates needed stuff on its own
|
// allocates needed stuff on its own
|
||||||
static void ifilter_expand(IFT dmask,IFT smask,IFT cmask,int ishift,int rshift)
|
static void ifilter_expand(
|
||||||
|
register IFT newbits,
|
||||||
|
register IFT notnewbits,
|
||||||
|
register IFT newbitsum)
|
||||||
{
|
{
|
||||||
flattened = 1;
|
flattened = 1;
|
||||||
size_t len = VEC_LENGTH(filters);
|
size_t len = VEC_LENGTH(filters);
|
||||||
VEC_ADDN(filters,cmask * len);
|
VEC_ADDN(filters,newbitsum * len);
|
||||||
size_t esz = cmask + 1; // size of expanded elements
|
size_t esz = newbitsum + 1; // size of expanded elements
|
||||||
for (size_t i = len - 1;;--i) {
|
for (size_t i = len - 1;;--i) {
|
||||||
|
register IFT x = VEC_BUF(filters,i).f;
|
||||||
|
register IFT y = 0;
|
||||||
for (IFT j = 0;;++j) {
|
for (IFT j = 0;;++j) {
|
||||||
VEC_BUF(filters,i * esz + j).f =
|
VEC_BUF(filters,i * esz + j).f = x | y;
|
||||||
EXPVAL(VEC_BUF(filters,i).f,j,dmask,smask,ishift,rshift);
|
if (j == newbitsum)
|
||||||
if (j == cmask)
|
|
||||||
break;
|
break;
|
||||||
|
y = ((y | notnewbits) + 1) & newbits;
|
||||||
}
|
}
|
||||||
if (i == 0)
|
if (i == 0)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static IFT ifilter_bitsum(IFT x)
|
||||||
|
{
|
||||||
|
if (sizeof(IFT) == 16)
|
||||||
|
return (((IFT) 1) <<
|
||||||
|
(__builtin_popcountll((unsigned long long) (x >> (sizeof(IFT) * 8 / 2))) +
|
||||||
|
__builtin_popcountll((unsigned long long) x))) - 1;
|
||||||
|
if (sizeof(IFT) == 8)
|
||||||
|
return (((IFT) 1) << __builtin_popcountll((unsigned long long) x)) - 1;
|
||||||
|
|
||||||
|
return (((IFT) 1) << __builtin_popcount((unsigned int) x)) - 1;
|
||||||
|
}
|
||||||
|
|
||||||
static inline void ifilter_addflatten(struct intfilter *ifltr,IFT mask)
|
static inline void ifilter_addflatten(struct intfilter *ifltr,IFT mask)
|
||||||
{
|
{
|
||||||
if (VEC_LENGTH(filters) == 0) {
|
if (VEC_LENGTH(filters) == 0) {
|
||||||
|
@ -136,27 +105,21 @@ static inline void ifilter_addflatten(struct intfilter *ifltr,IFT mask)
|
||||||
VEC_ADD(filters,*ifltr);
|
VEC_ADD(filters,*ifltr);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
IFT cross = ifiltermask ^ mask;
|
|
||||||
int ishift = 0;
|
IFT newbits = ifiltermask ^ mask;
|
||||||
while ((cross & 1) == 0) {
|
IFT notnewbits = ~newbits;
|
||||||
++ishift;
|
IFT newbitsum = ifilter_bitsum(newbits);
|
||||||
cross >>= 1;
|
|
||||||
}
|
|
||||||
IFT smask = cross & (cross + 1); // shift mask
|
|
||||||
IFT dmask = cross ^ smask; // direct mask
|
|
||||||
IFT cmask; // combined mask
|
|
||||||
int rshift = 0; // relative shift
|
|
||||||
while (cmask = (smask >> rshift) | dmask,(cmask & (cmask + 1)) != 0)
|
|
||||||
++rshift;
|
|
||||||
// preparations done
|
|
||||||
if (ifiltermask > mask) {
|
if (ifiltermask > mask) {
|
||||||
// already existing stuff has more precise mask than we
|
// current mask covers more bits
|
||||||
// so we need to expand our stuff
|
// expand new filter
|
||||||
ifilter_addexpanded(ifltr,dmask,smask,cmask,ishift,rshift);
|
ifilter_addexpanded(ifltr,newbits,notnewbits,newbitsum);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
// new filter mask covers more bits
|
||||||
|
// adjust current mask and expand current filters
|
||||||
ifiltermask = mask;
|
ifiltermask = mask;
|
||||||
ifilter_expand(dmask,smask,cmask,ishift,rshift);
|
ifilter_expand(newbits,notnewbits,newbitsum);
|
||||||
VEC_ADD(filters,*ifltr);
|
VEC_ADD(filters,*ifltr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue