LUCENE-1443: Performance improvement for OpenBitSetDISI.inPlaceAnd()

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@712908 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2008-11-11 01:54:49 +00:00
parent abe6ea0828
commit b179b038a1
4 changed files with 154 additions and 102 deletions

View File

@ -70,6 +70,9 @@ Optimizations
more efficient (single pass) by not creating & populating an
intermediate OpenBitSet (Paul Elschot, Mike McCandless)
2. LUCENE-1443: Performance improvement for OpenBitSetDISI.inPlaceAnd()
(Paul Elschot via yonik)
Documentation

View File

@ -688,7 +688,7 @@ public class BitUtil {
public static final byte[] ntzTable = {8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0};
/** Returns number of trailing zeros in the 64 bit long value. */
/** Returns number of trailing zeros in a 64 bit long value. */
public static int ntz(long val) {
// A full binary search to determine the low byte was slower than
// a linear search for nextSetBit(). This is most likely because
@ -728,6 +728,23 @@ public class BitUtil {
}
}
/** Returns number of trailing zeros in a 32 bit int value. */
public static int ntz(int val) {
// This implementation does a single binary search at the top level only.
// In addition, the case of a non-zero first byte is checked for first
// because it is the most common in dense bit arrays.
int lowByte = val & 0xff;
if (lowByte != 0) return ntzTable[lowByte];
lowByte = (val>>>8) & 0xff;
if (lowByte != 0) return ntzTable[lowByte] + 8;
lowByte = (val>>>16) & 0xff;
if (lowByte != 0) return ntzTable[lowByte] + 16;
// no need to mask off low byte for the last byte.
// no need to check for zero on the last byte either.
return ntzTable[val>>>24] + 24;
}
/** returns 0 based index of first set bit
* (only works for x!=0)
* <br/> This is an alternate implementation of ntz()

View File

@ -327,6 +327,43 @@ public class OpenBitSet extends DocIdSet implements Cloneable, Serializable {
bits[wordNum] &= ~bitmask;
}
/** Clears a range of bits. Clearing past the end does not change the size of the set.
*
* @param startIndex lower index
* @param endIndex one-past the last bit to clear
*/
public void clear(int startIndex, int endIndex) {
if (endIndex <= startIndex) return;
int startWord = (startIndex>>6);
if (startWord >= wlen) return;
// since endIndex is one past the end, this is index of the last
// word to be changed.
int endWord = ((endIndex-1)>>6);
long startmask = -1L << startIndex;
long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
// invert masks since we are clearing
startmask = ~startmask;
endmask = ~endmask;
if (startWord == endWord) {
bits[startWord] &= (startmask | endmask);
return;
}
bits[startWord] &= startmask;
int middle = Math.min(wlen, endWord);
Arrays.fill(bits, startWord+1, middle, 0L);
if (endWord < wlen) {
bits[endWord] &= endmask;
}
}
/** Clears a range of bits. Clearing past the end does not change the size of the set.
*
* @param startIndex lower index

View File

@ -59,20 +59,15 @@ public class OpenBitSetDISI extends OpenBitSet {
* constructor.
*/
public void inPlaceAnd(DocIdSetIterator disi) throws IOException {
int index = nextSetBit(0);
int lastNotCleared = -1;
while ((index != -1) && disi.skipTo(index)) {
while ((index != -1) && (index < disi.doc())) {
fastClear(index);
index = nextSetBit(index + 1);
int bitSetDoc = nextSetBit(0);
while ((bitSetDoc != -1) && disi.skipTo(bitSetDoc)) {
int disiDoc = disi.doc();
clear(bitSetDoc, disiDoc);
bitSetDoc = nextSetBit(disiDoc + 1);
}
if (index == disi.doc()) {
lastNotCleared = index;
index++;
if (bitSetDoc != -1) {
clear(bitSetDoc, size());
}
assert (index == -1) || (index > disi.doc());
}
clear(lastNotCleared+1, size());
}
/**