LUCENE-1467: Add nextDoc() and next(int) methods to OpenBitSetIterator.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@720609 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Busch 2008-11-25 21:39:47 +00:00
parent 129a630839
commit eb88fc88fd
3 changed files with 115 additions and 10 deletions

View File

@ -34,6 +34,10 @@ API Changes
and also to define custom Attributes. The new API has the same performance
as the old next(Token) approach. (Michael Busch)
5. LUCENE-1467: Add nextDoc() and next(int) methods to OpenBitSetIterator.
These methods can be used to avoid additional calls to doc().
(Michael Busch)
Bug fixes
1. LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals()

View File

@ -133,6 +133,35 @@ public class OpenBitSetIterator extends DocIdSetIterator {
return true;
}
/** Moves iterator to the next doc and returns its id;
returns -1 when this iterator is exhausted. */
public int nextDoc() {
if (indexArray==0) {
if (word!=0) {
word >>>= 8;
wordShift += 8;
}
while (word==0) {
if (++i >= words) {
return curDocId = -1;
}
word = arr[i];
wordShift =-1; // loop invariant code motion should move this
}
// after the first time, should I go with a linear search, or
// stick with the binary search in shift?
shift();
}
int bitIndex = (indexArray & 0x0f) + wordShift;
indexArray >>>= 4;
// should i<<6 be cached as a separate variable?
// it would only save one cycle in the best circumstances.
return curDocId = (i<<6) + bitIndex;
}
public boolean skipTo(int target) {
indexArray=0;
i = target >> 6;
@ -166,6 +195,38 @@ public class OpenBitSetIterator extends DocIdSetIterator {
return true;
}
/** Behaves like {@link #skipTo(int)} and returns the docId the iterator
* skipped to; returns -1 if no valid document could be skipped to. */
public int next(int fromIndex) {
indexArray=0;
i = fromIndex >> 6;
if (i>=words) {
word =0; // setup so next() will also return -1
return curDocId = -1;
}
wordShift = fromIndex & 0x3f;
word = arr[i] >>> wordShift;
if (word !=0) {
wordShift--; // compensate for 1 based arrIndex
} else {
while (word ==0) {
if (++i >= words) {
return curDocId = -1;
}
word = arr[i];
}
wordShift =-1;
}
shift();
int bitIndex = (indexArray & 0x0f) + wordShift;
indexArray >>>= 4;
// should i<<6 be cached as a separate variable?
// it would only save one cycle in the best circumstances.
return curDocId = (i<<6) + bitIndex;
}
public int doc() {
return this.curDocId;
}

View File

@ -46,8 +46,13 @@ public class TestOpenBitSet extends TestCase {
} while (aa>=0);
}
// test interleaving different BitSetIterator.next()
void doIterate(BitSet a, OpenBitSet b) {
// test interleaving different OpenBitSetIterator.next()/skipTo()
void doIterate(BitSet a, OpenBitSet b, int mode) {
if (mode==1) doIterate1(a, b);
if (mode==2) doIterate2(a, b);
}
void doIterate1(BitSet a, OpenBitSet b) {
int aa=-1,bb=-1;
OpenBitSetIterator iterator = new OpenBitSetIterator(b);
do {
@ -61,8 +66,20 @@ public class TestOpenBitSet extends TestCase {
} while (aa>=0);
}
void doIterate2(BitSet a, OpenBitSet b) {
int aa=-1,bb=-1;
OpenBitSetIterator iterator = new OpenBitSetIterator(b);
do {
aa = a.nextSetBit(aa+1);
if (rand.nextBoolean())
bb = iterator.nextDoc();
else
bb = iterator.next(bb+1);
assertEquals(aa,bb);
} while (aa>=0);
}
void doRandomSets(int maxSize, int iter) {
void doRandomSets(int maxSize, int iter, int mode) {
BitSet a0=null;
OpenBitSet b0=null;
@ -110,7 +127,7 @@ public class TestOpenBitSet extends TestCase {
BitSet aa = (BitSet)a.clone(); aa.flip(fromIndex,toIndex);
OpenBitSet bb = (OpenBitSet)b.clone(); bb.flip(fromIndex,toIndex);
doIterate(aa,bb); // a problem here is from flip or doIterate
doIterate(aa,bb, mode); // a problem here is from flip or doIterate
fromIndex = rand.nextInt(sz+80);
toIndex = fromIndex + rand.nextInt((sz>>1)+1);
@ -142,10 +159,10 @@ public class TestOpenBitSet extends TestCase {
OpenBitSet b_xor = (OpenBitSet)b.clone(); b_xor.xor(b0);
OpenBitSet b_andn = (OpenBitSet)b.clone(); b_andn.andNot(b0);
doIterate(a_and,b_and);
doIterate(a_or,b_or);
doIterate(a_xor,b_xor);
doIterate(a_andn,b_andn);
doIterate(a_and,b_and, mode);
doIterate(a_or,b_or, mode);
doIterate(a_xor,b_xor, mode);
doIterate(a_andn,b_andn, mode);
assertEquals(a_and.cardinality(), b_and.cardinality());
assertEquals(a_or.cardinality(), b_or.cardinality());
@ -167,12 +184,14 @@ public class TestOpenBitSet extends TestCase {
// large enough to flush obvious bugs, small enough to run in <.5 sec as part of a
// larger testsuite.
public void testSmall() {
doRandomSets(1200,1000);
doRandomSets(1200,1000, 1);
doRandomSets(1200,1000, 2);
}
public void testBig() {
// uncomment to run a bigger test (~2 minutes).
// doRandomSets(2000,200000);
// doRandomSets(2000,200000, 1);
// doRandomSets(2000,200000, 2);
}
public void testEquals() {
@ -196,7 +215,28 @@ public class TestOpenBitSet extends TestCase {
// try different type of object
assertFalse(b1.equals(new Object()));
}
public void testBitUtils()
{
long num = 100000;
assertEquals( 5, BitUtil.ntz(num) );
assertEquals( 5, BitUtil.ntz2(num) );
assertEquals( 5, BitUtil.ntz3(num) );
num = 10;
assertEquals( 1, BitUtil.ntz(num) );
assertEquals( 1, BitUtil.ntz2(num) );
assertEquals( 1, BitUtil.ntz3(num) );
for (int i=0; i<64; i++) {
num = 1L << i;
assertEquals( i, BitUtil.ntz(num) );
assertEquals( i, BitUtil.ntz2(num) );
assertEquals( i, BitUtil.ntz3(num) );
}
}
}