LUCENE-1467: Add nextDoc() and next(int) methods to OpenBitSetIterator.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@720609 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Busch 2008-11-25 21:39:47 +00:00
parent 129a630839
commit eb88fc88fd
3 changed files with 115 additions and 10 deletions

View File

@ -34,6 +34,10 @@ API Changes
and also to define custom Attributes. The new API has the same performance and also to define custom Attributes. The new API has the same performance
as the old next(Token) approach. (Michael Busch) as the old next(Token) approach. (Michael Busch)
5. LUCENE-1467: Add nextDoc() and next(int) methods to OpenBitSetIterator.
These methods can be used to avoid additional calls to doc().
(Michael Busch)
Bug fixes Bug fixes
1. LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals() 1. LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals()

View File

@ -133,6 +133,35 @@ public class OpenBitSetIterator extends DocIdSetIterator {
return true; return true;
} }
/** Moves iterator to the next doc and returns its id;
returns -1 when this iterator is exhausted. */
public int nextDoc() {
if (indexArray==0) {
if (word!=0) {
word >>>= 8;
wordShift += 8;
}
while (word==0) {
if (++i >= words) {
return curDocId = -1;
}
word = arr[i];
wordShift =-1; // loop invariant code motion should move this
}
// after the first time, should I go with a linear search, or
// stick with the binary search in shift?
shift();
}
int bitIndex = (indexArray & 0x0f) + wordShift;
indexArray >>>= 4;
// should i<<6 be cached as a separate variable?
// it would only save one cycle in the best circumstances.
return curDocId = (i<<6) + bitIndex;
}
public boolean skipTo(int target) { public boolean skipTo(int target) {
indexArray=0; indexArray=0;
i = target >> 6; i = target >> 6;
@ -166,6 +195,38 @@ public class OpenBitSetIterator extends DocIdSetIterator {
return true; return true;
} }
/** Behaves like {@link #skipTo(int)} and returns the docId the iterator
* skipped to; returns -1 if no valid document could be skipped to. */
public int next(int fromIndex) {
indexArray=0;
i = fromIndex >> 6;
if (i>=words) {
word =0; // setup so next() will also return -1
return curDocId = -1;
}
wordShift = fromIndex & 0x3f;
word = arr[i] >>> wordShift;
if (word !=0) {
wordShift--; // compensate for 1 based arrIndex
} else {
while (word ==0) {
if (++i >= words) {
return curDocId = -1;
}
word = arr[i];
}
wordShift =-1;
}
shift();
int bitIndex = (indexArray & 0x0f) + wordShift;
indexArray >>>= 4;
// should i<<6 be cached as a separate variable?
// it would only save one cycle in the best circumstances.
return curDocId = (i<<6) + bitIndex;
}
public int doc() { public int doc() {
return this.curDocId; return this.curDocId;
} }

View File

@ -46,8 +46,13 @@ public class TestOpenBitSet extends TestCase {
} while (aa>=0); } while (aa>=0);
} }
// test interleaving different BitSetIterator.next() // test interleaving different OpenBitSetIterator.next()/skipTo()
void doIterate(BitSet a, OpenBitSet b) { void doIterate(BitSet a, OpenBitSet b, int mode) {
if (mode==1) doIterate1(a, b);
if (mode==2) doIterate2(a, b);
}
void doIterate1(BitSet a, OpenBitSet b) {
int aa=-1,bb=-1; int aa=-1,bb=-1;
OpenBitSetIterator iterator = new OpenBitSetIterator(b); OpenBitSetIterator iterator = new OpenBitSetIterator(b);
do { do {
@ -61,8 +66,20 @@ public class TestOpenBitSet extends TestCase {
} while (aa>=0); } while (aa>=0);
} }
void doIterate2(BitSet a, OpenBitSet b) {
int aa=-1,bb=-1;
OpenBitSetIterator iterator = new OpenBitSetIterator(b);
do {
aa = a.nextSetBit(aa+1);
if (rand.nextBoolean())
bb = iterator.nextDoc();
else
bb = iterator.next(bb+1);
assertEquals(aa,bb);
} while (aa>=0);
}
void doRandomSets(int maxSize, int iter) { void doRandomSets(int maxSize, int iter, int mode) {
BitSet a0=null; BitSet a0=null;
OpenBitSet b0=null; OpenBitSet b0=null;
@ -110,7 +127,7 @@ public class TestOpenBitSet extends TestCase {
BitSet aa = (BitSet)a.clone(); aa.flip(fromIndex,toIndex); BitSet aa = (BitSet)a.clone(); aa.flip(fromIndex,toIndex);
OpenBitSet bb = (OpenBitSet)b.clone(); bb.flip(fromIndex,toIndex); OpenBitSet bb = (OpenBitSet)b.clone(); bb.flip(fromIndex,toIndex);
doIterate(aa,bb); // a problem here is from flip or doIterate doIterate(aa,bb, mode); // a problem here is from flip or doIterate
fromIndex = rand.nextInt(sz+80); fromIndex = rand.nextInt(sz+80);
toIndex = fromIndex + rand.nextInt((sz>>1)+1); toIndex = fromIndex + rand.nextInt((sz>>1)+1);
@ -142,10 +159,10 @@ public class TestOpenBitSet extends TestCase {
OpenBitSet b_xor = (OpenBitSet)b.clone(); b_xor.xor(b0); OpenBitSet b_xor = (OpenBitSet)b.clone(); b_xor.xor(b0);
OpenBitSet b_andn = (OpenBitSet)b.clone(); b_andn.andNot(b0); OpenBitSet b_andn = (OpenBitSet)b.clone(); b_andn.andNot(b0);
doIterate(a_and,b_and); doIterate(a_and,b_and, mode);
doIterate(a_or,b_or); doIterate(a_or,b_or, mode);
doIterate(a_xor,b_xor); doIterate(a_xor,b_xor, mode);
doIterate(a_andn,b_andn); doIterate(a_andn,b_andn, mode);
assertEquals(a_and.cardinality(), b_and.cardinality()); assertEquals(a_and.cardinality(), b_and.cardinality());
assertEquals(a_or.cardinality(), b_or.cardinality()); assertEquals(a_or.cardinality(), b_or.cardinality());
@ -167,12 +184,14 @@ public class TestOpenBitSet extends TestCase {
// large enough to flush obvious bugs, small enough to run in <.5 sec as part of a // large enough to flush obvious bugs, small enough to run in <.5 sec as part of a
// larger testsuite. // larger testsuite.
public void testSmall() { public void testSmall() {
doRandomSets(1200,1000); doRandomSets(1200,1000, 1);
doRandomSets(1200,1000, 2);
} }
public void testBig() { public void testBig() {
// uncomment to run a bigger test (~2 minutes). // uncomment to run a bigger test (~2 minutes).
// doRandomSets(2000,200000); // doRandomSets(2000,200000, 1);
// doRandomSets(2000,200000, 2);
} }
public void testEquals() { public void testEquals() {
@ -197,6 +216,27 @@ public class TestOpenBitSet extends TestCase {
assertFalse(b1.equals(new Object())); assertFalse(b1.equals(new Object()));
} }
public void testBitUtils()
{
long num = 100000;
assertEquals( 5, BitUtil.ntz(num) );
assertEquals( 5, BitUtil.ntz2(num) );
assertEquals( 5, BitUtil.ntz3(num) );
num = 10;
assertEquals( 1, BitUtil.ntz(num) );
assertEquals( 1, BitUtil.ntz2(num) );
assertEquals( 1, BitUtil.ntz3(num) );
for (int i=0; i<64; i++) {
num = 1L << i;
assertEquals( i, BitUtil.ntz(num) );
assertEquals( i, BitUtil.ntz2(num) );
assertEquals( i, BitUtil.ntz3(num) );
}
}
} }