mirror of https://github.com/apache/lucene.git
LUCENE-6024: Speed-up BitSet.or/and/andNot.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1634478 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
08c6f54346
commit
a25c4c77db
|
@ -18,6 +18,7 @@ package org.apache.lucene.util;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
|
@ -58,37 +59,90 @@ public abstract class BitSet implements MutableBits, Accountable {
|
|||
*/
|
||||
public abstract int nextSetBit(int i);
|
||||
|
||||
/** Does in-place OR of the bits provided by the
|
||||
* iterator. */
|
||||
/** Assert that the current doc is -1. */
|
||||
protected final void assertUnpositioned(DocIdSetIterator iter) {
|
||||
if (iter.docID() != -1) {
|
||||
throw new IllegalStateException("This operation only works with an unpositioned iterator, got current position = " + iter.docID());
|
||||
}
|
||||
}
|
||||
|
||||
/** Does in-place OR of the bits provided by the iterator. The state of the
|
||||
* iterator after this operation terminates is undefined. */
|
||||
public void or(DocIdSetIterator iter) throws IOException {
|
||||
assertUnpositioned(iter);
|
||||
for (int doc = iter.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iter.nextDoc()) {
|
||||
set(doc);
|
||||
}
|
||||
}
|
||||
|
||||
/** Does in-place AND of the bits provided by the
|
||||
* iterator. */
|
||||
public void and(DocIdSetIterator iter) throws IOException {
|
||||
private static abstract class LeapFrogCallBack {
|
||||
abstract void onMatch(int doc);
|
||||
void finish() {}
|
||||
}
|
||||
|
||||
/** Performs a leap frog between this and the provided iterator in order to find common documents. */
|
||||
private void leapFrog(DocIdSetIterator iter, LeapFrogCallBack callback) throws IOException {
|
||||
final int length = length();
|
||||
if (length == 0) {
|
||||
return;
|
||||
}
|
||||
int disiDoc, bitSetDoc = nextSetBit(0);
|
||||
while (bitSetDoc != DocIdSetIterator.NO_MORE_DOCS && (disiDoc = iter.advance(bitSetDoc)) < length) {
|
||||
clear(bitSetDoc, disiDoc);
|
||||
disiDoc++;
|
||||
bitSetDoc = (disiDoc < length) ? nextSetBit(disiDoc) : DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
if (bitSetDoc != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
clear(bitSetDoc, length);
|
||||
int bitSetDoc = -1;
|
||||
int disiDoc = iter.nextDoc();
|
||||
while (true) {
|
||||
// invariant: bitSetDoc <= disiDoc
|
||||
assert bitSetDoc <= disiDoc;
|
||||
if (disiDoc >= length) {
|
||||
callback.finish();
|
||||
return;
|
||||
}
|
||||
if (bitSetDoc < disiDoc) {
|
||||
bitSetDoc = nextSetBit(disiDoc);
|
||||
}
|
||||
if (bitSetDoc == disiDoc) {
|
||||
callback.onMatch(bitSetDoc);
|
||||
disiDoc = iter.nextDoc();
|
||||
} else {
|
||||
disiDoc = iter.advance(bitSetDoc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** this = this AND NOT other */
|
||||
/** Does in-place AND of the bits provided by the iterator. The state of the
|
||||
* iterator after this operation terminates is undefined. */
|
||||
public void and(DocIdSetIterator iter) throws IOException {
|
||||
assertUnpositioned(iter);
|
||||
leapFrog(iter, new LeapFrogCallBack() {
|
||||
int previous = -1;
|
||||
|
||||
@Override
|
||||
public void onMatch(int doc) {
|
||||
clear(previous + 1, doc);
|
||||
previous = doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish() {
|
||||
if (previous + 1 < length()) {
|
||||
clear(previous + 1, length());
|
||||
}
|
||||
}
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
/** this = this AND NOT other. The state of the iterator after this operation
|
||||
* terminates is undefined. */
|
||||
public void andNot(DocIdSetIterator iter) throws IOException {
|
||||
for (int doc = iter.nextDoc(), len = length(); doc < len; doc = iter.nextDoc()) {
|
||||
clear(doc);
|
||||
}
|
||||
assertUnpositioned(iter);
|
||||
leapFrog(iter, new LeapFrogCallBack() {
|
||||
|
||||
@Override
|
||||
public void onMatch(int doc) {
|
||||
clear(doc);
|
||||
}
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterable<? extends Accountable> getChildResources() {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -42,6 +42,11 @@ public class BitSetIterator extends DocIdSetIterator {
|
|||
return getBitSet(iterator, FixedBitSet.class);
|
||||
}
|
||||
|
||||
/** If the provided iterator wraps a {@link SparseFixedBitSet}, returns it, otherwise returns null. */
|
||||
public static SparseFixedBitSet getSparseFixedBitSetOrNull(DocIdSetIterator iterator) {
|
||||
return getBitSet(iterator, SparseFixedBitSet.class);
|
||||
}
|
||||
|
||||
private final BitSet bits;
|
||||
private final int length;
|
||||
private final long cost;
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.util;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
@ -131,11 +130,6 @@ public final class FixedBitSet extends BitSet implements MutableBits, Accountabl
|
|||
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(bits);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterable<? extends Accountable> getChildResources() {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
/** Expert. */
|
||||
public long[] getBits() {
|
||||
return bits;
|
||||
|
@ -234,12 +228,10 @@ public final class FixedBitSet extends BitSet implements MutableBits, Accountabl
|
|||
|
||||
@Override
|
||||
public void or(DocIdSetIterator iter) throws IOException {
|
||||
if (BitSetIterator.getFixedBitSetOrNull(iter) != null && iter.docID() == -1) {
|
||||
if (BitSetIterator.getFixedBitSetOrNull(iter) != null) {
|
||||
assertUnpositioned(iter);
|
||||
final FixedBitSet bits = BitSetIterator.getFixedBitSetOrNull(iter);
|
||||
or(bits);
|
||||
// advance after last doc that would be accepted if standard
|
||||
// iteration is used (to exhaust it):
|
||||
iter.advance(numBits);
|
||||
} else {
|
||||
super.or(iter);
|
||||
}
|
||||
|
@ -266,12 +258,10 @@ public final class FixedBitSet extends BitSet implements MutableBits, Accountabl
|
|||
|
||||
/** Does in-place XOR of the bits provided by the iterator. */
|
||||
public void xor(DocIdSetIterator iter) throws IOException {
|
||||
if (BitSetIterator.getFixedBitSetOrNull(iter) != null && iter.docID() == -1) {
|
||||
assertUnpositioned(iter);
|
||||
if (BitSetIterator.getFixedBitSetOrNull(iter) != null) {
|
||||
final FixedBitSet bits = BitSetIterator.getFixedBitSetOrNull(iter);
|
||||
xor(bits);
|
||||
// advance after last doc that would be accepted if standard
|
||||
// iteration is used (to exhaust it):
|
||||
iter.advance(numBits);
|
||||
} else {
|
||||
int doc;
|
||||
while ((doc = iter.nextDoc()) < numBits) {
|
||||
|
@ -291,12 +281,10 @@ public final class FixedBitSet extends BitSet implements MutableBits, Accountabl
|
|||
|
||||
@Override
|
||||
public void and(DocIdSetIterator iter) throws IOException {
|
||||
if (BitSetIterator.getFixedBitSetOrNull(iter) != null && iter.docID() == -1) {
|
||||
if (BitSetIterator.getFixedBitSetOrNull(iter) != null) {
|
||||
assertUnpositioned(iter);
|
||||
final FixedBitSet bits = BitSetIterator.getFixedBitSetOrNull(iter);
|
||||
and(bits);
|
||||
// advance after last doc that would be accepted if standard
|
||||
// iteration is used (to exhaust it):
|
||||
iter.advance(numBits);
|
||||
} else {
|
||||
super.and(iter);
|
||||
}
|
||||
|
@ -329,12 +317,10 @@ public final class FixedBitSet extends BitSet implements MutableBits, Accountabl
|
|||
|
||||
@Override
|
||||
public void andNot(DocIdSetIterator iter) throws IOException {
|
||||
if (BitSetIterator.getFixedBitSetOrNull(iter) != null && iter.docID() == -1) {
|
||||
if (BitSetIterator.getFixedBitSetOrNull(iter) != null) {
|
||||
assertUnpositioned(iter);
|
||||
final FixedBitSet bits = BitSetIterator.getFixedBitSetOrNull(iter);
|
||||
andNot(bits);
|
||||
// advance after last doc that would be accepted if standard
|
||||
// iteration is used (to exhaust it):
|
||||
iter.advance(numBits);
|
||||
} else {
|
||||
super.andNot(iter);
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.lucene.util;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
|
@ -156,7 +156,7 @@ public class SparseFixedBitSet extends BitSet implements Bits, Accountable {
|
|||
insertLong(i4096, i64, i, index);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void insertBlock(int i4096, int i64, int i) {
|
||||
indices[i4096] = 1L << i64; // shifts are mod 64 in java
|
||||
assert bits[i4096] == null;
|
||||
|
@ -196,10 +196,10 @@ public class SparseFixedBitSet extends BitSet implements Bits, Accountable {
|
|||
assert consistent(i);
|
||||
final int i4096 = i >>> 12;
|
||||
final int i64 = i >>> 6;
|
||||
clearWithinLong(i4096, i64, ~(1L << i));
|
||||
and(i4096, i64, ~(1L << i));
|
||||
}
|
||||
|
||||
private void clearWithinLong(int i4096, int i64, long mask) {
|
||||
private void and(int i4096, int i64, long mask) {
|
||||
final long index = indices[i4096];
|
||||
if ((index & (1L << i64)) != 0) {
|
||||
// offset of the long bits we are interested in in the array
|
||||
|
@ -225,6 +225,7 @@ public class SparseFixedBitSet extends BitSet implements Bits, Accountable {
|
|||
System.arraycopy(bitArray, o + 1, bitArray, o, length - o);
|
||||
bitArray[length] = 0L;
|
||||
}
|
||||
nonZeroLongCount -= 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -241,6 +242,7 @@ public class SparseFixedBitSet extends BitSet implements Bits, Accountable {
|
|||
} else {
|
||||
clearWithinBlock(firstBlock, from & MASK_4096, MASK_4096);
|
||||
for (int i = firstBlock + 1; i < lastBlock; ++i) {
|
||||
nonZeroLongCount -= Long.bitCount(indices[i]);
|
||||
indices[i] = 0;
|
||||
bits[i] = null;
|
||||
}
|
||||
|
@ -258,14 +260,14 @@ public class SparseFixedBitSet extends BitSet implements Bits, Accountable {
|
|||
int lastLong = to >>> 6;
|
||||
|
||||
if (firstLong == lastLong) {
|
||||
clearWithinLong(i4096, firstLong, ~mask(from, to));
|
||||
and(i4096, firstLong, ~mask(from, to));
|
||||
} else {
|
||||
assert firstLong < lastLong;
|
||||
clearWithinLong(i4096, lastLong, ~mask(0, to));
|
||||
and(i4096, lastLong, ~mask(0, to));
|
||||
for (int i = lastLong - 1; i >= firstLong + 1; --i) {
|
||||
clearWithinLong(i4096, i, 0L);
|
||||
and(i4096, i, 0L);
|
||||
}
|
||||
clearWithinLong(i4096, firstLong, ~mask(from, 63));
|
||||
and(i4096, firstLong, ~mask(from, 63));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -343,11 +345,158 @@ public class SparseFixedBitSet extends BitSet implements Bits, Accountable {
|
|||
}
|
||||
}
|
||||
|
||||
/** Return the long bits at the given <code>i64</code> index. */
|
||||
private long longBits(long index, long[] bits, int i64) {
|
||||
if ((index & (1L << i64)) == 0) {
|
||||
return 0L;
|
||||
} else {
|
||||
return bits[Long.bitCount(index & ((1L << i64) - 1))];
|
||||
}
|
||||
}
|
||||
|
||||
private void or(final int i4096, final long index, long[] bits, int nonZeroLongCount) {
|
||||
assert Long.bitCount(index) == nonZeroLongCount;
|
||||
final long currentIndex = indices[i4096];
|
||||
if (currentIndex == 0) {
|
||||
// fast path: if we currently have nothing in the block, just copy the data
|
||||
// this especially happens all the time if you call OR on an empty set
|
||||
indices[i4096] = index;
|
||||
this.bits[i4096] = Arrays.copyOf(bits, nonZeroLongCount);
|
||||
this.nonZeroLongCount += nonZeroLongCount;
|
||||
return;
|
||||
}
|
||||
final long[] currentBits = this.bits[i4096];
|
||||
final long[] newBits;
|
||||
final long newIndex = currentIndex | index;
|
||||
final int requiredCapacity = Long.bitCount(newIndex);
|
||||
if (currentBits.length >= requiredCapacity) {
|
||||
newBits = currentBits;
|
||||
} else {
|
||||
newBits = new long[oversize(requiredCapacity)];
|
||||
}
|
||||
// we iterate backwards in order to not override data we might need on the next iteration if the
|
||||
// array is reused
|
||||
for (int i = Long.numberOfLeadingZeros(newIndex), newO = Long.bitCount(newIndex) - 1;
|
||||
i < 64;
|
||||
i += 1 + Long.numberOfLeadingZeros(newIndex << (i + 1)), newO -= 1) {
|
||||
// bitIndex is the index of a bit which is set in newIndex and newO is the number of 1 bits on its right
|
||||
final int bitIndex = 63 - i;
|
||||
assert newO == Long.bitCount(newIndex & ((1L << bitIndex) - 1));
|
||||
newBits[newO] = longBits(currentIndex, currentBits, bitIndex) | longBits(index, bits, bitIndex);
|
||||
}
|
||||
indices[i4096] = newIndex;
|
||||
this.bits[i4096] = newBits;
|
||||
this.nonZeroLongCount += nonZeroLongCount - Long.bitCount(currentIndex & index);
|
||||
}
|
||||
|
||||
private void or(SparseFixedBitSet other) {
|
||||
for (int i = 0; i < other.indices.length; ++i) {
|
||||
final long index = other.indices[i];
|
||||
if (index != 0) {
|
||||
or(i, index, other.bits[i], Long.bitCount(index));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* {@link #or(DocIdSetIterator)} impl that works best when <code>it</code> is dense
|
||||
*/
|
||||
private void orDense(DocIdSetIterator it) throws IOException {
|
||||
assertUnpositioned(it);
|
||||
// The goal here is to try to take advantage of the ordering of documents
|
||||
// to build the data-structure more efficiently
|
||||
// NOTE: this heavily relies on the fact that shifts are mod 64
|
||||
final int firstDoc = it.nextDoc();
|
||||
if (firstDoc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
return;
|
||||
}
|
||||
int i4096 = firstDoc >>> 12;
|
||||
int i64 = firstDoc >>> 6;
|
||||
long index = 1L << i64;
|
||||
long currentLong = 1L << firstDoc;
|
||||
// we store at most 64 longs per block so preallocate in order never to have to resize
|
||||
long[] longs = new long[64];
|
||||
int numLongs = 0;
|
||||
|
||||
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
|
||||
final int doc64 = doc >>> 6;
|
||||
if (doc64 == i64) {
|
||||
// still in the same long, just set the bit
|
||||
currentLong |= 1L << doc;
|
||||
} else {
|
||||
longs[numLongs++] = currentLong;
|
||||
|
||||
final int doc4096 = doc >>> 12;
|
||||
if (doc4096 == i4096) {
|
||||
index |= 1L << doc64;
|
||||
} else {
|
||||
// we are on a new block, flush what we buffered
|
||||
or(i4096, index, longs, numLongs);
|
||||
// and reset state for the new block
|
||||
i4096 = doc4096;
|
||||
index = 1L << doc64;
|
||||
numLongs = 0;
|
||||
}
|
||||
|
||||
// we are on a new long, reset state
|
||||
i64 = doc64;
|
||||
currentLong = 1L << doc;
|
||||
}
|
||||
}
|
||||
|
||||
// flush
|
||||
longs[numLongs++] = currentLong;
|
||||
or(i4096, index, longs, numLongs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void or(DocIdSetIterator it) throws IOException {
|
||||
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
|
||||
set(doc);
|
||||
{
|
||||
// specialize union with another SparseFixedBitSet
|
||||
final SparseFixedBitSet other = BitSetIterator.getSparseFixedBitSetOrNull(it);
|
||||
if (other != null) {
|
||||
assertUnpositioned(it);
|
||||
or(other);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// We do not specialize the union with a FixedBitSet since FixedBitSets are
|
||||
// supposed to be used for dense data and sparse fixed bit sets for sparse
|
||||
// data, so a sparse set would likely get upgraded by DocIdSetBuilder before
|
||||
// being or'ed with a FixedBitSet
|
||||
|
||||
if (it.cost() < indices.length) {
|
||||
// the default impl is good for sparse iterators
|
||||
super.or(it);
|
||||
} else {
|
||||
orDense(it);
|
||||
}
|
||||
}
|
||||
|
||||
// AND and AND_NOT do not need much specialization here since this sparse set
|
||||
// is supposed to be used on sparse data and the default AND/AND_NOT impl
|
||||
// (leap frog) is efficient when at least one of the sets contains sparse data
|
||||
|
||||
@Override
|
||||
public void and(DocIdSetIterator it) throws IOException {
|
||||
final SparseFixedBitSet other = BitSetIterator.getSparseFixedBitSetOrNull(it);
|
||||
if (other != null) {
|
||||
// if we are merging with another SparseFixedBitSet, a quick win is
|
||||
// to clear up some blocks by only looking at their index. Then the set
|
||||
// is sparser and the leap-frog approach of the parent class is more
|
||||
// efficient. Since SparseFixedBitSet is supposed to be used for sparse
|
||||
// sets, the intersection of two SparseFixedBitSet is likely very sparse
|
||||
final int numCommonBlocks = Math.min(indices.length, other.indices.length);
|
||||
for (int i = 0; i < numCommonBlocks; ++i) {
|
||||
if ((indices[i] & other.indices[i]) == 0) {
|
||||
this.nonZeroLongCount -= Long.bitCount(this.indices[i]);
|
||||
this.indices[i] = 0;
|
||||
this.bits[i] = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
super.and(it);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -355,8 +504,4 @@ public class SparseFixedBitSet extends BitSet implements Bits, Accountable {
|
|||
return ramBytesUsed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterable<? extends Accountable> getChildResources() {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,6 +32,23 @@ public class TestSparseFixedBitSet extends BaseBitSetTestCase<SparseFixedBitSet>
|
|||
return set;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void assertEquals(BitSet set1, SparseFixedBitSet set2, int maxDoc) {
|
||||
super.assertEquals(set1, set2, maxDoc);
|
||||
// check invariants of the sparse set
|
||||
int nonZeroLongCount = 0;
|
||||
for (int i = 0; i < set2.indices.length; ++i) {
|
||||
final int n = Long.bitCount(set2.indices[i]);
|
||||
if (n != 0) {
|
||||
nonZeroLongCount += n;
|
||||
for (int j = n; j < set2.bits[i].length; ++j) {
|
||||
assertEquals(0, set2.bits[i][j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
assertEquals(nonZeroLongCount, set2.nonZeroLongCount);
|
||||
}
|
||||
|
||||
public void testApproximateCardinality() {
|
||||
final SparseFixedBitSet set = new SparseFixedBitSet(10000);
|
||||
final int first = random().nextInt(1000);
|
||||
|
|
|
@ -58,7 +58,7 @@ public abstract class BaseBitSetTestCase<T extends BitSet> extends LuceneTestCas
|
|||
return randomSet(numBits, (int) (percentSet * numBits));
|
||||
}
|
||||
|
||||
private void assertEquals(BitSet set1, BitSet set2, int maxDoc) {
|
||||
protected void assertEquals(BitSet set1, T set2, int maxDoc) {
|
||||
for (int i = 0; i < maxDoc; ++i) {
|
||||
assertEquals("Different at " + i, set1.get(i), set2.get(i));
|
||||
}
|
||||
|
@ -77,11 +77,15 @@ public abstract class BaseBitSetTestCase<T extends BitSet> extends LuceneTestCas
|
|||
/** Test the {@link BitSet#set} method. */
|
||||
public void testSet() throws IOException {
|
||||
final int numBits = 1 + random().nextInt(100000);
|
||||
for (float percentSet : new float[] {0, 0.01f, 0.1f, 0.5f, 0.9f, 0.99f, 1f}) {
|
||||
BitSet set1 = new JavaUtilBitSet(randomSet(numBits, percentSet), numBits);
|
||||
T set2 = copyOf(set1, numBits);
|
||||
assertEquals(set1, set2, numBits);
|
||||
BitSet set1 = new JavaUtilBitSet(randomSet(numBits, 0), numBits);
|
||||
T set2 = copyOf(set1, numBits);
|
||||
final int iters = 10000 + random().nextInt(10000);
|
||||
for (int i = 0; i < iters; ++i) {
|
||||
final int index = random().nextInt(numBits);
|
||||
set1.set(index);
|
||||
set2.set(index);
|
||||
}
|
||||
assertEquals(set1, set2, numBits);
|
||||
}
|
||||
|
||||
/** Test the {@link BitSet#clear(int)} method. */
|
||||
|
@ -118,16 +122,28 @@ public abstract class BaseBitSetTestCase<T extends BitSet> extends LuceneTestCas
|
|||
}
|
||||
|
||||
private DocIdSet randomCopy(BitSet set, int numBits) throws IOException {
|
||||
if (random().nextBoolean()) {
|
||||
return new BitDocIdSet(copyOf(set, numBits), set.cardinality());
|
||||
} else if (random().nextBoolean()) {
|
||||
final RoaringDocIdSet.Builder builder = new RoaringDocIdSet.Builder(numBits);
|
||||
for (int i = set.nextSetBit(0); i != DocIdSetIterator.NO_MORE_DOCS; i = i + 1 >= numBits ? DocIdSetIterator.NO_MORE_DOCS : set.nextSetBit(i + 1)) {
|
||||
builder.add(i);
|
||||
}
|
||||
return builder.build();
|
||||
} else {
|
||||
return new BitDocIdSet(set, set.cardinality());
|
||||
switch (random().nextInt(5)) {
|
||||
case 0:
|
||||
return new BitDocIdSet(set, set.cardinality());
|
||||
case 1:
|
||||
return new BitDocIdSet(copyOf(set, numBits), set.cardinality());
|
||||
case 2:
|
||||
final RoaringDocIdSet.Builder builder = new RoaringDocIdSet.Builder(numBits);
|
||||
for (int i = set.nextSetBit(0); i != DocIdSetIterator.NO_MORE_DOCS; i = i + 1 >= numBits ? DocIdSetIterator.NO_MORE_DOCS : set.nextSetBit(i + 1)) {
|
||||
builder.add(i);
|
||||
}
|
||||
return builder.build();
|
||||
case 3:
|
||||
FixedBitSet fbs = new FixedBitSet(numBits);
|
||||
fbs.or(new BitSetIterator(set, 0));
|
||||
return new BitDocIdSet(fbs);
|
||||
case 4:
|
||||
SparseFixedBitSet sfbs = new SparseFixedBitSet(numBits);
|
||||
sfbs.or(new BitSetIterator(set, 0));
|
||||
return new BitDocIdSet(sfbs);
|
||||
default:
|
||||
fail();
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -136,10 +152,10 @@ public abstract class BaseBitSetTestCase<T extends BitSet> extends LuceneTestCas
|
|||
final int numBits = 1 + random().nextInt(100000);
|
||||
BitSet set1 = new JavaUtilBitSet(randomSet(numBits, 0), numBits);
|
||||
T set2 = copyOf(set1, numBits);
|
||||
final int iters = 10 + random().nextInt(100);
|
||||
final int iters = 50 + random().nextInt(50);
|
||||
for (int i = 0; i < iters; ++i) {
|
||||
// make extreme percents more likely
|
||||
float percentSet2 = (float) Math.pow(random().nextDouble(), 2);
|
||||
float percentSet2 = rarely() ? 0 : (float) Math.pow(random().nextDouble(), 2);
|
||||
if (random().nextBoolean()) {
|
||||
percentSet2 = 1 - percentSet2;
|
||||
}
|
||||
|
@ -150,18 +166,20 @@ public abstract class BaseBitSetTestCase<T extends BitSet> extends LuceneTestCas
|
|||
if (bulkSetCopy.iterator() == null) {
|
||||
continue;
|
||||
}
|
||||
DocIdSetIterator it1 = bulkSetCopy.iterator();
|
||||
DocIdSetIterator it2 = bulkSetCopy.iterator();
|
||||
switch (random().nextInt(3)) {
|
||||
case 0:
|
||||
set1.or(bulkSetCopy.iterator());
|
||||
set2.or(bulkSetCopy.iterator());
|
||||
set1.or(it1);
|
||||
set2.or(it2);
|
||||
break;
|
||||
case 1:
|
||||
set1.and(bulkSetCopy.iterator());
|
||||
set2.and(bulkSetCopy.iterator());
|
||||
set1.and(it1);
|
||||
set2.and(it2);
|
||||
break;
|
||||
default:
|
||||
set1.andNot(bulkSetCopy.iterator());
|
||||
set2.andNot(bulkSetCopy.iterator());
|
||||
set1.andNot(it1);
|
||||
set2.andNot(it2);
|
||||
break;
|
||||
}
|
||||
assertEquals(set1, set2, numBits);
|
||||
|
|
Loading…
Reference in New Issue