mirror of https://github.com/apache/lucene.git
SparseFixedBitSet#firstDoc: reduce number of `indices` iterations for a bit set that is not fully built yet. (#13559)
This commit is contained in:
parent
0a24769850
commit
e8eba4d455
|
@ -108,8 +108,11 @@ API Changes
|
|||
|
||||
* GITHUB#13410: Removed Scorer#getWeight (Sanjay Dutt, Adrien Grand)
|
||||
|
||||
* GITHUB#13559: Add BitSet#nextSetBit(int, int) to get the index of the first set bit in range. (Egor Potemkin)
|
||||
|
||||
* GITHUB#13499: Remove deprecated TopScoreDocCollector + TopFieldCollector methods (#create, #createSharedManager) (Jakub Slowinski)
|
||||
|
||||
|
||||
New Features
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -92,6 +92,12 @@ public abstract class BitSet implements Bits, Accountable {
|
|||
*/
|
||||
public abstract int nextSetBit(int index);
|
||||
|
||||
/**
|
||||
* Returns the index of the first set bit from start (inclusive) until end (exclusive). {@link
|
||||
* DocIdSetIterator#NO_MORE_DOCS} is returned if there are no more set bits.
|
||||
*/
|
||||
public abstract int nextSetBit(int start, int end);
|
||||
|
||||
/** Assert that the current doc is -1. */
|
||||
protected final void checkUnpositioned(DocIdSetIterator iter) {
|
||||
if (iter.docID() != -1) {
|
||||
|
|
|
@ -291,6 +291,32 @@ public final class FixedBitSet extends BitSet {
|
|||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextSetBit(int start, int upperBound) {
|
||||
// Depends on the ghost bits being clear!
|
||||
assert start >= 0 && start < numBits : "index=" + start + ", numBits=" + numBits;
|
||||
assert start < upperBound : "index=" + start + ", upperBound=" + upperBound;
|
||||
assert upperBound <= numBits : "upperBound=" + upperBound + ", numBits=" + numBits;
|
||||
int i = start >> 6;
|
||||
long word = bits[i] >> start; // skip all the bits to the right of index
|
||||
|
||||
if (word != 0) {
|
||||
int res = start + Long.numberOfTrailingZeros(word);
|
||||
return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
int limit = bits2words(upperBound);
|
||||
while (++i < limit) {
|
||||
word = bits[i];
|
||||
if (word != 0) {
|
||||
int res = (i << 6) + Long.numberOfTrailingZeros(word);
|
||||
return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int prevSetBit(int index) {
|
||||
assert index >= 0 && index < numBits : "index=" + index + " numBits=" + numBits;
|
||||
|
|
|
@ -320,9 +320,11 @@ public class SparseFixedBitSet extends BitSet {
|
|||
}
|
||||
|
||||
/** Return the first document that occurs on or after the provided block index. */
|
||||
private int firstDoc(int i4096) {
|
||||
private int firstDoc(int i4096, int i4096upper) {
|
||||
assert i4096upper <= indices.length
|
||||
: "i4096upper=" + i4096 + ", indices.length=" + indices.length;
|
||||
long index = 0;
|
||||
while (i4096 < indices.length) {
|
||||
while (i4096 < i4096upper) {
|
||||
index = indices[i4096];
|
||||
if (index != 0) {
|
||||
final int i64 = Long.numberOfTrailingZeros(index);
|
||||
|
@ -353,7 +355,7 @@ public class SparseFixedBitSet extends BitSet {
|
|||
final long indexBits = index >>> i64 >>> 1;
|
||||
if (indexBits == 0) {
|
||||
// no more bits are set in the current block of 4096 bits, go to the next one
|
||||
return firstDoc(i4096 + 1);
|
||||
return firstDoc(i4096 + 1, indices.length);
|
||||
}
|
||||
// there are still set bits
|
||||
i64 += 1 + Long.numberOfTrailingZeros(indexBits);
|
||||
|
@ -361,6 +363,39 @@ public class SparseFixedBitSet extends BitSet {
|
|||
return (i64 << 6) | Long.numberOfTrailingZeros(bits);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextSetBit(int start, int upperBound) {
|
||||
assert start < length;
|
||||
assert upperBound > start;
|
||||
final int i4096 = start >>> 12;
|
||||
final long index = indices[i4096];
|
||||
final long[] bitArray = this.bits[i4096];
|
||||
int i64 = start >>> 6;
|
||||
final long i64bit = 1L << i64;
|
||||
int o = Long.bitCount(index & (i64bit - 1));
|
||||
if ((index & i64bit) != 0) {
|
||||
// There is at least one bit that is set in the current long, check if
|
||||
// one of them is after i
|
||||
final long bits = bitArray[o] >>> start; // shifts are mod 64
|
||||
if (bits != 0) {
|
||||
int res = start + Long.numberOfTrailingZeros(bits);
|
||||
return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
o += 1;
|
||||
}
|
||||
final long indexBits = index >>> i64 >>> 1;
|
||||
if (indexBits == 0) {
|
||||
// no more bits are set in the current block of 4096 bits, go to the next one
|
||||
int res = firstDoc(i4096 + 1, blockCount(upperBound));
|
||||
return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
// there are still set bits
|
||||
i64 += 1 + Long.numberOfTrailingZeros(indexBits);
|
||||
final long bits = bitArray[o];
|
||||
int res = (i64 << 6) | Long.numberOfTrailingZeros(bits);
|
||||
return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
/** Return the last document that occurs on or before the provided block index. */
|
||||
private int lastDoc(int i4096) {
|
||||
long index;
|
||||
|
|
|
@ -64,14 +64,20 @@ public class BlockJoinSelector {
|
|||
return false;
|
||||
}
|
||||
|
||||
final int firstChild = parents.prevSetBit(docID - 1) + 1;
|
||||
for (int child = children.nextSetBit(firstChild);
|
||||
child < docID;
|
||||
child = children.nextSetBit(child + 1)) {
|
||||
final int firstPotentialChild = parents.prevSetBit(docID - 1) + 1;
|
||||
assert firstPotentialChild <= docID;
|
||||
if (firstPotentialChild == docID) {
|
||||
// no children
|
||||
return false;
|
||||
}
|
||||
for (int child = children.nextSetBit(firstPotentialChild, docID);
|
||||
child != DocIdSetIterator.NO_MORE_DOCS;
|
||||
child = children.nextSetBit(child + 1, docID)) {
|
||||
if (docsWithValue.get(child)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.lucene.tests.util;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
|
@ -104,6 +105,23 @@ public abstract class BaseBitSetTestCase<T extends BitSet> extends LuceneTestCas
|
|||
}
|
||||
}
|
||||
|
||||
/** Test {@link BitSet#nextSetBit(int, int)}. */
|
||||
public void testNextSetBitInRange() throws IOException {
|
||||
Random random = random();
|
||||
final int numBits = 1 + random().nextInt(100000);
|
||||
for (float percentSet : new float[] {0, 0.01f, 0.1f, 0.5f, 0.9f, 0.99f, 1f}) {
|
||||
BitSet set1 = new JavaUtilBitSet(randomSet(numBits, percentSet), numBits);
|
||||
T set2 = copyOf(set1, numBits);
|
||||
for (int start = 0; start < numBits; ++start) {
|
||||
int end = RandomNumbers.randomIntBetween(random, start + 1, numBits);
|
||||
assertEquals(
|
||||
"start=" + start + ", end=" + end + ", numBits=" + numBits,
|
||||
set1.nextSetBit(start, end),
|
||||
set2.nextSetBit(start, end));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Test the {@link BitSet#set} method. */
|
||||
public void testSet() throws IOException {
|
||||
Random random = random();
|
||||
|
@ -330,5 +348,11 @@ public abstract class BaseBitSetTestCase<T extends BitSet> extends LuceneTestCas
|
|||
}
|
||||
return next;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextSetBit(int start, int upperBound) {
|
||||
int res = nextSetBit(start);
|
||||
return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue