SparseFixedBitSet#firstDoc: reduce number of `indices` iterations for a bit set that is not fully built yet. (#13559)

This commit is contained in:
Egor Potemkin 2024-08-01 15:11:58 +01:00 committed by GitHub
parent 0a24769850
commit e8eba4d455
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 107 additions and 7 deletions

View File

@ -108,8 +108,11 @@ API Changes
* GITHUB#13410: Removed Scorer#getWeight (Sanjay Dutt, Adrien Grand) * GITHUB#13410: Removed Scorer#getWeight (Sanjay Dutt, Adrien Grand)
* GITHUB#13559: Add BitSet#nextSetBit(int, int) to get the index of the first set bit in range. (Egor Potemkin)
* GITHUB#13499: Remove deprecated TopScoreDocCollector + TopFieldCollector methods (#create, #createSharedManager) (Jakub Slowinski) * GITHUB#13499: Remove deprecated TopScoreDocCollector + TopFieldCollector methods (#create, #createSharedManager) (Jakub Slowinski)
New Features New Features
--------------------- ---------------------

View File

@ -92,6 +92,12 @@ public abstract class BitSet implements Bits, Accountable {
*/ */
public abstract int nextSetBit(int index); public abstract int nextSetBit(int index);
/**
* Returns the index of the first set bit from start (inclusive) until end (exclusive). {@link
* DocIdSetIterator#NO_MORE_DOCS} is returned if there are no more set bits.
*/
public abstract int nextSetBit(int start, int end);
/** Assert that the current doc is -1. */ /** Assert that the current doc is -1. */
protected final void checkUnpositioned(DocIdSetIterator iter) { protected final void checkUnpositioned(DocIdSetIterator iter) {
if (iter.docID() != -1) { if (iter.docID() != -1) {

View File

@ -291,6 +291,32 @@ public final class FixedBitSet extends BitSet {
return DocIdSetIterator.NO_MORE_DOCS; return DocIdSetIterator.NO_MORE_DOCS;
} }
@Override
public int nextSetBit(int start, int upperBound) {
// Depends on the ghost bits being clear!
assert start >= 0 && start < numBits : "index=" + start + ", numBits=" + numBits;
assert start < upperBound : "index=" + start + ", upperBound=" + upperBound;
assert upperBound <= numBits : "upperBound=" + upperBound + ", numBits=" + numBits;
int i = start >> 6;
long word = bits[i] >> start; // skip all the bits to the right of index
if (word != 0) {
int res = start + Long.numberOfTrailingZeros(word);
return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS;
}
int limit = bits2words(upperBound);
while (++i < limit) {
word = bits[i];
if (word != 0) {
int res = (i << 6) + Long.numberOfTrailingZeros(word);
return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS;
}
}
return DocIdSetIterator.NO_MORE_DOCS;
}
@Override @Override
public int prevSetBit(int index) { public int prevSetBit(int index) {
assert index >= 0 && index < numBits : "index=" + index + " numBits=" + numBits; assert index >= 0 && index < numBits : "index=" + index + " numBits=" + numBits;

View File

@ -320,9 +320,11 @@ public class SparseFixedBitSet extends BitSet {
} }
/** Return the first document that occurs on or after the provided block index. */ /** Return the first document that occurs on or after the provided block index. */
private int firstDoc(int i4096) { private int firstDoc(int i4096, int i4096upper) {
assert i4096upper <= indices.length
: "i4096upper=" + i4096 + ", indices.length=" + indices.length;
long index = 0; long index = 0;
while (i4096 < indices.length) { while (i4096 < i4096upper) {
index = indices[i4096]; index = indices[i4096];
if (index != 0) { if (index != 0) {
final int i64 = Long.numberOfTrailingZeros(index); final int i64 = Long.numberOfTrailingZeros(index);
@ -353,7 +355,7 @@ public class SparseFixedBitSet extends BitSet {
final long indexBits = index >>> i64 >>> 1; final long indexBits = index >>> i64 >>> 1;
if (indexBits == 0) { if (indexBits == 0) {
// no more bits are set in the current block of 4096 bits, go to the next one // no more bits are set in the current block of 4096 bits, go to the next one
return firstDoc(i4096 + 1); return firstDoc(i4096 + 1, indices.length);
} }
// there are still set bits // there are still set bits
i64 += 1 + Long.numberOfTrailingZeros(indexBits); i64 += 1 + Long.numberOfTrailingZeros(indexBits);
@ -361,6 +363,39 @@ public class SparseFixedBitSet extends BitSet {
return (i64 << 6) | Long.numberOfTrailingZeros(bits); return (i64 << 6) | Long.numberOfTrailingZeros(bits);
} }
@Override
public int nextSetBit(int start, int upperBound) {
assert start < length;
assert upperBound > start;
final int i4096 = start >>> 12;
final long index = indices[i4096];
final long[] bitArray = this.bits[i4096];
int i64 = start >>> 6;
final long i64bit = 1L << i64;
int o = Long.bitCount(index & (i64bit - 1));
if ((index & i64bit) != 0) {
// There is at least one bit that is set in the current long, check if
// one of them is after i
final long bits = bitArray[o] >>> start; // shifts are mod 64
if (bits != 0) {
int res = start + Long.numberOfTrailingZeros(bits);
return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS;
}
o += 1;
}
final long indexBits = index >>> i64 >>> 1;
if (indexBits == 0) {
// no more bits are set in the current block of 4096 bits, go to the next one
int res = firstDoc(i4096 + 1, blockCount(upperBound));
return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS;
}
// there are still set bits
i64 += 1 + Long.numberOfTrailingZeros(indexBits);
final long bits = bitArray[o];
int res = (i64 << 6) | Long.numberOfTrailingZeros(bits);
return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS;
}
/** Return the last document that occurs on or before the provided block index. */ /** Return the last document that occurs on or before the provided block index. */
private int lastDoc(int i4096) { private int lastDoc(int i4096) {
long index; long index;

View File

@ -64,14 +64,20 @@ public class BlockJoinSelector {
return false; return false;
} }
final int firstChild = parents.prevSetBit(docID - 1) + 1; final int firstPotentialChild = parents.prevSetBit(docID - 1) + 1;
for (int child = children.nextSetBit(firstChild); assert firstPotentialChild <= docID;
child < docID; if (firstPotentialChild == docID) {
child = children.nextSetBit(child + 1)) { // no children
return false;
}
for (int child = children.nextSetBit(firstPotentialChild, docID);
child != DocIdSetIterator.NO_MORE_DOCS;
child = children.nextSetBit(child + 1, docID)) {
if (docsWithValue.get(child)) { if (docsWithValue.get(child)) {
return true; return true;
} }
} }
return false; return false;
} }

View File

@ -16,6 +16,7 @@
*/ */
package org.apache.lucene.tests.util; package org.apache.lucene.tests.util;
import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
import java.io.IOException; import java.io.IOException;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
@ -104,6 +105,23 @@ public abstract class BaseBitSetTestCase<T extends BitSet> extends LuceneTestCas
} }
} }
/** Test {@link BitSet#nextSetBit(int, int)}. */
public void testNextSetBitInRange() throws IOException {
Random random = random();
final int numBits = 1 + random().nextInt(100000);
for (float percentSet : new float[] {0, 0.01f, 0.1f, 0.5f, 0.9f, 0.99f, 1f}) {
BitSet set1 = new JavaUtilBitSet(randomSet(numBits, percentSet), numBits);
T set2 = copyOf(set1, numBits);
for (int start = 0; start < numBits; ++start) {
int end = RandomNumbers.randomIntBetween(random, start + 1, numBits);
assertEquals(
"start=" + start + ", end=" + end + ", numBits=" + numBits,
set1.nextSetBit(start, end),
set2.nextSetBit(start, end));
}
}
}
/** Test the {@link BitSet#set} method. */ /** Test the {@link BitSet#set} method. */
public void testSet() throws IOException { public void testSet() throws IOException {
Random random = random(); Random random = random();
@ -330,5 +348,11 @@ public abstract class BaseBitSetTestCase<T extends BitSet> extends LuceneTestCas
} }
return next; return next;
} }
@Override
public int nextSetBit(int start, int upperBound) {
int res = nextSetBit(start);
return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS;
}
} }
} }