mirror of https://github.com/apache/lucene.git
SparseFixedBitSet#firstDoc: reduce number of `indices` iterations for a bit set that is not fully built yet. (#13559)
This commit is contained in:
parent
0a24769850
commit
e8eba4d455
|
@ -108,8 +108,11 @@ API Changes
|
||||||
|
|
||||||
* GITHUB#13410: Removed Scorer#getWeight (Sanjay Dutt, Adrien Grand)
|
* GITHUB#13410: Removed Scorer#getWeight (Sanjay Dutt, Adrien Grand)
|
||||||
|
|
||||||
|
* GITHUB#13559: Add BitSet#nextSetBit(int, int) to get the index of the first set bit in range. (Egor Potemkin)
|
||||||
|
|
||||||
* GITHUB#13499: Remove deprecated TopScoreDocCollector + TopFieldCollector methods (#create, #createSharedManager) (Jakub Slowinski)
|
* GITHUB#13499: Remove deprecated TopScoreDocCollector + TopFieldCollector methods (#create, #createSharedManager) (Jakub Slowinski)
|
||||||
|
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -92,6 +92,12 @@ public abstract class BitSet implements Bits, Accountable {
|
||||||
*/
|
*/
|
||||||
public abstract int nextSetBit(int index);
|
public abstract int nextSetBit(int index);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the index of the first set bit from start (inclusive) until end (exclusive). {@link
|
||||||
|
* DocIdSetIterator#NO_MORE_DOCS} is returned if there are no more set bits.
|
||||||
|
*/
|
||||||
|
public abstract int nextSetBit(int start, int end);
|
||||||
|
|
||||||
/** Assert that the current doc is -1. */
|
/** Assert that the current doc is -1. */
|
||||||
protected final void checkUnpositioned(DocIdSetIterator iter) {
|
protected final void checkUnpositioned(DocIdSetIterator iter) {
|
||||||
if (iter.docID() != -1) {
|
if (iter.docID() != -1) {
|
||||||
|
|
|
@ -291,6 +291,32 @@ public final class FixedBitSet extends BitSet {
|
||||||
return DocIdSetIterator.NO_MORE_DOCS;
|
return DocIdSetIterator.NO_MORE_DOCS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextSetBit(int start, int upperBound) {
|
||||||
|
// Depends on the ghost bits being clear!
|
||||||
|
assert start >= 0 && start < numBits : "index=" + start + ", numBits=" + numBits;
|
||||||
|
assert start < upperBound : "index=" + start + ", upperBound=" + upperBound;
|
||||||
|
assert upperBound <= numBits : "upperBound=" + upperBound + ", numBits=" + numBits;
|
||||||
|
int i = start >> 6;
|
||||||
|
long word = bits[i] >> start; // skip all the bits to the right of index
|
||||||
|
|
||||||
|
if (word != 0) {
|
||||||
|
int res = start + Long.numberOfTrailingZeros(word);
|
||||||
|
return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int limit = bits2words(upperBound);
|
||||||
|
while (++i < limit) {
|
||||||
|
word = bits[i];
|
||||||
|
if (word != 0) {
|
||||||
|
int res = (i << 6) + Long.numberOfTrailingZeros(word);
|
||||||
|
return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int prevSetBit(int index) {
|
public int prevSetBit(int index) {
|
||||||
assert index >= 0 && index < numBits : "index=" + index + " numBits=" + numBits;
|
assert index >= 0 && index < numBits : "index=" + index + " numBits=" + numBits;
|
||||||
|
|
|
@ -320,9 +320,11 @@ public class SparseFixedBitSet extends BitSet {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Return the first document that occurs on or after the provided block index. */
|
/** Return the first document that occurs on or after the provided block index. */
|
||||||
private int firstDoc(int i4096) {
|
private int firstDoc(int i4096, int i4096upper) {
|
||||||
|
assert i4096upper <= indices.length
|
||||||
|
: "i4096upper=" + i4096 + ", indices.length=" + indices.length;
|
||||||
long index = 0;
|
long index = 0;
|
||||||
while (i4096 < indices.length) {
|
while (i4096 < i4096upper) {
|
||||||
index = indices[i4096];
|
index = indices[i4096];
|
||||||
if (index != 0) {
|
if (index != 0) {
|
||||||
final int i64 = Long.numberOfTrailingZeros(index);
|
final int i64 = Long.numberOfTrailingZeros(index);
|
||||||
|
@ -353,7 +355,7 @@ public class SparseFixedBitSet extends BitSet {
|
||||||
final long indexBits = index >>> i64 >>> 1;
|
final long indexBits = index >>> i64 >>> 1;
|
||||||
if (indexBits == 0) {
|
if (indexBits == 0) {
|
||||||
// no more bits are set in the current block of 4096 bits, go to the next one
|
// no more bits are set in the current block of 4096 bits, go to the next one
|
||||||
return firstDoc(i4096 + 1);
|
return firstDoc(i4096 + 1, indices.length);
|
||||||
}
|
}
|
||||||
// there are still set bits
|
// there are still set bits
|
||||||
i64 += 1 + Long.numberOfTrailingZeros(indexBits);
|
i64 += 1 + Long.numberOfTrailingZeros(indexBits);
|
||||||
|
@ -361,6 +363,39 @@ public class SparseFixedBitSet extends BitSet {
|
||||||
return (i64 << 6) | Long.numberOfTrailingZeros(bits);
|
return (i64 << 6) | Long.numberOfTrailingZeros(bits);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextSetBit(int start, int upperBound) {
|
||||||
|
assert start < length;
|
||||||
|
assert upperBound > start;
|
||||||
|
final int i4096 = start >>> 12;
|
||||||
|
final long index = indices[i4096];
|
||||||
|
final long[] bitArray = this.bits[i4096];
|
||||||
|
int i64 = start >>> 6;
|
||||||
|
final long i64bit = 1L << i64;
|
||||||
|
int o = Long.bitCount(index & (i64bit - 1));
|
||||||
|
if ((index & i64bit) != 0) {
|
||||||
|
// There is at least one bit that is set in the current long, check if
|
||||||
|
// one of them is after i
|
||||||
|
final long bits = bitArray[o] >>> start; // shifts are mod 64
|
||||||
|
if (bits != 0) {
|
||||||
|
int res = start + Long.numberOfTrailingZeros(bits);
|
||||||
|
return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
o += 1;
|
||||||
|
}
|
||||||
|
final long indexBits = index >>> i64 >>> 1;
|
||||||
|
if (indexBits == 0) {
|
||||||
|
// no more bits are set in the current block of 4096 bits, go to the next one
|
||||||
|
int res = firstDoc(i4096 + 1, blockCount(upperBound));
|
||||||
|
return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
// there are still set bits
|
||||||
|
i64 += 1 + Long.numberOfTrailingZeros(indexBits);
|
||||||
|
final long bits = bitArray[o];
|
||||||
|
int res = (i64 << 6) | Long.numberOfTrailingZeros(bits);
|
||||||
|
return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
|
||||||
/** Return the last document that occurs on or before the provided block index. */
|
/** Return the last document that occurs on or before the provided block index. */
|
||||||
private int lastDoc(int i4096) {
|
private int lastDoc(int i4096) {
|
||||||
long index;
|
long index;
|
||||||
|
|
|
@ -64,14 +64,20 @@ public class BlockJoinSelector {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
final int firstChild = parents.prevSetBit(docID - 1) + 1;
|
final int firstPotentialChild = parents.prevSetBit(docID - 1) + 1;
|
||||||
for (int child = children.nextSetBit(firstChild);
|
assert firstPotentialChild <= docID;
|
||||||
child < docID;
|
if (firstPotentialChild == docID) {
|
||||||
child = children.nextSetBit(child + 1)) {
|
// no children
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
for (int child = children.nextSetBit(firstPotentialChild, docID);
|
||||||
|
child != DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
child = children.nextSetBit(child + 1, docID)) {
|
||||||
if (docsWithValue.get(child)) {
|
if (docsWithValue.get(child)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.tests.util;
|
package org.apache.lucene.tests.util;
|
||||||
|
|
||||||
|
import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
@ -104,6 +105,23 @@ public abstract class BaseBitSetTestCase<T extends BitSet> extends LuceneTestCas
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Test {@link BitSet#nextSetBit(int, int)}. */
|
||||||
|
public void testNextSetBitInRange() throws IOException {
|
||||||
|
Random random = random();
|
||||||
|
final int numBits = 1 + random().nextInt(100000);
|
||||||
|
for (float percentSet : new float[] {0, 0.01f, 0.1f, 0.5f, 0.9f, 0.99f, 1f}) {
|
||||||
|
BitSet set1 = new JavaUtilBitSet(randomSet(numBits, percentSet), numBits);
|
||||||
|
T set2 = copyOf(set1, numBits);
|
||||||
|
for (int start = 0; start < numBits; ++start) {
|
||||||
|
int end = RandomNumbers.randomIntBetween(random, start + 1, numBits);
|
||||||
|
assertEquals(
|
||||||
|
"start=" + start + ", end=" + end + ", numBits=" + numBits,
|
||||||
|
set1.nextSetBit(start, end),
|
||||||
|
set2.nextSetBit(start, end));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** Test the {@link BitSet#set} method. */
|
/** Test the {@link BitSet#set} method. */
|
||||||
public void testSet() throws IOException {
|
public void testSet() throws IOException {
|
||||||
Random random = random();
|
Random random = random();
|
||||||
|
@ -330,5 +348,11 @@ public abstract class BaseBitSetTestCase<T extends BitSet> extends LuceneTestCas
|
||||||
}
|
}
|
||||||
return next;
|
return next;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextSetBit(int start, int upperBound) {
|
||||||
|
int res = nextSetBit(start);
|
||||||
|
return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue