diff --git a/lucene/core/src/java/org/apache/lucene/util/BitSet.java b/lucene/core/src/java/org/apache/lucene/util/BitSet.java index 4946957caf1..347e05d2e52 100644 --- a/lucene/core/src/java/org/apache/lucene/util/BitSet.java +++ b/lucene/core/src/java/org/apache/lucene/util/BitSet.java @@ -90,7 +90,11 @@ public abstract class BitSet implements Bits, Accountable { * Returns the index of the first set bit starting at the index specified. {@link * DocIdSetIterator#NO_MORE_DOCS} is returned if there are no more set bits. */ - public abstract int nextSetBit(int index); + public int nextSetBit(int index) { + // Default implementation. Subclasses may be able to override with a more performant + // implementation. + return nextSetBit(index, length()); + } /** * Returns the index of the first set bit from start (inclusive) until end (exclusive). {@link diff --git a/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java b/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java index 750c6684e95..5d1f394853c 100644 --- a/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java +++ b/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java @@ -272,27 +272,23 @@ public final class FixedBitSet extends BitSet { @Override public int nextSetBit(int index) { - // Depends on the ghost bits being clear! - assert index >= 0 && index < numBits : "index=" + index + ", numBits=" + numBits; - int i = index >> 6; - long word = bits[i] >> index; // skip all the bits to the right of index - - if (word != 0) { - return index + Long.numberOfTrailingZeros(word); - } - - while (++i < numWords) { - word = bits[i]; - if (word != 0) { - return (i << 6) + Long.numberOfTrailingZeros(word); - } - } - - return DocIdSetIterator.NO_MORE_DOCS; + // Override with a version that skips the bound check on the result since we know it will not + // go OOB: + return nextSetBitInRange(index, numBits); } @Override public int nextSetBit(int start, int upperBound) { + int res = nextSetBitInRange(start, upperBound); + return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS; + } + + /** + * Returns the next set bit in the specified range, but treats `upperBound` as a best-effort hint + * rather than a hard requirement. Note that this may return a result that is >= upperBound in + * some cases, so callers must add their own check if `upperBound` is a hard requirement. + */ + private int nextSetBitInRange(int start, int upperBound) { // Depends on the ghost bits being clear! assert start >= 0 && start < numBits : "index=" + start + ", numBits=" + numBits; assert start < upperBound : "index=" + start + ", upperBound=" + upperBound; @@ -301,16 +297,14 @@ public final class FixedBitSet extends BitSet { long word = bits[i] >> start; // skip all the bits to the right of index if (word != 0) { - int res = start + Long.numberOfTrailingZeros(word); - return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS; + return start + Long.numberOfTrailingZeros(word); } - int limit = bits2words(upperBound); + int limit = upperBound == numBits ? numWords : bits2words(upperBound); while (++i < limit) { word = bits[i]; if (word != 0) { - int res = (i << 6) + Long.numberOfTrailingZeros(word); - return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS; + return (i << 6) + Long.numberOfTrailingZeros(word); } } diff --git a/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java b/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java index 66805d53d94..3104948cc4c 100644 --- a/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java +++ b/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java @@ -337,36 +337,26 @@ public class SparseFixedBitSet extends BitSet { @Override public int nextSetBit(int i) { - assert i < length; - final int i4096 = i >>> 12; - final long index = indices[i4096]; - final long[] bitArray = this.bits[i4096]; - int i64 = i >>> 6; - int o = Long.bitCount(index & ((1L << i64) - 1)); - if ((index & (1L << i64)) != 0) { - // There is at least one bit that is set in the current long, check if - // one of them is after i - final long bits = bitArray[o] >>> i; // shifts are mod 64 - if (bits != 0) { - return i + Long.numberOfTrailingZeros(bits); - } - o += 1; - } - final long indexBits = index >>> i64 >>> 1; - if (indexBits == 0) { - // no more bits are set in the current block of 4096 bits, go to the next one - return firstDoc(i4096 + 1, indices.length); - } - // there are still set bits - i64 += 1 + Long.numberOfTrailingZeros(indexBits); - final long bits = bitArray[o]; - return (i64 << 6) | Long.numberOfTrailingZeros(bits); + // Override with a version that skips the bound check on the result since we know it will not + // go OOB: + return nextSetBitInRange(i, length); } @Override public int nextSetBit(int start, int upperBound) { + int res = nextSetBitInRange(start, upperBound); + return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS; + } + + /** + * Returns the next set bit in the specified range, but treats `upperBound` as a best-effort hint + * rather than a hard requirement. Note that this may return a result that is >= upperBound in + * some cases, so callers must add their own check if `upperBound` is a hard requirement. + */ + private int nextSetBitInRange(int start, int upperBound) { assert start < length; - assert upperBound > start; + assert upperBound > start && upperBound <= length + : "upperBound=" + upperBound + ", start=" + start + ", length=" + length; final int i4096 = start >>> 12; final long index = indices[i4096]; final long[] bitArray = this.bits[i4096]; @@ -378,22 +368,20 @@ public class SparseFixedBitSet extends BitSet { // one of them is after i final long bits = bitArray[o] >>> start; // shifts are mod 64 if (bits != 0) { - int res = start + Long.numberOfTrailingZeros(bits); - return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS; + return start + Long.numberOfTrailingZeros(bits); } o += 1; } final long indexBits = index >>> i64 >>> 1; if (indexBits == 0) { // no more bits are set in the current block of 4096 bits, go to the next one - int res = firstDoc(i4096 + 1, blockCount(upperBound)); - return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS; + int i4096upper = upperBound == length ? indices.length : blockCount(upperBound); + return firstDoc(i4096 + 1, i4096upper); } // there are still set bits i64 += 1 + Long.numberOfTrailingZeros(indexBits); final long bits = bitArray[o]; - int res = (i64 << 6) | Long.numberOfTrailingZeros(bits); - return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS; + return (i64 << 6) | Long.numberOfTrailingZeros(bits); } /** Return the last document that occurs on or before the provided block index. */ diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/util/BaseBitSetTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/util/BaseBitSetTestCase.java index 095bcabbe4c..205d8861322 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/util/BaseBitSetTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/util/BaseBitSetTestCase.java @@ -341,18 +341,12 @@ public abstract class BaseBitSetTestCase extends LuceneTestCas } @Override - public int nextSetBit(int i) { - int next = bitSet.nextSetBit(i); - if (next == -1) { + public int nextSetBit(int start, int upperBound) { + int next = bitSet.nextSetBit(start); + if (next == -1 || next >= upperBound) { next = DocIdSetIterator.NO_MORE_DOCS; } return next; } - - @Override - public int nextSetBit(int start, int upperBound) { - int res = nextSetBit(start); - return res < upperBound ? res : DocIdSetIterator.NO_MORE_DOCS; - } } }