diff --git a/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java b/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java index 7a971956342..946749fa461 100644 --- a/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java +++ b/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java @@ -47,8 +47,19 @@ public class SparseFixedBitSet extends DocIdSet implements Bits { return blockCount; } - final long[] indices; - final long[][] bits; + private static class Block { + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Block.class) + RamUsageEstimator.sizeOf(new long[1]); + + long index; + long[] bits; + + Block(int bit) { + this.index = 1L << (bit >>> 6); + this.bits = new long[] {1L << bit}; + } + } + + final Block[] blocks; final int length; int nonZeroLongCount; long ramBytesUsed; @@ -61,11 +72,9 @@ public class SparseFixedBitSet extends DocIdSet implements Bits { } this.length = length; final int blockCount = blockCount(length); - indices = new long[blockCount]; - bits = new long[blockCount][]; + blocks = new Block[blockCount]; ramBytesUsed = BASE_RAM_BYTES_USED - + RamUsageEstimator.shallowSizeOf(indices) - + RamUsageEstimator.shallowSizeOf(bits); + + RamUsageEstimator.shallowSizeOf(blocks); } @Override @@ -94,10 +103,11 @@ public class SparseFixedBitSet extends DocIdSet implements Bits { */ public int cardinality() { int cardinality = 0; - for (long[] bitArray : bits) { - if (bitArray != null) { - for (long bits : bitArray) { - cardinality += Long.bitCount(bits); + for (Block block : blocks) { + if (block != null) { + final int numLongs = Long.bitCount(block.index); + for (int i = 0; i < numLongs; ++i) { + cardinality += Long.bitCount(block.bits[i]); } } } @@ -122,18 +132,18 @@ public class SparseFixedBitSet extends DocIdSet implements Bits { public boolean get(int i) { assert consistent(i); final int i4096 = i >>> 12; - final long index = indices[i4096]; + final Block block = blocks[i4096]; final int i64 = i >>> 6; // first check the index, if the i64-th bit is not set, then i is not set // note: this relies on the fact that shifts are mod 64 in java - if ((index & (1L << i64)) == 0) { + if (block == null || (block.index & (1L << i64)) == 0) { return false; } // if it is set, then we count the number of bits that are set on the right // of i64, and that gives us the index of the long that stores the bits we // are interested in - final long bits = this.bits[i4096][Long.bitCount(index & ((1L << i64) - 1))]; + final long bits = block.bits[Long.bitCount(block.index & ((1L << i64) - 1))]; return (bits & (1L << i)) != 0; } @@ -151,53 +161,44 @@ public class SparseFixedBitSet extends DocIdSet implements Bits { public void set(int i) { assert consistent(i); final int i4096 = i >>> 12; - final long index = indices[i4096]; + final Block block = blocks[i4096]; final int i64 = i >>> 6; - if ((index & (1L << i64)) != 0) { + if (block == null) { + blocks[i4096] = new Block(i); + ++nonZeroLongCount; + ramBytesUsed += Block.BASE_RAM_BYTES_USED; + } else if ((block.index & (1L << i64)) != 0) { // in that case the sub 64-bits block we are interested in already exists, // we just need to set a bit in an existing long: the number of ones on // the right of i64 gives us the index of the long we need to update - bits[i4096][Long.bitCount(index & ((1L << i64) - 1))] |= 1L << i; // shifts are mod 64 in java - } else if (index == 0) { - // if the index is 0, it means that we just found a block of 4096 bits - // that has no bit that is set yet. So let's initialize a new block: - insertBlock(i4096, i64, i); + block.bits[Long.bitCount(block.index & ((1L << i64) - 1))] |= 1L << i; // shifts are mod 64 in java } else { // in that case we found a block of 4096 bits that has some values, but // the sub-block of 64 bits that we are interested in has no value yet, // so we need to insert a new long - insertLong(i4096, i64, i, index); + insertLong(block, i64, i); } } - - private void insertBlock(int i4096, int i64, int i) { - indices[i4096] = 1L << i64; // shifts are mod 64 in java - assert bits[i4096] == null; - bits[i4096] = new long[] { 1L << i }; // shifts are mod 64 in java - ++nonZeroLongCount; - ramBytesUsed += SINGLE_ELEMENT_ARRAY_BYTES_USED; - } - private void insertLong(int i4096, int i64, int i, long index) { - indices[i4096] |= 1L << i64; // shifts are mod 64 in java + private void insertLong(Block block, int i64, int i) { + block.index |= 1L << i64; // shifts are mod 64 in java // we count the number of bits that are set on the right of i64 // this gives us the index at which to perform the insertion - final int o = Long.bitCount(index & ((1L << i64) - 1)); - final long[] bitArray = bits[i4096]; - if (bitArray[bitArray.length - 1] == 0) { + final int o = Long.bitCount(block.index & ((1L << i64) - 1)); + if (block.bits[block.bits.length - 1] == 0) { // since we only store non-zero longs, if the last value is 0, it means // that we alreay have extra space, make use of it - System.arraycopy(bitArray, o, bitArray, o + 1, bitArray.length - o - 1); - bitArray[o] = 1L << i; + System.arraycopy(block.bits, o, block.bits, o + 1, block.bits.length - o - 1); + block.bits[o] = 1L << i; } else { // we don't have extra space so we need to resize to insert the new long - final int newSize = oversize(bitArray.length + 1); + final int newSize = oversize(block.bits.length + 1); final long[] newBitArray = new long[newSize]; - System.arraycopy(bitArray, 0, newBitArray, 0, o); + System.arraycopy(block.bits, 0, newBitArray, 0, o); newBitArray[o] = 1L << i; - System.arraycopy(bitArray, o, newBitArray, o + 1, bitArray.length - o); - bits[i4096] = newBitArray; - ramBytesUsed += (newSize - bitArray.length) * RamUsageEstimator.NUM_BYTES_LONG; + System.arraycopy(block.bits, o, newBitArray, o + 1, block.bits.length - o); + block.bits = newBitArray; + ramBytesUsed += (newSize - block.bits.length) * RamUsageEstimator.NUM_BYTES_LONG; } ++nonZeroLongCount; } @@ -234,12 +235,12 @@ public class SparseFixedBitSet extends DocIdSet implements Bits { /** Return the first document that occurs on or after the provided block index. */ private int firstDoc(int i4096) { - long index = 0; - while (i4096 < indices.length) { - index = indices[i4096]; - if (index != 0) { - final int i64 = Long.numberOfTrailingZeros(index); - return doc = (i4096 << 12) | (i64 << 6) | Long.numberOfTrailingZeros(bits[i4096][0]); + Block block = null; + while (i4096 < blocks.length) { + block = blocks[i4096]; + if (block != null) { + final int i64 = Long.numberOfTrailingZeros(block.index); + return doc = (i4096 << 12) | (i64 << 6) | Long.numberOfTrailingZeros(block.bits[0]); } i4096 += 1; } @@ -254,12 +255,15 @@ public class SparseFixedBitSet extends DocIdSet implements Bits { @Override public int advance(int target) throws IOException { final int i4096 = target >>> 12; - if (i4096 >= indices.length) { + if (i4096 >= blocks.length) { return doc = NO_MORE_DOCS; } - final long index = indices[i4096]; + final Block block = blocks[i4096]; + if (block == null) { + return firstDoc(i4096 + 1); + } int i64 = target >>> 6; - long indexBits = index >>> i64; + long indexBits = block.index >>> i64; if (indexBits == 0) { // if the index is zero, it means that there is no value in the // current block, so return the first document of the next block @@ -280,13 +284,12 @@ public class SparseFixedBitSet extends DocIdSet implements Bits { } // So now we are on a sub 64-bits block that has values - assert (index & (1L << i64)) != 0; + assert (block.index & (1L << i64)) != 0; // we count the number of ones on the left of i64 to figure out the // index of the long that contains the bits we are interested in - int longIndex = Long.bitCount(index & ((1L << i64) - 1)); // shifts are mod 64 in java - final long[] longArray = bits[i4096]; - assert longArray[longIndex] != 0; - long bits = longArray[longIndex] >>> i1; // shifts are mod 64 in java + int longIndex = Long.bitCount(block.index & ((1L << i64) - 1)); // shifts are mod 64 in java + assert block.bits[longIndex] != 0; + long bits = block.bits[longIndex] >>> i1; // shifts are mod 64 in java if (bits != 0L) { // hurray, we found some non-zero bits, this gives us the next document: i1 += Long.numberOfTrailingZeros(bits); @@ -296,7 +299,7 @@ public class SparseFixedBitSet extends DocIdSet implements Bits { // otherwise it means that although we were on a sub-64 block that contains // documents, all documents of this sub-block have already been consumed // so two cases: - indexBits = index >>> i64 >>> 1; // we don't shift by (i64+1) otherwise we might shift by a multiple of 64 which is a no-op + indexBits = block.index >>> i64 >>> 1; // we don't shift by (i64+1) otherwise we might shift by a multiple of 64 which is a no-op if (indexBits == 0) { // Case 1: this was the last long of the block of 4096 bits, then go // to the next block @@ -306,7 +309,7 @@ public class SparseFixedBitSet extends DocIdSet implements Bits { // by skipping trailing zeros of the index trailingZeros = Long.numberOfTrailingZeros(indexBits); i64 += 1 + trailingZeros; - bits = longArray[longIndex + 1]; + bits = block.bits[longIndex + 1]; assert bits != 0; i1 = Long.numberOfTrailingZeros(bits); return doc = (i4096 << 12) | ((i64 & 0x3F) << 6) | i1; diff --git a/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java b/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java index d5391f47b11..da8f50aa990 100644 --- a/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java +++ b/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java @@ -548,7 +548,7 @@ public class TestValueSources extends LuceneTestCase { expected.createWeight(context, searcher); actual.createWeight(context, searcher); - for (org.apache.lucene.index.LeafReaderContext leaf : reader.leaves()) { + for (org.apache.lucene.index.LeafReaderContext leaf : searcher.getIndexReader().leaves()) { final FunctionValues expectedVals = expected.getValues(context, leaf); final FunctionValues actualVals = actual.getValues(context, leaf);