From c1556a6136f8117b26e39c72428169447becb372 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 20 Oct 2014 08:25:31 +0000 Subject: [PATCH] Revert changes to SparseFixedBitSet. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1633076 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/util/SparseFixedBitSet.java | 117 +++++++++--------- 1 file changed, 57 insertions(+), 60 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java b/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java index 946749fa461..7a971956342 100644 --- a/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java +++ b/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java @@ -47,19 +47,8 @@ public class SparseFixedBitSet extends DocIdSet implements Bits { return blockCount; } - private static class Block { - private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Block.class) + RamUsageEstimator.sizeOf(new long[1]); - - long index; - long[] bits; - - Block(int bit) { - this.index = 1L << (bit >>> 6); - this.bits = new long[] {1L << bit}; - } - } - - final Block[] blocks; + final long[] indices; + final long[][] bits; final int length; int nonZeroLongCount; long ramBytesUsed; @@ -72,9 +61,11 @@ public class SparseFixedBitSet extends DocIdSet implements Bits { } this.length = length; final int blockCount = blockCount(length); - blocks = new Block[blockCount]; + indices = new long[blockCount]; + bits = new long[blockCount][]; ramBytesUsed = BASE_RAM_BYTES_USED - + RamUsageEstimator.shallowSizeOf(blocks); + + RamUsageEstimator.shallowSizeOf(indices) + + RamUsageEstimator.shallowSizeOf(bits); } @Override @@ -103,11 +94,10 @@ public class SparseFixedBitSet extends DocIdSet implements Bits { */ public int cardinality() { int cardinality = 0; - for (Block block : blocks) { - if (block != null) { - final int numLongs = Long.bitCount(block.index); - for (int i = 0; i < numLongs; ++i) { - cardinality += Long.bitCount(block.bits[i]); + for (long[] bitArray : bits) { + if (bitArray != null) { + for (long bits : bitArray) { + cardinality += Long.bitCount(bits); } } } @@ -132,18 +122,18 @@ public class SparseFixedBitSet extends DocIdSet implements Bits { public boolean get(int i) { assert consistent(i); final int i4096 = i >>> 12; - final Block block = blocks[i4096]; + final long index = indices[i4096]; final int i64 = i >>> 6; // first check the index, if the i64-th bit is not set, then i is not set // note: this relies on the fact that shifts are mod 64 in java - if (block == null || (block.index & (1L << i64)) == 0) { + if ((index & (1L << i64)) == 0) { return false; } // if it is set, then we count the number of bits that are set on the right // of i64, and that gives us the index of the long that stores the bits we // are interested in - final long bits = block.bits[Long.bitCount(block.index & ((1L << i64) - 1))]; + final long bits = this.bits[i4096][Long.bitCount(index & ((1L << i64) - 1))]; return (bits & (1L << i)) != 0; } @@ -161,44 +151,53 @@ public class SparseFixedBitSet extends DocIdSet implements Bits { public void set(int i) { assert consistent(i); final int i4096 = i >>> 12; - final Block block = blocks[i4096]; + final long index = indices[i4096]; final int i64 = i >>> 6; - if (block == null) { - blocks[i4096] = new Block(i); - ++nonZeroLongCount; - ramBytesUsed += Block.BASE_RAM_BYTES_USED; - } else if ((block.index & (1L << i64)) != 0) { + if ((index & (1L << i64)) != 0) { // in that case the sub 64-bits block we are interested in already exists, // we just need to set a bit in an existing long: the number of ones on // the right of i64 gives us the index of the long we need to update - block.bits[Long.bitCount(block.index & ((1L << i64) - 1))] |= 1L << i; // shifts are mod 64 in java + bits[i4096][Long.bitCount(index & ((1L << i64) - 1))] |= 1L << i; // shifts are mod 64 in java + } else if (index == 0) { + // if the index is 0, it means that we just found a block of 4096 bits + // that has no bit that is set yet. So let's initialize a new block: + insertBlock(i4096, i64, i); } else { // in that case we found a block of 4096 bits that has some values, but // the sub-block of 64 bits that we are interested in has no value yet, // so we need to insert a new long - insertLong(block, i64, i); + insertLong(i4096, i64, i, index); } } + + private void insertBlock(int i4096, int i64, int i) { + indices[i4096] = 1L << i64; // shifts are mod 64 in java + assert bits[i4096] == null; + bits[i4096] = new long[] { 1L << i }; // shifts are mod 64 in java + ++nonZeroLongCount; + ramBytesUsed += SINGLE_ELEMENT_ARRAY_BYTES_USED; + } - private void insertLong(Block block, int i64, int i) { - block.index |= 1L << i64; // shifts are mod 64 in java + private void insertLong(int i4096, int i64, int i, long index) { + indices[i4096] |= 1L << i64; // shifts are mod 64 in java // we count the number of bits that are set on the right of i64 // this gives us the index at which to perform the insertion - final int o = Long.bitCount(block.index & ((1L << i64) - 1)); - if (block.bits[block.bits.length - 1] == 0) { + final int o = Long.bitCount(index & ((1L << i64) - 1)); + final long[] bitArray = bits[i4096]; + if (bitArray[bitArray.length - 1] == 0) { // since we only store non-zero longs, if the last value is 0, it means // that we alreay have extra space, make use of it - System.arraycopy(block.bits, o, block.bits, o + 1, block.bits.length - o - 1); - block.bits[o] = 1L << i; + System.arraycopy(bitArray, o, bitArray, o + 1, bitArray.length - o - 1); + bitArray[o] = 1L << i; } else { // we don't have extra space so we need to resize to insert the new long - final int newSize = oversize(block.bits.length + 1); + final int newSize = oversize(bitArray.length + 1); final long[] newBitArray = new long[newSize]; - System.arraycopy(block.bits, 0, newBitArray, 0, o); + System.arraycopy(bitArray, 0, newBitArray, 0, o); newBitArray[o] = 1L << i; - System.arraycopy(block.bits, o, newBitArray, o + 1, block.bits.length - o); - block.bits = newBitArray; - ramBytesUsed += (newSize - block.bits.length) * RamUsageEstimator.NUM_BYTES_LONG; + System.arraycopy(bitArray, o, newBitArray, o + 1, bitArray.length - o); + bits[i4096] = newBitArray; + ramBytesUsed += (newSize - bitArray.length) * RamUsageEstimator.NUM_BYTES_LONG; } ++nonZeroLongCount; } @@ -235,12 +234,12 @@ public class SparseFixedBitSet extends DocIdSet implements Bits { /** Return the first document that occurs on or after the provided block index. */ private int firstDoc(int i4096) { - Block block = null; - while (i4096 < blocks.length) { - block = blocks[i4096]; - if (block != null) { - final int i64 = Long.numberOfTrailingZeros(block.index); - return doc = (i4096 << 12) | (i64 << 6) | Long.numberOfTrailingZeros(block.bits[0]); + long index = 0; + while (i4096 < indices.length) { + index = indices[i4096]; + if (index != 0) { + final int i64 = Long.numberOfTrailingZeros(index); + return doc = (i4096 << 12) | (i64 << 6) | Long.numberOfTrailingZeros(bits[i4096][0]); } i4096 += 1; } @@ -255,15 +254,12 @@ public class SparseFixedBitSet extends DocIdSet implements Bits { @Override public int advance(int target) throws IOException { final int i4096 = target >>> 12; - if (i4096 >= blocks.length) { + if (i4096 >= indices.length) { return doc = NO_MORE_DOCS; } - final Block block = blocks[i4096]; - if (block == null) { - return firstDoc(i4096 + 1); - } + final long index = indices[i4096]; int i64 = target >>> 6; - long indexBits = block.index >>> i64; + long indexBits = index >>> i64; if (indexBits == 0) { // if the index is zero, it means that there is no value in the // current block, so return the first document of the next block @@ -284,12 +280,13 @@ public class SparseFixedBitSet extends DocIdSet implements Bits { } // So now we are on a sub 64-bits block that has values - assert (block.index & (1L << i64)) != 0; + assert (index & (1L << i64)) != 0; // we count the number of ones on the left of i64 to figure out the // index of the long that contains the bits we are interested in - int longIndex = Long.bitCount(block.index & ((1L << i64) - 1)); // shifts are mod 64 in java - assert block.bits[longIndex] != 0; - long bits = block.bits[longIndex] >>> i1; // shifts are mod 64 in java + int longIndex = Long.bitCount(index & ((1L << i64) - 1)); // shifts are mod 64 in java + final long[] longArray = bits[i4096]; + assert longArray[longIndex] != 0; + long bits = longArray[longIndex] >>> i1; // shifts are mod 64 in java if (bits != 0L) { // hurray, we found some non-zero bits, this gives us the next document: i1 += Long.numberOfTrailingZeros(bits); @@ -299,7 +296,7 @@ public class SparseFixedBitSet extends DocIdSet implements Bits { // otherwise it means that although we were on a sub-64 block that contains // documents, all documents of this sub-block have already been consumed // so two cases: - indexBits = block.index >>> i64 >>> 1; // we don't shift by (i64+1) otherwise we might shift by a multiple of 64 which is a no-op + indexBits = index >>> i64 >>> 1; // we don't shift by (i64+1) otherwise we might shift by a multiple of 64 which is a no-op if (indexBits == 0) { // Case 1: this was the last long of the block of 4096 bits, then go // to the next block @@ -309,7 +306,7 @@ public class SparseFixedBitSet extends DocIdSet implements Bits { // by skipping trailing zeros of the index trailingZeros = Long.numberOfTrailingZeros(indexBits); i64 += 1 + trailingZeros; - bits = block.bits[longIndex + 1]; + bits = longArray[longIndex + 1]; assert bits != 0; i1 = Long.numberOfTrailingZeros(bits); return doc = (i4096 << 12) | ((i64 & 0x3F) << 6) | i1;