diff --git a/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java b/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java index 02849c1585a..49d61614e86 100644 --- a/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java +++ b/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java @@ -408,7 +408,11 @@ public class SparseFixedBitSet extends BitSet { // fast path: if we currently have nothing in the block, just copy the data // this especially happens all the time if you call OR on an empty set indices[i4096] = index; - this.bits[i4096] = ArrayUtil.copyOfSubArray(bits, 0, nonZeroLongCount); + + long[] newBits = ArrayUtil.copyOfSubArray(bits, 0, nonZeroLongCount); + this.bits[i4096] = newBits; + // we may slightly overestimate size here, but keep it cheap + this.ramBytesUsed += SINGLE_ELEMENT_ARRAY_BYTES_USED + ((long) newBits.length - 1 << 3); this.nonZeroLongCount += nonZeroLongCount; return; } @@ -420,6 +424,8 @@ public class SparseFixedBitSet extends BitSet { newBits = currentBits; } else { newBits = new long[oversize(requiredCapacity)]; + // we may slightly overestimate size here, but keep it cheap + this.ramBytesUsed += (long) (newBits.length - currentBits.length) << 3; } // we iterate backwards in order to not override data we might need on the next iteration if the // array is reused diff --git a/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java b/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java index 594fbf57245..c17e0c9e3d9 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java @@ -71,4 +71,38 @@ public class TestSparseFixedBitSet extends BaseBitSetTestCase } assertEquals(numDocs, set.approximateCardinality()); } + + public void testRamBytesUsed() throws IOException { + int size = 1000 + random().nextInt(10000); + BitSet original = new SparseFixedBitSet(size); + for (int i = 0; i < 3; i++) { + original.set(random().nextInt(size)); + } + assertTrue(original.ramBytesUsed() > 0); + + // Take union with a random sparse iterator, then check memory usage + BitSet copy = copyOf(original, size); + BitSet otherBitSet = new SparseFixedBitSet(size); + int interval = 10 + random().nextInt(100); + for (int i = 0; i < size; i += interval) { + otherBitSet.set(i); + } + copy.or(new BitSetIterator(otherBitSet, size)); + assertTrue(copy.ramBytesUsed() > original.ramBytesUsed()); + + // Take union with a dense iterator, then check memory usage + copy = copyOf(original, size); + copy.or(DocIdSetIterator.all(size)); + assertTrue(copy.ramBytesUsed() > original.ramBytesUsed()); + assertTrue(copy.ramBytesUsed() > size / Byte.SIZE); + + // Check that both "copy" strategies result in bit sets with + // (roughly) same memory usage as original + BitSet setCopy = copyOf(original, size); + assertEquals(setCopy.ramBytesUsed(), original.ramBytesUsed()); + + BitSet orCopy = new SparseFixedBitSet(size); + orCopy.or(new BitSetIterator(original, size)); + assertEquals(orCopy.ramBytesUsed(), original.ramBytesUsed(), 64L); + } }