LUCENE-10564: Make sure SparseFixedBitSet#or updates memory usage (#882)

Before, it didn't update the estimated memory usage, so calls to ramBytesUsed
could be totally off.
This commit is contained in:
Julie Tibshirani 2022-05-12 13:29:07 -07:00 committed by GitHub
parent ea5c40686f
commit 3afc9fa966
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 41 additions and 1 deletions

View File

@ -408,7 +408,11 @@ public class SparseFixedBitSet extends BitSet {
// fast path: if we currently have nothing in the block, just copy the data // fast path: if we currently have nothing in the block, just copy the data
// this especially happens all the time if you call OR on an empty set // this especially happens all the time if you call OR on an empty set
indices[i4096] = index; indices[i4096] = index;
this.bits[i4096] = ArrayUtil.copyOfSubArray(bits, 0, nonZeroLongCount);
long[] newBits = ArrayUtil.copyOfSubArray(bits, 0, nonZeroLongCount);
this.bits[i4096] = newBits;
// we may slightly overestimate size here, but keep it cheap
this.ramBytesUsed += SINGLE_ELEMENT_ARRAY_BYTES_USED + ((long) newBits.length - 1 << 3);
this.nonZeroLongCount += nonZeroLongCount; this.nonZeroLongCount += nonZeroLongCount;
return; return;
} }
@ -420,6 +424,8 @@ public class SparseFixedBitSet extends BitSet {
newBits = currentBits; newBits = currentBits;
} else { } else {
newBits = new long[oversize(requiredCapacity)]; newBits = new long[oversize(requiredCapacity)];
// we may slightly overestimate size here, but keep it cheap
this.ramBytesUsed += (long) (newBits.length - currentBits.length) << 3;
} }
// we iterate backwards in order to not override data we might need on the next iteration if the // we iterate backwards in order to not override data we might need on the next iteration if the
// array is reused // array is reused

View File

@ -71,4 +71,38 @@ public class TestSparseFixedBitSet extends BaseBitSetTestCase<SparseFixedBitSet>
} }
assertEquals(numDocs, set.approximateCardinality()); assertEquals(numDocs, set.approximateCardinality());
} }
public void testRamBytesUsed() throws IOException {
int size = 1000 + random().nextInt(10000);
BitSet original = new SparseFixedBitSet(size);
for (int i = 0; i < 3; i++) {
original.set(random().nextInt(size));
}
assertTrue(original.ramBytesUsed() > 0);
// Take union with a random sparse iterator, then check memory usage
BitSet copy = copyOf(original, size);
BitSet otherBitSet = new SparseFixedBitSet(size);
int interval = 10 + random().nextInt(100);
for (int i = 0; i < size; i += interval) {
otherBitSet.set(i);
}
copy.or(new BitSetIterator(otherBitSet, size));
assertTrue(copy.ramBytesUsed() > original.ramBytesUsed());
// Take union with a dense iterator, then check memory usage
copy = copyOf(original, size);
copy.or(DocIdSetIterator.all(size));
assertTrue(copy.ramBytesUsed() > original.ramBytesUsed());
assertTrue(copy.ramBytesUsed() > size / Byte.SIZE);
// Check that both "copy" strategies result in bit sets with
// (roughly) same memory usage as original
BitSet setCopy = copyOf(original, size);
assertEquals(setCopy.ramBytesUsed(), original.ramBytesUsed());
BitSet orCopy = new SparseFixedBitSet(size);
orCopy.or(new BitSetIterator(original, size));
assertEquals(orCopy.ramBytesUsed(), original.ramBytesUsed(), 64L);
}
} }