mirror of https://github.com/apache/lucene.git
LUCENE-10564: Make sure SparseFixedBitSet#or updates memory usage (#882)
Before, it didn't update the estimated memory usage, so calls to ramBytesUsed could be totally off.
This commit is contained in:
parent
ea5c40686f
commit
3afc9fa966
|
@ -408,7 +408,11 @@ public class SparseFixedBitSet extends BitSet {
|
||||||
// fast path: if we currently have nothing in the block, just copy the data
|
// fast path: if we currently have nothing in the block, just copy the data
|
||||||
// this especially happens all the time if you call OR on an empty set
|
// this especially happens all the time if you call OR on an empty set
|
||||||
indices[i4096] = index;
|
indices[i4096] = index;
|
||||||
this.bits[i4096] = ArrayUtil.copyOfSubArray(bits, 0, nonZeroLongCount);
|
|
||||||
|
long[] newBits = ArrayUtil.copyOfSubArray(bits, 0, nonZeroLongCount);
|
||||||
|
this.bits[i4096] = newBits;
|
||||||
|
// we may slightly overestimate size here, but keep it cheap
|
||||||
|
this.ramBytesUsed += SINGLE_ELEMENT_ARRAY_BYTES_USED + ((long) newBits.length - 1 << 3);
|
||||||
this.nonZeroLongCount += nonZeroLongCount;
|
this.nonZeroLongCount += nonZeroLongCount;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -420,6 +424,8 @@ public class SparseFixedBitSet extends BitSet {
|
||||||
newBits = currentBits;
|
newBits = currentBits;
|
||||||
} else {
|
} else {
|
||||||
newBits = new long[oversize(requiredCapacity)];
|
newBits = new long[oversize(requiredCapacity)];
|
||||||
|
// we may slightly overestimate size here, but keep it cheap
|
||||||
|
this.ramBytesUsed += (long) (newBits.length - currentBits.length) << 3;
|
||||||
}
|
}
|
||||||
// we iterate backwards in order to not override data we might need on the next iteration if the
|
// we iterate backwards in order to not override data we might need on the next iteration if the
|
||||||
// array is reused
|
// array is reused
|
||||||
|
|
|
@ -71,4 +71,38 @@ public class TestSparseFixedBitSet extends BaseBitSetTestCase<SparseFixedBitSet>
|
||||||
}
|
}
|
||||||
assertEquals(numDocs, set.approximateCardinality());
|
assertEquals(numDocs, set.approximateCardinality());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testRamBytesUsed() throws IOException {
|
||||||
|
int size = 1000 + random().nextInt(10000);
|
||||||
|
BitSet original = new SparseFixedBitSet(size);
|
||||||
|
for (int i = 0; i < 3; i++) {
|
||||||
|
original.set(random().nextInt(size));
|
||||||
|
}
|
||||||
|
assertTrue(original.ramBytesUsed() > 0);
|
||||||
|
|
||||||
|
// Take union with a random sparse iterator, then check memory usage
|
||||||
|
BitSet copy = copyOf(original, size);
|
||||||
|
BitSet otherBitSet = new SparseFixedBitSet(size);
|
||||||
|
int interval = 10 + random().nextInt(100);
|
||||||
|
for (int i = 0; i < size; i += interval) {
|
||||||
|
otherBitSet.set(i);
|
||||||
|
}
|
||||||
|
copy.or(new BitSetIterator(otherBitSet, size));
|
||||||
|
assertTrue(copy.ramBytesUsed() > original.ramBytesUsed());
|
||||||
|
|
||||||
|
// Take union with a dense iterator, then check memory usage
|
||||||
|
copy = copyOf(original, size);
|
||||||
|
copy.or(DocIdSetIterator.all(size));
|
||||||
|
assertTrue(copy.ramBytesUsed() > original.ramBytesUsed());
|
||||||
|
assertTrue(copy.ramBytesUsed() > size / Byte.SIZE);
|
||||||
|
|
||||||
|
// Check that both "copy" strategies result in bit sets with
|
||||||
|
// (roughly) same memory usage as original
|
||||||
|
BitSet setCopy = copyOf(original, size);
|
||||||
|
assertEquals(setCopy.ramBytesUsed(), original.ramBytesUsed());
|
||||||
|
|
||||||
|
BitSet orCopy = new SparseFixedBitSet(size);
|
||||||
|
orCopy.or(new BitSetIterator(original, size));
|
||||||
|
assertEquals(orCopy.ramBytesUsed(), original.ramBytesUsed(), 64L);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue