LUCENE-5764: Add tests to DocIdSet.ramBytesUsed.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1602876 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2014-06-16 14:25:12 +00:00
parent 631e55046f
commit 67d1a72c55
9 changed files with 91 additions and 21 deletions

View File

@ -25,6 +25,12 @@ import org.apache.lucene.search.DocIdSetIterator;
/** Simple DocIdSet and DocIdSetIterator backed by a BitSet */
public class DocIdBitSet extends DocIdSet implements Bits {
private static final long BASE_RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(DocIdBitSet.class)
+ RamUsageEstimator.shallowSizeOfInstance(BitSet.class)
+ RamUsageEstimator.NUM_BYTES_ARRAY_HEADER; // the array that stores the bits
private final BitSet bitSet;
public DocIdBitSet(BitSet bitSet) {
@ -67,7 +73,9 @@ public class DocIdBitSet extends DocIdSet implements Bits {
@Override
public long ramBytesUsed() {
return RamUsageEstimator.NUM_BYTES_OBJECT_REF + (bitSet.size() + 7) >>> 3;
// unfortunately this is likely underestimated if the Bitset implementation
// over-sizes the array that stores the bits
return BASE_RAM_BYTES_USED + (bitSet.size() + 7) >>> 3;
}
private static class DocIdBitSetIterator extends DocIdSetIterator {

View File

@ -33,6 +33,8 @@ import org.apache.lucene.search.DocIdSetIterator;
*/
public final class FixedBitSet extends DocIdSet implements Bits {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(FixedBitSet.class);
/**
* A {@link DocIdSetIterator} which iterates over set bits in a
* {@link FixedBitSet}.
@ -220,10 +222,7 @@ public final class FixedBitSet extends DocIdSet implements Bits {
@Override
public long ramBytesUsed() {
return RamUsageEstimator.alignObjectSize(
RamUsageEstimator.NUM_BYTES_OBJECT_REF // the reference to the long[]
+ RamUsageEstimator.NUM_BYTES_INT * 2) // numBits and numWords
+ RamUsageEstimator.sizeOf(bits); // the bits
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(bits);
}
/** Expert. */

View File

@ -75,6 +75,9 @@ Test system: AMD Opteron, 64 bit linux, Sun Java 1.5_06 -server -Xbatch -Xmx64M
*/
public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(OpenBitSet.class);
protected long[] bits;
protected int wlen; // number of words (elements) used in the array
@ -133,11 +136,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
@Override
public long ramBytesUsed() {
return RamUsageEstimator.alignObjectSize(
RamUsageEstimator.NUM_BYTES_OBJECT_REF
+ RamUsageEstimator.NUM_BYTES_LONG
+ RamUsageEstimator.NUM_BYTES_INT)
+ RamUsageEstimator.sizeOf(bits);
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(bits);
}
/** Returns the current capacity in bits (1 greater than the index of the last bit) */

View File

@ -35,6 +35,8 @@ import org.apache.lucene.util.packed.PackedInts;
*/
public final class PForDeltaDocIdSet extends DocIdSet implements Accountable {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(PForDeltaDocIdSet.class);
static final int BLOCK_SIZE = 128;
static final int MAX_EXCEPTIONS = 24; // no more than 24 exceptions per block
static final PackedInts.Decoder[] DECODERS = new PackedInts.Decoder[32];
@ -513,7 +515,17 @@ public final class PForDeltaDocIdSet extends DocIdSet implements Accountable {
@Override
public long ramBytesUsed() {
return RamUsageEstimator.alignObjectSize(3 * RamUsageEstimator.NUM_BYTES_OBJECT_REF) + docIDs.ramBytesUsed() + offsets.ramBytesUsed();
if (this == EMPTY) {
return 0L;
}
long ramBytesUsed = BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(data);
if (docIDs != SINGLE_ZERO_BUFFER) {
ramBytesUsed += docIDs.ramBytesUsed();
}
if (offsets != SINGLE_ZERO_BUFFER) {
ramBytesUsed += offsets.ramBytesUsed();
}
return ramBytesUsed;
}
}

View File

@ -76,6 +76,8 @@ import org.apache.lucene.util.packed.PackedInts;
*/
public final class WAH8DocIdSet extends DocIdSet implements Accountable {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(WAH8DocIdSet.class);
// Minimum index interval, intervals below this value can't guarantee anymore
// that this set implementation won't be significantly larger than a FixedBitSet
// The reason is that a single sequence saves at least one byte and an index
@ -738,10 +740,17 @@ public final class WAH8DocIdSet extends DocIdSet implements Accountable {
@Override
public long ramBytesUsed() {
return RamUsageEstimator.alignObjectSize(3 * RamUsageEstimator.NUM_BYTES_OBJECT_REF + 2 * RamUsageEstimator.NUM_BYTES_INT)
+ RamUsageEstimator.sizeOf(data)
+ positions.ramBytesUsed()
+ wordNums.ramBytesUsed();
if (this == EMPTY) {
return 0L;
}
long ramBytesUsed = BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(data);
if (positions != SINGLE_ZERO_BUFFER) {
ramBytesUsed += positions.ramBytesUsed();
}
if (wordNums != SINGLE_ZERO_BUFFER) {
ramBytesUsed += wordNums.ramBytesUsed();
}
return ramBytesUsed;
}
}

View File

@ -29,6 +29,9 @@ import org.apache.lucene.util.RamUsageEstimator;
* @lucene.internal
*/
public class EliasFanoDocIdSet extends DocIdSet {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(EliasFanoDocIdSet.class);
final EliasFanoEncoder efEncoder;
/**
@ -129,7 +132,7 @@ public class EliasFanoDocIdSet extends DocIdSet {
@Override
public long ramBytesUsed() {
return RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_REF) + efEncoder.ramBytesUsed();
return BASE_RAM_BYTES_USED + efEncoder.ramBytesUsed();
}
}

View File

@ -85,6 +85,9 @@ import org.apache.lucene.util.ToStringUtils;
*/
public class EliasFanoEncoder implements Accountable {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(EliasFanoEncoder.class);
final long numValues;
private final long upperBound;
final int numLowBits;
@ -354,10 +357,7 @@ public class EliasFanoEncoder implements Accountable {
@Override
public long ramBytesUsed() {
return RamUsageEstimator.alignObjectSize(
RamUsageEstimator.NUM_BYTES_OBJECT_REF * 3
+ RamUsageEstimator.NUM_BYTES_LONG * 8
+ RamUsageEstimator.NUM_BYTES_INT * 2)
return BASE_RAM_BYTES_USED
+ RamUsageEstimator.sizeOf(lowerLongs)
+ RamUsageEstimator.sizeOf(upperLongs)
+ RamUsageEstimator.sizeOf(upperZeroBitPositionIndex);

View File

@ -3,6 +3,8 @@ package org.apache.lucene.util;
import java.io.IOException;
import java.util.BitSet;
import org.junit.Ignore;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -26,5 +28,11 @@ public class TestDocIdBitSet extends BaseDocIdSetTestCase<DocIdBitSet> {
public DocIdBitSet copyOf(BitSet bs, int length) throws IOException {
return new DocIdBitSet((BitSet) bs.clone());
}
@Override
@Ignore("no access to the internals of this impl")
public void testRamBytesUsed() throws IOException {
super.testRamBytesUsed();
}
}

View File

@ -113,6 +113,21 @@ public abstract class BaseDocIdSetTestCase<T extends DocIdSet> extends LuceneTes
}
}
/** Test ram usage estimation. */
public void testRamBytesUsed() throws IOException {
final int iters = 100;
for (int i = 0; i < iters; ++i) {
final int pow = random().nextInt(20);
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << pow);
final int numDocs = TestUtil.nextInt(random(), 0, Math.min(maxDoc, 1 << TestUtil.nextInt(random(), 0, pow)));
final BitSet set = randomSet(maxDoc, numDocs);
final DocIdSet copy = copyOf(set, maxDoc);
final long actualBytes = ramBytesUsed(copy, maxDoc);
final long expectedBytes = copy.ramBytesUsed();
assertEquals(expectedBytes, actualBytes);
}
}
/** Assert that the content of the {@link DocIdSet} is the same as the content of the {@link BitSet}. */
public void assertEquals(int numBits, BitSet ds1, T ds2) throws IOException {
// nextDoc
@ -172,4 +187,21 @@ public abstract class BaseDocIdSetTestCase<T extends DocIdSet> extends LuceneTes
}
}
private static class Dummy {
@SuppressWarnings("unused")
Object o1, o2;
}
// same as RamUsageTester.sizeOf but tries to not take into account resources
// that might be shared across instances
private long ramBytesUsed(DocIdSet set, int length) throws IOException {
Dummy dummy = new Dummy();
dummy.o1 = copyOf(new BitSet(length), length);
dummy.o2 = set;
long bytes1 = RamUsageTester.sizeOf(dummy);
dummy.o2 = null;
long bytes2 = RamUsageTester.sizeOf(dummy);
return bytes1 - bytes2;
}
}