mirror of https://github.com/apache/lucene.git
LUCENE-5764: Add tests to DocIdSet.ramBytesUsed.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1602876 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
631e55046f
commit
67d1a72c55
|
@ -25,6 +25,12 @@ import org.apache.lucene.search.DocIdSetIterator;
|
|||
|
||||
/** Simple DocIdSet and DocIdSetIterator backed by a BitSet */
|
||||
public class DocIdBitSet extends DocIdSet implements Bits {
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED =
|
||||
RamUsageEstimator.shallowSizeOfInstance(DocIdBitSet.class)
|
||||
+ RamUsageEstimator.shallowSizeOfInstance(BitSet.class)
|
||||
+ RamUsageEstimator.NUM_BYTES_ARRAY_HEADER; // the array that stores the bits
|
||||
|
||||
private final BitSet bitSet;
|
||||
|
||||
public DocIdBitSet(BitSet bitSet) {
|
||||
|
@ -67,7 +73,9 @@ public class DocIdBitSet extends DocIdSet implements Bits {
|
|||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return RamUsageEstimator.NUM_BYTES_OBJECT_REF + (bitSet.size() + 7) >>> 3;
|
||||
// unfortunately this is likely underestimated if the Bitset implementation
|
||||
// over-sizes the array that stores the bits
|
||||
return BASE_RAM_BYTES_USED + (bitSet.size() + 7) >>> 3;
|
||||
}
|
||||
|
||||
private static class DocIdBitSetIterator extends DocIdSetIterator {
|
||||
|
|
|
@ -33,6 +33,8 @@ import org.apache.lucene.search.DocIdSetIterator;
|
|||
*/
|
||||
public final class FixedBitSet extends DocIdSet implements Bits {
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(FixedBitSet.class);
|
||||
|
||||
/**
|
||||
* A {@link DocIdSetIterator} which iterates over set bits in a
|
||||
* {@link FixedBitSet}.
|
||||
|
@ -220,10 +222,7 @@ public final class FixedBitSet extends DocIdSet implements Bits {
|
|||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return RamUsageEstimator.alignObjectSize(
|
||||
RamUsageEstimator.NUM_BYTES_OBJECT_REF // the reference to the long[]
|
||||
+ RamUsageEstimator.NUM_BYTES_INT * 2) // numBits and numWords
|
||||
+ RamUsageEstimator.sizeOf(bits); // the bits
|
||||
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(bits);
|
||||
}
|
||||
|
||||
/** Expert. */
|
||||
|
|
|
@ -75,6 +75,9 @@ Test system: AMD Opteron, 64 bit linux, Sun Java 1.5_06 -server -Xbatch -Xmx64M
|
|||
*/
|
||||
|
||||
public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(OpenBitSet.class);
|
||||
|
||||
protected long[] bits;
|
||||
protected int wlen; // number of words (elements) used in the array
|
||||
|
||||
|
@ -133,11 +136,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
|||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return RamUsageEstimator.alignObjectSize(
|
||||
RamUsageEstimator.NUM_BYTES_OBJECT_REF
|
||||
+ RamUsageEstimator.NUM_BYTES_LONG
|
||||
+ RamUsageEstimator.NUM_BYTES_INT)
|
||||
+ RamUsageEstimator.sizeOf(bits);
|
||||
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(bits);
|
||||
}
|
||||
|
||||
/** Returns the current capacity in bits (1 greater than the index of the last bit) */
|
||||
|
|
|
@ -35,6 +35,8 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
*/
|
||||
public final class PForDeltaDocIdSet extends DocIdSet implements Accountable {
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(PForDeltaDocIdSet.class);
|
||||
|
||||
static final int BLOCK_SIZE = 128;
|
||||
static final int MAX_EXCEPTIONS = 24; // no more than 24 exceptions per block
|
||||
static final PackedInts.Decoder[] DECODERS = new PackedInts.Decoder[32];
|
||||
|
@ -513,7 +515,17 @@ public final class PForDeltaDocIdSet extends DocIdSet implements Accountable {
|
|||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return RamUsageEstimator.alignObjectSize(3 * RamUsageEstimator.NUM_BYTES_OBJECT_REF) + docIDs.ramBytesUsed() + offsets.ramBytesUsed();
|
||||
if (this == EMPTY) {
|
||||
return 0L;
|
||||
}
|
||||
long ramBytesUsed = BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(data);
|
||||
if (docIDs != SINGLE_ZERO_BUFFER) {
|
||||
ramBytesUsed += docIDs.ramBytesUsed();
|
||||
}
|
||||
if (offsets != SINGLE_ZERO_BUFFER) {
|
||||
ramBytesUsed += offsets.ramBytesUsed();
|
||||
}
|
||||
return ramBytesUsed;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -76,6 +76,8 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
*/
|
||||
public final class WAH8DocIdSet extends DocIdSet implements Accountable {
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(WAH8DocIdSet.class);
|
||||
|
||||
// Minimum index interval, intervals below this value can't guarantee anymore
|
||||
// that this set implementation won't be significantly larger than a FixedBitSet
|
||||
// The reason is that a single sequence saves at least one byte and an index
|
||||
|
@ -738,10 +740,17 @@ public final class WAH8DocIdSet extends DocIdSet implements Accountable {
|
|||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return RamUsageEstimator.alignObjectSize(3 * RamUsageEstimator.NUM_BYTES_OBJECT_REF + 2 * RamUsageEstimator.NUM_BYTES_INT)
|
||||
+ RamUsageEstimator.sizeOf(data)
|
||||
+ positions.ramBytesUsed()
|
||||
+ wordNums.ramBytesUsed();
|
||||
if (this == EMPTY) {
|
||||
return 0L;
|
||||
}
|
||||
long ramBytesUsed = BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(data);
|
||||
if (positions != SINGLE_ZERO_BUFFER) {
|
||||
ramBytesUsed += positions.ramBytesUsed();
|
||||
}
|
||||
if (wordNums != SINGLE_ZERO_BUFFER) {
|
||||
ramBytesUsed += wordNums.ramBytesUsed();
|
||||
}
|
||||
return ramBytesUsed;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -29,6 +29,9 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
* @lucene.internal
|
||||
*/
|
||||
public class EliasFanoDocIdSet extends DocIdSet {
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(EliasFanoDocIdSet.class);
|
||||
|
||||
final EliasFanoEncoder efEncoder;
|
||||
|
||||
/**
|
||||
|
@ -129,7 +132,7 @@ public class EliasFanoDocIdSet extends DocIdSet {
|
|||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_REF) + efEncoder.ramBytesUsed();
|
||||
return BASE_RAM_BYTES_USED + efEncoder.ramBytesUsed();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -85,6 +85,9 @@ import org.apache.lucene.util.ToStringUtils;
|
|||
*/
|
||||
|
||||
public class EliasFanoEncoder implements Accountable {
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(EliasFanoEncoder.class);
|
||||
|
||||
final long numValues;
|
||||
private final long upperBound;
|
||||
final int numLowBits;
|
||||
|
@ -354,10 +357,7 @@ public class EliasFanoEncoder implements Accountable {
|
|||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return RamUsageEstimator.alignObjectSize(
|
||||
RamUsageEstimator.NUM_BYTES_OBJECT_REF * 3
|
||||
+ RamUsageEstimator.NUM_BYTES_LONG * 8
|
||||
+ RamUsageEstimator.NUM_BYTES_INT * 2)
|
||||
return BASE_RAM_BYTES_USED
|
||||
+ RamUsageEstimator.sizeOf(lowerLongs)
|
||||
+ RamUsageEstimator.sizeOf(upperLongs)
|
||||
+ RamUsageEstimator.sizeOf(upperZeroBitPositionIndex);
|
||||
|
|
|
@ -3,6 +3,8 @@ package org.apache.lucene.util;
|
|||
import java.io.IOException;
|
||||
import java.util.BitSet;
|
||||
|
||||
import org.junit.Ignore;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -26,5 +28,11 @@ public class TestDocIdBitSet extends BaseDocIdSetTestCase<DocIdBitSet> {
|
|||
public DocIdBitSet copyOf(BitSet bs, int length) throws IOException {
|
||||
return new DocIdBitSet((BitSet) bs.clone());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
@Ignore("no access to the internals of this impl")
|
||||
public void testRamBytesUsed() throws IOException {
|
||||
super.testRamBytesUsed();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -113,6 +113,21 @@ public abstract class BaseDocIdSetTestCase<T extends DocIdSet> extends LuceneTes
|
|||
}
|
||||
}
|
||||
|
||||
/** Test ram usage estimation. */
|
||||
public void testRamBytesUsed() throws IOException {
|
||||
final int iters = 100;
|
||||
for (int i = 0; i < iters; ++i) {
|
||||
final int pow = random().nextInt(20);
|
||||
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << pow);
|
||||
final int numDocs = TestUtil.nextInt(random(), 0, Math.min(maxDoc, 1 << TestUtil.nextInt(random(), 0, pow)));
|
||||
final BitSet set = randomSet(maxDoc, numDocs);
|
||||
final DocIdSet copy = copyOf(set, maxDoc);
|
||||
final long actualBytes = ramBytesUsed(copy, maxDoc);
|
||||
final long expectedBytes = copy.ramBytesUsed();
|
||||
assertEquals(expectedBytes, actualBytes);
|
||||
}
|
||||
}
|
||||
|
||||
/** Assert that the content of the {@link DocIdSet} is the same as the content of the {@link BitSet}. */
|
||||
public void assertEquals(int numBits, BitSet ds1, T ds2) throws IOException {
|
||||
// nextDoc
|
||||
|
@ -172,4 +187,21 @@ public abstract class BaseDocIdSetTestCase<T extends DocIdSet> extends LuceneTes
|
|||
}
|
||||
}
|
||||
|
||||
private static class Dummy {
|
||||
@SuppressWarnings("unused")
|
||||
Object o1, o2;
|
||||
}
|
||||
|
||||
// same as RamUsageTester.sizeOf but tries to not take into account resources
|
||||
// that might be shared across instances
|
||||
private long ramBytesUsed(DocIdSet set, int length) throws IOException {
|
||||
Dummy dummy = new Dummy();
|
||||
dummy.o1 = copyOf(new BitSet(length), length);
|
||||
dummy.o2 = set;
|
||||
long bytes1 = RamUsageTester.sizeOf(dummy);
|
||||
dummy.o2 = null;
|
||||
long bytes2 = RamUsageTester.sizeOf(dummy);
|
||||
return bytes1 - bytes2;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue