LUCENE-5765: Add tests to OrdinalMap.ramBytesUsed.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1602880 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2014-06-16 14:41:15 +00:00
parent 67d1a72c55
commit a54f8a2e4e
7 changed files with 143 additions and 19 deletions

View File

@ -112,9 +112,9 @@ class FrozenBufferedUpdates {
} }
binaryDVUpdates = allBinaryUpdates.toArray(new BinaryDocValuesUpdate[allBinaryUpdates.size()]); binaryDVUpdates = allBinaryUpdates.toArray(new BinaryDocValuesUpdate[allBinaryUpdates.size()]);
bytesUsed = (int) terms.ramBytesUsed() + queries.length * BYTES_PER_DEL_QUERY bytesUsed = (int) (terms.ramBytesUsed() + queries.length * BYTES_PER_DEL_QUERY
+ numericUpdatesSize + numericDVUpdates.length * RamUsageEstimator.NUM_BYTES_OBJECT_REF + numericUpdatesSize + RamUsageEstimator.shallowSizeOf(numericDVUpdates)
+ binaryUpdatesSize + binaryDVUpdates.length * RamUsageEstimator.NUM_BYTES_OBJECT_REF; + binaryUpdatesSize + RamUsageEstimator.shallowSizeOf(binaryDVUpdates));
numTermDeletes = deletes.numTermDeletes.get(); numTermDeletes = deletes.numTermDeletes.get();
} }

View File

@ -25,6 +25,7 @@ import org.apache.lucene.index.MultiTermsEnum.TermsEnumWithSlice;
import org.apache.lucene.util.Accountable; import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.AppendingPackedLongBuffer; import org.apache.lucene.util.packed.AppendingPackedLongBuffer;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer; import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedInts;
@ -377,6 +378,9 @@ public class MultiDocValues {
// TODO: use more efficient packed ints structures? // TODO: use more efficient packed ints structures?
// TODO: pull this out? its pretty generic (maps between N ord()-enabled TermsEnums) // TODO: pull this out? its pretty generic (maps between N ord()-enabled TermsEnums)
public static class OrdinalMap implements Accountable { public static class OrdinalMap implements Accountable {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(OrdinalMap.class);
// cache key of whoever asked for this awful thing // cache key of whoever asked for this awful thing
final Object owner; final Object owner;
// globalOrd -> (globalOrd - segmentOrd) where segmentOrd is the the ordinal in the first segment that contains this term // globalOrd -> (globalOrd - segmentOrd) where segmentOrd is the the ordinal in the first segment that contains this term
@ -473,7 +477,7 @@ public class MultiDocValues {
@Override @Override
public long ramBytesUsed() { public long ramBytesUsed() {
long size = globalOrdDeltas.ramBytesUsed() + firstSegments.ramBytesUsed(); long size = BASE_RAM_BYTES_USED + globalOrdDeltas.ramBytesUsed() + firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(ordDeltas);
for (int i = 0; i < ordDeltas.length; i++) { for (int i = 0; i < ordDeltas.length; i++) {
size += ordDeltas[i].ramBytesUsed(); size += ordDeltas[i].ramBytesUsed();
} }

View File

@ -307,6 +307,12 @@ public final class RamUsageEstimator {
return alignObjectSize((long) NUM_BYTES_ARRAY_HEADER + (long) NUM_BYTES_DOUBLE * arr.length); return alignObjectSize((long) NUM_BYTES_ARRAY_HEADER + (long) NUM_BYTES_DOUBLE * arr.length);
} }
/** Returns the shallow size in bytes of the Object[] object. */
// Use this method instead of #shallowSizeOf(Object) to avoid costly reflection
public static long shallowSizeOf(Object[] arr) {
return alignObjectSize((long) NUM_BYTES_ARRAY_HEADER + (long) NUM_BYTES_OBJECT_REF * arr.length);
}
/** /**
* Estimates a "shallow" memory usage of the given object. For arrays, this will be the * Estimates a "shallow" memory usage of the given object. For arrays, this will be the
* memory taken by array storage (no subreferences will be followed). For objects, this * memory taken by array storage (no subreferences will be followed). For objects, this

View File

@ -194,7 +194,7 @@ abstract class AbstractAppendingLongBuffer extends LongValues implements Account
// TODO: this is called per-doc-per-norms/dv-field, can we optimize this? // TODO: this is called per-doc-per-norms/dv-field, can we optimize this?
long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed()) long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed())
+ (pending != null ? RamUsageEstimator.sizeOf(pending) : 0L) + (pending != null ? RamUsageEstimator.sizeOf(pending) : 0L)
+ RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * values.length); // values + RamUsageEstimator.shallowSizeOf(values);
return bytesUsed + valuesBytes; return bytesUsed + valuesBytes;
} }

View File

@ -107,7 +107,7 @@ abstract class AbstractPagedMutable<T extends AbstractPagedMutable<T>> extends L
/** Return the number of bytes used by this object. */ /** Return the number of bytes used by this object. */
public long ramBytesUsed() { public long ramBytesUsed() {
long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed()); long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed());
bytesUsed += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * subMutables.length); bytesUsed += RamUsageEstimator.alignObjectSize(RamUsageEstimator.shallowSizeOf(subMutables));
for (PackedInts.Mutable gw : subMutables) { for (PackedInts.Mutable gw : subMutables) {
bytesUsed += gw.ramBytesUsed(); bytesUsed += gw.ramBytesUsed();
} }

View File

@ -0,0 +1,87 @@
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.lang.reflect.Field;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.lucene49.Lucene49DocValuesFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues;
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.RamUsageTester;
import org.apache.lucene.util.TestUtil;
public class TestOrdinalMap extends LuceneTestCase {
private static final RamUsageTester.Filter ORDINAL_MAP_FILTER = new RamUsageTester.Filter() {
@Override
public boolean accept(Field field) {
if (field.getDeclaringClass().equals(OrdinalMap.class) && field.getName().equals("owner")) {
return false;
}
return true;
}
};
public void testRamBytesUsed() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig cfg = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(TestUtil.alwaysDocValuesFormat(new Lucene49DocValuesFormat()));
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
final int maxDoc = TestUtil.nextInt(random(), 10, 1000);
final int maxTermLength = TestUtil.nextInt(random(), 1, 4);
for (int i = 0; i < maxDoc; ++i) {
Document d = new Document();
if (random().nextBoolean()) {
d.add(new SortedDocValuesField("sdv", new BytesRef(TestUtil.randomSimpleString(random(), maxTermLength))));
}
final int numSortedSet = random().nextInt(3);
for (int j = 0; j < numSortedSet; ++j) {
d.add(new SortedSetDocValuesField("ssdv", new BytesRef(TestUtil.randomSimpleString(random(), maxTermLength))));
}
iw.addDocument(d);
if (rarely()) {
iw.getReader().close();
}
}
iw.commit();
DirectoryReader r = iw.getReader();
AtomicReader ar = SlowCompositeReaderWrapper.wrap(r);
SortedDocValues sdv = ar.getSortedDocValues("sdv");
if (sdv instanceof MultiSortedDocValues) {
OrdinalMap map = ((MultiSortedDocValues) sdv).mapping;
assertEquals(RamUsageTester.sizeOf(map, ORDINAL_MAP_FILTER), map.ramBytesUsed());
}
SortedSetDocValues ssdv = ar.getSortedSetDocValues("ssdv");
if (ssdv instanceof MultiSortedSetDocValues) {
OrdinalMap map = ((MultiSortedSetDocValues) ssdv).mapping;
assertEquals(RamUsageTester.sizeOf(map, ORDINAL_MAP_FILTER), map.ramBytesUsed());
}
iw.close();
r.close();
dir.close();
}
}

View File

@ -29,19 +29,44 @@ import java.util.NoSuchElementException;
/** Crawls object graph to collect RAM usage for testing */ /** Crawls object graph to collect RAM usage for testing */
public final class RamUsageTester { public final class RamUsageTester {
/** /**
* A {@link Filter} that accepts all fields.
*/
private static final Filter DEFAULT_FILTER = new Filter() {
@Override
public boolean accept(Field field) {
return true;
}
};
/** A filter that allows to decide on what to take into account when measuring RAM usage. */
public static interface Filter {
/** Whether the provided field should be taken into account when measuring RAM usage. */
boolean accept(Field field);
}
/**
* Estimates the RAM usage by the given object. It will * Estimates the RAM usage by the given object. It will
* walk the object tree and sum up all referenced objects. * walk the object tree and sum up all referenced objects.
* *
* <p><b>Resource Usage:</b> This method internally uses a set of * <p><b>Resource Usage:</b> This method internally uses a set of
* every object seen during traversals so it does allocate memory * every object seen during traversals so it does allocate memory
* (it isn't side-effect free). After the method exits, this memory * (it isn't side-effect free). After the method exits, this memory
* should be GCed.</p> * should be GCed.</p>
*/ */
public static long sizeOf(Object obj) { public static long sizeOf(Object obj, Filter filter) {
return measureObjectSize(obj); return measureObjectSize(obj, filter);
} }
/** Same as calling <code>sizeOf(obj, DEFAULT_FILTER)</code>. */
public static long sizeOf(Object obj) {
return sizeOf(obj, DEFAULT_FILTER);
}
/** /**
* Return a human-readable size of a given object. * Return a human-readable size of a given object.
* @see #sizeOf(Object) * @see #sizeOf(Object)
@ -50,14 +75,14 @@ public final class RamUsageTester {
public static String humanSizeOf(Object object) { public static String humanSizeOf(Object object) {
return RamUsageEstimator.humanReadableUnits(sizeOf(object)); return RamUsageEstimator.humanReadableUnits(sizeOf(object));
} }
/* /*
* Non-recursive version of object descend. This consumes more memory than recursive in-depth * Non-recursive version of object descend. This consumes more memory than recursive in-depth
* traversal but prevents stack overflows on long chains of objects * traversal but prevents stack overflows on long chains of objects
* or complex graphs (a max. recursion depth on my machine was ~5000 objects linked in a chain * or complex graphs (a max. recursion depth on my machine was ~5000 objects linked in a chain
* so not too much). * so not too much).
*/ */
private static long measureObjectSize(Object root) { private static long measureObjectSize(Object root, Filter filter) {
// Objects seen so far. // Objects seen so far.
final IdentityHashSet<Object> seen = new IdentityHashSet<>(); final IdentityHashSet<Object> seen = new IdentityHashSet<>();
// Class cache with reference Field and precalculated shallow size. // Class cache with reference Field and precalculated shallow size.
@ -113,10 +138,12 @@ public final class RamUsageTester {
} }
for (Field f : cachedInfo.referenceFields) { for (Field f : cachedInfo.referenceFields) {
// Fast path to eliminate redundancies. if (filter.accept(f)) {
final Object o = f.get(ob); // Fast path to eliminate redundancies.
if (o != null && !seen.contains(o)) { final Object o = f.get(ob);
stack.add(o); if (o != null && !seen.contains(o)) {
stack.add(o);
}
} }
} }