mirror of https://github.com/apache/lucene.git
LUCENE-5765: Add tests to OrdinalMap.ramBytesUsed.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1602880 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
67d1a72c55
commit
a54f8a2e4e
|
@ -112,9 +112,9 @@ class FrozenBufferedUpdates {
|
|||
}
|
||||
binaryDVUpdates = allBinaryUpdates.toArray(new BinaryDocValuesUpdate[allBinaryUpdates.size()]);
|
||||
|
||||
bytesUsed = (int) terms.ramBytesUsed() + queries.length * BYTES_PER_DEL_QUERY
|
||||
+ numericUpdatesSize + numericDVUpdates.length * RamUsageEstimator.NUM_BYTES_OBJECT_REF
|
||||
+ binaryUpdatesSize + binaryDVUpdates.length * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
|
||||
bytesUsed = (int) (terms.ramBytesUsed() + queries.length * BYTES_PER_DEL_QUERY
|
||||
+ numericUpdatesSize + RamUsageEstimator.shallowSizeOf(numericDVUpdates)
|
||||
+ binaryUpdatesSize + RamUsageEstimator.shallowSizeOf(binaryDVUpdates));
|
||||
|
||||
numTermDeletes = deletes.numTermDeletes.get();
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.index.MultiTermsEnum.TermsEnumWithSlice;
|
|||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.packed.AppendingPackedLongBuffer;
|
||||
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
@ -377,6 +378,9 @@ public class MultiDocValues {
|
|||
// TODO: use more efficient packed ints structures?
|
||||
// TODO: pull this out? its pretty generic (maps between N ord()-enabled TermsEnums)
|
||||
public static class OrdinalMap implements Accountable {
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(OrdinalMap.class);
|
||||
|
||||
// cache key of whoever asked for this awful thing
|
||||
final Object owner;
|
||||
// globalOrd -> (globalOrd - segmentOrd) where segmentOrd is the the ordinal in the first segment that contains this term
|
||||
|
@ -473,7 +477,7 @@ public class MultiDocValues {
|
|||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
long size = globalOrdDeltas.ramBytesUsed() + firstSegments.ramBytesUsed();
|
||||
long size = BASE_RAM_BYTES_USED + globalOrdDeltas.ramBytesUsed() + firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(ordDeltas);
|
||||
for (int i = 0; i < ordDeltas.length; i++) {
|
||||
size += ordDeltas[i].ramBytesUsed();
|
||||
}
|
||||
|
|
|
@ -307,6 +307,12 @@ public final class RamUsageEstimator {
|
|||
return alignObjectSize((long) NUM_BYTES_ARRAY_HEADER + (long) NUM_BYTES_DOUBLE * arr.length);
|
||||
}
|
||||
|
||||
/** Returns the shallow size in bytes of the Object[] object. */
|
||||
// Use this method instead of #shallowSizeOf(Object) to avoid costly reflection
|
||||
public static long shallowSizeOf(Object[] arr) {
|
||||
return alignObjectSize((long) NUM_BYTES_ARRAY_HEADER + (long) NUM_BYTES_OBJECT_REF * arr.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimates a "shallow" memory usage of the given object. For arrays, this will be the
|
||||
* memory taken by array storage (no subreferences will be followed). For objects, this
|
||||
|
|
|
@ -194,7 +194,7 @@ abstract class AbstractAppendingLongBuffer extends LongValues implements Account
|
|||
// TODO: this is called per-doc-per-norms/dv-field, can we optimize this?
|
||||
long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed())
|
||||
+ (pending != null ? RamUsageEstimator.sizeOf(pending) : 0L)
|
||||
+ RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * values.length); // values
|
||||
+ RamUsageEstimator.shallowSizeOf(values);
|
||||
|
||||
return bytesUsed + valuesBytes;
|
||||
}
|
||||
|
|
|
@ -107,7 +107,7 @@ abstract class AbstractPagedMutable<T extends AbstractPagedMutable<T>> extends L
|
|||
/** Return the number of bytes used by this object. */
|
||||
public long ramBytesUsed() {
|
||||
long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed());
|
||||
bytesUsed += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * subMutables.length);
|
||||
bytesUsed += RamUsageEstimator.alignObjectSize(RamUsageEstimator.shallowSizeOf(subMutables));
|
||||
for (PackedInts.Mutable gw : subMutables) {
|
||||
bytesUsed += gw.ramBytesUsed();
|
||||
}
|
||||
|
|
|
@ -0,0 +1,87 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Field;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.codecs.lucene49.Lucene49DocValuesFormat;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues;
|
||||
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
|
||||
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.RamUsageTester;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestOrdinalMap extends LuceneTestCase {
|
||||
|
||||
private static final RamUsageTester.Filter ORDINAL_MAP_FILTER = new RamUsageTester.Filter() {
|
||||
@Override
|
||||
public boolean accept(Field field) {
|
||||
if (field.getDeclaringClass().equals(OrdinalMap.class) && field.getName().equals("owner")) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
public void testRamBytesUsed() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig cfg = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(TestUtil.alwaysDocValuesFormat(new Lucene49DocValuesFormat()));
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
|
||||
final int maxDoc = TestUtil.nextInt(random(), 10, 1000);
|
||||
final int maxTermLength = TestUtil.nextInt(random(), 1, 4);
|
||||
for (int i = 0; i < maxDoc; ++i) {
|
||||
Document d = new Document();
|
||||
if (random().nextBoolean()) {
|
||||
d.add(new SortedDocValuesField("sdv", new BytesRef(TestUtil.randomSimpleString(random(), maxTermLength))));
|
||||
}
|
||||
final int numSortedSet = random().nextInt(3);
|
||||
for (int j = 0; j < numSortedSet; ++j) {
|
||||
d.add(new SortedSetDocValuesField("ssdv", new BytesRef(TestUtil.randomSimpleString(random(), maxTermLength))));
|
||||
}
|
||||
iw.addDocument(d);
|
||||
if (rarely()) {
|
||||
iw.getReader().close();
|
||||
}
|
||||
}
|
||||
iw.commit();
|
||||
DirectoryReader r = iw.getReader();
|
||||
AtomicReader ar = SlowCompositeReaderWrapper.wrap(r);
|
||||
SortedDocValues sdv = ar.getSortedDocValues("sdv");
|
||||
if (sdv instanceof MultiSortedDocValues) {
|
||||
OrdinalMap map = ((MultiSortedDocValues) sdv).mapping;
|
||||
assertEquals(RamUsageTester.sizeOf(map, ORDINAL_MAP_FILTER), map.ramBytesUsed());
|
||||
}
|
||||
SortedSetDocValues ssdv = ar.getSortedSetDocValues("ssdv");
|
||||
if (ssdv instanceof MultiSortedSetDocValues) {
|
||||
OrdinalMap map = ((MultiSortedSetDocValues) ssdv).mapping;
|
||||
assertEquals(RamUsageTester.sizeOf(map, ORDINAL_MAP_FILTER), map.ramBytesUsed());
|
||||
}
|
||||
iw.close();
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
}
|
|
@ -29,19 +29,44 @@ import java.util.NoSuchElementException;
|
|||
/** Crawls object graph to collect RAM usage for testing */
|
||||
public final class RamUsageTester {
|
||||
|
||||
/**
|
||||
/**
|
||||
* A {@link Filter} that accepts all fields.
|
||||
*/
|
||||
private static final Filter DEFAULT_FILTER = new Filter() {
|
||||
|
||||
@Override
|
||||
public boolean accept(Field field) {
|
||||
return true;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/** A filter that allows to decide on what to take into account when measuring RAM usage. */
|
||||
public static interface Filter {
|
||||
|
||||
/** Whether the provided field should be taken into account when measuring RAM usage. */
|
||||
boolean accept(Field field);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimates the RAM usage by the given object. It will
|
||||
* walk the object tree and sum up all referenced objects.
|
||||
*
|
||||
*
|
||||
* <p><b>Resource Usage:</b> This method internally uses a set of
|
||||
* every object seen during traversals so it does allocate memory
|
||||
* (it isn't side-effect free). After the method exits, this memory
|
||||
* should be GCed.</p>
|
||||
*/
|
||||
public static long sizeOf(Object obj) {
|
||||
return measureObjectSize(obj);
|
||||
public static long sizeOf(Object obj, Filter filter) {
|
||||
return measureObjectSize(obj, filter);
|
||||
}
|
||||
|
||||
|
||||
/** Same as calling <code>sizeOf(obj, DEFAULT_FILTER)</code>. */
|
||||
public static long sizeOf(Object obj) {
|
||||
return sizeOf(obj, DEFAULT_FILTER);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a human-readable size of a given object.
|
||||
* @see #sizeOf(Object)
|
||||
|
@ -50,14 +75,14 @@ public final class RamUsageTester {
|
|||
public static String humanSizeOf(Object object) {
|
||||
return RamUsageEstimator.humanReadableUnits(sizeOf(object));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Non-recursive version of object descend. This consumes more memory than recursive in-depth
|
||||
* Non-recursive version of object descend. This consumes more memory than recursive in-depth
|
||||
* traversal but prevents stack overflows on long chains of objects
|
||||
* or complex graphs (a max. recursion depth on my machine was ~5000 objects linked in a chain
|
||||
* so not too much).
|
||||
* so not too much).
|
||||
*/
|
||||
private static long measureObjectSize(Object root) {
|
||||
private static long measureObjectSize(Object root, Filter filter) {
|
||||
// Objects seen so far.
|
||||
final IdentityHashSet<Object> seen = new IdentityHashSet<>();
|
||||
// Class cache with reference Field and precalculated shallow size.
|
||||
|
@ -113,10 +138,12 @@ public final class RamUsageTester {
|
|||
}
|
||||
|
||||
for (Field f : cachedInfo.referenceFields) {
|
||||
// Fast path to eliminate redundancies.
|
||||
final Object o = f.get(ob);
|
||||
if (o != null && !seen.contains(o)) {
|
||||
stack.add(o);
|
||||
if (filter.accept(f)) {
|
||||
// Fast path to eliminate redundancies.
|
||||
final Object o = f.get(ob);
|
||||
if (o != null && !seen.contains(o)) {
|
||||
stack.add(o);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue