diff --git a/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java b/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java index 99648d9fd50..59e5525f1e2 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java +++ b/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java @@ -112,9 +112,9 @@ class FrozenBufferedUpdates { } binaryDVUpdates = allBinaryUpdates.toArray(new BinaryDocValuesUpdate[allBinaryUpdates.size()]); - bytesUsed = (int) terms.ramBytesUsed() + queries.length * BYTES_PER_DEL_QUERY - + numericUpdatesSize + numericDVUpdates.length * RamUsageEstimator.NUM_BYTES_OBJECT_REF - + binaryUpdatesSize + binaryDVUpdates.length * RamUsageEstimator.NUM_BYTES_OBJECT_REF; + bytesUsed = (int) (terms.ramBytesUsed() + queries.length * BYTES_PER_DEL_QUERY + + numericUpdatesSize + RamUsageEstimator.shallowSizeOf(numericDVUpdates) + + binaryUpdatesSize + RamUsageEstimator.shallowSizeOf(binaryDVUpdates)); numTermDeletes = deletes.numTermDeletes.get(); } diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java index 9513bae9700..207d6a0a869 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java @@ -25,6 +25,7 @@ import org.apache.lucene.index.MultiTermsEnum.TermsEnumWithSlice; import org.apache.lucene.util.Accountable; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.packed.AppendingPackedLongBuffer; import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer; import org.apache.lucene.util.packed.PackedInts; @@ -377,6 +378,9 @@ public class MultiDocValues { // TODO: use more efficient packed ints structures? // TODO: pull this out? its pretty generic (maps between N ord()-enabled TermsEnums) public static class OrdinalMap implements Accountable { + + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(OrdinalMap.class); + // cache key of whoever asked for this awful thing final Object owner; // globalOrd -> (globalOrd - segmentOrd) where segmentOrd is the the ordinal in the first segment that contains this term @@ -473,7 +477,7 @@ public class MultiDocValues { @Override public long ramBytesUsed() { - long size = globalOrdDeltas.ramBytesUsed() + firstSegments.ramBytesUsed(); + long size = BASE_RAM_BYTES_USED + globalOrdDeltas.ramBytesUsed() + firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(ordDeltas); for (int i = 0; i < ordDeltas.length; i++) { size += ordDeltas[i].ramBytesUsed(); } diff --git a/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java b/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java index 51e583494df..66fe7933d56 100644 --- a/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java +++ b/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java @@ -307,6 +307,12 @@ public final class RamUsageEstimator { return alignObjectSize((long) NUM_BYTES_ARRAY_HEADER + (long) NUM_BYTES_DOUBLE * arr.length); } + /** Returns the shallow size in bytes of the Object[] object. */ + // Use this method instead of #shallowSizeOf(Object) to avoid costly reflection + public static long shallowSizeOf(Object[] arr) { + return alignObjectSize((long) NUM_BYTES_ARRAY_HEADER + (long) NUM_BYTES_OBJECT_REF * arr.length); + } + /** * Estimates a "shallow" memory usage of the given object. For arrays, this will be the * memory taken by array storage (no subreferences will be followed). For objects, this diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/AbstractAppendingLongBuffer.java b/lucene/core/src/java/org/apache/lucene/util/packed/AbstractAppendingLongBuffer.java index 54f524b473e..d36f705cde1 100644 --- a/lucene/core/src/java/org/apache/lucene/util/packed/AbstractAppendingLongBuffer.java +++ b/lucene/core/src/java/org/apache/lucene/util/packed/AbstractAppendingLongBuffer.java @@ -194,7 +194,7 @@ abstract class AbstractAppendingLongBuffer extends LongValues implements Account // TODO: this is called per-doc-per-norms/dv-field, can we optimize this? long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed()) + (pending != null ? RamUsageEstimator.sizeOf(pending) : 0L) - + RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * values.length); // values + + RamUsageEstimator.shallowSizeOf(values); return bytesUsed + valuesBytes; } diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/AbstractPagedMutable.java b/lucene/core/src/java/org/apache/lucene/util/packed/AbstractPagedMutable.java index 30ace931b09..914cc5375b5 100644 --- a/lucene/core/src/java/org/apache/lucene/util/packed/AbstractPagedMutable.java +++ b/lucene/core/src/java/org/apache/lucene/util/packed/AbstractPagedMutable.java @@ -107,7 +107,7 @@ abstract class AbstractPagedMutable> extends L /** Return the number of bytes used by this object. */ public long ramBytesUsed() { long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed()); - bytesUsed += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * subMutables.length); + bytesUsed += RamUsageEstimator.alignObjectSize(RamUsageEstimator.shallowSizeOf(subMutables)); for (PackedInts.Mutable gw : subMutables) { bytesUsed += gw.ramBytesUsed(); } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestOrdinalMap.java b/lucene/core/src/test/org/apache/lucene/index/TestOrdinalMap.java new file mode 100644 index 00000000000..83520e848e8 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/index/TestOrdinalMap.java @@ -0,0 +1,87 @@ +package org.apache.lucene.index; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.lang.reflect.Field; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.codecs.lucene49.Lucene49DocValuesFormat; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues; +import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues; +import org.apache.lucene.index.MultiDocValues.OrdinalMap; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.RamUsageTester; +import org.apache.lucene.util.TestUtil; + +public class TestOrdinalMap extends LuceneTestCase { + + private static final RamUsageTester.Filter ORDINAL_MAP_FILTER = new RamUsageTester.Filter() { + @Override + public boolean accept(Field field) { + if (field.getDeclaringClass().equals(OrdinalMap.class) && field.getName().equals("owner")) { + return false; + } + return true; + } + }; + + public void testRamBytesUsed() throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig cfg = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(TestUtil.alwaysDocValuesFormat(new Lucene49DocValuesFormat())); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg); + final int maxDoc = TestUtil.nextInt(random(), 10, 1000); + final int maxTermLength = TestUtil.nextInt(random(), 1, 4); + for (int i = 0; i < maxDoc; ++i) { + Document d = new Document(); + if (random().nextBoolean()) { + d.add(new SortedDocValuesField("sdv", new BytesRef(TestUtil.randomSimpleString(random(), maxTermLength)))); + } + final int numSortedSet = random().nextInt(3); + for (int j = 0; j < numSortedSet; ++j) { + d.add(new SortedSetDocValuesField("ssdv", new BytesRef(TestUtil.randomSimpleString(random(), maxTermLength)))); + } + iw.addDocument(d); + if (rarely()) { + iw.getReader().close(); + } + } + iw.commit(); + DirectoryReader r = iw.getReader(); + AtomicReader ar = SlowCompositeReaderWrapper.wrap(r); + SortedDocValues sdv = ar.getSortedDocValues("sdv"); + if (sdv instanceof MultiSortedDocValues) { + OrdinalMap map = ((MultiSortedDocValues) sdv).mapping; + assertEquals(RamUsageTester.sizeOf(map, ORDINAL_MAP_FILTER), map.ramBytesUsed()); + } + SortedSetDocValues ssdv = ar.getSortedSetDocValues("ssdv"); + if (ssdv instanceof MultiSortedSetDocValues) { + OrdinalMap map = ((MultiSortedSetDocValues) ssdv).mapping; + assertEquals(RamUsageTester.sizeOf(map, ORDINAL_MAP_FILTER), map.ramBytesUsed()); + } + iw.close(); + r.close(); + dir.close(); + } + +} diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java b/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java index be02eb60736..5a3d96dbcc1 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java @@ -29,19 +29,44 @@ import java.util.NoSuchElementException; /** Crawls object graph to collect RAM usage for testing */ public final class RamUsageTester { - /** + /** + * A {@link Filter} that accepts all fields. + */ + private static final Filter DEFAULT_FILTER = new Filter() { + + @Override + public boolean accept(Field field) { + return true; + } + + }; + + /** A filter that allows to decide on what to take into account when measuring RAM usage. */ + public static interface Filter { + + /** Whether the provided field should be taken into account when measuring RAM usage. */ + boolean accept(Field field); + + } + + /** * Estimates the RAM usage by the given object. It will * walk the object tree and sum up all referenced objects. - * + * *

Resource Usage: This method internally uses a set of * every object seen during traversals so it does allocate memory * (it isn't side-effect free). After the method exits, this memory * should be GCed.

*/ - public static long sizeOf(Object obj) { - return measureObjectSize(obj); + public static long sizeOf(Object obj, Filter filter) { + return measureObjectSize(obj, filter); } - + + /** Same as calling sizeOf(obj, DEFAULT_FILTER). */ + public static long sizeOf(Object obj) { + return sizeOf(obj, DEFAULT_FILTER); + } + /** * Return a human-readable size of a given object. * @see #sizeOf(Object) @@ -50,14 +75,14 @@ public final class RamUsageTester { public static String humanSizeOf(Object object) { return RamUsageEstimator.humanReadableUnits(sizeOf(object)); } - + /* - * Non-recursive version of object descend. This consumes more memory than recursive in-depth + * Non-recursive version of object descend. This consumes more memory than recursive in-depth * traversal but prevents stack overflows on long chains of objects * or complex graphs (a max. recursion depth on my machine was ~5000 objects linked in a chain - * so not too much). + * so not too much). */ - private static long measureObjectSize(Object root) { + private static long measureObjectSize(Object root, Filter filter) { // Objects seen so far. final IdentityHashSet seen = new IdentityHashSet<>(); // Class cache with reference Field and precalculated shallow size. @@ -113,10 +138,12 @@ public final class RamUsageTester { } for (Field f : cachedInfo.referenceFields) { - // Fast path to eliminate redundancies. - final Object o = f.get(ob); - if (o != null && !seen.contains(o)) { - stack.add(o); + if (filter.accept(f)) { + // Fast path to eliminate redundancies. + final Object o = f.get(ob); + if (o != null && !seen.contains(o)) { + stack.add(o); + } } }