LUCENE-5765: Add tests to OrdinalMap.ramBytesUsed.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1602880 13f79535-47bb-0310-9956-ffa450edef68
2014-06-16 14:41:15 +00:00 · 2014-06-16 14:41:15 +00:00 · a54f8a2e4e
parent 67d1a72c55
commit a54f8a2e4e
7 changed files with 143 additions and 19 deletions
--- a/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java
@ -112,9 +112,9 @@ class FrozenBufferedUpdates {
    }
    binaryDVUpdates = allBinaryUpdates.toArray(new BinaryDocValuesUpdate[allBinaryUpdates.size()]);
    
-    bytesUsed = (int) terms.ramBytesUsed() + queries.length * BYTES_PER_DEL_QUERY 
-        + numericUpdatesSize + numericDVUpdates.length * RamUsageEstimator.NUM_BYTES_OBJECT_REF
-        + binaryUpdatesSize + binaryDVUpdates.length * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
+    bytesUsed = (int) (terms.ramBytesUsed() + queries.length * BYTES_PER_DEL_QUERY 
+        + numericUpdatesSize + RamUsageEstimator.shallowSizeOf(numericDVUpdates)
+        + binaryUpdatesSize + RamUsageEstimator.shallowSizeOf(binaryDVUpdates));
    
    numTermDeletes = deletes.numTermDeletes.get();
  }
--- a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
@ -25,6 +25,7 @@ import org.apache.lucene.index.MultiTermsEnum.TermsEnumWithSlice;
 import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.packed.AppendingPackedLongBuffer;
 import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
 import org.apache.lucene.util.packed.PackedInts;
@ -377,6 +378,9 @@ public class MultiDocValues {
  // TODO: use more efficient packed ints structures?
  // TODO: pull this out? its pretty generic (maps between N ord()-enabled TermsEnums) 
  public static class OrdinalMap implements Accountable {
+
+    private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(OrdinalMap.class);
+
    // cache key of whoever asked for this awful thing
    final Object owner;
    // globalOrd -> (globalOrd - segmentOrd) where segmentOrd is the the ordinal in the first segment that contains this term
@ -473,7 +477,7 @@ public class MultiDocValues {

    @Override
    public long ramBytesUsed() {
-      long size = globalOrdDeltas.ramBytesUsed() + firstSegments.ramBytesUsed();
+      long size = BASE_RAM_BYTES_USED + globalOrdDeltas.ramBytesUsed() + firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(ordDeltas);
      for (int i = 0; i < ordDeltas.length; i++) {
        size += ordDeltas[i].ramBytesUsed();
      }
--- a/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java
+++ b/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java
@ -307,6 +307,12 @@ public final class RamUsageEstimator {
    return alignObjectSize((long) NUM_BYTES_ARRAY_HEADER + (long) NUM_BYTES_DOUBLE * arr.length);
  }

+  /** Returns the shallow size in bytes of the Object[] object. */
+  // Use this method instead of #shallowSizeOf(Object) to avoid costly reflection
+  public static long shallowSizeOf(Object[] arr) {
+    return alignObjectSize((long) NUM_BYTES_ARRAY_HEADER + (long) NUM_BYTES_OBJECT_REF * arr.length);
+  }
+
  /** 
   * Estimates a "shallow" memory usage of the given object. For arrays, this will be the
   * memory taken by array storage (no subreferences will be followed). For objects, this
--- a/lucene/core/src/java/org/apache/lucene/util/packed/AbstractAppendingLongBuffer.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/AbstractAppendingLongBuffer.java
@ -194,7 +194,7 @@ abstract class AbstractAppendingLongBuffer extends LongValues implements Account
    // TODO: this is called per-doc-per-norms/dv-field, can we optimize this?
    long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed())
        + (pending != null ? RamUsageEstimator.sizeOf(pending) : 0L)
-        + RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * values.length); // values
+        + RamUsageEstimator.shallowSizeOf(values);

    return bytesUsed + valuesBytes;
  }
--- a/lucene/core/src/java/org/apache/lucene/util/packed/AbstractPagedMutable.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/AbstractPagedMutable.java
@ -107,7 +107,7 @@ abstract class AbstractPagedMutable<T extends AbstractPagedMutable<T>> extends L
  /** Return the number of bytes used by this object. */
  public long ramBytesUsed() {
    long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed());
-    bytesUsed += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * subMutables.length);
+    bytesUsed += RamUsageEstimator.alignObjectSize(RamUsageEstimator.shallowSizeOf(subMutables));
    for (PackedInts.Mutable gw : subMutables) {
      bytesUsed += gw.ramBytesUsed();
    }
--- a/lucene/core/src/test/org/apache/lucene/index/TestOrdinalMap.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestOrdinalMap.java
@ -0,0 +1,87 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.lang.reflect.Field;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.codecs.lucene49.Lucene49DocValuesFormat;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
+import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues;
+import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
+import org.apache.lucene.index.MultiDocValues.OrdinalMap;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.RamUsageTester;
+import org.apache.lucene.util.TestUtil;
+
+public class TestOrdinalMap extends LuceneTestCase {
+
+  private static final RamUsageTester.Filter ORDINAL_MAP_FILTER = new RamUsageTester.Filter() {
+    @Override
+    public boolean accept(Field field) {
+      if (field.getDeclaringClass().equals(OrdinalMap.class) && field.getName().equals("owner")) {
+        return false;
+      }
+      return true;
+    }
+  };
+
+  public void testRamBytesUsed() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriterConfig cfg = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(TestUtil.alwaysDocValuesFormat(new Lucene49DocValuesFormat()));
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
+    final int maxDoc = TestUtil.nextInt(random(), 10, 1000);
+    final int maxTermLength = TestUtil.nextInt(random(), 1, 4);
+    for (int i = 0; i < maxDoc; ++i) {
+      Document d = new Document();
+      if (random().nextBoolean()) {
+        d.add(new SortedDocValuesField("sdv", new BytesRef(TestUtil.randomSimpleString(random(), maxTermLength))));
+      }
+      final int numSortedSet = random().nextInt(3);
+      for (int j = 0; j < numSortedSet; ++j) {
+        d.add(new SortedSetDocValuesField("ssdv", new BytesRef(TestUtil.randomSimpleString(random(), maxTermLength))));
+      }
+      iw.addDocument(d);
+      if (rarely()) {
+        iw.getReader().close();
+      }
+    }
+    iw.commit();
+    DirectoryReader r = iw.getReader();
+    AtomicReader ar = SlowCompositeReaderWrapper.wrap(r);
+    SortedDocValues sdv = ar.getSortedDocValues("sdv");
+    if (sdv instanceof MultiSortedDocValues) {
+      OrdinalMap map = ((MultiSortedDocValues) sdv).mapping;
+      assertEquals(RamUsageTester.sizeOf(map, ORDINAL_MAP_FILTER), map.ramBytesUsed());
+    }
+    SortedSetDocValues ssdv = ar.getSortedSetDocValues("ssdv");
+    if (ssdv instanceof MultiSortedSetDocValues) {
+      OrdinalMap map = ((MultiSortedSetDocValues) ssdv).mapping;
+      assertEquals(RamUsageTester.sizeOf(map, ORDINAL_MAP_FILTER), map.ramBytesUsed());
+    }
+    iw.close();
+    r.close();
+    dir.close();
+  }
+
+}
--- a/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java
@ -29,19 +29,44 @@ import java.util.NoSuchElementException;
 /** Crawls object graph to collect RAM usage for testing */
 public final class RamUsageTester {
  
-  /** 
+  /**
+   * A {@link Filter} that accepts all fields.
+   */
+  private static final Filter DEFAULT_FILTER = new Filter() {
+
+    @Override
+    public boolean accept(Field field) {
+      return true;
+    }
+
+  };
+
+  /** A filter that allows to decide on what to take into account when measuring RAM usage. */
+  public static interface Filter {
+
+    /** Whether the provided field should be taken into account when measuring RAM usage. */
+    boolean accept(Field field);
+
+  }
+
+  /**
   * Estimates the RAM usage by the given object. It will
   * walk the object tree and sum up all referenced objects.
-   * 
+   *
   * <p><b>Resource Usage:</b> This method internally uses a set of
   * every object seen during traversals so it does allocate memory
   * (it isn't side-effect free). After the method exits, this memory
   * should be GCed.</p>
   */
-  public static long sizeOf(Object obj) {
-    return measureObjectSize(obj);
+  public static long sizeOf(Object obj, Filter filter) {
+    return measureObjectSize(obj, filter);
  }
-  
+
+  /** Same as calling <code>sizeOf(obj, DEFAULT_FILTER)</code>. */
+  public static long sizeOf(Object obj) {
+    return sizeOf(obj, DEFAULT_FILTER);
+  }
+
  /**
   * Return a human-readable size of a given object.
   * @see #sizeOf(Object)
@ -50,14 +75,14 @@ public final class RamUsageTester {
  public static String humanSizeOf(Object object) {
    return RamUsageEstimator.humanReadableUnits(sizeOf(object));
  }
-  
+
  /*
-   * Non-recursive version of object descend. This consumes more memory than recursive in-depth 
+   * Non-recursive version of object descend. This consumes more memory than recursive in-depth
   * traversal but prevents stack overflows on long chains of objects
   * or complex graphs (a max. recursion depth on my machine was ~5000 objects linked in a chain
-   * so not too much).  
+   * so not too much).
   */
-  private static long measureObjectSize(Object root) {
+  private static long measureObjectSize(Object root, Filter filter) {
    // Objects seen so far.
    final IdentityHashSet<Object> seen = new IdentityHashSet<>();
    // Class cache with reference Field and precalculated shallow size. 
@ -113,10 +138,12 @@ public final class RamUsageTester {
          }

          for (Field f : cachedInfo.referenceFields) {
-            // Fast path to eliminate redundancies.
-            final Object o = f.get(ob);
-            if (o != null && !seen.contains(o)) {
-              stack.add(o);
+            if (filter.accept(f)) {
+              // Fast path to eliminate redundancies.
+              final Object o = f.get(ob);
+              if (o != null && !seen.contains(o)) {
+                stack.add(o);
+              }
            }
          }