From db8ea25a09000d8d90318e930b6763e0a960949c Mon Sep 17 00:00:00 2001
From: Adrien Grand <jpountz@apache.org>
Date: Wed, 18 Jun 2014 23:43:19 +0000
Subject: [PATCH] LUCENE-5773: Improve test by measuring deltas instead of
 absolute values.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1603676 13f79535-47bb-0310-9956-ffa450edef68
---
 .../org/apache/lucene/util/PagedBytes.java    |  7 +-
 .../TestCompressingStoredFieldsFormat.java    |  1 -
 .../apache/lucene/util/TestPagedBytes.java    |  2 +-
 .../index/BaseIndexFileFormatTestCase.java    | 68 +++++++++++--------
 .../org/apache/lucene/index/RandomCodec.java  |  3 +
 5 files changed, 48 insertions(+), 33 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java b/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java
index 5329a62ea20..5327f55b85b 100644
--- a/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java
+++ b/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java
@@ -236,8 +236,11 @@ public final class PagedBytes implements Accountable {
 
   @Override
   public long ramBytesUsed() {
-    long size = BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOf(blocks)
-        + bytesUsedPerBlock * numBlocks;
+    long size = BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOf(blocks);;
+    if (numBlocks > 0) {
+      size += (numBlocks - 1) * bytesUsedPerBlock;
+      size += RamUsageEstimator.sizeOf(blocks[numBlocks - 1]);
+    }
     if (currentBlock != null) {
       size += RamUsageEstimator.sizeOf(currentBlock);
     }
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java
index 4348484c9bf..1e8f62f57af 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java
@@ -29,7 +29,6 @@ import org.apache.lucene.document.IntField;
 import org.apache.lucene.index.BaseStoredFieldsFormatTestCase;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.store.Directory;
 import org.junit.Test;
 
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java b/lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java
index ff875ec24e6..a2a0cccc58d 100644
--- a/lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java
@@ -27,7 +27,6 @@ import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.MockDirectoryWrapper;
-import org.apache.lucene.util.PagedBytes.Reader;
 import org.junit.Ignore;
 
 public class TestPagedBytes extends LuceneTestCase {
@@ -196,6 +195,7 @@ public class TestPagedBytes extends LuceneTestCase {
     }
     assertEquals(RamUsageTester.sizeOf(b), b.ramBytesUsed());
     final PagedBytes.Reader reader = b.freeze(random().nextBoolean());
+    assertEquals(RamUsageTester.sizeOf(b), b.ramBytesUsed());
     assertEquals(RamUsageTester.sizeOf(reader), reader.ramBytesUsed());
   }
 
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java
index 8bb8b321e0f..a4a1181dbe1 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java
@@ -25,11 +25,13 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.IdentityHashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
@@ -38,7 +40,6 @@ import org.apache.lucene.util.InfoStream;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.RamUsageTester;
-import org.apache.lucene.util.packed.PackedInts;
 
 /**
  * Common tests to all index formats.
@@ -48,9 +49,6 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
   // metadata or Directory-level objects
   private static final Set<Class<?>> EXCLUDED_CLASSES = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
 
-  // Notorious singletons
-  private static final Set<Object> EXCLUDED_OBJECTS = Collections.newSetFromMap(new IdentityHashMap<Object,Boolean>());
-
   static {
     // Directory objects, don't take into account eg. the NIO buffers
     EXCLUDED_CLASSES.add(Directory.class);
@@ -58,6 +56,7 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
 
     // used for thread management, not by the index
     EXCLUDED_CLASSES.add(CloseableThreadLocal.class);
+    EXCLUDED_CLASSES.add(ThreadLocal.class);
 
     // don't follow references to the top-level reader
     EXCLUDED_CLASSES.add(IndexReader.class);
@@ -70,16 +69,9 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
     EXCLUDED_CLASSES.add(SegmentCommitInfo.class);
     EXCLUDED_CLASSES.add(FieldInfo.class);
 
-    // singletons
-    EXCLUDED_CLASSES.add(Codec.class);
-    EXCLUDED_CLASSES.add(Enum.class);
-    for (PackedInts.Format format : PackedInts.Format.values()) {
-      for (int i = 1; i <= 64; ++i) {
-        if (format.isSupported(i)) {
-          EXCLUDED_OBJECTS.add(PackedInts.getDecoder(format, PackedInts.VERSION_CURRENT, i));
-        }
-      }
-    }
+    // constant overhead is typically due to strings
+    // TODO: can we remove this and still pass the test consistently
+    EXCLUDED_CLASSES.add(String.class);
   }
 
   static class Accumulator extends RamUsageTester.Accumulator {
@@ -91,9 +83,6 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
     }
 
     public long accumulateObject(Object o, long shallowSize, java.util.Map<Field, Object> fieldValues, java.util.Collection<Object> queue) {
-      if (EXCLUDED_OBJECTS.contains(o)) {
-        return 0L;
-      }
       for (Class<?> clazz = o.getClass(); clazz != null; clazz = clazz.getSuperclass()) {
         if (EXCLUDED_CLASSES.contains(clazz) && o != root) {
           return 0;
@@ -118,6 +107,14 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
       return v;
     }
 
+    @Override
+    public long accumulateArray(Object array, long shallowSize,
+        List<Object> values, Collection<Object> queue) {
+      long v = super.accumulateArray(array, shallowSize, values, queue);
+      // System.out.println(array.getClass() + "=" + v);
+      return v;
+    }
+
   };
 
   /** Returns the codec to run tests against */
@@ -204,35 +201,48 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
 
   /** Test the accuracy of the ramBytesUsed estimations. */
   public void testRamBytesUsed() throws IOException {
+    if (Codec.getDefault() instanceof RandomCodec) {
+      // this test relies on the fact that two segments will be written with
+      // the same codec so we need to disable MockRandomPF
+      final Set<String> avoidCodecs = new HashSet<>(((RandomCodec) Codec.getDefault()).avoidCodecs);
+      avoidCodecs.add(new MockRandomPostingsFormat().getName());
+      Codec.setDefault(new RandomCodec(random(), avoidCodecs));
+    }
     Directory dir = newDirectory();
     IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     IndexWriter w = new IndexWriter(dir, cfg);
     // we need to index enough documents so that constant overhead doesn't dominate
     final int numDocs = atLeast(10000);
+    AtomicReader reader1 = null;
     for (int i = 0; i < numDocs; ++i) {
       Document d = new Document();
       addRandomFields(d);
       w.addDocument(d);
+      if (i == 100) {
+        w.forceMerge(1);
+        w.commit();
+        reader1 = getOnlySegmentReader(DirectoryReader.open(dir));
+      }
     }
     w.forceMerge(1);
     w.commit();
     w.close();
 
-    IndexReader reader = DirectoryReader.open(dir);
+    AtomicReader reader2 = getOnlySegmentReader(DirectoryReader.open(dir));
 
-    for (AtomicReaderContext context : reader.leaves()) {
-      final AtomicReader r = context.reader();
-      // beware of lazy-loaded stuff
-      new SimpleMergedSegmentWarmer(InfoStream.NO_OUTPUT).warm(r);
-      final long actualBytes = RamUsageTester.sizeOf(r, new Accumulator(r));
-      final long expectedBytes = ((SegmentReader) r).ramBytesUsed();
-      final long absoluteError = actualBytes - expectedBytes;
-      final double relativeError = (double) absoluteError / actualBytes;
-      final String message = "Actual RAM usage " + actualBytes + ", but got " + expectedBytes + ", " + 100*relativeError + "% error";
-      assertTrue(message, Math.abs(relativeError) < 0.20d || Math.abs(absoluteError) < 1000);
+    for (AtomicReader reader : Arrays.asList(reader1, reader2)) {
+      new SimpleMergedSegmentWarmer(InfoStream.NO_OUTPUT).warm(reader);
     }
 
-    reader.close();
+    final long actualBytes = RamUsageTester.sizeOf(reader2, new Accumulator(reader2)) - RamUsageTester.sizeOf(reader1, new Accumulator(reader1));
+    final long expectedBytes = ((SegmentReader) reader2).ramBytesUsed() - ((SegmentReader) reader1).ramBytesUsed();
+    final long absoluteError = actualBytes - expectedBytes;
+    final double relativeError = (double) absoluteError / actualBytes;
+    final String message = "Actual RAM usage " + actualBytes + ", but got " + expectedBytes + ", " + 100*relativeError + "% error";
+    assertTrue(message, Math.abs(relativeError) < 0.20d || Math.abs(absoluteError) < 1000);
+
+    reader1.close();
+    reader2.close();
     dir.close();
   }
 
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
index e4deb36b8cd..0cc4cc5f8b0 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
@@ -75,6 +75,8 @@ public class RandomCodec extends Lucene49Codec {
   /** unique set of docvalues format names this codec knows about */
   public Set<String> dvFormatNames = new HashSet<>();
 
+  public final Set<String> avoidCodecs;
+
   /** memorized field->postingsformat mappings */
   // note: we have to sync this map even though its just for debugging/toString, 
   // otherwise DWPT's .toString() calls that iterate over the map can 
@@ -117,6 +119,7 @@ public class RandomCodec extends Lucene49Codec {
 
   public RandomCodec(Random random, Set<String> avoidCodecs) {
     this.perFieldSeed = random.nextInt();
+    this.avoidCodecs = avoidCodecs;
     // TODO: make it possible to specify min/max iterms per
     // block via CL:
     int minItemsPerBlock = TestUtil.nextInt(random, 2, 100);