From db8ea25a09000d8d90318e930b6763e0a960949c Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 18 Jun 2014 23:43:19 +0000 Subject: [PATCH] LUCENE-5773: Improve test by measuring deltas instead of absolute values. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1603676 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/util/PagedBytes.java | 7 +- .../TestCompressingStoredFieldsFormat.java | 1 - .../apache/lucene/util/TestPagedBytes.java | 2 +- .../index/BaseIndexFileFormatTestCase.java | 68 +++++++++++-------- .../org/apache/lucene/index/RandomCodec.java | 3 + 5 files changed, 48 insertions(+), 33 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java b/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java index 5329a62ea20..5327f55b85b 100644 --- a/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java +++ b/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java @@ -236,8 +236,11 @@ public final class PagedBytes implements Accountable { @Override public long ramBytesUsed() { - long size = BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOf(blocks) - + bytesUsedPerBlock * numBlocks; + long size = BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOf(blocks);; + if (numBlocks > 0) { + size += (numBlocks - 1) * bytesUsedPerBlock; + size += RamUsageEstimator.sizeOf(blocks[numBlocks - 1]); + } if (currentBlock != null) { size += RamUsageEstimator.sizeOf(currentBlock); } diff --git a/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java index 4348484c9bf..1e8f62f57af 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java @@ -29,7 +29,6 @@ import org.apache.lucene.document.IntField; import org.apache.lucene.index.BaseStoredFieldsFormatTestCase; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.store.Directory; import org.junit.Test; diff --git a/lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java b/lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java index ff875ec24e6..a2a0cccc58d 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java @@ -27,7 +27,6 @@ import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.MockDirectoryWrapper; -import org.apache.lucene.util.PagedBytes.Reader; import org.junit.Ignore; public class TestPagedBytes extends LuceneTestCase { @@ -196,6 +195,7 @@ public class TestPagedBytes extends LuceneTestCase { } assertEquals(RamUsageTester.sizeOf(b), b.ramBytesUsed()); final PagedBytes.Reader reader = b.freeze(random().nextBoolean()); + assertEquals(RamUsageTester.sizeOf(b), b.ramBytesUsed()); assertEquals(RamUsageTester.sizeOf(reader), reader.ramBytesUsed()); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java index 8bb8b321e0f..a4a1181dbe1 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java @@ -25,11 +25,13 @@ import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.IdentityHashMap; +import java.util.List; import java.util.Map; import java.util.Set; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; @@ -38,7 +40,6 @@ import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.RamUsageTester; -import org.apache.lucene.util.packed.PackedInts; /** * Common tests to all index formats. @@ -48,9 +49,6 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase { // metadata or Directory-level objects private static final Set> EXCLUDED_CLASSES = Collections.newSetFromMap(new IdentityHashMap,Boolean>()); - // Notorious singletons - private static final Set EXCLUDED_OBJECTS = Collections.newSetFromMap(new IdentityHashMap()); - static { // Directory objects, don't take into account eg. the NIO buffers EXCLUDED_CLASSES.add(Directory.class); @@ -58,6 +56,7 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase { // used for thread management, not by the index EXCLUDED_CLASSES.add(CloseableThreadLocal.class); + EXCLUDED_CLASSES.add(ThreadLocal.class); // don't follow references to the top-level reader EXCLUDED_CLASSES.add(IndexReader.class); @@ -70,16 +69,9 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase { EXCLUDED_CLASSES.add(SegmentCommitInfo.class); EXCLUDED_CLASSES.add(FieldInfo.class); - // singletons - EXCLUDED_CLASSES.add(Codec.class); - EXCLUDED_CLASSES.add(Enum.class); - for (PackedInts.Format format : PackedInts.Format.values()) { - for (int i = 1; i <= 64; ++i) { - if (format.isSupported(i)) { - EXCLUDED_OBJECTS.add(PackedInts.getDecoder(format, PackedInts.VERSION_CURRENT, i)); - } - } - } + // constant overhead is typically due to strings + // TODO: can we remove this and still pass the test consistently + EXCLUDED_CLASSES.add(String.class); } static class Accumulator extends RamUsageTester.Accumulator { @@ -91,9 +83,6 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase { } public long accumulateObject(Object o, long shallowSize, java.util.Map fieldValues, java.util.Collection queue) { - if (EXCLUDED_OBJECTS.contains(o)) { - return 0L; - } for (Class clazz = o.getClass(); clazz != null; clazz = clazz.getSuperclass()) { if (EXCLUDED_CLASSES.contains(clazz) && o != root) { return 0; @@ -118,6 +107,14 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase { return v; } + @Override + public long accumulateArray(Object array, long shallowSize, + List values, Collection queue) { + long v = super.accumulateArray(array, shallowSize, values, queue); + // System.out.println(array.getClass() + "=" + v); + return v; + } + }; /** Returns the codec to run tests against */ @@ -204,35 +201,48 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase { /** Test the accuracy of the ramBytesUsed estimations. */ public void testRamBytesUsed() throws IOException { + if (Codec.getDefault() instanceof RandomCodec) { + // this test relies on the fact that two segments will be written with + // the same codec so we need to disable MockRandomPF + final Set avoidCodecs = new HashSet<>(((RandomCodec) Codec.getDefault()).avoidCodecs); + avoidCodecs.add(new MockRandomPostingsFormat().getName()); + Codec.setDefault(new RandomCodec(random(), avoidCodecs)); + } Directory dir = newDirectory(); IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); IndexWriter w = new IndexWriter(dir, cfg); // we need to index enough documents so that constant overhead doesn't dominate final int numDocs = atLeast(10000); + AtomicReader reader1 = null; for (int i = 0; i < numDocs; ++i) { Document d = new Document(); addRandomFields(d); w.addDocument(d); + if (i == 100) { + w.forceMerge(1); + w.commit(); + reader1 = getOnlySegmentReader(DirectoryReader.open(dir)); + } } w.forceMerge(1); w.commit(); w.close(); - IndexReader reader = DirectoryReader.open(dir); + AtomicReader reader2 = getOnlySegmentReader(DirectoryReader.open(dir)); - for (AtomicReaderContext context : reader.leaves()) { - final AtomicReader r = context.reader(); - // beware of lazy-loaded stuff - new SimpleMergedSegmentWarmer(InfoStream.NO_OUTPUT).warm(r); - final long actualBytes = RamUsageTester.sizeOf(r, new Accumulator(r)); - final long expectedBytes = ((SegmentReader) r).ramBytesUsed(); - final long absoluteError = actualBytes - expectedBytes; - final double relativeError = (double) absoluteError / actualBytes; - final String message = "Actual RAM usage " + actualBytes + ", but got " + expectedBytes + ", " + 100*relativeError + "% error"; - assertTrue(message, Math.abs(relativeError) < 0.20d || Math.abs(absoluteError) < 1000); + for (AtomicReader reader : Arrays.asList(reader1, reader2)) { + new SimpleMergedSegmentWarmer(InfoStream.NO_OUTPUT).warm(reader); } - reader.close(); + final long actualBytes = RamUsageTester.sizeOf(reader2, new Accumulator(reader2)) - RamUsageTester.sizeOf(reader1, new Accumulator(reader1)); + final long expectedBytes = ((SegmentReader) reader2).ramBytesUsed() - ((SegmentReader) reader1).ramBytesUsed(); + final long absoluteError = actualBytes - expectedBytes; + final double relativeError = (double) absoluteError / actualBytes; + final String message = "Actual RAM usage " + actualBytes + ", but got " + expectedBytes + ", " + 100*relativeError + "% error"; + assertTrue(message, Math.abs(relativeError) < 0.20d || Math.abs(absoluteError) < 1000); + + reader1.close(); + reader2.close(); dir.close(); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java index e4deb36b8cd..0cc4cc5f8b0 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java @@ -75,6 +75,8 @@ public class RandomCodec extends Lucene49Codec { /** unique set of docvalues format names this codec knows about */ public Set dvFormatNames = new HashSet<>(); + public final Set avoidCodecs; + /** memorized field->postingsformat mappings */ // note: we have to sync this map even though its just for debugging/toString, // otherwise DWPT's .toString() calls that iterate over the map can @@ -117,6 +119,7 @@ public class RandomCodec extends Lucene49Codec { public RandomCodec(Random random, Set avoidCodecs) { this.perFieldSeed = random.nextInt(); + this.avoidCodecs = avoidCodecs; // TODO: make it possible to specify min/max iterms per // block via CL: int minItemsPerBlock = TestUtil.nextInt(random, 2, 100);