From fab92b83d0dfa8a55fd4ac39fba11d695c2b1201 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 24 Feb 2014 18:28:27 +0000 Subject: [PATCH] LUCENE-5463: RUE.(human)sizeOf(Object) is now a forbidden API. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1571384 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 5 ++++ lucene/common-build.xml | 18 +++++++++++--- lucene/core/build.xml | 6 +++++ .../lucene/index/FilterAtomicReader.java | 9 +++++++ .../apache/lucene/util/RamUsageEstimator.java | 5 ++++ .../lucene/util/TestRamUsageEstimator.java | 6 +++++ .../facet/taxonomy/CachedOrdinalsReader.java | 10 +++++++- lucene/memory/build.xml | 4 ++++ .../analyzing/AnalyzingInfixSuggester.java | 13 +++++++++- .../suggest/fst/FSTCompletionLookup.java | 10 +++++++- .../search/suggest/jaspell/JaspellLookup.java | 2 +- .../jaspell/JaspellTernarySearchTrie.java | 24 ++++++++++++++++++- .../lucene/search/suggest/tst/TSTLookup.java | 6 ++++- .../search/suggest/tst/TernaryTreeNode.java | 20 ++++++++++++++++ lucene/test-framework/build.xml | 1 + lucene/tools/build.xml | 1 + lucene/tools/forbiddenApis/rue.txt | 19 +++++++++++++++ 17 files changed, 150 insertions(+), 9 deletions(-) create mode 100644 lucene/tools/forbiddenApis/rue.txt diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 10ec410d1d1..90c41998581 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -88,6 +88,11 @@ Test Framework * LUCENE-5449: Rename _TestUtil and _TestHelper to remove the leading _. +Build + +* LUCENE-5463: RamUsageEstimator.(human)sizeOf(Object) is now a forbidden API. + (Adrien Grand, Robert Muir) + ======================= Lucene 4.7.0 ======================= New Features diff --git a/lucene/common-build.xml b/lucene/common-build.xml index ee3a3dd0585..45d030fb8bc 100644 --- a/lucene/common-build.xml +++ b/lucene/common-build.xml @@ -2192,6 +2192,7 @@ ${ant.project.name}.test.dependencies=${test.classpath.list} + - + - + + @@ -2223,12 +2225,22 @@ ${ant.project.name}.test.dependencies=${test.classpath.list} + - + + + + + + + + + + diff --git a/lucene/core/build.xml b/lucene/core/build.xml index c114bc7abe9..7aa1f1eba10 100644 --- a/lucene/core/build.xml +++ b/lucene/core/build.xml @@ -30,6 +30,12 @@ org/apache/lucene/util/PrintStreamInfoStream.class "/> + + diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java index 8a618ec5905..9b3b214495c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java @@ -46,6 +46,15 @@ import org.apache.lucene.util.BytesRef; */ public class FilterAtomicReader extends AtomicReader { + /** Get the wrapped instance by reader as long as this reader is + * an intance of {@link FilterAtomicReader}. */ + public static AtomicReader unwrap(AtomicReader reader) { + while (reader instanceof FilterAtomicReader) { + reader = ((FilterAtomicReader) reader).in; + } + return reader; + } + /** Base class for filtering {@link Fields} * implementations. */ public static class FilterFields extends Fields { diff --git a/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java b/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java index 06cad08ec68..88ab5f9cc48 100644 --- a/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java +++ b/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java @@ -320,6 +320,11 @@ public final class RamUsageEstimator { return alignObjectSize((long) NUM_BYTES_ARRAY_HEADER + (long) NUM_BYTES_DOUBLE * arr.length); } + /** Returns the size in bytes of the String object. */ + public static long sizeOf(String s) { + return shallowSizeOf(s) + alignObjectSize(NUM_BYTES_ARRAY_HEADER + NUM_BYTES_CHAR * s.length()); + } + /** * Estimates the RAM usage by the given object. It will * walk the object tree and sum up all referenced objects. diff --git a/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java b/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java index caab70b0d00..cb5442a4f93 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java @@ -21,6 +21,8 @@ import static org.apache.lucene.util.RamUsageEstimator.*; import java.util.Random; +import com.carrotsearch.randomizedtesting.generators.RandomStrings; + public class TestRamUsageEstimator extends LuceneTestCase { public void testSanity() { assertTrue(sizeOf(new String("test string")) > shallowSizeOfInstance(String.class)); @@ -84,6 +86,10 @@ public class TestRamUsageEstimator extends LuceneTestCase { double[] array = new double[rnd.nextInt(1024)]; assertEquals(sizeOf(array), sizeOf((Object) array)); } + { + String s = RandomStrings.randomUnicodeOfCodepointLength(random(), random().nextInt(10)); + assertEquals(sizeOf(s), sizeOf((Object) s)); + } } public void testReferenceSize() { diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CachedOrdinalsReader.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CachedOrdinalsReader.java index 0abb5b936b3..8789deecc49 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CachedOrdinalsReader.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CachedOrdinalsReader.java @@ -136,13 +136,21 @@ public class CachedOrdinalsReader extends OrdinalsReader { this.ordinals = ords; } } + + public long ramBytesUsed() { + long mem = RamUsageEstimator.shallowSizeOf(this) + RamUsageEstimator.sizeOf(offsets); + if (offsets != ordinals) { + mem += RamUsageEstimator.sizeOf(ordinals); + } + return mem; + } } /** How many bytes is this cache using? */ public synchronized long ramBytesUsed() { long bytes = 0; for(CachedOrds ords : ordsCache.values()) { - bytes += RamUsageEstimator.sizeOf(ords); + bytes += ords.ramBytesUsed(); } return bytes; diff --git a/lucene/memory/build.xml b/lucene/memory/build.xml index cae567785f8..cae96627cca 100644 --- a/lucene/memory/build.xml +++ b/lucene/memory/build.xml @@ -19,6 +19,10 @@ + + Single-document in-memory index implementation diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java index 164819b7918..0944587a109 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java @@ -44,11 +44,13 @@ import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.index.FilterAtomicReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SegmentReader; import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.Term; import org.apache.lucene.index.sorter.Sorter; @@ -640,7 +642,16 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { @Override public long sizeInBytes() { - return RamUsageEstimator.sizeOf(this); + long mem = RamUsageEstimator.shallowSizeOf(this); + if (searcher != null) { + for (AtomicReaderContext context : searcher.getIndexReader().leaves()) { + AtomicReader reader = FilterAtomicReader.unwrap(context.reader()); + if (reader instanceof SegmentReader) { + mem += ((SegmentReader) context.reader()).ramBytesUsed(); + } + } + } + return mem; } @Override diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java index 38132cad444..912c1ee6bbc 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java @@ -291,7 +291,15 @@ public class FSTCompletionLookup extends Lookup { @Override public long sizeInBytes() { - return RamUsageEstimator.sizeOf(this); + long mem = RamUsageEstimator.shallowSizeOf(this) + RamUsageEstimator.shallowSizeOf(normalCompletion) + RamUsageEstimator.shallowSizeOf(higherWeightsCompletion); + if (normalCompletion != null) { + mem += normalCompletion.getFST().sizeInBytes(); + } + if (higherWeightsCompletion != null && (normalCompletion == null || normalCompletion.getFST() != higherWeightsCompletion.getFST())) { + // the fst should be shared between the 2 completion instances, don't count it twice + mem += higherWeightsCompletion.getFST().sizeInBytes(); + } + return mem; } @Override diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java index 58207ef8217..941df306aea 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java @@ -198,7 +198,7 @@ public class JaspellLookup extends Lookup { /** Returns byte size of the underlying TST. */ @Override public long sizeInBytes() { - return RamUsageEstimator.sizeOf(trie); + return trie.sizeInBytes(); } @Override diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java index 3ec84fe274d..44d9412f32a 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java @@ -40,6 +40,7 @@ import java.util.Vector; import java.util.zip.GZIPInputStream; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.RamUsageEstimator; /** * Implementation of a Ternary Search Trie, a data structure for storing @@ -75,7 +76,7 @@ public class JaspellTernarySearchTrie { protected Object data; /** The relative nodes. */ - protected TSTNode[] relatives = new TSTNode[4]; + protected final TSTNode[] relatives = new TSTNode[4]; /** The char used in the split. */ protected char splitchar; @@ -92,6 +93,17 @@ public class JaspellTernarySearchTrie { this.splitchar = splitchar; relatives[PARENT] = parent; } + + public long sizeInBytes() { + long mem = RamUsageEstimator.shallowSizeOf(this) + RamUsageEstimator.shallowSizeOf(relatives); + for (TSTNode node : relatives) { + if (node != null) { + mem += node.sizeInBytes(); + } + } + return mem; + } + } /** @@ -873,4 +885,14 @@ public class JaspellTernarySearchTrie { sortKeysNumReturnValues, sortKeysResult); } + /** Return an approximate memory usage for this trie. */ + public long sizeInBytes() { + long mem = RamUsageEstimator.shallowSizeOf(this); + final TSTNode root = getRoot(); + if (root != null) { + mem += root.sizeInBytes(); + } + return mem; + } + } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java index 95fc389480a..f47c80875b7 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java @@ -216,7 +216,11 @@ public class TSTLookup extends Lookup { /** Returns byte size of the underlying TST */ @Override public long sizeInBytes() { - return RamUsageEstimator.sizeOf(autocomplete); + long mem = RamUsageEstimator.shallowSizeOf(this); + if (root != null) { + mem += root.sizeInBytes(); + } + return mem; } @Override diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TernaryTreeNode.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TernaryTreeNode.java index 3a2d34de10f..9af35cac236 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TernaryTreeNode.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TernaryTreeNode.java @@ -1,5 +1,7 @@ package org.apache.lucene.search.suggest.tst; +import org.apache.lucene.util.RamUsageEstimator; + /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -42,4 +44,22 @@ public class TernaryTreeNode { */ String token; Object val; + + long sizeInBytes() { + long mem = RamUsageEstimator.shallowSizeOf(this); + if (loKid != null) { + mem += loKid.sizeInBytes(); + } + if (eqKid != null) { + mem += eqKid.sizeInBytes(); + } + if (hiKid != null) { + mem += hiKid.sizeInBytes(); + } + if (token != null) { + mem += RamUsageEstimator.sizeOf(token); + } + mem += RamUsageEstimator.shallowSizeOf(val); + return mem; + } } diff --git a/lucene/test-framework/build.xml b/lucene/test-framework/build.xml index a4abbe608b1..f8a93086d56 100644 --- a/lucene/test-framework/build.xml +++ b/lucene/test-framework/build.xml @@ -51,6 +51,7 @@ + diff --git a/lucene/tools/build.xml b/lucene/tools/build.xml index 4eed862d6a5..c300edbc493 100644 --- a/lucene/tools/build.xml +++ b/lucene/tools/build.xml @@ -39,6 +39,7 @@ +