From 470dbb3858633b03425688c89f14809523dc4d30 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Mon, 4 Nov 2013 12:31:33 +0000 Subject: [PATCH] LUCENE-5323: add Lookup.sizeInBytes git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1538578 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 +++ .../java/org/apache/lucene/search/suggest/Lookup.java | 5 +++++ .../suggest/analyzing/AnalyzingInfixSuggester.java | 6 ++++++ .../search/suggest/analyzing/AnalyzingSuggester.java | 1 + .../search/suggest/analyzing/FreeTextSuggester.java | 3 ++- .../lucene/search/suggest/fst/FSTCompletionLookup.java | 5 +++++ .../search/suggest/fst/WFSTCompletionLookup.java | 6 ++++++ .../lucene/search/suggest/jaspell/JaspellLookup.java | 7 +++++++ .../apache/lucene/search/suggest/tst/TSTLookup.java | 7 +++++++ .../lucene/search/suggest/LookupBenchmarkTest.java | 10 +--------- 10 files changed, 43 insertions(+), 10 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 30d0a6fb7dc..7d0bfa9a4d8 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -139,6 +139,9 @@ New Features a ValueSource, such as a NumericDocValuesField or an expression. (Shai Erera) +* LUCENE-5323: Add .sizeInBytes method to all suggesters (Lookup). + (Areek Zillur via Mike McCandless) + Bug Fixes * LUCENE-4998: Fixed a few places to pass IOContext.READONCE instead diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java index dd35d850a1e..3b4e09ce4fa 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java @@ -201,4 +201,9 @@ public abstract class Lookup { */ public abstract boolean load(InputStream input) throws IOException; + /** + * Get the size of the underlying lookup implementation in memory + * @return ram size of the lookup implementation in bytes + */ + public abstract long sizeInBytes(); } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java index 7d388aa26b1..5d4dc52574b 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java @@ -72,6 +72,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.Version; // TODO: @@ -599,4 +600,9 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { dir = null; } } + + @Override + public long sizeInBytes() { + return RamUsageEstimator.sizeOf(this); + } }; diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java index 3187845cadd..4278440e219 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java @@ -250,6 +250,7 @@ public class AnalyzingSuggester extends Lookup { } /** Returns byte size of the underlying FST. */ + @Override public long sizeInBytes() { return fst == null ? 0 : fst.sizeInBytes(); } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java index 36c22a74239..797acaea959 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java @@ -198,7 +198,8 @@ public class FreeTextSuggester extends Lookup { this.separator = separator; } - /** Returns byte size of the underlying FST. */ + /** Returns byte size of the underlying FST. */ + @Override public long sizeInBytes() { if (fst == null) { return 0; diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java index 3dbf66d98ce..7c88a3e2b99 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java @@ -288,4 +288,9 @@ public class FSTCompletionLookup extends Lookup { } return true; } + + @Override + public long sizeInBytes() { + return RamUsageEstimator.sizeOf(this); + } } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java index eb073568ee9..eaff4049342 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java @@ -287,4 +287,10 @@ public class WFSTCompletionLookup extends Lookup { return left.compareTo(right); } }; + + /** Returns byte size of the underlying FST. */ + @Override + public long sizeInBytes() { + return (fst == null) ? 0 : fst.sizeInBytes(); + } } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java index 0482e526f17..65c453249de 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java @@ -31,6 +31,7 @@ import org.apache.lucene.search.suggest.jaspell.JaspellTernarySearchTrie.TSTNode import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.UnicodeUtil; /** @@ -200,4 +201,10 @@ public class JaspellLookup extends Lookup { } return true; } + + /** Returns byte size of the underlying TST. */ + @Override + public long sizeInBytes() { + return RamUsageEstimator.sizeOf(trie); + } } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java index 21ed3adcfdf..59cd5f99df6 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java @@ -31,6 +31,7 @@ import org.apache.lucene.search.suggest.SortedInputIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.UnicodeUtil; /** @@ -219,5 +220,11 @@ public class TSTLookup extends Lookup { } return true; } + + /** Returns byte size of the underlying TST */ + @Override + public long sizeInBytes() { + return RamUsageEstimator.sizeOf(autocomplete); + } } diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java index 7858a239aef..48742342fe6 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java @@ -144,15 +144,7 @@ public class LookupBenchmarkTest extends LuceneTestCase { System.err.println("-- RAM consumption"); for (Class cls : benchmarkClasses) { Lookup lookup = buildLookup(cls, dictionaryInput); - long sizeInBytes; - if (lookup instanceof AnalyzingSuggester) { - // Just get size of FST: else we are also measuring - // size of MockAnalyzer which is non-trivial and - // varies depending on test seed: - sizeInBytes = ((AnalyzingSuggester) lookup).sizeInBytes(); - } else { - sizeInBytes = RamUsageEstimator.sizeOf(lookup); - } + long sizeInBytes = lookup.sizeInBytes(); System.err.println( String.format(Locale.ROOT, "%-15s size[B]:%,13d", lookup.getClass().getSimpleName(),