From 84c114d0bcde3344f5f8f92cd495571eb885d018 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Thu, 14 Aug 2014 13:39:29 +0000 Subject: [PATCH] LUCENE-5884: optimize FST.ramBytesUsed git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1617940 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 +++ .../lucene/util/fst/ByteSequenceOutputs.java | 4 +++- .../lucene/util/fst/CharSequenceOutputs.java | 8 +++++++ .../java/org/apache/lucene/util/fst/FST.java | 7 +++++-- .../lucene/util/fst/IntSequenceOutputs.java | 8 +++++++ .../org/apache/lucene/util/fst/NoOutputs.java | 5 +++++ .../org/apache/lucene/util/fst/Outputs.java | 5 +---- .../apache/lucene/util/fst/PairOutputs.java | 5 ++++- .../apache/lucene/util/fst/ListOfOutputs.java | 21 +++++++++++++++++++ .../util/fst/UpToTwoPositiveIntOutputs.java | 13 ++++++++++++ 10 files changed, 71 insertions(+), 8 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 41307c585da..23d1f4f5875 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -214,6 +214,9 @@ Optimizations * LUCENE-5856: Optimize Fixed/Open/LongBitSet to remove unnecessary AND. (Robert Muir) +* LUCENE-5884: Optimize FST.ramBytesUsed. (Adrien Grand, Robert Muir, + Mike McCandless) + Bug Fixes * LUCENE-5796: Fixes the Scorer.getChildren() method for two combinations diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java b/lucene/core/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java index 38860b7c2b2..f6e206b30d3 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java @@ -151,8 +151,10 @@ public final class ByteSequenceOutputs extends Outputs { return output.toString(); } + private static final long BASE_NUM_BYTES = RamUsageEstimator.shallowSizeOf(NO_OUTPUT); + @Override public long ramBytesUsed(BytesRef output) { - return super.ramBytesUsed(output) + RamUsageEstimator.sizeOf(output.bytes); + return BASE_NUM_BYTES + RamUsageEstimator.sizeOf(output.bytes); } } diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/CharSequenceOutputs.java b/lucene/core/src/java/org/apache/lucene/util/fst/CharSequenceOutputs.java index 3435c1dbeae..c7b18a189c5 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/CharSequenceOutputs.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/CharSequenceOutputs.java @@ -22,6 +22,7 @@ import java.io.IOException; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.RamUsageEstimator; /** * An FST {@link Outputs} implementation where each output @@ -150,4 +151,11 @@ public final class CharSequenceOutputs extends Outputs { public String outputToString(CharsRef output) { return output.toString(); } + + private static final long BASE_NUM_BYTES = RamUsageEstimator.shallowSizeOf(NO_OUTPUT); + + @Override + public long ramBytesUsed(CharsRef output) { + return BASE_NUM_BYTES + RamUsageEstimator.sizeOf(output.chars); + } } diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java index ba610639eb8..990e3760d77 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java @@ -428,6 +428,8 @@ public final class FST implements Accountable { return size; } + private int cachedArcsBytesUsed; + @Override public long ramBytesUsed() { long size = BASE_RAM_BYTES_USED; @@ -438,8 +440,7 @@ public final class FST implements Accountable { size += nodeAddress.ramBytesUsed(); size += inCounts.ramBytesUsed(); } - size += ramBytesUsed(cachedRootArcs); - size += ramBytesUsed(assertingCachedRootArcs); + size += cachedArcsBytesUsed; size += RamUsageEstimator.sizeOf(bytesPerArc); return size; } @@ -472,6 +473,7 @@ public final class FST implements Accountable { private void cacheRootArcs() throws IOException { cachedRootArcs = (Arc[]) new Arc[0x80]; readRootArcs(cachedRootArcs); + cachedArcsBytesUsed += ramBytesUsed(cachedRootArcs); assert setAssertingRootArcs(cachedRootArcs); assert assertRootArcs(); @@ -502,6 +504,7 @@ public final class FST implements Accountable { private boolean setAssertingRootArcs(Arc[] arcs) throws IOException { assertingCachedRootArcs = (Arc[]) new Arc[arcs.length]; readRootArcs(assertingCachedRootArcs); + cachedArcsBytesUsed *= 2; return true; } diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/IntSequenceOutputs.java b/lucene/core/src/java/org/apache/lucene/util/fst/IntSequenceOutputs.java index 82482183097..136d3c1289c 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/IntSequenceOutputs.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/IntSequenceOutputs.java @@ -22,6 +22,7 @@ import java.io.IOException; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.RamUsageEstimator; /** * An FST {@link Outputs} implementation where each output @@ -152,4 +153,11 @@ public final class IntSequenceOutputs extends Outputs { public String outputToString(IntsRef output) { return output.toString(); } + + private static final long BASE_NUM_BYTES = RamUsageEstimator.shallowSizeOf(NO_OUTPUT); + + @Override + public long ramBytesUsed(IntsRef output) { + return BASE_NUM_BYTES + RamUsageEstimator.sizeOf(output.ints); + } } diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/NoOutputs.java b/lucene/core/src/java/org/apache/lucene/util/fst/NoOutputs.java index 39d2330cc19..1d05126bc6c 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/NoOutputs.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/NoOutputs.java @@ -101,4 +101,9 @@ public final class NoOutputs extends Outputs { public String outputToString(Object output) { return ""; } + + @Override + public long ramBytesUsed(Object output) { + return 0; + } } diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/Outputs.java b/lucene/core/src/java/org/apache/lucene/util/fst/Outputs.java index e2efd626221..ac69300d066 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/Outputs.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/Outputs.java @@ -22,7 +22,6 @@ import java.io.IOException; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; import org.apache.lucene.util.Accountable; -import org.apache.lucene.util.RamUsageEstimator; /** * Represents the outputs for an FST, providing the basic @@ -100,7 +99,5 @@ public abstract class Outputs { /** Return memory usage for the provided output. * @see Accountable */ - public long ramBytesUsed(T output) { - return RamUsageEstimator.shallowSizeOf(output); - } + public abstract long ramBytesUsed(T output); } diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/PairOutputs.java b/lucene/core/src/java/org/apache/lucene/util/fst/PairOutputs.java index 8682ebf2b29..1358e8235f8 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/PairOutputs.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/PairOutputs.java @@ -21,6 +21,7 @@ import java.io.IOException; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; +import org.apache.lucene.util.RamUsageEstimator; /** * An FST {@link Outputs} implementation, holding two other outputs. @@ -176,9 +177,11 @@ public class PairOutputs extends Outputs> { return "PairOutputs<" + outputs1 + "," + outputs2 + ">"; } + private static final long BASE_NUM_BYTES = RamUsageEstimator.shallowSizeOf(new Pair(null, null)); + @Override public long ramBytesUsed(Pair output) { - long ramBytesUsed = super.ramBytesUsed(output); + long ramBytesUsed = BASE_NUM_BYTES; if (output.output1 != null) { ramBytesUsed += outputs1.ramBytesUsed(output.output1); } diff --git a/lucene/misc/src/java/org/apache/lucene/util/fst/ListOfOutputs.java b/lucene/misc/src/java/org/apache/lucene/util/fst/ListOfOutputs.java index 88fc5432d49..a8a72491b51 100644 --- a/lucene/misc/src/java/org/apache/lucene/util/fst/ListOfOutputs.java +++ b/lucene/misc/src/java/org/apache/lucene/util/fst/ListOfOutputs.java @@ -24,6 +24,7 @@ import java.util.List; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; import org.apache.lucene.util.IntsRef; // javadocs +import org.apache.lucene.util.RamUsageEstimator; /** * Wraps another Outputs implementation and encodes one or @@ -208,4 +209,24 @@ public final class ListOfOutputs extends Outputs { return (List) output; } } + + private static final long BASE_LIST_NUM_BYTES = RamUsageEstimator.shallowSizeOf(new ArrayList()); + + @Override + public long ramBytesUsed(Object output) { + long bytes = 0; + if (output instanceof List) { + bytes += BASE_LIST_NUM_BYTES; + List outputList = (List) output; + for(T _output : outputList) { + bytes += outputs.ramBytesUsed(_output); + } + // 2 * to allow for ArrayList's oversizing: + bytes += 2 * outputList.size() * RamUsageEstimator.NUM_BYTES_OBJECT_REF; + } else { + bytes += outputs.ramBytesUsed((T) output); + } + + return bytes; + } } diff --git a/lucene/misc/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java b/lucene/misc/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java index 78e2715b56f..e2fb5984d3b 100644 --- a/lucene/misc/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java +++ b/lucene/misc/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java @@ -21,6 +21,7 @@ import java.io.IOException; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; +import org.apache.lucene.util.RamUsageEstimator; /** * An FST {@link Outputs} implementation where each output @@ -232,4 +233,16 @@ public final class UpToTwoPositiveIntOutputs extends Outputs { assert valid(second, false); return new TwoLongs((Long) first, (Long) second); } + + private static final long TWO_LONGS_NUM_BYTES = RamUsageEstimator.shallowSizeOf(new TwoLongs(0, 0)); + + @Override + public long ramBytesUsed(Object o) { + if (o instanceof Long) { + return RamUsageEstimator.sizeOf((Long) o); + } else { + assert o instanceof TwoLongs; + return TWO_LONGS_NUM_BYTES; + } + } }