LUCENE-5884: optimize FST.ramBytesUsed

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1617940 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2014-08-14 13:39:29 +00:00
parent 8d260c3bc0
commit 84c114d0bc
10 changed files with 71 additions and 8 deletions

View File

@ -214,6 +214,9 @@ Optimizations
* LUCENE-5856: Optimize Fixed/Open/LongBitSet to remove unnecessary AND.
(Robert Muir)
* LUCENE-5884: Optimize FST.ramBytesUsed. (Adrien Grand, Robert Muir,
Mike McCandless)
Bug Fixes
* LUCENE-5796: Fixes the Scorer.getChildren() method for two combinations

View File

@ -151,8 +151,10 @@ public final class ByteSequenceOutputs extends Outputs<BytesRef> {
return output.toString();
}
private static final long BASE_NUM_BYTES = RamUsageEstimator.shallowSizeOf(NO_OUTPUT);
@Override
public long ramBytesUsed(BytesRef output) {
return super.ramBytesUsed(output) + RamUsageEstimator.sizeOf(output.bytes);
return BASE_NUM_BYTES + RamUsageEstimator.sizeOf(output.bytes);
}
}

View File

@ -22,6 +22,7 @@ import java.io.IOException;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.RamUsageEstimator;
/**
* An FST {@link Outputs} implementation where each output
@ -150,4 +151,11 @@ public final class CharSequenceOutputs extends Outputs<CharsRef> {
public String outputToString(CharsRef output) {
return output.toString();
}
private static final long BASE_NUM_BYTES = RamUsageEstimator.shallowSizeOf(NO_OUTPUT);
@Override
public long ramBytesUsed(CharsRef output) {
return BASE_NUM_BYTES + RamUsageEstimator.sizeOf(output.chars);
}
}

View File

@ -428,6 +428,8 @@ public final class FST<T> implements Accountable {
return size;
}
private int cachedArcsBytesUsed;
@Override
public long ramBytesUsed() {
long size = BASE_RAM_BYTES_USED;
@ -438,8 +440,7 @@ public final class FST<T> implements Accountable {
size += nodeAddress.ramBytesUsed();
size += inCounts.ramBytesUsed();
}
size += ramBytesUsed(cachedRootArcs);
size += ramBytesUsed(assertingCachedRootArcs);
size += cachedArcsBytesUsed;
size += RamUsageEstimator.sizeOf(bytesPerArc);
return size;
}
@ -472,6 +473,7 @@ public final class FST<T> implements Accountable {
private void cacheRootArcs() throws IOException {
cachedRootArcs = (Arc<T>[]) new Arc[0x80];
readRootArcs(cachedRootArcs);
cachedArcsBytesUsed += ramBytesUsed(cachedRootArcs);
assert setAssertingRootArcs(cachedRootArcs);
assert assertRootArcs();
@ -502,6 +504,7 @@ public final class FST<T> implements Accountable {
private boolean setAssertingRootArcs(Arc<T>[] arcs) throws IOException {
assertingCachedRootArcs = (Arc<T>[]) new Arc[arcs.length];
readRootArcs(assertingCachedRootArcs);
cachedArcsBytesUsed *= 2;
return true;
}

View File

@ -22,6 +22,7 @@ import java.io.IOException;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.RamUsageEstimator;
/**
* An FST {@link Outputs} implementation where each output
@ -152,4 +153,11 @@ public final class IntSequenceOutputs extends Outputs<IntsRef> {
public String outputToString(IntsRef output) {
return output.toString();
}
private static final long BASE_NUM_BYTES = RamUsageEstimator.shallowSizeOf(NO_OUTPUT);
@Override
public long ramBytesUsed(IntsRef output) {
return BASE_NUM_BYTES + RamUsageEstimator.sizeOf(output.ints);
}
}

View File

@ -101,4 +101,9 @@ public final class NoOutputs extends Outputs<Object> {
public String outputToString(Object output) {
return "";
}
@Override
public long ramBytesUsed(Object output) {
return 0;
}
}

View File

@ -22,7 +22,6 @@ import java.io.IOException;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Represents the outputs for an FST, providing the basic
@ -100,7 +99,5 @@ public abstract class Outputs<T> {
/** Return memory usage for the provided output.
* @see Accountable */
public long ramBytesUsed(T output) {
return RamUsageEstimator.shallowSizeOf(output);
}
public abstract long ramBytesUsed(T output);
}

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.RamUsageEstimator;
/**
* An FST {@link Outputs} implementation, holding two other outputs.
@ -176,9 +177,11 @@ public class PairOutputs<A,B> extends Outputs<PairOutputs.Pair<A,B>> {
return "PairOutputs<" + outputs1 + "," + outputs2 + ">";
}
private static final long BASE_NUM_BYTES = RamUsageEstimator.shallowSizeOf(new Pair<Object,Object>(null, null));
@Override
public long ramBytesUsed(Pair<A,B> output) {
long ramBytesUsed = super.ramBytesUsed(output);
long ramBytesUsed = BASE_NUM_BYTES;
if (output.output1 != null) {
ramBytesUsed += outputs1.ramBytesUsed(output.output1);
}

View File

@ -24,6 +24,7 @@ import java.util.List;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.IntsRef; // javadocs
import org.apache.lucene.util.RamUsageEstimator;
/**
* Wraps another Outputs implementation and encodes one or
@ -208,4 +209,24 @@ public final class ListOfOutputs<T> extends Outputs<Object> {
return (List<T>) output;
}
}
private static final long BASE_LIST_NUM_BYTES = RamUsageEstimator.shallowSizeOf(new ArrayList<Object>());
@Override
public long ramBytesUsed(Object output) {
long bytes = 0;
if (output instanceof List) {
bytes += BASE_LIST_NUM_BYTES;
List<T> outputList = (List<T>) output;
for(T _output : outputList) {
bytes += outputs.ramBytesUsed(_output);
}
// 2 * to allow for ArrayList's oversizing:
bytes += 2 * outputList.size() * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
} else {
bytes += outputs.ramBytesUsed((T) output);
}
return bytes;
}
}

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.RamUsageEstimator;
/**
* An FST {@link Outputs} implementation where each output
@ -232,4 +233,16 @@ public final class UpToTwoPositiveIntOutputs extends Outputs<Object> {
assert valid(second, false);
return new TwoLongs((Long) first, (Long) second);
}
private static final long TWO_LONGS_NUM_BYTES = RamUsageEstimator.shallowSizeOf(new TwoLongs(0, 0));
@Override
public long ramBytesUsed(Object o) {
if (o instanceof Long) {
return RamUsageEstimator.sizeOf((Long) o);
} else {
assert o instanceof TwoLongs;
return TWO_LONGS_NUM_BYTES;
}
}
}