mirror of https://github.com/apache/lucene.git
LUCENE-4120: FST.pack: Use packed integer arrays for improved memory efficiency.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1349826 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9182f9f907
commit
4104901e64
|
@ -37,6 +37,7 @@ import org.apache.lucene.util.IntsRef;
|
||||||
import org.apache.lucene.util.fst.Builder;
|
import org.apache.lucene.util.fst.Builder;
|
||||||
import org.apache.lucene.util.fst.FST;
|
import org.apache.lucene.util.fst.FST;
|
||||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
import com.ibm.icu.text.Normalizer2;
|
import com.ibm.icu.text.Normalizer2;
|
||||||
|
|
||||||
|
@ -161,7 +162,7 @@ public class TokenInfoDictionaryBuilder {
|
||||||
offset = next;
|
offset = next;
|
||||||
}
|
}
|
||||||
|
|
||||||
final FST<Long> fst = fstBuilder.finish().pack(2, 100000);
|
final FST<Long> fst = fstBuilder.finish().pack(2, 100000, PackedInts.DEFAULT);
|
||||||
|
|
||||||
System.out.print(" " + fst.getNodeCount() + " nodes, " + fst.getArcCount() + " arcs, " + fst.sizeInBytes() + " bytes... ");
|
System.out.print(" " + fst.getNodeCount() + " nodes, " + fst.getArcCount() + " arcs, " + fst.sizeInBytes() + " bytes... ");
|
||||||
dictionary.setFST(fst);
|
dictionary.setFST(fst);
|
||||||
|
|
|
@ -54,6 +54,7 @@ import org.apache.lucene.util.fst.ByteSequenceOutputs;
|
||||||
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
||||||
import org.apache.lucene.util.fst.FST;
|
import org.apache.lucene.util.fst.FST;
|
||||||
import org.apache.lucene.util.fst.Util;
|
import org.apache.lucene.util.fst.Util;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
// TODO: would be nice to somehow allow this to act like
|
// TODO: would be nice to somehow allow this to act like
|
||||||
// InstantiatedIndex, by never writing to disk; ie you write
|
// InstantiatedIndex, by never writing to disk; ie you write
|
||||||
|
@ -81,14 +82,16 @@ import org.apache.lucene.util.fst.Util;
|
||||||
public class MemoryPostingsFormat extends PostingsFormat {
|
public class MemoryPostingsFormat extends PostingsFormat {
|
||||||
|
|
||||||
private final boolean doPackFST;
|
private final boolean doPackFST;
|
||||||
|
private final float acceptableOverheadRatio;
|
||||||
|
|
||||||
public MemoryPostingsFormat() {
|
public MemoryPostingsFormat() {
|
||||||
this(false);
|
this(false, PackedInts.DEFAULT);
|
||||||
}
|
}
|
||||||
|
|
||||||
public MemoryPostingsFormat(boolean doPackFST) {
|
public MemoryPostingsFormat(boolean doPackFST, float acceptableOverheadRatio) {
|
||||||
super("Memory");
|
super("Memory");
|
||||||
this.doPackFST = doPackFST;
|
this.doPackFST = doPackFST;
|
||||||
|
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -102,13 +105,15 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
||||||
private final Builder<BytesRef> builder;
|
private final Builder<BytesRef> builder;
|
||||||
private final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
|
private final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
|
||||||
private final boolean doPackFST;
|
private final boolean doPackFST;
|
||||||
|
private final float acceptableOverheadRatio;
|
||||||
private int termCount;
|
private int termCount;
|
||||||
|
|
||||||
public TermsWriter(IndexOutput out, FieldInfo field, boolean doPackFST) {
|
public TermsWriter(IndexOutput out, FieldInfo field, boolean doPackFST, float acceptableOverheadRatio) {
|
||||||
this.out = out;
|
this.out = out;
|
||||||
this.field = field;
|
this.field = field;
|
||||||
this.doPackFST = doPackFST;
|
this.doPackFST = doPackFST;
|
||||||
builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doPackFST);
|
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
||||||
|
builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doPackFST, acceptableOverheadRatio);
|
||||||
}
|
}
|
||||||
|
|
||||||
private class PostingsWriter extends PostingsConsumer {
|
private class PostingsWriter extends PostingsConsumer {
|
||||||
|
@ -265,7 +270,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
||||||
out.writeVInt(docCount);
|
out.writeVInt(docCount);
|
||||||
FST<BytesRef> fst = builder.finish();
|
FST<BytesRef> fst = builder.finish();
|
||||||
if (doPackFST) {
|
if (doPackFST) {
|
||||||
fst = fst.pack(3, Math.max(10, fst.getNodeCount()/4));
|
fst = fst.pack(3, Math.max(10, fst.getNodeCount()/4), acceptableOverheadRatio);
|
||||||
}
|
}
|
||||||
fst.save(out);
|
fst.save(out);
|
||||||
//System.out.println("finish field=" + field.name + " fp=" + out.getFilePointer());
|
//System.out.println("finish field=" + field.name + " fp=" + out.getFilePointer());
|
||||||
|
@ -290,7 +295,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
||||||
@Override
|
@Override
|
||||||
public TermsConsumer addField(FieldInfo field) {
|
public TermsConsumer addField(FieldInfo field) {
|
||||||
//System.out.println("\naddField field=" + field.name);
|
//System.out.println("\naddField field=" + field.name);
|
||||||
return new TermsWriter(out, field, doPackFST);
|
return new TermsWriter(out, field, doPackFST, acceptableOverheadRatio);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.document.PackedLongDocValuesField; // javadocs
|
||||||
import org.apache.lucene.document.ShortDocValuesField; // javadocs
|
import org.apache.lucene.document.ShortDocValuesField; // javadocs
|
||||||
import org.apache.lucene.document.SortedBytesDocValuesField; // javadocs
|
import org.apache.lucene.document.SortedBytesDocValuesField; // javadocs
|
||||||
import org.apache.lucene.document.StraightBytesDocValuesField; // javadocs
|
import org.apache.lucene.document.StraightBytesDocValuesField; // javadocs
|
||||||
|
import org.apache.lucene.store.DataOutput;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
|
@ -411,6 +412,11 @@ public abstract class DocValues implements Closeable {
|
||||||
Arrays.fill(arr, off, off+len, 0);
|
Arrays.fill(arr, off, off+len, 0);
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
return new SortedSource(type, BytesRef.getUTF8SortedAsUnicodeComparator()) {
|
return new SortedSource(type, BytesRef.getUTF8SortedAsUnicodeComparator()) {
|
||||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.IntsRef;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
import org.apache.lucene.util.fst.FST.INPUT_TYPE; // javadoc
|
import org.apache.lucene.util.fst.FST.INPUT_TYPE; // javadoc
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds a minimal FST (maps an IntsRef term to an arbitrary
|
* Builds a minimal FST (maps an IntsRef term to an arbitrary
|
||||||
|
@ -83,7 +84,18 @@ public class Builder<T> {
|
||||||
* pruning options turned off.
|
* pruning options turned off.
|
||||||
*/
|
*/
|
||||||
public Builder(FST.INPUT_TYPE inputType, Outputs<T> outputs) {
|
public Builder(FST.INPUT_TYPE inputType, Outputs<T> outputs) {
|
||||||
this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, false);
|
this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, false, PackedInts.COMPACT);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instantiates an FST/FSA builder with {@link PackedInts#DEFAULT}
|
||||||
|
* <code>acceptableOverheadRatio</code>.
|
||||||
|
*/
|
||||||
|
public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
|
||||||
|
boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
|
||||||
|
FreezeTail<T> freezeTail, boolean willPackFST) {
|
||||||
|
this(inputType, minSuffixCount1, minSuffixCount2, doShareSuffix, doShareNonSingletonNodes,
|
||||||
|
shareMaxTailLength, outputs, freezeTail, willPackFST, PackedInts.DEFAULT);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -126,17 +138,20 @@ public class Builder<T> {
|
||||||
* @param willPackFST Pass true if you will pack the FST before saving. This
|
* @param willPackFST Pass true if you will pack the FST before saving. This
|
||||||
* causes the FST to create additional data structures internally to facilitate packing, but
|
* causes the FST to create additional data structures internally to facilitate packing, but
|
||||||
* it means the resulting FST cannot be saved: it must
|
* it means the resulting FST cannot be saved: it must
|
||||||
* first be packed using {@link FST#pack(int, int)}}.
|
* first be packed using {@link FST#pack(int, int, float)}
|
||||||
|
*
|
||||||
|
* @param acceptableOverheadRatio How to trade speed for space when building the FST. This option
|
||||||
|
* is only relevant when willPackFST is true. @see PackedInts#getMutable(int, int, float)
|
||||||
*/
|
*/
|
||||||
public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
|
public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
|
||||||
boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
|
boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
|
||||||
FreezeTail<T> freezeTail, boolean willPackFST) {
|
FreezeTail<T> freezeTail, boolean willPackFST, float acceptableOverheadRatio) {
|
||||||
this.minSuffixCount1 = minSuffixCount1;
|
this.minSuffixCount1 = minSuffixCount1;
|
||||||
this.minSuffixCount2 = minSuffixCount2;
|
this.minSuffixCount2 = minSuffixCount2;
|
||||||
this.freezeTail = freezeTail;
|
this.freezeTail = freezeTail;
|
||||||
this.doShareNonSingletonNodes = doShareNonSingletonNodes;
|
this.doShareNonSingletonNodes = doShareNonSingletonNodes;
|
||||||
this.shareMaxTailLength = shareMaxTailLength;
|
this.shareMaxTailLength = shareMaxTailLength;
|
||||||
fst = new FST<T>(inputType, outputs, willPackFST);
|
fst = new FST<T>(inputType, outputs, willPackFST, acceptableOverheadRatio);
|
||||||
if (doShareSuffix) {
|
if (doShareSuffix) {
|
||||||
dedupHash = new NodeHash<T>(fst);
|
dedupHash = new NodeHash<T>(fst);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -37,8 +37,9 @@ import org.apache.lucene.util.CodecUtil;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.IntsRef;
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
|
||||||
import org.apache.lucene.util.fst.Builder.UnCompiledNode;
|
import org.apache.lucene.util.fst.Builder.UnCompiledNode;
|
||||||
|
import org.apache.lucene.util.packed.GrowableWriter;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
// TODO: break this into WritableFST and ReadOnlyFST.. then
|
// TODO: break this into WritableFST and ReadOnlyFST.. then
|
||||||
// we can have subclasses of ReadOnlyFST to handle the
|
// we can have subclasses of ReadOnlyFST to handle the
|
||||||
|
@ -155,7 +156,7 @@ public final class FST<T> {
|
||||||
public int arcWithOutputCount;
|
public int arcWithOutputCount;
|
||||||
|
|
||||||
private final boolean packed;
|
private final boolean packed;
|
||||||
private final int[] nodeRefToAddress;
|
private PackedInts.Reader nodeRefToAddress;
|
||||||
|
|
||||||
// If arc has this label then that arc is final/accepted
|
// If arc has this label then that arc is final/accepted
|
||||||
public static final int END_LABEL = -1;
|
public static final int END_LABEL = -1;
|
||||||
|
@ -252,25 +253,23 @@ public final class FST<T> {
|
||||||
|
|
||||||
private final BytesWriter writer;
|
private final BytesWriter writer;
|
||||||
|
|
||||||
// TODO: we can save RAM here by using growable packed
|
private GrowableWriter nodeAddress;
|
||||||
// ints...:
|
|
||||||
private int[] nodeAddress;
|
|
||||||
|
|
||||||
// TODO: we could be smarter here, and prune periodically
|
// TODO: we could be smarter here, and prune periodically
|
||||||
// as we go; high in-count nodes will "usually" become
|
// as we go; high in-count nodes will "usually" become
|
||||||
// clear early on:
|
// clear early on:
|
||||||
private int[] inCounts;
|
private GrowableWriter inCounts;
|
||||||
|
|
||||||
// make a new empty FST, for building; Builder invokes
|
// make a new empty FST, for building; Builder invokes
|
||||||
// this ctor
|
// this ctor
|
||||||
FST(INPUT_TYPE inputType, Outputs<T> outputs, boolean willPackFST) {
|
FST(INPUT_TYPE inputType, Outputs<T> outputs, boolean willPackFST, float acceptableOverheadRatio) {
|
||||||
this.inputType = inputType;
|
this.inputType = inputType;
|
||||||
this.outputs = outputs;
|
this.outputs = outputs;
|
||||||
bytes = new byte[128];
|
bytes = new byte[128];
|
||||||
NO_OUTPUT = outputs.getNoOutput();
|
NO_OUTPUT = outputs.getNoOutput();
|
||||||
if (willPackFST) {
|
if (willPackFST) {
|
||||||
nodeAddress = new int[8];
|
nodeAddress = new GrowableWriter(PackedInts.bitsRequired(bytes.length - 1), 8, acceptableOverheadRatio);
|
||||||
inCounts = new int[8];
|
inCounts = new GrowableWriter(1, 8, acceptableOverheadRatio);
|
||||||
} else {
|
} else {
|
||||||
nodeAddress = null;
|
nodeAddress = null;
|
||||||
inCounts = null;
|
inCounts = null;
|
||||||
|
@ -320,11 +319,7 @@ public final class FST<T> {
|
||||||
throw new IllegalStateException("invalid input type " + t);
|
throw new IllegalStateException("invalid input type " + t);
|
||||||
}
|
}
|
||||||
if (packed) {
|
if (packed) {
|
||||||
final int nodeRefCount = in.readVInt();
|
nodeRefToAddress = PackedInts.getReader(in);
|
||||||
nodeRefToAddress = new int[nodeRefCount];
|
|
||||||
for(int idx=0;idx<nodeRefCount;idx++) {
|
|
||||||
nodeRefToAddress[idx] = in.readVInt();
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
nodeRefToAddress = null;
|
nodeRefToAddress = null;
|
||||||
}
|
}
|
||||||
|
@ -348,10 +343,10 @@ public final class FST<T> {
|
||||||
public int sizeInBytes() {
|
public int sizeInBytes() {
|
||||||
int size = bytes.length;
|
int size = bytes.length;
|
||||||
if (packed) {
|
if (packed) {
|
||||||
size += nodeRefToAddress.length * RamUsageEstimator.NUM_BYTES_INT;
|
size += nodeRefToAddress.ramBytesUsed();
|
||||||
} else if (nodeAddress != null) {
|
} else if (nodeAddress != null) {
|
||||||
size += nodeAddress.length * RamUsageEstimator.NUM_BYTES_INT;
|
size += nodeAddress.ramBytesUsed();
|
||||||
size += inCounts.length * RamUsageEstimator.NUM_BYTES_INT;
|
size += inCounts.ramBytesUsed();
|
||||||
}
|
}
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
@ -374,7 +369,7 @@ public final class FST<T> {
|
||||||
private int getNodeAddress(int node) {
|
private int getNodeAddress(int node) {
|
||||||
if (nodeAddress != null) {
|
if (nodeAddress != null) {
|
||||||
// Deref
|
// Deref
|
||||||
return nodeAddress[node];
|
return (int) nodeAddress.get(node);
|
||||||
} else {
|
} else {
|
||||||
// Straight
|
// Straight
|
||||||
return node;
|
return node;
|
||||||
|
@ -444,6 +439,9 @@ public final class FST<T> {
|
||||||
if (nodeAddress != null) {
|
if (nodeAddress != null) {
|
||||||
throw new IllegalStateException("cannot save an FST pre-packed FST; it must first be packed");
|
throw new IllegalStateException("cannot save an FST pre-packed FST; it must first be packed");
|
||||||
}
|
}
|
||||||
|
if (packed && !(nodeRefToAddress instanceof PackedInts.Mutable)) {
|
||||||
|
throw new IllegalStateException("cannot save a FST which has been loaded from disk ");
|
||||||
|
}
|
||||||
CodecUtil.writeHeader(out, FILE_FORMAT_NAME, VERSION_CURRENT);
|
CodecUtil.writeHeader(out, FILE_FORMAT_NAME, VERSION_CURRENT);
|
||||||
if (packed) {
|
if (packed) {
|
||||||
out.writeByte((byte) 1);
|
out.writeByte((byte) 1);
|
||||||
|
@ -469,11 +467,7 @@ public final class FST<T> {
|
||||||
}
|
}
|
||||||
out.writeByte(t);
|
out.writeByte(t);
|
||||||
if (packed) {
|
if (packed) {
|
||||||
assert nodeRefToAddress != null;
|
((PackedInts.Mutable) nodeRefToAddress).save(out);
|
||||||
out.writeVInt(nodeRefToAddress.length);
|
|
||||||
for(int idx=0;idx<nodeRefToAddress.length;idx++) {
|
|
||||||
out.writeVInt(nodeRefToAddress[idx]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
out.writeVInt(startNode);
|
out.writeVInt(startNode);
|
||||||
out.writeVInt(nodeCount);
|
out.writeVInt(nodeCount);
|
||||||
|
@ -624,7 +618,7 @@ public final class FST<T> {
|
||||||
if (!targetHasArcs) {
|
if (!targetHasArcs) {
|
||||||
flags += BIT_STOP_NODE;
|
flags += BIT_STOP_NODE;
|
||||||
} else if (inCounts != null) {
|
} else if (inCounts != null) {
|
||||||
inCounts[target.node]++;
|
inCounts.set(target.node, inCounts.get(target.node) + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (arc.output != NO_OUTPUT) {
|
if (arc.output != NO_OUTPUT) {
|
||||||
|
@ -715,11 +709,11 @@ public final class FST<T> {
|
||||||
final int node;
|
final int node;
|
||||||
if (nodeAddress != null) {
|
if (nodeAddress != null) {
|
||||||
// Nodes are addressed by 1+ord:
|
// Nodes are addressed by 1+ord:
|
||||||
if (nodeCount == nodeAddress.length) {
|
if (nodeCount == nodeAddress.size()) {
|
||||||
nodeAddress = ArrayUtil.grow(nodeAddress);
|
nodeAddress = nodeAddress.resize(ArrayUtil.oversize(nodeAddress.size() + 1, nodeAddress.getBitsPerValue()));
|
||||||
inCounts = ArrayUtil.grow(inCounts);
|
inCounts = inCounts.resize(ArrayUtil.oversize(inCounts.size() + 1, inCounts.getBitsPerValue()));
|
||||||
}
|
}
|
||||||
nodeAddress[nodeCount] = endAddress;
|
nodeAddress.set(nodeCount, endAddress);
|
||||||
// System.out.println(" write nodeAddress[" + nodeCount + "] = " + endAddress);
|
// System.out.println(" write nodeAddress[" + nodeCount + "] = " + endAddress);
|
||||||
node = nodeCount;
|
node = nodeCount;
|
||||||
} else {
|
} else {
|
||||||
|
@ -1005,9 +999,9 @@ public final class FST<T> {
|
||||||
// Address is delta-coded from current address:
|
// Address is delta-coded from current address:
|
||||||
arc.target = pos + code;
|
arc.target = pos + code;
|
||||||
//System.out.println(" delta pos=" + pos + " delta=" + code + " target=" + arc.target);
|
//System.out.println(" delta pos=" + pos + " delta=" + code + " target=" + arc.target);
|
||||||
} else if (code < nodeRefToAddress.length) {
|
} else if (code < nodeRefToAddress.size()) {
|
||||||
// Deref
|
// Deref
|
||||||
arc.target = nodeRefToAddress[code];
|
arc.target = (int) nodeRefToAddress.get(code);
|
||||||
//System.out.println(" deref code=" + code + " target=" + arc.target);
|
//System.out.println(" deref code=" + code + " target=" + arc.target);
|
||||||
} else {
|
} else {
|
||||||
// Absolute
|
// Absolute
|
||||||
|
@ -1420,7 +1414,7 @@ public final class FST<T> {
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// Creates a packed FST
|
// Creates a packed FST
|
||||||
private FST(INPUT_TYPE inputType, int[] nodeRefToAddress, Outputs<T> outputs) {
|
private FST(INPUT_TYPE inputType, PackedInts.Reader nodeRefToAddress, Outputs<T> outputs) {
|
||||||
packed = true;
|
packed = true;
|
||||||
this.inputType = inputType;
|
this.inputType = inputType;
|
||||||
bytes = new byte[128];
|
bytes = new byte[128];
|
||||||
|
@ -1432,8 +1426,10 @@ public final class FST<T> {
|
||||||
|
|
||||||
/** Expert: creates an FST by packing this one. This
|
/** Expert: creates an FST by packing this one. This
|
||||||
* process requires substantial additional RAM (currently
|
* process requires substantial additional RAM (currently
|
||||||
* ~8 bytes per node), but then should produce a smaller FST. */
|
* up to ~8 bytes per node depending on
|
||||||
public FST<T> pack(int minInCountDeref, int maxDerefNodes) throws IOException {
|
* <code>acceptableOverheadRatio</code>), but then should
|
||||||
|
* produce a smaller FST. */
|
||||||
|
public FST<T> pack(int minInCountDeref, int maxDerefNodes, float acceptableOverheadRatio) throws IOException {
|
||||||
|
|
||||||
// TODO: other things to try
|
// TODO: other things to try
|
||||||
// - renumber the nodes to get more next / better locality?
|
// - renumber the nodes to get more next / better locality?
|
||||||
|
@ -1454,22 +1450,22 @@ public final class FST<T> {
|
||||||
|
|
||||||
final BytesReader r = getBytesReader(0);
|
final BytesReader r = getBytesReader(0);
|
||||||
|
|
||||||
final int topN = Math.min(maxDerefNodes, inCounts.length);
|
final int topN = Math.min(maxDerefNodes, inCounts.size());
|
||||||
|
|
||||||
// Find top nodes with highest number of incoming arcs:
|
// Find top nodes with highest number of incoming arcs:
|
||||||
NodeQueue q = new NodeQueue(topN);
|
NodeQueue q = new NodeQueue(topN);
|
||||||
|
|
||||||
// TODO: we could use more RAM efficient selection algo here...
|
// TODO: we could use more RAM efficient selection algo here...
|
||||||
NodeAndInCount bottom = null;
|
NodeAndInCount bottom = null;
|
||||||
for(int node=0;node<inCounts.length;node++) {
|
for(int node=0; node<inCounts.size(); node++) {
|
||||||
if (inCounts[node] >= minInCountDeref) {
|
if (inCounts.get(node) >= minInCountDeref) {
|
||||||
if (bottom == null) {
|
if (bottom == null) {
|
||||||
q.add(new NodeAndInCount(node, inCounts[node]));
|
q.add(new NodeAndInCount(node, (int) inCounts.get(node)));
|
||||||
if (q.size() == topN) {
|
if (q.size() == topN) {
|
||||||
bottom = q.top();
|
bottom = q.top();
|
||||||
}
|
}
|
||||||
} else if (inCounts[node] > bottom.count) {
|
} else if (inCounts.get(node) > bottom.count) {
|
||||||
q.insertWithOverflow(new NodeAndInCount(node, inCounts[node]));
|
q.insertWithOverflow(new NodeAndInCount(node, (int) inCounts.get(node)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1484,20 +1480,17 @@ public final class FST<T> {
|
||||||
//System.out.println("map node=" + n.node + " inCount=" + n.count + " to newID=" + downTo);
|
//System.out.println("map node=" + n.node + " inCount=" + n.count + " to newID=" + downTo);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: we can use packed ints:
|
final FST<T> fst = new FST<T>(inputType, null, outputs);
|
||||||
// +1 because node ords start at 1 (0 is reserved as
|
|
||||||
// stop node):
|
|
||||||
final int[] nodeRefToAddressIn = new int[topNodeMap.size()];
|
|
||||||
|
|
||||||
final FST<T> fst = new FST<T>(inputType, nodeRefToAddressIn, outputs);
|
|
||||||
|
|
||||||
final BytesWriter writer = fst.writer;
|
final BytesWriter writer = fst.writer;
|
||||||
|
|
||||||
final int[] newNodeAddress = new int[1+nodeCount];
|
// +1 because node ords start at 1 (0 is reserved as stop node):
|
||||||
|
final GrowableWriter newNodeAddress = new GrowableWriter(
|
||||||
|
PackedInts.bitsRequired(bytes.length), 1 + nodeCount, acceptableOverheadRatio);
|
||||||
|
|
||||||
// Fill initial coarse guess:
|
// Fill initial coarse guess:
|
||||||
for(int node=1;node<=nodeCount;node++) {
|
for(int node=1;node<=nodeCount;node++) {
|
||||||
newNodeAddress[node] = 1 + bytes.length - nodeAddress[node];
|
newNodeAddress.set(node, 1 + bytes.length - nodeAddress.get(node));
|
||||||
}
|
}
|
||||||
|
|
||||||
int absCount;
|
int absCount;
|
||||||
|
@ -1537,11 +1530,11 @@ public final class FST<T> {
|
||||||
fst.nodeCount++;
|
fst.nodeCount++;
|
||||||
final int address = writer.posWrite;
|
final int address = writer.posWrite;
|
||||||
//System.out.println(" node: " + node + " address=" + address);
|
//System.out.println(" node: " + node + " address=" + address);
|
||||||
if (address != newNodeAddress[node]) {
|
if (address != newNodeAddress.get(node)) {
|
||||||
addressError = address - newNodeAddress[node];
|
addressError = address - (int) newNodeAddress.get(node);
|
||||||
//System.out.println(" change: " + (address - newNodeAddress[node]));
|
//System.out.println(" change: " + (address - newNodeAddress[node]));
|
||||||
changed = true;
|
changed = true;
|
||||||
newNodeAddress[node] = address;
|
newNodeAddress.set(node, address);
|
||||||
changedCount++;
|
changedCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1621,10 +1614,10 @@ public final class FST<T> {
|
||||||
if (ptr != null) {
|
if (ptr != null) {
|
||||||
absPtr = ptr;
|
absPtr = ptr;
|
||||||
} else {
|
} else {
|
||||||
absPtr = topNodeMap.size() + newNodeAddress[arc.target] + addressError;
|
absPtr = topNodeMap.size() + (int) newNodeAddress.get(arc.target) + addressError;
|
||||||
}
|
}
|
||||||
|
|
||||||
int delta = newNodeAddress[arc.target] + addressError - writer.posWrite - 2;
|
int delta = (int) newNodeAddress.get(arc.target) + addressError - writer.posWrite - 2;
|
||||||
if (delta < 0) {
|
if (delta < 0) {
|
||||||
//System.out.println("neg: " + delta);
|
//System.out.println("neg: " + delta);
|
||||||
anyNegDelta = true;
|
anyNegDelta = true;
|
||||||
|
@ -1654,7 +1647,7 @@ public final class FST<T> {
|
||||||
|
|
||||||
if (doWriteTarget) {
|
if (doWriteTarget) {
|
||||||
|
|
||||||
int delta = newNodeAddress[arc.target] + addressError - writer.posWrite;
|
int delta = (int) newNodeAddress.get(arc.target) + addressError - writer.posWrite;
|
||||||
if (delta < 0) {
|
if (delta < 0) {
|
||||||
anyNegDelta = true;
|
anyNegDelta = true;
|
||||||
//System.out.println("neg: " + delta);
|
//System.out.println("neg: " + delta);
|
||||||
|
@ -1745,11 +1738,20 @@ public final class FST<T> {
|
||||||
//System.out.println(" " + changedCount + " of " + fst.nodeCount + " changed; retry");
|
//System.out.println(" " + changedCount + " of " + fst.nodeCount + " changed; retry");
|
||||||
}
|
}
|
||||||
|
|
||||||
for(Map.Entry<Integer,Integer> ent : topNodeMap.entrySet()) {
|
long maxAddress = 0;
|
||||||
nodeRefToAddressIn[ent.getValue()] = newNodeAddress[ent.getKey()];
|
for (int key : topNodeMap.keySet()) {
|
||||||
|
maxAddress = Math.max(maxAddress, newNodeAddress.get(key));
|
||||||
}
|
}
|
||||||
|
|
||||||
fst.startNode = newNodeAddress[startNode];
|
PackedInts.Mutable nodeRefToAddressIn = PackedInts.getMutable(topNodeMap.size(),
|
||||||
|
PackedInts.bitsRequired(maxAddress), acceptableOverheadRatio);
|
||||||
|
for(Map.Entry<Integer,Integer> ent : topNodeMap.entrySet()) {
|
||||||
|
nodeRefToAddressIn.set(ent.getValue(), newNodeAddress.get(ent.getKey()));
|
||||||
|
}
|
||||||
|
fst.nodeRefToAddress = nodeRefToAddressIn;
|
||||||
|
|
||||||
|
|
||||||
|
fst.startNode = (int) newNodeAddress.get(startNode);
|
||||||
//System.out.println("new startNode=" + fst.startNode + " old startNode=" + startNode);
|
//System.out.println("new startNode=" + fst.startNode + " old startNode=" + startNode);
|
||||||
|
|
||||||
if (emptyOutput != null) {
|
if (emptyOutput != null) {
|
||||||
|
|
|
@ -51,4 +51,9 @@ final class DirectPacked64SingleBlockReader extends PackedInts.ReaderImpl {
|
||||||
throw new IllegalStateException("failed", e);
|
throw new IllegalStateException("failed", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -73,4 +73,9 @@ final class DirectPackedReader extends PackedInts.ReaderImpl {
|
||||||
throw new IllegalStateException("failed", ioe);
|
throw new IllegalStateException("failed", ioe);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,6 +17,10 @@ package org.apache.lucene.util.packed;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.DataOutput;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Implements {@link PackedInts.Mutable}, but grows the
|
* Implements {@link PackedInts.Mutable}, but grows the
|
||||||
* bit count of the underlying packed ints on-demand.
|
* bit count of the underlying packed ints on-demand.
|
||||||
|
@ -111,4 +115,14 @@ public class GrowableWriter implements PackedInts.Mutable {
|
||||||
current.fill(fromIndex, toIndex, val);
|
current.fill(fromIndex, toIndex, val);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return current.ramBytesUsed();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void save(DataOutput out) throws IOException {
|
||||||
|
current.save(out);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -284,6 +284,11 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
|
||||||
return RamUsageEstimator.sizeOf(blocks);
|
return RamUsageEstimator.sizeOf(blocks);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int getFormat() {
|
||||||
|
return PackedInts.PACKED_SINGLE_BLOCK;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
|
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
|
||||||
|
|
|
@ -100,6 +100,11 @@ public class PackedInts {
|
||||||
*/
|
*/
|
||||||
int size();
|
int size();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the in-memory size in bytes.
|
||||||
|
*/
|
||||||
|
long ramBytesUsed();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Expert: if the bit-width of this reader matches one of
|
* Expert: if the bit-width of this reader matches one of
|
||||||
* java's native types, returns the underlying array
|
* java's native types, returns the underlying array
|
||||||
|
@ -118,6 +123,7 @@ public class PackedInts {
|
||||||
* @see #getArray
|
* @see #getArray
|
||||||
*/
|
*/
|
||||||
boolean hasArray();
|
boolean hasArray();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -171,6 +177,7 @@ public class PackedInts {
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
public static interface Mutable extends Reader {
|
public static interface Mutable extends Reader {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the value at the given index in the array.
|
* Set the value at the given index in the array.
|
||||||
* @param index where the value should be positioned.
|
* @param index where the value should be positioned.
|
||||||
|
@ -197,6 +204,13 @@ public class PackedInts {
|
||||||
*/
|
*/
|
||||||
void clear();
|
void clear();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save this mutable into <code>out</code>. Instantiating a reader from
|
||||||
|
* the generated data will return a reader with the same number of bits
|
||||||
|
* per value.
|
||||||
|
*/
|
||||||
|
void save(DataOutput out) throws IOException;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -239,6 +253,7 @@ public class PackedInts {
|
||||||
}
|
}
|
||||||
return gets;
|
return gets;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static abstract class MutableImpl extends ReaderImpl implements Mutable {
|
public static abstract class MutableImpl extends ReaderImpl implements Mutable {
|
||||||
|
@ -267,6 +282,18 @@ public class PackedInts {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected int getFormat() {
|
||||||
|
return PACKED;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void save(DataOutput out) throws IOException {
|
||||||
|
Writer writer = getWriterByFormat(out, valueCount, bitsPerValue, getFormat());
|
||||||
|
for (int i = 0; i < valueCount; ++i) {
|
||||||
|
writer.add(get(i));
|
||||||
|
}
|
||||||
|
writer.finish();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** A write-once Writer.
|
/** A write-once Writer.
|
||||||
|
@ -470,28 +497,40 @@ public class PackedInts {
|
||||||
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
|
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
|
||||||
|
|
||||||
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
|
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
|
||||||
return new PackedWriter(out, valueCount, 8);
|
return getWriterByFormat(out, valueCount, 8, PACKED);
|
||||||
} else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
|
} else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
|
||||||
return new PackedWriter(out, valueCount, 16);
|
return getWriterByFormat(out, valueCount, 16, PACKED);
|
||||||
} else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
|
} else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
|
||||||
return new PackedWriter(out, valueCount, 32);
|
return getWriterByFormat(out, valueCount, 32, PACKED);
|
||||||
} else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
|
} else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
|
||||||
return new PackedWriter(out, valueCount, 64);
|
return getWriterByFormat(out, valueCount, 64, PACKED);
|
||||||
} else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
|
} else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
|
||||||
return new PackedWriter(out, valueCount, 24);
|
return getWriterByFormat(out, valueCount, 24, PACKED);
|
||||||
} else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
|
} else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
|
||||||
return new PackedWriter(out, valueCount, bitsPerValue);
|
return getWriterByFormat(out, valueCount, 48, PACKED);
|
||||||
} else {
|
} else {
|
||||||
for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
|
for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
|
||||||
if (Packed64SingleBlock.isSupported(bpv)) {
|
if (Packed64SingleBlock.isSupported(bpv)) {
|
||||||
float overhead = Packed64SingleBlock.overheadPerValue(bpv);
|
float overhead = Packed64SingleBlock.overheadPerValue(bpv);
|
||||||
float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
|
float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
|
||||||
if (overhead <= acceptableOverhead) {
|
if (overhead <= acceptableOverhead) {
|
||||||
return new Packed64SingleBlockWriter(out, valueCount, bpv);
|
return getWriterByFormat(out, valueCount, bpv, PACKED_SINGLE_BLOCK);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return getWriterByFormat(out, valueCount, bitsPerValue, PACKED);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Writer getWriterByFormat(DataOutput out,
|
||||||
|
int valueCount, int bitsPerValue, int format) throws IOException {
|
||||||
|
switch (format) {
|
||||||
|
case PACKED:
|
||||||
return new PackedWriter(out, valueCount, bitsPerValue);
|
return new PackedWriter(out, valueCount, bitsPerValue);
|
||||||
|
case PACKED_SINGLE_BLOCK:
|
||||||
|
return new Packed64SingleBlockWriter(out, valueCount, bitsPerValue);
|
||||||
|
default:
|
||||||
|
throw new IllegalArgumentException("Unknown format " + format);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -41,7 +41,7 @@ public class TestRollingUpdates extends LuceneTestCase {
|
||||||
|
|
||||||
//provider.register(new MemoryCodec());
|
//provider.register(new MemoryCodec());
|
||||||
if (random().nextBoolean()) {
|
if (random().nextBoolean()) {
|
||||||
Codec.setDefault(_TestUtil.alwaysPostingsFormat(new MemoryPostingsFormat(random().nextBoolean())));
|
Codec.setDefault(_TestUtil.alwaysPostingsFormat(new MemoryPostingsFormat(random().nextBoolean(), random.nextFloat())));
|
||||||
}
|
}
|
||||||
|
|
||||||
final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
|
final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
|
||||||
|
|
|
@ -64,6 +64,7 @@ import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
|
||||||
import org.apache.lucene.util.fst.FST.Arc;
|
import org.apache.lucene.util.fst.FST.Arc;
|
||||||
import org.apache.lucene.util.fst.FST.BytesReader;
|
import org.apache.lucene.util.fst.FST.BytesReader;
|
||||||
import org.apache.lucene.util.fst.PairOutputs.Pair;
|
import org.apache.lucene.util.fst.PairOutputs.Pair;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
@SuppressCodecs({ "SimpleText", "Memory" })
|
@SuppressCodecs({ "SimpleText", "Memory" })
|
||||||
public class TestFSTs extends LuceneTestCase {
|
public class TestFSTs extends LuceneTestCase {
|
||||||
|
@ -536,7 +537,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("TEST: now rewrite");
|
System.out.println("TEST: now rewrite");
|
||||||
}
|
}
|
||||||
final FST<T> packed = fst.pack(_TestUtil.nextInt(random, 1, 10), _TestUtil.nextInt(random, 0, 10000000));
|
final FST<T> packed = fst.pack(_TestUtil.nextInt(random, 1, 10), _TestUtil.nextInt(random, 0, 10000000), random.nextFloat());
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("TEST: now verify packed FST");
|
System.out.println("TEST: now verify packed FST");
|
||||||
}
|
}
|
||||||
|
@ -1182,7 +1183,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
if (rewriteIter == 1) {
|
if (rewriteIter == 1) {
|
||||||
if (doRewrite) {
|
if (doRewrite) {
|
||||||
// Verify again, with packed FST:
|
// Verify again, with packed FST:
|
||||||
fst = fst.pack(_TestUtil.nextInt(random, 1, 10), _TestUtil.nextInt(random, 0, 10000000));
|
fst = fst.pack(_TestUtil.nextInt(random, 1, 10), _TestUtil.nextInt(random, 0, 10000000), random.nextFloat());
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1324,7 +1325,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
|
|
||||||
if (doPack) {
|
if (doPack) {
|
||||||
System.out.println("Pack...");
|
System.out.println("Pack...");
|
||||||
fst = fst.pack(4, 100000000);
|
fst = fst.pack(4, 100000000, random().nextFloat());
|
||||||
System.out.println("New size " + fst.sizeInBytes() + " bytes");
|
System.out.println("New size " + fst.sizeInBytes() + " bytes");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1927,7 +1928,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
final Long nothing = outputs.getNoOutput();
|
final Long nothing = outputs.getNoOutput();
|
||||||
final Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
|
final Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||||
|
|
||||||
final FST<Long> fst = new FST<Long>(FST.INPUT_TYPE.BYTE1, outputs, false);
|
final FST<Long> fst = new FST<Long>(FST.INPUT_TYPE.BYTE1, outputs, false, PackedInts.COMPACT);
|
||||||
|
|
||||||
final Builder.UnCompiledNode<Long> rootNode = new Builder.UnCompiledNode<Long>(b, 0);
|
final Builder.UnCompiledNode<Long> rootNode = new Builder.UnCompiledNode<Long>(b, 0);
|
||||||
|
|
||||||
|
|
|
@ -560,4 +560,40 @@ public class TestPackedInts extends LuceneTestCase {
|
||||||
assertEquals(1 << 10, wrt.get(valueCount - 1));
|
assertEquals(1 << 10, wrt.get(valueCount - 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSave() throws IOException {
|
||||||
|
final int valueCount = _TestUtil.nextInt(random(), 1, 2048);
|
||||||
|
for (int bpv = 1; bpv <= 64; ++bpv) {
|
||||||
|
final int maxValue = (int) Math.min(PackedInts.maxValue(31), PackedInts.maxValue(bpv));
|
||||||
|
final RAMDirectory directory = new RAMDirectory();
|
||||||
|
List<PackedInts.Mutable> packedInts = createPackedInts(valueCount, bpv);
|
||||||
|
for (PackedInts.Mutable mutable : packedInts) {
|
||||||
|
for (int i = 0; i < mutable.size(); ++i) {
|
||||||
|
mutable.set(i, random().nextInt(maxValue));
|
||||||
|
}
|
||||||
|
|
||||||
|
IndexOutput out = directory.createOutput("packed-ints.bin", IOContext.DEFAULT);
|
||||||
|
mutable.save(out);
|
||||||
|
out.close();
|
||||||
|
|
||||||
|
IndexInput in = directory.openInput("packed-ints.bin", IOContext.DEFAULT);
|
||||||
|
PackedInts.Reader reader = PackedInts.getReader(in);
|
||||||
|
assertEquals(mutable.getBitsPerValue(), reader.getBitsPerValue());
|
||||||
|
assertEquals(valueCount, reader.size());
|
||||||
|
if (mutable instanceof Packed64SingleBlock) {
|
||||||
|
// make sure that we used the right format so that the reader has
|
||||||
|
// the same performance characteristics as the mutable that has been
|
||||||
|
// serialized
|
||||||
|
assertTrue(reader instanceof Packed64SingleBlock);
|
||||||
|
} else {
|
||||||
|
assertFalse(reader instanceof Packed64SingleBlock);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < valueCount; ++i) {
|
||||||
|
assertEquals(mutable.get(i), reader.get(i));
|
||||||
|
}
|
||||||
|
in.close();
|
||||||
|
directory.deleteFile("packed-ints.bin");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -99,8 +99,8 @@ public class RandomCodec extends Lucene40Codec {
|
||||||
new NestedPulsingPostingsFormat(),
|
new NestedPulsingPostingsFormat(),
|
||||||
new Lucene40WithOrds(),
|
new Lucene40WithOrds(),
|
||||||
new SimpleTextPostingsFormat(),
|
new SimpleTextPostingsFormat(),
|
||||||
new MemoryPostingsFormat(true),
|
new MemoryPostingsFormat(true, random.nextFloat()),
|
||||||
new MemoryPostingsFormat(false));
|
new MemoryPostingsFormat(false, random.nextFloat()));
|
||||||
|
|
||||||
Collections.shuffle(formats, random);
|
Collections.shuffle(formats, random);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue