mirror of https://github.com/apache/lucene.git
LUCENE-3289: add options to FST Builder to tradeoff RAM/CPU used during build vs how small the resulting FST is
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1145292 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
94a47f4415
commit
fbf9f4ccad
|
@ -532,6 +532,11 @@ Optimizations
|
||||||
directly if possible and merges separately written files on the fly instead
|
directly if possible and merges separately written files on the fly instead
|
||||||
of during close. (Simon Willnauer, Robert Muir)
|
of during close. (Simon Willnauer, Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-3289: When building an FST you can now tune how aggressively
|
||||||
|
the FST should try to share common suffixes. Typically you can
|
||||||
|
greatly reduce RAM required during building, and CPU consumed, at
|
||||||
|
the cost of a somewhat larger FST. (Mike McCandless)
|
||||||
|
|
||||||
======================= Lucene 3.3.0 =======================
|
======================= Lucene 3.3.0 =======================
|
||||||
|
|
||||||
Changes in backwards compatibility policy
|
Changes in backwards compatibility policy
|
||||||
|
|
|
@ -190,7 +190,7 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
|
||||||
if (indexDivisor > 1) {
|
if (indexDivisor > 1) {
|
||||||
// subsample
|
// subsample
|
||||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
|
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
|
||||||
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, outputs);
|
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||||
final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
|
final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
|
||||||
BytesRefFSTEnum.InputOutput<Long> result;
|
BytesRefFSTEnum.InputOutput<Long> result;
|
||||||
int count = indexDivisor;
|
int count = indexDivisor;
|
||||||
|
|
|
@ -222,9 +222,7 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
||||||
public FSTFieldWriter(FieldInfo fieldInfo, long termsFilePointer) throws IOException {
|
public FSTFieldWriter(FieldInfo fieldInfo, long termsFilePointer) throws IOException {
|
||||||
this.fieldInfo = fieldInfo;
|
this.fieldInfo = fieldInfo;
|
||||||
fstOutputs = PositiveIntOutputs.getSingleton(true);
|
fstOutputs = PositiveIntOutputs.getSingleton(true);
|
||||||
fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE1,
|
fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, fstOutputs);
|
||||||
0, 0, true,
|
|
||||||
fstOutputs);
|
|
||||||
indexStart = out.getFilePointer();
|
indexStart = out.getFilePointer();
|
||||||
////System.out.println("VGW: field=" + fieldInfo.name);
|
////System.out.println("VGW: field=" + fieldInfo.name);
|
||||||
|
|
||||||
|
|
|
@ -478,9 +478,6 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton(false);
|
PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton(false);
|
||||||
final Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> b;
|
final Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> b;
|
||||||
b = new Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(FST.INPUT_TYPE.BYTE1,
|
b = new Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(FST.INPUT_TYPE.BYTE1,
|
||||||
0,
|
|
||||||
0,
|
|
||||||
true,
|
|
||||||
new PairOutputs<Long,PairOutputs.Pair<Long,Long>>(posIntOutputs,
|
new PairOutputs<Long,PairOutputs.Pair<Long,Long>>(posIntOutputs,
|
||||||
new PairOutputs<Long,Long>(posIntOutputs, posIntOutputs)));
|
new PairOutputs<Long,Long>(posIntOutputs, posIntOutputs)));
|
||||||
IndexInput in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
|
IndexInput in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
|
||||||
|
|
|
@ -62,6 +62,9 @@ public class Builder<T> {
|
||||||
// terms go through it:
|
// terms go through it:
|
||||||
private final int minSuffixCount2;
|
private final int minSuffixCount2;
|
||||||
|
|
||||||
|
private final boolean doShareNonSingletonNodes;
|
||||||
|
private final int shareMaxTailLength;
|
||||||
|
|
||||||
private final IntsRef lastInput = new IntsRef();
|
private final IntsRef lastInput = new IntsRef();
|
||||||
|
|
||||||
// NOTE: cutting this over to ArrayList instead loses ~6%
|
// NOTE: cutting this over to ArrayList instead loses ~6%
|
||||||
|
@ -72,12 +75,11 @@ public class Builder<T> {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instantiates an FST/FSA builder without any pruning. A shortcut
|
* Instantiates an FST/FSA builder without any pruning. A shortcut
|
||||||
* to {@link #Builder(FST.INPUT_TYPE, int, int, boolean, Outputs)} with
|
* to {@link #Builder(FST.INPUT_TYPE, int, int, boolean, boolean, int, Outputs)} with
|
||||||
* pruning options turned off.
|
* pruning options turned off.
|
||||||
*/
|
*/
|
||||||
public Builder(FST.INPUT_TYPE inputType, Outputs<T> outputs)
|
public Builder(FST.INPUT_TYPE inputType, Outputs<T> outputs) {
|
||||||
{
|
this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs);
|
||||||
this(inputType, 0, 0, true, outputs);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -97,20 +99,34 @@ public class Builder<T> {
|
||||||
* @param minSuffixCount2
|
* @param minSuffixCount2
|
||||||
* (Note: only Mike McCandless knows what this one is really doing...)
|
* (Note: only Mike McCandless knows what this one is really doing...)
|
||||||
*
|
*
|
||||||
* @param doMinSuffix
|
* @param doShareSuffix
|
||||||
* If <code>true</code>, the shared suffixes will be compacted into unique paths.
|
* If <code>true</code>, the shared suffixes will be compacted into unique paths.
|
||||||
* This requires an additional hash map for lookups in memory. Setting this parameter to
|
* This requires an additional hash map for lookups in memory. Setting this parameter to
|
||||||
* <code>false</code> creates a single path for all input sequences. This will result in a larger
|
* <code>false</code> creates a single path for all input sequences. This will result in a larger
|
||||||
* graph, but may require less memory and will speed up construction.
|
* graph, but may require less memory and will speed up construction.
|
||||||
|
*
|
||||||
|
* @param doShareNonSingletonNodes
|
||||||
|
* Only used if doShareSuffix is true. Set this to
|
||||||
|
* true to ensure FST is fully minimal, at cost of more
|
||||||
|
* CPU and more RAM during building.
|
||||||
|
*
|
||||||
|
* @param shareMaxTailLength
|
||||||
|
* Only used if doShareSuffix is true. Set this to
|
||||||
|
* Integer.MAX_VALUE to ensure FST is fully minimal, at cost of more
|
||||||
|
* CPU and more RAM during building.
|
||||||
|
*
|
||||||
* @param outputs The output type for each input sequence. Applies only if building an FST. For
|
* @param outputs The output type for each input sequence. Applies only if building an FST. For
|
||||||
* FSA, use {@link NoOutputs#getSingleton()} and {@link NoOutputs#getNoOutput()} as the
|
* FSA, use {@link NoOutputs#getSingleton()} and {@link NoOutputs#getNoOutput()} as the
|
||||||
* singleton output object.
|
* singleton output object.
|
||||||
*/
|
*/
|
||||||
public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doMinSuffix, Outputs<T> outputs) {
|
public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
|
||||||
|
boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs) {
|
||||||
this.minSuffixCount1 = minSuffixCount1;
|
this.minSuffixCount1 = minSuffixCount1;
|
||||||
this.minSuffixCount2 = minSuffixCount2;
|
this.minSuffixCount2 = minSuffixCount2;
|
||||||
|
this.doShareNonSingletonNodes = doShareNonSingletonNodes;
|
||||||
|
this.shareMaxTailLength = shareMaxTailLength;
|
||||||
fst = new FST<T>(inputType, outputs);
|
fst = new FST<T>(inputType, outputs);
|
||||||
if (doMinSuffix) {
|
if (doShareSuffix) {
|
||||||
dedupHash = new NodeHash<T>(fst);
|
dedupHash = new NodeHash<T>(fst);
|
||||||
} else {
|
} else {
|
||||||
dedupHash = null;
|
dedupHash = null;
|
||||||
|
@ -143,10 +159,9 @@ public class Builder<T> {
|
||||||
fst.setAllowArrayArcs(b);
|
fst.setAllowArrayArcs(b);
|
||||||
}
|
}
|
||||||
|
|
||||||
private CompiledNode compileNode(UnCompiledNode<T> n) throws IOException {
|
private CompiledNode compileNode(UnCompiledNode<T> n, int tailLength) throws IOException {
|
||||||
|
|
||||||
final int address;
|
final int address;
|
||||||
if (dedupHash != null) {
|
if (dedupHash != null && (doShareNonSingletonNodes || n.numArcs <= 1) && tailLength <= shareMaxTailLength) {
|
||||||
if (n.numArcs == 0) {
|
if (n.numArcs == 0) {
|
||||||
address = fst.addNode(n);
|
address = fst.addNode(n);
|
||||||
} else {
|
} else {
|
||||||
|
@ -221,7 +236,7 @@ public class Builder<T> {
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
if (minSuffixCount2 != 0) {
|
if (minSuffixCount2 != 0) {
|
||||||
compileAllTargets(node);
|
compileAllTargets(node, lastInput.length-idx);
|
||||||
}
|
}
|
||||||
final T nextFinalOutput = node.output;
|
final T nextFinalOutput = node.output;
|
||||||
|
|
||||||
|
@ -237,7 +252,7 @@ public class Builder<T> {
|
||||||
// compile any targets that were previously
|
// compile any targets that were previously
|
||||||
// undecided:
|
// undecided:
|
||||||
parent.replaceLast(lastInput.ints[lastInput.offset + idx-1],
|
parent.replaceLast(lastInput.ints[lastInput.offset + idx-1],
|
||||||
compileNode(node),
|
compileNode(node, 1+lastInput.length-idx),
|
||||||
nextFinalOutput,
|
nextFinalOutput,
|
||||||
isFinal);
|
isFinal);
|
||||||
} else {
|
} else {
|
||||||
|
@ -428,22 +443,28 @@ public class Builder<T> {
|
||||||
// empty string got pruned
|
// empty string got pruned
|
||||||
return null;
|
return null;
|
||||||
} else {
|
} else {
|
||||||
fst.finish(compileNode(frontier[0]).address);
|
fst.finish(compileNode(frontier[0], lastInput.length).address);
|
||||||
//System.out.println("compile addr = " + fst.getStartNode());
|
//System.out.println("compile addr = " + fst.getStartNode());
|
||||||
return fst;
|
return fst;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (minSuffixCount2 != 0) {
|
if (minSuffixCount2 != 0) {
|
||||||
compileAllTargets(frontier[0]);
|
compileAllTargets(frontier[0], lastInput.length);
|
||||||
}
|
}
|
||||||
//System.out.println("NOW: " + frontier[0].numArcs);
|
//System.out.println("NOW: " + frontier[0].numArcs);
|
||||||
fst.finish(compileNode(frontier[0]).address);
|
fst.finish(compileNode(frontier[0], lastInput.length).address);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
if (dedupHash != null) {
|
||||||
|
System.out.println("NH: " + dedupHash.count());
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
return fst;
|
return fst;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void compileAllTargets(UnCompiledNode<T> node) throws IOException {
|
private void compileAllTargets(UnCompiledNode<T> node, int tailLength) throws IOException {
|
||||||
for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) {
|
for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) {
|
||||||
final Arc<T> arc = node.arcs[arcIdx];
|
final Arc<T> arc = node.arcs[arcIdx];
|
||||||
if (!arc.target.isCompiled()) {
|
if (!arc.target.isCompiled()) {
|
||||||
|
@ -453,7 +474,7 @@ public class Builder<T> {
|
||||||
//System.out.println("seg=" + segment + " FORCE final arc=" + (char) arc.label);
|
//System.out.println("seg=" + segment + " FORCE final arc=" + (char) arc.label);
|
||||||
arc.isFinal = n.isFinal = true;
|
arc.isFinal = n.isFinal = true;
|
||||||
}
|
}
|
||||||
arc.target = compileNode(n);
|
arc.target = compileNode(n, tailLength-1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,8 +25,12 @@ import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.CodecUtil;
|
import org.apache.lucene.util.CodecUtil;
|
||||||
import org.apache.lucene.util.fst.Builder.UnCompiledNode;
|
import org.apache.lucene.util.fst.Builder.UnCompiledNode;
|
||||||
|
|
||||||
|
// TODO: if FST is pure prefix trie we can do a more compact
|
||||||
|
// job, ie, once we are at a 'suffix only', just store the
|
||||||
|
// completion labels as a string not as a series of arcs.
|
||||||
|
|
||||||
// NOTE: while the FST is able to represent a non-final
|
// NOTE: while the FST is able to represent a non-final
|
||||||
// dead-end state (NON_FINAL_END_NODE=0), the layres above
|
// dead-end state (NON_FINAL_END_NODE=0), the layers above
|
||||||
// (FSTEnum, Util) have problems with this!!
|
// (FSTEnum, Util) have problems with this!!
|
||||||
|
|
||||||
/** Represents an FST using a compact byte[] format.
|
/** Represents an FST using a compact byte[] format.
|
||||||
|
|
|
@ -164,4 +164,8 @@ final class NodeHash<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int count() {
|
||||||
|
return count;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -150,7 +150,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
for(IntsRef term : terms2) {
|
for(IntsRef term : terms2) {
|
||||||
pairs.add(new FSTTester.InputOutput<Object>(term, NO_OUTPUT));
|
pairs.add(new FSTTester.InputOutput<Object>(term, NO_OUTPUT));
|
||||||
}
|
}
|
||||||
FST<Object> fst = new FSTTester<Object>(random, dir, inputMode, pairs, outputs).doTest(0, 0);
|
FST<Object> fst = new FSTTester<Object>(random, dir, inputMode, pairs, outputs).doTest(0, 0, false);
|
||||||
assertNotNull(fst);
|
assertNotNull(fst);
|
||||||
assertEquals(22, fst.getNodeCount());
|
assertEquals(22, fst.getNodeCount());
|
||||||
assertEquals(27, fst.getArcCount());
|
assertEquals(27, fst.getArcCount());
|
||||||
|
@ -163,7 +163,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
for(int idx=0;idx<terms2.length;idx++) {
|
for(int idx=0;idx<terms2.length;idx++) {
|
||||||
pairs.add(new FSTTester.InputOutput<Long>(terms2[idx], outputs.get(idx)));
|
pairs.add(new FSTTester.InputOutput<Long>(terms2[idx], outputs.get(idx)));
|
||||||
}
|
}
|
||||||
final FST<Long> fst = new FSTTester<Long>(random, dir, inputMode, pairs, outputs).doTest(0, 0);
|
final FST<Long> fst = new FSTTester<Long>(random, dir, inputMode, pairs, outputs).doTest(0, 0, false);
|
||||||
assertNotNull(fst);
|
assertNotNull(fst);
|
||||||
assertEquals(22, fst.getNodeCount());
|
assertEquals(22, fst.getNodeCount());
|
||||||
assertEquals(27, fst.getArcCount());
|
assertEquals(27, fst.getArcCount());
|
||||||
|
@ -178,7 +178,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
final BytesRef output = random.nextInt(30) == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx));
|
final BytesRef output = random.nextInt(30) == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx));
|
||||||
pairs.add(new FSTTester.InputOutput<BytesRef>(terms2[idx], output));
|
pairs.add(new FSTTester.InputOutput<BytesRef>(terms2[idx], output));
|
||||||
}
|
}
|
||||||
final FST<BytesRef> fst = new FSTTester<BytesRef>(random, dir, inputMode, pairs, outputs).doTest(0, 0);
|
final FST<BytesRef> fst = new FSTTester<BytesRef>(random, dir, inputMode, pairs, outputs).doTest(0, 0, false);
|
||||||
assertNotNull(fst);
|
assertNotNull(fst);
|
||||||
assertEquals(24, fst.getNodeCount());
|
assertEquals(24, fst.getNodeCount());
|
||||||
assertEquals(30, fst.getArcCount());
|
assertEquals(30, fst.getArcCount());
|
||||||
|
@ -359,14 +359,14 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
|
|
||||||
public void doTest() throws IOException {
|
public void doTest() throws IOException {
|
||||||
// no pruning
|
// no pruning
|
||||||
doTest(0, 0);
|
doTest(0, 0, true);
|
||||||
|
|
||||||
if (!(outputs instanceof UpToTwoPositiveIntOutputs)) {
|
if (!(outputs instanceof UpToTwoPositiveIntOutputs)) {
|
||||||
// simple pruning
|
// simple pruning
|
||||||
doTest(_TestUtil.nextInt(random, 1, 1+pairs.size()), 0);
|
doTest(_TestUtil.nextInt(random, 1, 1+pairs.size()), 0, true);
|
||||||
|
|
||||||
// leafy pruning
|
// leafy pruning
|
||||||
doTest(0, _TestUtil.nextInt(random, 1, 1+pairs.size()));
|
doTest(0, _TestUtil.nextInt(random, 1, 1+pairs.size()), true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -446,14 +446,17 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
FST<T> doTest(int prune1, int prune2) throws IOException {
|
FST<T> doTest(int prune1, int prune2, boolean allowRandomSuffixSharing) throws IOException {
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("TEST: prune1=" + prune1 + " prune2=" + prune2);
|
System.out.println("TEST: prune1=" + prune1 + " prune2=" + prune2);
|
||||||
}
|
}
|
||||||
|
|
||||||
final Builder<T> builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4,
|
final Builder<T> builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4,
|
||||||
prune1, prune2,
|
prune1, prune2,
|
||||||
prune1==0 && prune2==0, outputs);
|
prune1==0 && prune2==0,
|
||||||
|
allowRandomSuffixSharing ? random.nextBoolean() : true,
|
||||||
|
allowRandomSuffixSharing ? _TestUtil.nextInt(random, 1, 10) : Integer.MAX_VALUE,
|
||||||
|
outputs);
|
||||||
|
|
||||||
for(InputOutput<T> pair : pairs) {
|
for(InputOutput<T> pair : pairs) {
|
||||||
if (pair.output instanceof UpToTwoPositiveIntOutputs.TwoLongs) {
|
if (pair.output instanceof UpToTwoPositiveIntOutputs.TwoLongs) {
|
||||||
|
@ -1017,7 +1020,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
IndexReader r = IndexReader.open(writer, true);
|
IndexReader r = IndexReader.open(writer, true);
|
||||||
writer.close();
|
writer.close();
|
||||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(random.nextBoolean());
|
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(random.nextBoolean());
|
||||||
Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, outputs);
|
Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||||
|
|
||||||
boolean storeOrd = random.nextBoolean();
|
boolean storeOrd = random.nextBoolean();
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
|
@ -1145,7 +1148,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
this.inputMode = inputMode;
|
this.inputMode = inputMode;
|
||||||
this.outputs = outputs;
|
this.outputs = outputs;
|
||||||
|
|
||||||
builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, outputs);
|
builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract T getOutput(IntsRef input, int ord) throws IOException;
|
protected abstract T getOutput(IntsRef input, int ord) throws IOException;
|
||||||
|
@ -1245,7 +1248,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// java -cp build/classes/test:build/classes/java:lib/junit-4.7.jar org.apache.lucene.util.fst.TestFSTs /x/tmp/allTerms3.txt out
|
// java -cp build/classes/test:build/classes/java:build/classes/test-framework:lib/junit-4.7.jar org.apache.lucene.util.fst.TestFSTs /x/tmp/allTerms3.txt out
|
||||||
public static void main(String[] args) throws IOException {
|
public static void main(String[] args) throws IOException {
|
||||||
int prune = 0;
|
int prune = 0;
|
||||||
int limit = Integer.MAX_VALUE;
|
int limit = Integer.MAX_VALUE;
|
||||||
|
@ -1351,7 +1354,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
|
|
||||||
public void testSingleString() throws Exception {
|
public void testSingleString() throws Exception {
|
||||||
final Outputs<Object> outputs = NoOutputs.getSingleton();
|
final Outputs<Object> outputs = NoOutputs.getSingleton();
|
||||||
final Builder<Object> b = new Builder<Object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, outputs);
|
final Builder<Object> b = new Builder<Object>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||||
b.add(new BytesRef("foobar"), outputs.getNoOutput());
|
b.add(new BytesRef("foobar"), outputs.getNoOutput());
|
||||||
final BytesRefFSTEnum<Object> fstEnum = new BytesRefFSTEnum<Object>(b.finish());
|
final BytesRefFSTEnum<Object> fstEnum = new BytesRefFSTEnum<Object>(b.finish());
|
||||||
assertNull(fstEnum.seekFloor(new BytesRef("foo")));
|
assertNull(fstEnum.seekFloor(new BytesRef("foo")));
|
||||||
|
@ -1368,7 +1371,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
|
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
|
||||||
|
|
||||||
// Build an FST mapping BytesRef -> Long
|
// Build an FST mapping BytesRef -> Long
|
||||||
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, outputs);
|
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||||
|
|
||||||
final BytesRef a = new BytesRef("a");
|
final BytesRef a = new BytesRef("a");
|
||||||
final BytesRef b = new BytesRef("b");
|
final BytesRef b = new BytesRef("b");
|
||||||
|
@ -1413,7 +1416,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
FST<Object> compile(String[] lines) throws IOException {
|
FST<Object> compile(String[] lines) throws IOException {
|
||||||
final NoOutputs outputs = NoOutputs.getSingleton();
|
final NoOutputs outputs = NoOutputs.getSingleton();
|
||||||
final Object nothing = outputs.getNoOutput();
|
final Object nothing = outputs.getNoOutput();
|
||||||
final Builder<Object> b = new Builder<Object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, outputs);
|
final Builder<Object> b = new Builder<Object>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||||
|
|
||||||
int line = 0;
|
int line = 0;
|
||||||
final BytesRef term = new BytesRef();
|
final BytesRef term = new BytesRef();
|
||||||
|
@ -1488,7 +1491,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
public void testNonFinalStopNodes() throws Exception {
|
public void testNonFinalStopNodes() throws Exception {
|
||||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
|
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
|
||||||
final Long nothing = outputs.getNoOutput();
|
final Long nothing = outputs.getNoOutput();
|
||||||
final Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, outputs);
|
final Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||||
|
|
||||||
final FST<Long> fst = new FST<Long>(FST.INPUT_TYPE.BYTE1, outputs);
|
final FST<Long> fst = new FST<Long>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||||
|
|
||||||
|
|
|
@ -244,7 +244,7 @@ public class SynonymMap {
|
||||||
ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
|
ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
|
||||||
// TODO: are we using the best sharing options?
|
// TODO: are we using the best sharing options?
|
||||||
org.apache.lucene.util.fst.Builder<BytesRef> builder =
|
org.apache.lucene.util.fst.Builder<BytesRef> builder =
|
||||||
new org.apache.lucene.util.fst.Builder<BytesRef>(FST.INPUT_TYPE.BYTE4, 0, 0, true, outputs);
|
new org.apache.lucene.util.fst.Builder<BytesRef>(FST.INPUT_TYPE.BYTE4, outputs);
|
||||||
|
|
||||||
BytesRef scratch = new BytesRef(64);
|
BytesRef scratch = new BytesRef(64);
|
||||||
ByteArrayDataOutput scratchOutput = new ByteArrayDataOutput();
|
ByteArrayDataOutput scratchOutput = new ByteArrayDataOutput();
|
||||||
|
|
|
@ -450,8 +450,7 @@ public class FSTLookup extends Lookup {
|
||||||
// Build the automaton.
|
// Build the automaton.
|
||||||
final Outputs<Object> outputs = NoOutputs.getSingleton();
|
final Outputs<Object> outputs = NoOutputs.getSingleton();
|
||||||
final Object empty = outputs.getNoOutput();
|
final Object empty = outputs.getNoOutput();
|
||||||
final Builder<Object> builder =
|
final Builder<Object> builder = new Builder<Object>(FST.INPUT_TYPE.BYTE4, outputs);
|
||||||
new Builder<Object>(FST.INPUT_TYPE.BYTE4, 0, 0, true, outputs);
|
|
||||||
final IntsRef scratchIntsRef = new IntsRef(10);
|
final IntsRef scratchIntsRef = new IntsRef(10);
|
||||||
for (Entry e : entries) {
|
for (Entry e : entries) {
|
||||||
final int termLength = scratchIntsRef.length = e.term.length;
|
final int termLength = scratchIntsRef.length = e.term.length;
|
||||||
|
|
Loading…
Reference in New Issue