diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 895b99cb23e..53618d71c9c 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -21,8 +21,12 @@ Changes in backwards compatibility policy Robert Muir) ======================= Lucene 4.1.0 ======================= +New Features -(No Changes) +* LUCENE-4404: New ListOfOutputs (in lucene/misc) for FSTs wraps + another Outputs implementation, allowing you to store more than one + output for a single input. UpToTwoPositiveIntsOutputs was moved + from lucene/core to lucene/misc. (Mike McCandless) ======================= Lucene 4.0.0 ======================= diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java b/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java index ea9fd45b358..9c25f468926 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java @@ -399,8 +399,10 @@ public class Builder { } final UnCompiledNode lastNode = frontier[input.length]; - lastNode.isFinal = true; - lastNode.output = NO_OUTPUT; + if (lastInput.length != input.length || prefixLenPlus1 != input.length + 1) { + lastNode.isFinal = true; + lastNode.output = NO_OUTPUT; + } // push conflicting outputs forward, only as far as // needed diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java index cd97d7086e4..4375f55f219 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java @@ -296,11 +296,13 @@ public final class FST { // messy bytes = new byte[numBytes]; in.readBytes(bytes, 0, numBytes); + BytesReader reader; if (packed) { - emptyOutput = outputs.read(getBytesReader(0)); + reader = getBytesReader(0); } else { - emptyOutput = outputs.read(getBytesReader(numBytes-1)); + reader = getBytesReader(numBytes-1); } + emptyOutput = outputs.readFinalOutput(reader); } else { emptyOutput = null; } @@ -414,7 +416,7 @@ public final class FST { // TODO: this is messy -- replace with sillyBytesWriter; maybe make // bytes private final int posSave = writer.posWrite; - outputs.write(emptyOutput, writer); + outputs.writeFinalOutput(emptyOutput, writer); emptyOutputBytes = new byte[writer.posWrite-posSave]; if (!packed) { @@ -638,7 +640,7 @@ public final class FST { if (arc.nextFinalOutput != NO_OUTPUT) { //System.out.println(" write final output"); - outputs.write(arc.nextFinalOutput, writer); + outputs.writeFinalOutput(arc.nextFinalOutput, writer); } if (targetHasArcs && (flags & BIT_TARGET_NEXT) == 0) { @@ -788,7 +790,7 @@ public final class FST { outputs.read(in); } if (arc.flag(BIT_ARC_HAS_FINAL_OUTPUT)) { - outputs.read(in); + outputs.readFinalOutput(in); } if (arc.flag(BIT_STOP_NODE)) { } else if (arc.flag(BIT_TARGET_NEXT)) { @@ -963,7 +965,7 @@ public final class FST { } if (arc.flag(BIT_ARC_HAS_FINAL_OUTPUT)) { - arc.nextFinalOutput = outputs.read(in); + arc.nextFinalOutput = outputs.readFinalOutput(in); } else { arc.nextFinalOutput = outputs.getNoOutput(); } @@ -1127,7 +1129,7 @@ public final class FST { } if (flag(flags, BIT_ARC_HAS_FINAL_OUTPUT)) { - outputs.read(in); + outputs.readFinalOutput(in); } if (!flag(flags, BIT_STOP_NODE) && !flag(flags, BIT_TARGET_NEXT)) { @@ -1221,6 +1223,14 @@ public final class FST { } } + /** Returns a {@link BytesReader} for this FST, positioned at + * position 0. */ + public BytesReader getBytesReader() { + return getBytesReader(0); + } + + /** Returns a {@link BytesReader} for this FST, positioned at + * the provided position. */ public BytesReader getBytesReader(int pos) { // TODO: maybe re-use via ThreadLocal? if (packed) { @@ -1654,7 +1664,7 @@ public final class FST { } } if (arc.nextFinalOutput != NO_OUTPUT) { - outputs.write(arc.nextFinalOutput, writer); + outputs.writeFinalOutput(arc.nextFinalOutput, writer); } if (doWriteTarget) { diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/Outputs.java b/lucene/core/src/java/org/apache/lucene/util/fst/Outputs.java index 7d54c150851..a06d53b80b8 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/Outputs.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/Outputs.java @@ -49,10 +49,27 @@ public abstract class Outputs { /** Eg add("foo", "bar") -> "foobar" */ public abstract T add(T prefix, T output); + /** Encode an output value into a {@link DataOutput}. */ public abstract void write(T output, DataOutput out) throws IOException; + /** Encode an final node output value into a {@link + * DataOutput}. By default this just calls {@link #write(Object, + * DataOutput)}. */ + public void writeFinalOutput(T output, DataOutput out) throws IOException { + write(output, out); + } + + /** Decode an output value previously written with {@link + * #write(Object, DataOutput)}. */ public abstract T read(DataInput in) throws IOException; + /** Decode an output value previously written with {@link + * #writeFinalOutput(Object, DataOutput)}. By default this + * just calls {@link #read(DataInput)}. */ + public T readFinalOutput(DataInput in) throws IOException { + return read(in); + } + /** NOTE: this output is compared with == so you must * ensure that all methods return the single object if * it's really no output */ diff --git a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java index 1e38b9a9d5e..27326b97734 100644 --- a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java +++ b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java @@ -29,8 +29,6 @@ import java.io.Writer; import java.util.*; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.DirectoryReader; @@ -56,7 +54,6 @@ import org.apache.lucene.util.LineFileDocs; import org.apache.lucene.util.LuceneTestCase.Slow; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util._TestUtil; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.CompiledAutomaton; @@ -67,6 +64,10 @@ import org.apache.lucene.util.fst.FST.BytesReader; import org.apache.lucene.util.fst.PairOutputs.Pair; import org.apache.lucene.util.packed.PackedInts; +import static org.apache.lucene.util.fst.FSTTester.getRandomString; +import static org.apache.lucene.util.fst.FSTTester.simpleRandomString; +import static org.apache.lucene.util.fst.FSTTester.toIntsRef; + @SuppressCodecs({ "SimpleText", "Memory", "Direct" }) @Slow public class TestFSTs extends LuceneTestCase { @@ -87,59 +88,6 @@ public class TestFSTs extends LuceneTestCase { super.tearDown(); } - private static BytesRef toBytesRef(IntsRef ir) { - BytesRef br = new BytesRef(ir.length); - for(int i=0;i= 0 && x <= 255; - br.bytes[i] = (byte) x; - } - br.length = ir.length; - return br; - } - - static IntsRef toIntsRef(String s, int inputMode) { - return toIntsRef(s, inputMode, new IntsRef(10)); - } - - static IntsRef toIntsRef(String s, int inputMode, IntsRef ir) { - if (inputMode == 0) { - // utf8 - return toIntsRef(new BytesRef(s), ir); - } else { - // utf32 - return toIntsRefUTF32(s, ir); - } - } - - static IntsRef toIntsRefUTF32(String s, IntsRef ir) { - final int charLength = s.length(); - int charIdx = 0; - int intIdx = 0; - while(charIdx < charLength) { - if (intIdx == ir.ints.length) { - ir.grow(intIdx+1); - } - final int utf32 = s.codePointAt(charIdx); - ir.ints[intIdx] = utf32; - charIdx += Character.charCount(utf32); - intIdx++; - } - ir.length = intIdx; - return ir; - } - - static IntsRef toIntsRef(BytesRef br, IntsRef ir) { - if (br.length > ir.ints.length) { - ir.grow(br.length); - } - for(int i=0;i(term, NO_OUTPUT)); } - new FSTTester(random(), dir, inputMode, pairs, outputs, false).doTest(); + new FSTTester(random(), dir, inputMode, pairs, outputs, false).doTest(true); } // PositiveIntOutput (ord) @@ -241,7 +176,7 @@ public class TestFSTs extends LuceneTestCase { for(int idx=0;idx(terms[idx], (long) idx)); } - new FSTTester(random(), dir, inputMode, pairs, outputs, true).doTest(); + new FSTTester(random(), dir, inputMode, pairs, outputs, true).doTest(true); } // PositiveIntOutput (random monotonically increasing positive number) @@ -255,7 +190,7 @@ public class TestFSTs extends LuceneTestCase { lastOutput = value; pairs.add(new FSTTester.InputOutput(terms[idx], value)); } - new FSTTester(random(), dir, inputMode, pairs, outputs, doShare).doTest(); + new FSTTester(random(), dir, inputMode, pairs, outputs, doShare).doTest(true); } // PositiveIntOutput (random positive number) @@ -265,7 +200,7 @@ public class TestFSTs extends LuceneTestCase { for(int idx=0;idx(terms[idx], _TestUtil.nextLong(random(), 0, Long.MAX_VALUE))); } - new FSTTester(random(), dir, inputMode, pairs, outputs, false).doTest(); + new FSTTester(random(), dir, inputMode, pairs, outputs, false).doTest(true); } // Pair @@ -281,7 +216,7 @@ public class TestFSTs extends LuceneTestCase { pairs.add(new FSTTester.InputOutput>(terms[idx], outputs.newPair((long) idx, value))); } - new FSTTester>(random(), dir, inputMode, pairs, outputs, false).doTest(); + new FSTTester>(random(), dir, inputMode, pairs, outputs, false).doTest(true); } // Sequence-of-bytes @@ -293,7 +228,7 @@ public class TestFSTs extends LuceneTestCase { final BytesRef output = random().nextInt(30) == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx)); pairs.add(new FSTTester.InputOutput(terms[idx], output)); } - new FSTTester(random(), dir, inputMode, pairs, outputs, false).doTest(); + new FSTTester(random(), dir, inputMode, pairs, outputs, false).doTest(true); } // Sequence-of-ints @@ -309,722 +244,11 @@ public class TestFSTs extends LuceneTestCase { } pairs.add(new FSTTester.InputOutput(terms[idx], output)); } - new FSTTester(random(), dir, inputMode, pairs, outputs, false).doTest(); + new FSTTester(random(), dir, inputMode, pairs, outputs, false).doTest(true); } - // Up to two positive ints, shared, generally but not - // monotonically increasing - { - if (VERBOSE) { - System.out.println("TEST: now test UpToTwoPositiveIntOutputs"); - } - final UpToTwoPositiveIntOutputs outputs = UpToTwoPositiveIntOutputs.getSingleton(true); - final List> pairs = new ArrayList>(terms.length); - long lastOutput = 0; - for(int idx=0;idx(terms[idx], output)); - } - new FSTTester(random(), dir, inputMode, pairs, outputs, false).doTest(); - } } - private static class FSTTester { - - final Random random; - final List> pairs; - final int inputMode; - final Outputs outputs; - final Directory dir; - final boolean doReverseLookup; - - public FSTTester(Random random, Directory dir, int inputMode, List> pairs, Outputs outputs, boolean doReverseLookup) { - this.random = random; - this.dir = dir; - this.inputMode = inputMode; - this.pairs = pairs; - this.outputs = outputs; - this.doReverseLookup = doReverseLookup; - } - - private static class InputOutput implements Comparable> { - public final IntsRef input; - public final T output; - - public InputOutput(IntsRef input, T output) { - this.input = input; - this.output = output; - } - - public int compareTo(InputOutput other) { - if (other instanceof InputOutput) { - return input.compareTo((other).input); - } else { - throw new IllegalArgumentException(); - } - } - } - - public void doTest() throws IOException { - // no pruning - doTest(0, 0, true); - - if (!(outputs instanceof UpToTwoPositiveIntOutputs)) { - // simple pruning - doTest(_TestUtil.nextInt(random, 1, 1+pairs.size()), 0, true); - - // leafy pruning - doTest(0, _TestUtil.nextInt(random, 1, 1+pairs.size()), true); - } - } - - // runs the term, returning the output, or null if term - // isn't accepted. if prefixLength is non-null it must be - // length 1 int array; prefixLength[0] is set to the length - // of the term prefix that matches - private T run(FST fst, IntsRef term, int[] prefixLength) throws IOException { - assert prefixLength == null || prefixLength.length == 1; - final FST.Arc arc = fst.getFirstArc(new FST.Arc()); - final T NO_OUTPUT = fst.outputs.getNoOutput(); - T output = NO_OUTPUT; - final FST.BytesReader fstReader = fst.getBytesReader(0); - - for(int i=0;i<=term.length;i++) { - final int label; - if (i == term.length) { - label = FST.END_LABEL; - } else { - label = term.ints[term.offset+i]; - } - // System.out.println(" loop i=" + i + " label=" + label + " output=" + fst.outputs.outputToString(output) + " curArc: target=" + arc.target + " isFinal?=" + arc.isFinal()); - if (fst.findTargetArc(label, arc, arc, fstReader) == null) { - // System.out.println(" not found"); - if (prefixLength != null) { - prefixLength[0] = i; - return output; - } else { - return null; - } - } - output = fst.outputs.add(output, arc.output); - } - - if (prefixLength != null) { - prefixLength[0] = term.length; - } - - return output; - } - - private T randomAcceptedWord(FST fst, IntsRef in) throws IOException { - FST.Arc arc = fst.getFirstArc(new FST.Arc()); - - final List> arcs = new ArrayList>(); - in.length = 0; - in.offset = 0; - final T NO_OUTPUT = fst.outputs.getNoOutput(); - T output = NO_OUTPUT; - final FST.BytesReader fstReader = fst.getBytesReader(0); - - while(true) { - // read all arcs: - fst.readFirstTargetArc(arc, arc, fstReader); - arcs.add(new FST.Arc().copyFrom(arc)); - while(!arc.isLast()) { - fst.readNextArc(arc, fstReader); - arcs.add(new FST.Arc().copyFrom(arc)); - } - - // pick one - arc = arcs.get(random.nextInt(arcs.size())); - arcs.clear(); - - // accumulate output - output = fst.outputs.add(output, arc.output); - - // append label - if (arc.label == FST.END_LABEL) { - break; - } - - if (in.ints.length == in.length) { - in.grow(1+in.length); - } - in.ints[in.length++] = arc.label; - } - - return output; - } - - - FST doTest(int prune1, int prune2, boolean allowRandomSuffixSharing) throws IOException { - if (VERBOSE) { - System.out.println("\nTEST: prune1=" + prune1 + " prune2=" + prune2); - } - - final boolean willRewrite = random.nextBoolean(); - - final Builder builder = new Builder(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, - prune1, prune2, - prune1==0 && prune2==0, - allowRandomSuffixSharing ? random.nextBoolean() : true, - allowRandomSuffixSharing ? _TestUtil.nextInt(random, 1, 10) : Integer.MAX_VALUE, - outputs, - null, - willRewrite); - - for(InputOutput pair : pairs) { - if (pair.output instanceof UpToTwoPositiveIntOutputs.TwoLongs) { - final UpToTwoPositiveIntOutputs _outputs = (UpToTwoPositiveIntOutputs) outputs; - final UpToTwoPositiveIntOutputs.TwoLongs twoLongs = (UpToTwoPositiveIntOutputs.TwoLongs) pair.output; - @SuppressWarnings("unchecked") final Builder builderObject = (Builder) builder; - builderObject.add(pair.input, _outputs.get(twoLongs.first)); - builderObject.add(pair.input, _outputs.get(twoLongs.second)); - } else { - builder.add(pair.input, pair.output); - } - } - FST fst = builder.finish(); - - if (random.nextBoolean() && fst != null && !willRewrite) { - IOContext context = LuceneTestCase.newIOContext(random); - IndexOutput out = dir.createOutput("fst.bin", context); - fst.save(out); - out.close(); - IndexInput in = dir.openInput("fst.bin", context); - try { - fst = new FST(in, outputs); - } finally { - in.close(); - dir.deleteFile("fst.bin"); - } - } - - if (VERBOSE && pairs.size() <= 20 && fst != null) { - Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8"); - Util.toDot(fst, w, false, false); - w.close(); - System.out.println("SAVED out.dot"); - } - - if (VERBOSE) { - if (fst == null) { - System.out.println(" fst has 0 nodes (fully pruned)"); - } else { - System.out.println(" fst has " + fst.getNodeCount() + " nodes and " + fst.getArcCount() + " arcs"); - } - } - - if (prune1 == 0 && prune2 == 0) { - verifyUnPruned(inputMode, fst); - } else { - verifyPruned(inputMode, fst, prune1, prune2); - } - - if (willRewrite && fst != null) { - if (VERBOSE) { - System.out.println("TEST: now rewrite"); - } - final FST packed = fst.pack(_TestUtil.nextInt(random, 1, 10), _TestUtil.nextInt(random, 0, 10000000), random.nextFloat()); - if (VERBOSE) { - System.out.println("TEST: now verify packed FST"); - } - if (prune1 == 0 && prune2 == 0) { - verifyUnPruned(inputMode, packed); - } else { - verifyPruned(inputMode, packed, prune1, prune2); - } - } - - return fst; - } - - // FST is complete - private void verifyUnPruned(int inputMode, FST fst) throws IOException { - - final FST fstLong; - final Set validOutputs; - long minLong = Long.MAX_VALUE; - long maxLong = Long.MIN_VALUE; - - if (doReverseLookup) { - @SuppressWarnings("unchecked") FST fstLong0 = (FST) fst; - fstLong = fstLong0; - validOutputs = new HashSet(); - for(InputOutput pair: pairs) { - Long output = (Long) pair.output; - maxLong = Math.max(maxLong, output); - minLong = Math.min(minLong, output); - validOutputs.add(output); - } - } else { - fstLong = null; - validOutputs = null; - } - - if (pairs.size() == 0) { - assertNull(fst); - return; - } - - if (VERBOSE) { - System.out.println("TEST: now verify " + pairs.size() + " terms"); - for(InputOutput pair : pairs) { - assertNotNull(pair); - assertNotNull(pair.input); - assertNotNull(pair.output); - System.out.println(" " + inputToString(inputMode, pair.input) + ": " + outputs.outputToString(pair.output)); - } - } - - assertNotNull(fst); - - // visit valid pairs in order -- make sure all words - // are accepted, and FSTEnum's next() steps through - // them correctly - if (VERBOSE) { - System.out.println("TEST: check valid terms/next()"); - } - { - IntsRefFSTEnum fstEnum = new IntsRefFSTEnum(fst); - for(InputOutput pair : pairs) { - IntsRef term = pair.input; - if (VERBOSE) { - System.out.println("TEST: check term=" + inputToString(inputMode, term) + " output=" + fst.outputs.outputToString(pair.output)); - } - Object output = run(fst, term, null); - assertNotNull("term " + inputToString(inputMode, term) + " is not accepted", output); - assertEquals(pair.output, output); - - // verify enum's next - IntsRefFSTEnum.InputOutput t = fstEnum.next(); - assertNotNull(t); - assertEquals("expected input=" + inputToString(inputMode, term) + " but fstEnum returned " + inputToString(inputMode, t.input), term, t.input); - assertEquals(pair.output, t.output); - } - assertNull(fstEnum.next()); - } - - final Map termsMap = new HashMap(); - for(InputOutput pair : pairs) { - termsMap.put(pair.input, pair.output); - } - - if (doReverseLookup && maxLong > minLong) { - // Do random lookups so we test null (output doesn't - // exist) case: - assertNull(Util.getByOutput(fstLong, minLong-7)); - assertNull(Util.getByOutput(fstLong, maxLong+7)); - - final int num = atLeast(100); - for(int iter=0;iter fstEnum = new IntsRefFSTEnum(fst); - num = atLeast(100); - for(int iter=0;iter seekResult; - if (random.nextInt(3) == 0) { - if (VERBOSE) { - System.out.println(" do non-exist seekExact term=" + inputToString(inputMode, term)); - } - seekResult = fstEnum.seekExact(term); - pos = -1; - } else if (random.nextBoolean()) { - if (VERBOSE) { - System.out.println(" do non-exist seekFloor term=" + inputToString(inputMode, term)); - } - seekResult = fstEnum.seekFloor(term); - pos--; - } else { - if (VERBOSE) { - System.out.println(" do non-exist seekCeil term=" + inputToString(inputMode, term)); - } - seekResult = fstEnum.seekCeil(term); - } - - if (pos != -1 && pos < pairs.size()) { - //System.out.println(" got " + inputToString(inputMode,seekResult.input) + " output=" + fst.outputs.outputToString(seekResult.output)); - assertNotNull("got null but expected term=" + inputToString(inputMode, pairs.get(pos).input), seekResult); - if (VERBOSE) { - System.out.println(" got " + inputToString(inputMode, seekResult.input)); - } - assertEquals("expected " + inputToString(inputMode, pairs.get(pos).input) + " but got " + inputToString(inputMode, seekResult.input), pairs.get(pos).input, seekResult.input); - assertEquals(pairs.get(pos).output, seekResult.output); - } else { - // seeked before start or beyond end - //System.out.println("seek=" + seekTerm); - assertNull("expected null but got " + (seekResult==null ? "null" : inputToString(inputMode, seekResult.input)), seekResult); - if (VERBOSE) { - System.out.println(" got null"); - } - } - - break; - } - } - } else { - // seek to term that does exist: - InputOutput pair = pairs.get(random.nextInt(pairs.size())); - final IntsRefFSTEnum.InputOutput seekResult; - if (random.nextInt(3) == 2) { - if (VERBOSE) { - System.out.println(" do exists seekExact term=" + inputToString(inputMode, pair.input)); - } - seekResult = fstEnum.seekExact(pair.input); - } else if (random.nextBoolean()) { - if (VERBOSE) { - System.out.println(" do exists seekFloor " + inputToString(inputMode, pair.input)); - } - seekResult = fstEnum.seekFloor(pair.input); - } else { - if (VERBOSE) { - System.out.println(" do exists seekCeil " + inputToString(inputMode, pair.input)); - } - seekResult = fstEnum.seekCeil(pair.input); - } - assertNotNull(seekResult); - assertEquals("got " + inputToString(inputMode, seekResult.input) + " but expected " + inputToString(inputMode, pair.input), pair.input, seekResult.input); - assertEquals(pair.output, seekResult.output); - } - } - - if (VERBOSE) { - System.out.println("TEST: mixed next/seek"); - } - - // test mixed next/seek - num = atLeast(100); - for(int iter=0;iter(fst); - int upto = -1; - while(true) { - boolean isDone = false; - if (upto == pairs.size()-1 || random.nextBoolean()) { - // next - upto++; - if (VERBOSE) { - System.out.println(" do next"); - } - isDone = fstEnum.next() == null; - } else if (upto != -1 && upto < 0.75 * pairs.size() && random.nextBoolean()) { - int attempt = 0; - for(;attempt<10;attempt++) { - IntsRef term = toIntsRef(getRandomString(random), inputMode); - if (!termsMap.containsKey(term) && term.compareTo(pairs.get(upto).input) > 0) { - int pos = Collections.binarySearch(pairs, new InputOutput(term, null)); - assert pos < 0; - upto = -(pos+1); - - if (random.nextBoolean()) { - upto--; - assertTrue(upto != -1); - if (VERBOSE) { - System.out.println(" do non-exist seekFloor(" + inputToString(inputMode, term) + ")"); - } - isDone = fstEnum.seekFloor(term) == null; - } else { - if (VERBOSE) { - System.out.println(" do non-exist seekCeil(" + inputToString(inputMode, term) + ")"); - } - isDone = fstEnum.seekCeil(term) == null; - } - - break; - } - } - if (attempt == 10) { - continue; - } - - } else { - final int inc = random.nextInt(pairs.size() - upto - 1); - upto += inc; - if (upto == -1) { - upto = 0; - } - - if (random.nextBoolean()) { - if (VERBOSE) { - System.out.println(" do seekCeil(" + inputToString(inputMode, pairs.get(upto).input) + ")"); - } - isDone = fstEnum.seekCeil(pairs.get(upto).input) == null; - } else { - if (VERBOSE) { - System.out.println(" do seekFloor(" + inputToString(inputMode, pairs.get(upto).input) + ")"); - } - isDone = fstEnum.seekFloor(pairs.get(upto).input) == null; - } - } - if (VERBOSE) { - if (!isDone) { - System.out.println(" got " + inputToString(inputMode, fstEnum.current().input)); - } else { - System.out.println(" got null"); - } - } - - if (upto == pairs.size()) { - assertTrue(isDone); - break; - } else { - assertFalse(isDone); - assertEquals(pairs.get(upto).input, fstEnum.current().input); - assertEquals(pairs.get(upto).output, fstEnum.current().output); - - /* - if (upto < pairs.size()-1) { - int tryCount = 0; - while(tryCount < 10) { - final IntsRef t = toIntsRef(getRandomString(), inputMode); - if (pairs.get(upto).input.compareTo(t) < 0) { - final boolean expected = t.compareTo(pairs.get(upto+1).input) < 0; - if (VERBOSE) { - System.out.println("TEST: call beforeNext(" + inputToString(inputMode, t) + "); current=" + inputToString(inputMode, pairs.get(upto).input) + " next=" + inputToString(inputMode, pairs.get(upto+1).input) + " expected=" + expected); - } - assertEquals(expected, fstEnum.beforeNext(t)); - break; - } - tryCount++; - } - } - */ - } - } - } - } - - private static class CountMinOutput { - int count; - T output; - T finalOutput; - boolean isLeaf = true; - boolean isFinal; - } - - // FST is pruned - private void verifyPruned(int inputMode, FST fst, int prune1, int prune2) throws IOException { - - if (VERBOSE) { - System.out.println("TEST: now verify pruned " + pairs.size() + " terms; outputs=" + outputs); - for(InputOutput pair : pairs) { - System.out.println(" " + inputToString(inputMode, pair.input) + ": " + outputs.outputToString(pair.output)); - } - } - - // To validate the FST, we brute-force compute all prefixes - // in the terms, matched to their "common" outputs, prune that - // set according to the prune thresholds, then assert the FST - // matches that same set. - - // NOTE: Crazy RAM intensive!! - - //System.out.println("TEST: tally prefixes"); - - // build all prefixes - final Map> prefixes = new HashMap>(); - final IntsRef scratch = new IntsRef(10); - for(InputOutput pair: pairs) { - scratch.copyInts(pair.input); - for(int idx=0;idx<=pair.input.length;idx++) { - scratch.length = idx; - CountMinOutput cmo = prefixes.get(scratch); - if (cmo == null) { - cmo = new CountMinOutput(); - cmo.count = 1; - cmo.output = pair.output; - prefixes.put(IntsRef.deepCopyOf(scratch), cmo); - } else { - cmo.count++; - T output1 = cmo.output; - if (output1.equals(outputs.getNoOutput())) { - output1 = outputs.getNoOutput(); - } - T output2 = pair.output; - if (output2.equals(outputs.getNoOutput())) { - output2 = outputs.getNoOutput(); - } - cmo.output = outputs.common(output1, output2); - } - if (idx == pair.input.length) { - cmo.isFinal = true; - cmo.finalOutput = cmo.output; - } - } - } - - if (VERBOSE) { - System.out.println("TEST: now prune"); - } - - // prune 'em - final Iterator>> it = prefixes.entrySet().iterator(); - while(it.hasNext()) { - Map.Entry> ent = it.next(); - final IntsRef prefix = ent.getKey(); - final CountMinOutput cmo = ent.getValue(); - if (VERBOSE) { - System.out.println(" term prefix=" + inputToString(inputMode, prefix, false) + " count=" + cmo.count + " isLeaf=" + cmo.isLeaf + " output=" + outputs.outputToString(cmo.output) + " isFinal=" + cmo.isFinal); - } - final boolean keep; - if (prune1 > 0) { - keep = cmo.count >= prune1; - } else { - assert prune2 > 0; - if (prune2 > 1 && cmo.count >= prune2) { - keep = true; - } else if (prefix.length > 0) { - // consult our parent - scratch.length = prefix.length-1; - System.arraycopy(prefix.ints, prefix.offset, scratch.ints, 0, scratch.length); - final CountMinOutput cmo2 = prefixes.get(scratch); - //System.out.println(" parent count = " + (cmo2 == null ? -1 : cmo2.count)); - keep = cmo2 != null && ((prune2 > 1 && cmo2.count >= prune2) || (prune2 == 1 && (cmo2.count >= 2 || prefix.length <= 1))); - } else if (cmo.count >= prune2) { - keep = true; - } else { - keep = false; - } - } - - if (!keep) { - it.remove(); - //System.out.println(" remove"); - } else { - // clear isLeaf for all ancestors - //System.out.println(" keep"); - scratch.copyInts(prefix); - scratch.length--; - while(scratch.length >= 0) { - final CountMinOutput cmo2 = prefixes.get(scratch); - if (cmo2 != null) { - //System.out.println(" clear isLeaf " + inputToString(inputMode, scratch)); - cmo2.isLeaf = false; - } - scratch.length--; - } - } - } - - if (VERBOSE) { - System.out.println("TEST: after prune"); - for(Map.Entry> ent : prefixes.entrySet()) { - System.out.println(" " + inputToString(inputMode, ent.getKey(), false) + ": isLeaf=" + ent.getValue().isLeaf + " isFinal=" + ent.getValue().isFinal); - if (ent.getValue().isFinal) { - System.out.println(" finalOutput=" + outputs.outputToString(ent.getValue().finalOutput)); - } - } - } - - if (prefixes.size() <= 1) { - assertNull(fst); - return; - } - - assertNotNull(fst); - - // make sure FST only enums valid prefixes - if (VERBOSE) { - System.out.println("TEST: check pruned enum"); - } - IntsRefFSTEnum fstEnum = new IntsRefFSTEnum(fst); - IntsRefFSTEnum.InputOutput current; - while((current = fstEnum.next()) != null) { - if (VERBOSE) { - System.out.println(" fstEnum.next prefix=" + inputToString(inputMode, current.input, false) + " output=" + outputs.outputToString(current.output)); - } - final CountMinOutput cmo = prefixes.get(current.input); - assertNotNull(cmo); - assertTrue(cmo.isLeaf || cmo.isFinal); - //if (cmo.isFinal && !cmo.isLeaf) { - if (cmo.isFinal) { - assertEquals(cmo.finalOutput, current.output); - } else { - assertEquals(cmo.output, current.output); - } - } - - // make sure all non-pruned prefixes are present in the FST - if (VERBOSE) { - System.out.println("TEST: verify all prefixes"); - } - final int[] stopNode = new int[1]; - for(Map.Entry> ent : prefixes.entrySet()) { - if (ent.getKey().length > 0) { - final CountMinOutput cmo = ent.getValue(); - final T output = run(fst, ent.getKey(), stopNode); - if (VERBOSE) { - System.out.println("TEST: verify prefix=" + inputToString(inputMode, ent.getKey(), false) + " output=" + outputs.outputToString(cmo.output)); - } - // if (cmo.isFinal && !cmo.isLeaf) { - if (cmo.isFinal) { - assertEquals(cmo.finalOutput, output); - } else { - assertEquals(cmo.output, output); - } - assertEquals(ent.getKey().length, stopNode[0]); - } - } - } - } public void testRandomWords() throws IOException { testRandomWords(1000, atLeast(2)); @@ -1058,40 +282,11 @@ public class TestFSTs extends LuceneTestCase { } } - static String getRandomString(Random random) { - final String term; - if (random.nextBoolean()) { - term = _TestUtil.randomRealisticUnicodeString(random); - } else { - // we want to mix in limited-alphabet symbols so - // we get more sharing of the nodes given how few - // terms we are testing... - term = simpleRandomString(random); - } - return term; - } - @Nightly public void testBigSet() throws IOException { testRandomWords(_TestUtil.nextInt(random(), 50000, 60000), 1); } - static String inputToString(int inputMode, IntsRef term) { - return inputToString(inputMode, term, true); - } - - private static String inputToString(int inputMode, IntsRef term, boolean isValidUnicode) { - if (!isValidUnicode) { - return term.toString(); - } else if (inputMode == 0) { - // utf8 - return toBytesRef(term).utf8ToString() + " " + term; - } else { - // utf32 - return UnicodeUtil.newString(term.ints, term.offset, term.length) + " " + term; - } - } - // Build FST for all unique terms in the test line docs // file, up until a time limit public void testRealTerms() throws Exception { diff --git a/lucene/misc/src/java/org/apache/lucene/util/fst/ListOfOutputs.java b/lucene/misc/src/java/org/apache/lucene/util/fst/ListOfOutputs.java new file mode 100644 index 00000000000..bdcf20c8bbe --- /dev/null +++ b/lucene/misc/src/java/org/apache/lucene/util/fst/ListOfOutputs.java @@ -0,0 +1,188 @@ +package org.apache.lucene.util.fst; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.DataOutput; + +/** + * Wraps another Outputs implementation and encodes one or + * more of its output values. You can use this when a single + * input may need to map to more than one output, + * maintaining order: pass the same input with a different + * output by calling {@link Builder#add(IntsRef,Object)} multiple + * times. The builder will then combine the outputs using + * the {@link Outputs#merge(Object,Object)} method. + * + *

The resulting FST may not be minimal when an input has + * more than one output, as this requires pushing all + * multi-output values to a final state. + * + *

NOTE: this cannot wrap itself (ie you cannot make an + * FST with List<List<Object>> outputs using this). + * + * @lucene.experimental + */ + + +// NOTE: i think we could get a more compact FST if, instead +// of adding the same input multiple times with a different +// output each time, we added it only once with a +// pre-constructed List output. This way the "multiple +// values" is fully opaque to the Builder/FST. It would +// require implementing the full algebra using set +// arithmetic (I think?); maybe SetOfOutputs is a good name. + +@SuppressWarnings("unchecked") +public final class ListOfOutputs extends Outputs { + + private final Outputs outputs; + + public ListOfOutputs(Outputs outputs) { + this.outputs = outputs; + } + + @Override + public Object common(Object output1, Object output2) { + // These will never be a list: + return outputs.common((T) output1, (T) output2); + } + + @Override + public Object subtract(Object object, Object inc) { + // These will never be a list: + return outputs.subtract((T) object, (T) inc); + } + + @Override + public Object add(Object prefix, Object output) { + assert !(prefix instanceof List); + if (!(output instanceof List)) { + return outputs.add((T) prefix, (T) output); + } else { + List outputList = (List) output; + List addedList = new ArrayList(outputList.size()); + for(T _output : outputList) { + addedList.add(outputs.add((T) prefix, _output)); + } + return addedList; + } + } + + @Override + public void write(Object output, DataOutput out) throws IOException { + assert !(output instanceof List); + outputs.write((T) output, out); + } + + @Override + public void writeFinalOutput(Object output, DataOutput out) throws IOException { + if (!(output instanceof List)) { + out.writeVInt(1); + outputs.write((T) output, out); + } else { + List outputList = (List) output; + out.writeVInt(outputList.size()); + for(T eachOutput : outputList) { + outputs.write(eachOutput, out); + } + } + } + + @Override + public Object read(DataInput in) throws IOException { + return outputs.read(in); + } + + @Override + public Object readFinalOutput(DataInput in) throws IOException { + int count = in.readVInt(); + if (count == 1) { + return outputs.read(in); + } else { + List outputList = new ArrayList(count); + for(int i=0;i outputList = (List) output; + + StringBuilder b = new StringBuilder(); + b.append('['); + + for(int i=0;i 0) { + b.append(", "); + } + b.append(outputs.outputToString(outputList.get(i))); + } + b.append(']'); + return b.toString(); + } + } + + @Override + public Object merge(Object first, Object second) { + List outputList = new ArrayList(); + if (!(first instanceof List)) { + outputList.add((T) first); + } else { + outputList.addAll((List) first); + } + if (!(second instanceof List)) { + outputList.add((T) second); + } else { + outputList.addAll((List) second); + } + //System.out.println("MERGE: now " + outputList.size() + " first=" + outputToString(first) + " second=" + outputToString(second)); + //System.out.println(" return " + outputToString(outputList)); + return outputList; + } + + @Override + public String toString() { + return "OneOrMoreOutputs(" + outputs + ")"; + } + + public List asList(Object output) { + if (!(output instanceof List)) { + List result = new ArrayList(1); + result.add((T) output); + return result; + } else { + return (List) output; + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java b/lucene/misc/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java similarity index 92% rename from lucene/core/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java rename to lucene/misc/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java index 8da8fd1b449..04cbbf16ad6 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java +++ b/lucene/misc/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java @@ -17,6 +17,21 @@ package org.apache.lucene.util.fst; * limitations under the License. */ +/** + * An FST {@link Outputs} implementation where each output + * is one or two non-negative long values. If it's a + * single output, Long is returned; else, TwoLongs. Order + * is preserved in the TwoLongs case, ie .first is the first + * input/output added to Builder, and .second is the + * second. You cannot store 0 output with this (that's + * reserved to mean "no output")! + * + * NOTE: the resulting FST is not guaranteed to be minimal! + * See {@link Builder}. + * + * @lucene.experimental + */ + import java.io.IOException; import org.apache.lucene.store.DataInput; @@ -36,7 +51,6 @@ import org.apache.lucene.store.DataOutput; * * @lucene.experimental */ - public final class UpToTwoPositiveIntOutputs extends Outputs { /** Holds two long outputs. */ diff --git a/lucene/misc/src/java/org/apache/lucene/util/fst/package.html b/lucene/misc/src/java/org/apache/lucene/util/fst/package.html new file mode 100644 index 00000000000..bc878ba8159 --- /dev/null +++ b/lucene/misc/src/java/org/apache/lucene/util/fst/package.html @@ -0,0 +1,21 @@ + + + +Misc FST classes. + + diff --git a/lucene/misc/src/test/org/apache/lucene/util/fst/TestFSTsMisc.java b/lucene/misc/src/test/org/apache/lucene/util/fst/TestFSTsMisc.java new file mode 100644 index 00000000000..ec395250bc1 --- /dev/null +++ b/lucene/misc/src/test/org/apache/lucene/util/fst/TestFSTsMisc.java @@ -0,0 +1,237 @@ +package org.apache.lucene.util.fst; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Random; +import java.util.Set; + +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.fst.UpToTwoPositiveIntOutputs.TwoLongs; + +import static org.apache.lucene.util.fst.FSTTester.getRandomString; +import static org.apache.lucene.util.fst.FSTTester.toIntsRef; + +public class TestFSTsMisc extends LuceneTestCase { + + private MockDirectoryWrapper dir; + + @Override + public void setUp() throws Exception { + super.setUp(); + dir = newMockDirectory(); + dir.setPreventDoubleWrite(false); + } + + @Override + public void tearDown() throws Exception { + // can be null if we force simpletext (funky, some kind of bug in test runner maybe) + if (dir != null) dir.close(); + super.tearDown(); + } + + public void testRandomWords() throws IOException { + testRandomWords(1000, LuceneTestCase.atLeast(random(), 2)); + //testRandomWords(100, 1); + } + + private void testRandomWords(int maxNumWords, int numIter) throws IOException { + Random random = new Random(random().nextLong()); + for(int iter=0;iter termsSet = new HashSet(); + IntsRef[] terms = new IntsRef[numWords]; + while(termsSet.size() < numWords) { + final String term = getRandomString(random); + termsSet.add(toIntsRef(term, inputMode)); + } + doTest(inputMode, termsSet.toArray(new IntsRef[termsSet.size()])); + } + } + } + + private void doTest(int inputMode, IntsRef[] terms) throws IOException { + Arrays.sort(terms); + + // Up to two positive ints, shared, generally but not + // monotonically increasing + { + if (VERBOSE) { + System.out.println("TEST: now test UpToTwoPositiveIntOutputs"); + } + final UpToTwoPositiveIntOutputs outputs = UpToTwoPositiveIntOutputs.getSingleton(true); + final List> pairs = new ArrayList>(terms.length); + long lastOutput = 0; + for(int idx=0;idx values = new ArrayList(); + values.add(value); + values.add(value2); + output = values; + } else { + output = outputs.get(value); + } + pairs.add(new FSTTester.InputOutput(terms[idx], output)); + } + new FSTTester(random(), dir, inputMode, pairs, outputs, false) { + @Override + protected boolean outputsEqual(Object output1, Object output2) { + if (output1 instanceof TwoLongs && output2 instanceof List) { + TwoLongs twoLongs1 = (TwoLongs) output1; + return Arrays.asList(new Long[] {twoLongs1.first, twoLongs1.second}).equals(output2); + } else if (output2 instanceof TwoLongs && output1 instanceof List) { + TwoLongs twoLongs2 = (TwoLongs) output2; + return Arrays.asList(new Long[] {twoLongs2.first, twoLongs2.second}).equals(output1); + } + return output1.equals(output2); + } + }.doTest(false); + } + + // ListOfOutputs(PositiveIntOutputs), generally but not + // monotonically increasing + { + if (VERBOSE) { + System.out.println("TEST: now test OneOrMoreOutputs"); + } + final PositiveIntOutputs _outputs = PositiveIntOutputs.getSingleton(); + final ListOfOutputs outputs = new ListOfOutputs(_outputs); + final List> pairs = new ArrayList>(terms.length); + long lastOutput = 0; + for(int idx=0;idx values = new ArrayList(); + for(int i=0;i(terms[idx], output)); + } + new FSTTester(random(), dir, inputMode, pairs, outputs, false).doTest(false); + } + } + + public void testListOfOutputs() throws Exception { + PositiveIntOutputs _outputs = PositiveIntOutputs.getSingleton(); + ListOfOutputs outputs = new ListOfOutputs(_outputs); + final Builder builder = new Builder(FST.INPUT_TYPE.BYTE1, outputs); + + final IntsRef scratch = new IntsRef(); + // Add the same input more than once and the outputs + // are merged: + builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 1L); + builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 3L); + builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 0L); + builder.add(Util.toIntsRef(new BytesRef("b"), scratch), 17L); + final FST fst = builder.finish(); + + Object output = Util.get(fst, new BytesRef("a")); + assertNotNull(output); + List outputList = outputs.asList(output); + assertEquals(3, outputList.size()); + assertEquals(1L, outputList.get(0).longValue()); + assertEquals(3L, outputList.get(1).longValue()); + assertEquals(0L, outputList.get(2).longValue()); + + output = Util.get(fst, new BytesRef("b")); + assertNotNull(output); + outputList = outputs.asList(output); + assertEquals(1, outputList.size()); + assertEquals(17L, outputList.get(0).longValue()); + } + + public void testListOfOutputsEmptyString() throws Exception { + PositiveIntOutputs _outputs = PositiveIntOutputs.getSingleton(); + ListOfOutputs outputs = new ListOfOutputs(_outputs); + final Builder builder = new Builder(FST.INPUT_TYPE.BYTE1, outputs); + + final IntsRef scratch = new IntsRef(); + builder.add(scratch, 0L); + builder.add(scratch, 1L); + builder.add(scratch, 17L); + builder.add(scratch, 1L); + + builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 1L); + builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 3L); + builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 0L); + builder.add(Util.toIntsRef(new BytesRef("b"), scratch), 0L); + + final FST fst = builder.finish(); + + Object output = Util.get(fst, new BytesRef("")); + assertNotNull(output); + List outputList = outputs.asList(output); + assertEquals(4, outputList.size()); + assertEquals(0L, outputList.get(0).longValue()); + assertEquals(1L, outputList.get(1).longValue()); + assertEquals(17L, outputList.get(2).longValue()); + assertEquals(1L, outputList.get(3).longValue()); + + output = Util.get(fst, new BytesRef("a")); + assertNotNull(output); + outputList = outputs.asList(output); + assertEquals(3, outputList.size()); + assertEquals(1L, outputList.get(0).longValue()); + assertEquals(3L, outputList.get(1).longValue()); + assertEquals(0L, outputList.get(2).longValue()); + + output = Util.get(fst, new BytesRef("b")); + assertNotNull(output); + outputList = outputs.asList(output); + assertEquals(1, outputList.size()); + assertEquals(0L, outputList.get(0).longValue()); + } +} + + diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java b/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java new file mode 100644 index 00000000000..2277b3b9b2a --- /dev/null +++ b/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java @@ -0,0 +1,832 @@ +package org.apache.lucene.util.fst; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.Set; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.UnicodeUtil; +import org.apache.lucene.util._TestUtil; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +/** Helper class to test FSTs. */ +public class FSTTester { + + final Random random; + final List> pairs; + final int inputMode; + final Outputs outputs; + final Directory dir; + final boolean doReverseLookup; + + public FSTTester(Random random, Directory dir, int inputMode, List> pairs, Outputs outputs, boolean doReverseLookup) { + this.random = random; + this.dir = dir; + this.inputMode = inputMode; + this.pairs = pairs; + this.outputs = outputs; + this.doReverseLookup = doReverseLookup; + } + + static String inputToString(int inputMode, IntsRef term) { + return inputToString(inputMode, term, true); + } + + static String inputToString(int inputMode, IntsRef term, boolean isValidUnicode) { + if (!isValidUnicode) { + return term.toString(); + } else if (inputMode == 0) { + // utf8 + return toBytesRef(term).utf8ToString() + " " + term; + } else { + // utf32 + return UnicodeUtil.newString(term.ints, term.offset, term.length) + " " + term; + } + } + + private static BytesRef toBytesRef(IntsRef ir) { + BytesRef br = new BytesRef(ir.length); + for(int i=0;i= 0 && x <= 255; + br.bytes[i] = (byte) x; + } + br.length = ir.length; + return br; + } + + static String getRandomString(Random random) { + final String term; + if (random.nextBoolean()) { + term = _TestUtil.randomRealisticUnicodeString(random); + } else { + // we want to mix in limited-alphabet symbols so + // we get more sharing of the nodes given how few + // terms we are testing... + term = simpleRandomString(random); + } + return term; + } + + static String simpleRandomString(Random r) { + final int end = r.nextInt(10); + if (end == 0) { + // allow 0 length + return ""; + } + final char[] buffer = new char[end]; + for (int i = 0; i < end; i++) { + buffer[i] = (char) _TestUtil.nextInt(r, 97, 102); + } + return new String(buffer, 0, end); + } + + static IntsRef toIntsRef(String s, int inputMode) { + return toIntsRef(s, inputMode, new IntsRef(10)); + } + + static IntsRef toIntsRef(String s, int inputMode, IntsRef ir) { + if (inputMode == 0) { + // utf8 + return toIntsRef(new BytesRef(s), ir); + } else { + // utf32 + return toIntsRefUTF32(s, ir); + } + } + + static IntsRef toIntsRefUTF32(String s, IntsRef ir) { + final int charLength = s.length(); + int charIdx = 0; + int intIdx = 0; + while(charIdx < charLength) { + if (intIdx == ir.ints.length) { + ir.grow(intIdx+1); + } + final int utf32 = s.codePointAt(charIdx); + ir.ints[intIdx] = utf32; + charIdx += Character.charCount(utf32); + intIdx++; + } + ir.length = intIdx; + return ir; + } + + static IntsRef toIntsRef(BytesRef br, IntsRef ir) { + if (br.length > ir.ints.length) { + ir.grow(br.length); + } + for(int i=0;i implements Comparable> { + public final IntsRef input; + public final T output; + + public InputOutput(IntsRef input, T output) { + this.input = input; + this.output = output; + } + + public int compareTo(InputOutput other) { + if (other instanceof InputOutput) { + return input.compareTo((other).input); + } else { + throw new IllegalArgumentException(); + } + } + } + + public void doTest(boolean testPruning) throws IOException { + // no pruning + doTest(0, 0, true); + + if (testPruning) { + // simple pruning + doTest(_TestUtil.nextInt(random, 1, 1+pairs.size()), 0, true); + + // leafy pruning + doTest(0, _TestUtil.nextInt(random, 1, 1+pairs.size()), true); + } + } + + // runs the term, returning the output, or null if term + // isn't accepted. if prefixLength is non-null it must be + // length 1 int array; prefixLength[0] is set to the length + // of the term prefix that matches + private T run(FST fst, IntsRef term, int[] prefixLength) throws IOException { + assert prefixLength == null || prefixLength.length == 1; + final FST.Arc arc = fst.getFirstArc(new FST.Arc()); + final T NO_OUTPUT = fst.outputs.getNoOutput(); + T output = NO_OUTPUT; + final FST.BytesReader fstReader = fst.getBytesReader(0); + + for(int i=0;i<=term.length;i++) { + final int label; + if (i == term.length) { + label = FST.END_LABEL; + } else { + label = term.ints[term.offset+i]; + } + // System.out.println(" loop i=" + i + " label=" + label + " output=" + fst.outputs.outputToString(output) + " curArc: target=" + arc.target + " isFinal?=" + arc.isFinal()); + if (fst.findTargetArc(label, arc, arc, fstReader) == null) { + // System.out.println(" not found"); + if (prefixLength != null) { + prefixLength[0] = i; + return output; + } else { + return null; + } + } + output = fst.outputs.add(output, arc.output); + } + + if (prefixLength != null) { + prefixLength[0] = term.length; + } + + return output; + } + + private T randomAcceptedWord(FST fst, IntsRef in) throws IOException { + FST.Arc arc = fst.getFirstArc(new FST.Arc()); + + final List> arcs = new ArrayList>(); + in.length = 0; + in.offset = 0; + final T NO_OUTPUT = fst.outputs.getNoOutput(); + T output = NO_OUTPUT; + final FST.BytesReader fstReader = fst.getBytesReader(0); + + while(true) { + // read all arcs: + fst.readFirstTargetArc(arc, arc, fstReader); + arcs.add(new FST.Arc().copyFrom(arc)); + while(!arc.isLast()) { + fst.readNextArc(arc, fstReader); + arcs.add(new FST.Arc().copyFrom(arc)); + } + + // pick one + arc = arcs.get(random.nextInt(arcs.size())); + arcs.clear(); + + // accumulate output + output = fst.outputs.add(output, arc.output); + + // append label + if (arc.label == FST.END_LABEL) { + break; + } + + if (in.ints.length == in.length) { + in.grow(1+in.length); + } + in.ints[in.length++] = arc.label; + } + + return output; + } + + + FST doTest(int prune1, int prune2, boolean allowRandomSuffixSharing) throws IOException { + if (LuceneTestCase.VERBOSE) { + System.out.println("\nTEST: prune1=" + prune1 + " prune2=" + prune2); + } + + final boolean willRewrite = random.nextBoolean(); + + final Builder builder = new Builder(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, + prune1, prune2, + prune1==0 && prune2==0, + allowRandomSuffixSharing ? random.nextBoolean() : true, + allowRandomSuffixSharing ? _TestUtil.nextInt(random, 1, 10) : Integer.MAX_VALUE, + outputs, + null, + willRewrite); + + for(InputOutput pair : pairs) { + if (pair.output instanceof List) { + @SuppressWarnings("unchecked") List longValues = (List) pair.output; + @SuppressWarnings("unchecked") final Builder builderObject = (Builder) builder; + for(Long value : longValues) { + builderObject.add(pair.input, value); + } + } else { + builder.add(pair.input, pair.output); + } + } + FST fst = builder.finish(); + + if (random.nextBoolean() && fst != null && !willRewrite) { + IOContext context = LuceneTestCase.newIOContext(random); + IndexOutput out = dir.createOutput("fst.bin", context); + fst.save(out); + out.close(); + IndexInput in = dir.openInput("fst.bin", context); + try { + fst = new FST(in, outputs); + } finally { + in.close(); + dir.deleteFile("fst.bin"); + } + } + + if (LuceneTestCase.VERBOSE && pairs.size() <= 20 && fst != null) { + Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8"); + Util.toDot(fst, w, false, false); + w.close(); + System.out.println("SAVED out.dot"); + } + + if (LuceneTestCase.VERBOSE) { + if (fst == null) { + System.out.println(" fst has 0 nodes (fully pruned)"); + } else { + System.out.println(" fst has " + fst.getNodeCount() + " nodes and " + fst.getArcCount() + " arcs"); + } + } + + if (prune1 == 0 && prune2 == 0) { + verifyUnPruned(inputMode, fst); + } else { + verifyPruned(inputMode, fst, prune1, prune2); + } + + if (willRewrite && fst != null) { + if (LuceneTestCase.VERBOSE) { + System.out.println("TEST: now rewrite"); + } + final FST packed = fst.pack(_TestUtil.nextInt(random, 1, 10), _TestUtil.nextInt(random, 0, 10000000), random.nextFloat()); + if (LuceneTestCase.VERBOSE) { + System.out.println("TEST: now verify packed FST"); + } + if (prune1 == 0 && prune2 == 0) { + verifyUnPruned(inputMode, packed); + } else { + verifyPruned(inputMode, packed, prune1, prune2); + } + } + + return fst; + } + + protected boolean outputsEqual(T a, T b) { + return a.equals(b); + } + + // FST is complete + private void verifyUnPruned(int inputMode, FST fst) throws IOException { + + final FST fstLong; + final Set validOutputs; + long minLong = Long.MAX_VALUE; + long maxLong = Long.MIN_VALUE; + + if (doReverseLookup) { + @SuppressWarnings("unchecked") FST fstLong0 = (FST) fst; + fstLong = fstLong0; + validOutputs = new HashSet(); + for(InputOutput pair: pairs) { + Long output = (Long) pair.output; + maxLong = Math.max(maxLong, output); + minLong = Math.min(minLong, output); + validOutputs.add(output); + } + } else { + fstLong = null; + validOutputs = null; + } + + if (pairs.size() == 0) { + assertNull(fst); + return; + } + + if (LuceneTestCase.VERBOSE) { + System.out.println("TEST: now verify " + pairs.size() + " terms"); + for(InputOutput pair : pairs) { + assertNotNull(pair); + assertNotNull(pair.input); + assertNotNull(pair.output); + System.out.println(" " + inputToString(inputMode, pair.input) + ": " + outputs.outputToString(pair.output)); + } + } + + assertNotNull(fst); + + // visit valid pairs in order -- make sure all words + // are accepted, and FSTEnum's next() steps through + // them correctly + if (LuceneTestCase.VERBOSE) { + System.out.println("TEST: check valid terms/next()"); + } + { + IntsRefFSTEnum fstEnum = new IntsRefFSTEnum(fst); + for(InputOutput pair : pairs) { + IntsRef term = pair.input; + if (LuceneTestCase.VERBOSE) { + System.out.println("TEST: check term=" + inputToString(inputMode, term) + " output=" + fst.outputs.outputToString(pair.output)); + } + T output = run(fst, term, null); + assertNotNull("term " + inputToString(inputMode, term) + " is not accepted", output); + assertTrue(outputsEqual(pair.output, output)); + + // verify enum's next + IntsRefFSTEnum.InputOutput t = fstEnum.next(); + assertNotNull(t); + assertEquals("expected input=" + inputToString(inputMode, term) + " but fstEnum returned " + inputToString(inputMode, t.input), term, t.input); + assertTrue(outputsEqual(pair.output, t.output)); + } + assertNull(fstEnum.next()); + } + + final Map termsMap = new HashMap(); + for(InputOutput pair : pairs) { + termsMap.put(pair.input, pair.output); + } + + if (doReverseLookup && maxLong > minLong) { + // Do random lookups so we test null (output doesn't + // exist) case: + assertNull(Util.getByOutput(fstLong, minLong-7)); + assertNull(Util.getByOutput(fstLong, maxLong+7)); + + final int num = LuceneTestCase.atLeast(random, 100); + for(int iter=0;iter fstEnum = new IntsRefFSTEnum(fst); + num = LuceneTestCase.atLeast(random, 100); + for(int iter=0;iter seekResult; + if (random.nextInt(3) == 0) { + if (LuceneTestCase.VERBOSE) { + System.out.println(" do non-exist seekExact term=" + inputToString(inputMode, term)); + } + seekResult = fstEnum.seekExact(term); + pos = -1; + } else if (random.nextBoolean()) { + if (LuceneTestCase.VERBOSE) { + System.out.println(" do non-exist seekFloor term=" + inputToString(inputMode, term)); + } + seekResult = fstEnum.seekFloor(term); + pos--; + } else { + if (LuceneTestCase.VERBOSE) { + System.out.println(" do non-exist seekCeil term=" + inputToString(inputMode, term)); + } + seekResult = fstEnum.seekCeil(term); + } + + if (pos != -1 && pos < pairs.size()) { + //System.out.println(" got " + inputToString(inputMode,seekResult.input) + " output=" + fst.outputs.outputToString(seekResult.output)); + assertNotNull("got null but expected term=" + inputToString(inputMode, pairs.get(pos).input), seekResult); + if (LuceneTestCase.VERBOSE) { + System.out.println(" got " + inputToString(inputMode, seekResult.input)); + } + assertEquals("expected " + inputToString(inputMode, pairs.get(pos).input) + " but got " + inputToString(inputMode, seekResult.input), pairs.get(pos).input, seekResult.input); + assertTrue(outputsEqual(pairs.get(pos).output, seekResult.output)); + } else { + // seeked before start or beyond end + //System.out.println("seek=" + seekTerm); + assertNull("expected null but got " + (seekResult==null ? "null" : inputToString(inputMode, seekResult.input)), seekResult); + if (LuceneTestCase.VERBOSE) { + System.out.println(" got null"); + } + } + + break; + } + } + } else { + // seek to term that does exist: + InputOutput pair = pairs.get(random.nextInt(pairs.size())); + final IntsRefFSTEnum.InputOutput seekResult; + if (random.nextInt(3) == 2) { + if (LuceneTestCase.VERBOSE) { + System.out.println(" do exists seekExact term=" + inputToString(inputMode, pair.input)); + } + seekResult = fstEnum.seekExact(pair.input); + } else if (random.nextBoolean()) { + if (LuceneTestCase.VERBOSE) { + System.out.println(" do exists seekFloor " + inputToString(inputMode, pair.input)); + } + seekResult = fstEnum.seekFloor(pair.input); + } else { + if (LuceneTestCase.VERBOSE) { + System.out.println(" do exists seekCeil " + inputToString(inputMode, pair.input)); + } + seekResult = fstEnum.seekCeil(pair.input); + } + assertNotNull(seekResult); + assertEquals("got " + inputToString(inputMode, seekResult.input) + " but expected " + inputToString(inputMode, pair.input), pair.input, seekResult.input); + assertTrue(outputsEqual(pair.output, seekResult.output)); + } + } + + if (LuceneTestCase.VERBOSE) { + System.out.println("TEST: mixed next/seek"); + } + + // test mixed next/seek + num = LuceneTestCase.atLeast(random, 100); + for(int iter=0;iter(fst); + int upto = -1; + while(true) { + boolean isDone = false; + if (upto == pairs.size()-1 || random.nextBoolean()) { + // next + upto++; + if (LuceneTestCase.VERBOSE) { + System.out.println(" do next"); + } + isDone = fstEnum.next() == null; + } else if (upto != -1 && upto < 0.75 * pairs.size() && random.nextBoolean()) { + int attempt = 0; + for(;attempt<10;attempt++) { + IntsRef term = toIntsRef(getRandomString(random), inputMode); + if (!termsMap.containsKey(term) && term.compareTo(pairs.get(upto).input) > 0) { + int pos = Collections.binarySearch(pairs, new InputOutput(term, null)); + assert pos < 0; + upto = -(pos+1); + + if (random.nextBoolean()) { + upto--; + assertTrue(upto != -1); + if (LuceneTestCase.VERBOSE) { + System.out.println(" do non-exist seekFloor(" + inputToString(inputMode, term) + ")"); + } + isDone = fstEnum.seekFloor(term) == null; + } else { + if (LuceneTestCase.VERBOSE) { + System.out.println(" do non-exist seekCeil(" + inputToString(inputMode, term) + ")"); + } + isDone = fstEnum.seekCeil(term) == null; + } + + break; + } + } + if (attempt == 10) { + continue; + } + + } else { + final int inc = random.nextInt(pairs.size() - upto - 1); + upto += inc; + if (upto == -1) { + upto = 0; + } + + if (random.nextBoolean()) { + if (LuceneTestCase.VERBOSE) { + System.out.println(" do seekCeil(" + inputToString(inputMode, pairs.get(upto).input) + ")"); + } + isDone = fstEnum.seekCeil(pairs.get(upto).input) == null; + } else { + if (LuceneTestCase.VERBOSE) { + System.out.println(" do seekFloor(" + inputToString(inputMode, pairs.get(upto).input) + ")"); + } + isDone = fstEnum.seekFloor(pairs.get(upto).input) == null; + } + } + if (LuceneTestCase.VERBOSE) { + if (!isDone) { + System.out.println(" got " + inputToString(inputMode, fstEnum.current().input)); + } else { + System.out.println(" got null"); + } + } + + if (upto == pairs.size()) { + assertTrue(isDone); + break; + } else { + assertFalse(isDone); + assertEquals(pairs.get(upto).input, fstEnum.current().input); + assertTrue(outputsEqual(pairs.get(upto).output, fstEnum.current().output)); + + /* + if (upto < pairs.size()-1) { + int tryCount = 0; + while(tryCount < 10) { + final IntsRef t = toIntsRef(getRandomString(), inputMode); + if (pairs.get(upto).input.compareTo(t) < 0) { + final boolean expected = t.compareTo(pairs.get(upto+1).input) < 0; + if (LuceneTestCase.VERBOSE) { + System.out.println("TEST: call beforeNext(" + inputToString(inputMode, t) + "); current=" + inputToString(inputMode, pairs.get(upto).input) + " next=" + inputToString(inputMode, pairs.get(upto+1).input) + " expected=" + expected); + } + assertEquals(expected, fstEnum.beforeNext(t)); + break; + } + tryCount++; + } + } + */ + } + } + } + } + + private static class CountMinOutput { + int count; + T output; + T finalOutput; + boolean isLeaf = true; + boolean isFinal; + } + + // FST is pruned + private void verifyPruned(int inputMode, FST fst, int prune1, int prune2) throws IOException { + + if (LuceneTestCase.VERBOSE) { + System.out.println("TEST: now verify pruned " + pairs.size() + " terms; outputs=" + outputs); + for(InputOutput pair : pairs) { + System.out.println(" " + inputToString(inputMode, pair.input) + ": " + outputs.outputToString(pair.output)); + } + } + + // To validate the FST, we brute-force compute all prefixes + // in the terms, matched to their "common" outputs, prune that + // set according to the prune thresholds, then assert the FST + // matches that same set. + + // NOTE: Crazy RAM intensive!! + + //System.out.println("TEST: tally prefixes"); + + // build all prefixes + final Map> prefixes = new HashMap>(); + final IntsRef scratch = new IntsRef(10); + for(InputOutput pair: pairs) { + scratch.copyInts(pair.input); + for(int idx=0;idx<=pair.input.length;idx++) { + scratch.length = idx; + CountMinOutput cmo = prefixes.get(scratch); + if (cmo == null) { + cmo = new CountMinOutput(); + cmo.count = 1; + cmo.output = pair.output; + prefixes.put(IntsRef.deepCopyOf(scratch), cmo); + } else { + cmo.count++; + T output1 = cmo.output; + if (output1.equals(outputs.getNoOutput())) { + output1 = outputs.getNoOutput(); + } + T output2 = pair.output; + if (output2.equals(outputs.getNoOutput())) { + output2 = outputs.getNoOutput(); + } + cmo.output = outputs.common(output1, output2); + } + if (idx == pair.input.length) { + cmo.isFinal = true; + cmo.finalOutput = cmo.output; + } + } + } + + if (LuceneTestCase.VERBOSE) { + System.out.println("TEST: now prune"); + } + + // prune 'em + final Iterator>> it = prefixes.entrySet().iterator(); + while(it.hasNext()) { + Map.Entry> ent = it.next(); + final IntsRef prefix = ent.getKey(); + final CountMinOutput cmo = ent.getValue(); + if (LuceneTestCase.VERBOSE) { + System.out.println(" term prefix=" + inputToString(inputMode, prefix, false) + " count=" + cmo.count + " isLeaf=" + cmo.isLeaf + " output=" + outputs.outputToString(cmo.output) + " isFinal=" + cmo.isFinal); + } + final boolean keep; + if (prune1 > 0) { + keep = cmo.count >= prune1; + } else { + assert prune2 > 0; + if (prune2 > 1 && cmo.count >= prune2) { + keep = true; + } else if (prefix.length > 0) { + // consult our parent + scratch.length = prefix.length-1; + System.arraycopy(prefix.ints, prefix.offset, scratch.ints, 0, scratch.length); + final CountMinOutput cmo2 = prefixes.get(scratch); + //System.out.println(" parent count = " + (cmo2 == null ? -1 : cmo2.count)); + keep = cmo2 != null && ((prune2 > 1 && cmo2.count >= prune2) || (prune2 == 1 && (cmo2.count >= 2 || prefix.length <= 1))); + } else if (cmo.count >= prune2) { + keep = true; + } else { + keep = false; + } + } + + if (!keep) { + it.remove(); + //System.out.println(" remove"); + } else { + // clear isLeaf for all ancestors + //System.out.println(" keep"); + scratch.copyInts(prefix); + scratch.length--; + while(scratch.length >= 0) { + final CountMinOutput cmo2 = prefixes.get(scratch); + if (cmo2 != null) { + //System.out.println(" clear isLeaf " + inputToString(inputMode, scratch)); + cmo2.isLeaf = false; + } + scratch.length--; + } + } + } + + if (LuceneTestCase.VERBOSE) { + System.out.println("TEST: after prune"); + for(Map.Entry> ent : prefixes.entrySet()) { + System.out.println(" " + inputToString(inputMode, ent.getKey(), false) + ": isLeaf=" + ent.getValue().isLeaf + " isFinal=" + ent.getValue().isFinal); + if (ent.getValue().isFinal) { + System.out.println(" finalOutput=" + outputs.outputToString(ent.getValue().finalOutput)); + } + } + } + + if (prefixes.size() <= 1) { + assertNull(fst); + return; + } + + assertNotNull(fst); + + // make sure FST only enums valid prefixes + if (LuceneTestCase.VERBOSE) { + System.out.println("TEST: check pruned enum"); + } + IntsRefFSTEnum fstEnum = new IntsRefFSTEnum(fst); + IntsRefFSTEnum.InputOutput current; + while((current = fstEnum.next()) != null) { + if (LuceneTestCase.VERBOSE) { + System.out.println(" fstEnum.next prefix=" + inputToString(inputMode, current.input, false) + " output=" + outputs.outputToString(current.output)); + } + final CountMinOutput cmo = prefixes.get(current.input); + assertNotNull(cmo); + assertTrue(cmo.isLeaf || cmo.isFinal); + //if (cmo.isFinal && !cmo.isLeaf) { + if (cmo.isFinal) { + assertEquals(cmo.finalOutput, current.output); + } else { + assertEquals(cmo.output, current.output); + } + } + + // make sure all non-pruned prefixes are present in the FST + if (LuceneTestCase.VERBOSE) { + System.out.println("TEST: verify all prefixes"); + } + final int[] stopNode = new int[1]; + for(Map.Entry> ent : prefixes.entrySet()) { + if (ent.getKey().length > 0) { + final CountMinOutput cmo = ent.getValue(); + final T output = run(fst, ent.getKey(), stopNode); + if (LuceneTestCase.VERBOSE) { + System.out.println("TEST: verify prefix=" + inputToString(inputMode, ent.getKey(), false) + " output=" + outputs.outputToString(cmo.output)); + } + // if (cmo.isFinal && !cmo.isLeaf) { + if (cmo.isFinal) { + assertEquals(cmo.finalOutput, output); + } else { + assertEquals(cmo.output, output); + } + assertEquals(ent.getKey().length, stopNode[0]); + } + } + } +} diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/fst/package.html b/lucene/test-framework/src/java/org/apache/lucene/util/fst/package.html new file mode 100644 index 00000000000..26289284b56 --- /dev/null +++ b/lucene/test-framework/src/java/org/apache/lucene/util/fst/package.html @@ -0,0 +1,25 @@ + + + + + + + +Support for FST testing. + +