mirror of https://github.com/apache/lucene.git
LUCENE-8638: remove deprecated FST get by output
This commit is contained in:
parent
a37844aedd
commit
666c7a2590
|
@ -22,6 +22,8 @@ System Requirements
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
|
* LUCENE-8638: Remove deprecated methods in FST for lookup by output.
|
||||||
|
|
||||||
* LUCENE-8638: haversin() expressions function now returns its result in meters rather than
|
* LUCENE-8638: haversin() expressions function now returns its result in meters rather than
|
||||||
kilometers.
|
kilometers.
|
||||||
|
|
||||||
|
|
|
@ -45,7 +45,7 @@ public final class Util {
|
||||||
public static <T> T get(FST<T> fst, IntsRef input) throws IOException {
|
public static <T> T get(FST<T> fst, IntsRef input) throws IOException {
|
||||||
|
|
||||||
// TODO: would be nice not to alloc this on every lookup
|
// TODO: would be nice not to alloc this on every lookup
|
||||||
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
|
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<>());
|
||||||
|
|
||||||
final BytesReader fstReader = fst.getBytesReader();
|
final BytesReader fstReader = fst.getBytesReader();
|
||||||
|
|
||||||
|
@ -92,167 +92,6 @@ public final class Util {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Reverse lookup (lookup by output instead of by input), in the special case when your FSTs
|
|
||||||
* outputs are strictly ascending. This locates the input/output pair where the output is equal to
|
|
||||||
* the target, and will return null if that output does not exist.
|
|
||||||
*
|
|
||||||
* <p>NOTE: this only works with {@code FST<Long>}, only works when the outputs are ascending in
|
|
||||||
* order with the inputs. For example, simple ordinals (0, 1, 2, ...), or file offsets (when
|
|
||||||
* appending to a file) fit this.
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public static IntsRef getByOutput(FST<Long> fst, long targetOutput) throws IOException {
|
|
||||||
|
|
||||||
final BytesReader in = fst.getBytesReader();
|
|
||||||
|
|
||||||
// TODO: would be nice not to alloc this on every lookup
|
|
||||||
FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<Long>());
|
|
||||||
|
|
||||||
FST.Arc<Long> scratchArc = new FST.Arc<>();
|
|
||||||
|
|
||||||
final IntsRefBuilder result = new IntsRefBuilder();
|
|
||||||
return getByOutput(fst, targetOutput, in, arc, scratchArc, result);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Expert: like {@link Util#getByOutput(FST, long)} except reusing BytesReader, initial and
|
|
||||||
* scratch Arc, and result.
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public static IntsRef getByOutput(
|
|
||||||
FST<Long> fst,
|
|
||||||
long targetOutput,
|
|
||||||
BytesReader in,
|
|
||||||
Arc<Long> arc,
|
|
||||||
Arc<Long> scratchArc,
|
|
||||||
IntsRefBuilder result)
|
|
||||||
throws IOException {
|
|
||||||
long output = arc.output();
|
|
||||||
int upto = 0;
|
|
||||||
|
|
||||||
// System.out.println("reverseLookup output=" + targetOutput);
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
// System.out.println("loop: output=" + output + " upto=" + upto + " arc=" + arc);
|
|
||||||
if (arc.isFinal()) {
|
|
||||||
final long finalOutput = output + arc.nextFinalOutput();
|
|
||||||
// System.out.println(" isFinal finalOutput=" + finalOutput);
|
|
||||||
if (finalOutput == targetOutput) {
|
|
||||||
result.setLength(upto);
|
|
||||||
// System.out.println(" found!");
|
|
||||||
return result.get();
|
|
||||||
} else if (finalOutput > targetOutput) {
|
|
||||||
// System.out.println(" not found!");
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (FST.targetHasArcs(arc)) {
|
|
||||||
// System.out.println(" targetHasArcs");
|
|
||||||
result.grow(1 + upto);
|
|
||||||
|
|
||||||
fst.readFirstRealTargetArc(arc.target(), arc, in);
|
|
||||||
|
|
||||||
if (arc.bytesPerArc() != 0 && arc.nodeFlags() == FST.ARCS_FOR_BINARY_SEARCH) {
|
|
||||||
|
|
||||||
int low = 0;
|
|
||||||
int high = arc.numArcs() - 1;
|
|
||||||
int mid = 0;
|
|
||||||
// System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput + "
|
|
||||||
// output=" + output);
|
|
||||||
boolean exact = false;
|
|
||||||
while (low <= high) {
|
|
||||||
mid = (low + high) >>> 1;
|
|
||||||
in.setPosition(arc.posArcsStart());
|
|
||||||
in.skipBytes(arc.bytesPerArc() * mid);
|
|
||||||
final byte flags = in.readByte();
|
|
||||||
fst.readLabel(in);
|
|
||||||
final long minArcOutput;
|
|
||||||
if ((flags & FST.BIT_ARC_HAS_OUTPUT) != 0) {
|
|
||||||
final long arcOutput = fst.outputs.read(in);
|
|
||||||
minArcOutput = output + arcOutput;
|
|
||||||
} else {
|
|
||||||
minArcOutput = output;
|
|
||||||
}
|
|
||||||
// System.out.println(" cycle mid=" + mid + " output=" + minArcOutput);
|
|
||||||
if (minArcOutput == targetOutput) {
|
|
||||||
exact = true;
|
|
||||||
break;
|
|
||||||
} else if (minArcOutput < targetOutput) {
|
|
||||||
low = mid + 1;
|
|
||||||
} else {
|
|
||||||
high = mid - 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int idx;
|
|
||||||
if (high == -1) {
|
|
||||||
return null;
|
|
||||||
} else if (exact) {
|
|
||||||
idx = mid;
|
|
||||||
} else {
|
|
||||||
idx = low - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
fst.readArcByIndex(arc, in, idx);
|
|
||||||
result.setIntAt(upto++, arc.label());
|
|
||||||
output += arc.output();
|
|
||||||
|
|
||||||
} else {
|
|
||||||
|
|
||||||
FST.Arc<Long> prevArc = null;
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
// System.out.println(" cycle label=" + arc.label + " output=" + arc.output);
|
|
||||||
|
|
||||||
// This is the min output we'd hit if we follow
|
|
||||||
// this arc:
|
|
||||||
final long minArcOutput = output + arc.output();
|
|
||||||
|
|
||||||
if (minArcOutput == targetOutput) {
|
|
||||||
// Recurse on this arc:
|
|
||||||
// System.out.println(" match! break");
|
|
||||||
output = minArcOutput;
|
|
||||||
result.setIntAt(upto++, arc.label());
|
|
||||||
break;
|
|
||||||
} else if (minArcOutput > targetOutput) {
|
|
||||||
if (prevArc == null) {
|
|
||||||
// Output doesn't exist
|
|
||||||
return null;
|
|
||||||
} else {
|
|
||||||
// Recurse on previous arc:
|
|
||||||
arc.copyFrom(prevArc);
|
|
||||||
result.setIntAt(upto++, arc.label());
|
|
||||||
output += arc.output();
|
|
||||||
// System.out.println(" recurse prev label=" + (char) arc.label + " output=" +
|
|
||||||
// output);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else if (arc.isLast()) {
|
|
||||||
// Recurse on this arc:
|
|
||||||
output = minArcOutput;
|
|
||||||
// System.out.println(" recurse last label=" + (char) arc.label + " output=" +
|
|
||||||
// output);
|
|
||||||
result.setIntAt(upto++, arc.label());
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
// Read next arc in this node:
|
|
||||||
prevArc = scratchArc;
|
|
||||||
prevArc.copyFrom(arc);
|
|
||||||
// System.out.println(" after copy label=" + (char) prevArc.label + " vs " +
|
|
||||||
// (char) arc.label);
|
|
||||||
fst.readNextRealArc(arc, in);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// System.out.println(" no target arcs; not found!");
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents a path in TopNSearcher.
|
* Represents a path in TopNSearcher.
|
||||||
*
|
*
|
||||||
|
@ -899,7 +738,7 @@ public final class Util {
|
||||||
scratch.setLength(charLimit);
|
scratch.setLength(charLimit);
|
||||||
scratch.grow(charLimit);
|
scratch.grow(charLimit);
|
||||||
for (int idx = 0; idx < charLimit; idx++) {
|
for (int idx = 0; idx < charLimit; idx++) {
|
||||||
scratch.setIntAt(idx, (int) s.charAt(idx));
|
scratch.setIntAt(idx, s.charAt(idx));
|
||||||
}
|
}
|
||||||
return scratch.get();
|
return scratch.get();
|
||||||
}
|
}
|
||||||
|
@ -1033,9 +872,6 @@ public final class Util {
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
// System.out.println(" non-bs cycle");
|
// System.out.println(" non-bs cycle");
|
||||||
// TODO: we should fix this code to not have to create
|
|
||||||
// object for the output of every arc we scan... only
|
|
||||||
// for the matching arc, if found
|
|
||||||
if (arc.label() >= label) {
|
if (arc.label() >= label) {
|
||||||
// System.out.println(" found!");
|
// System.out.println(" found!");
|
||||||
return arc;
|
return arc;
|
||||||
|
@ -1067,12 +903,12 @@ public final class Util {
|
||||||
+ ")";
|
+ ")";
|
||||||
BytesReader in = fst.getBytesReader();
|
BytesReader in = fst.getBytesReader();
|
||||||
int low = arc.arcIdx();
|
int low = arc.arcIdx();
|
||||||
int mid = 0;
|
int mid;
|
||||||
int high = arc.numArcs() - 1;
|
int high = arc.numArcs() - 1;
|
||||||
while (low <= high) {
|
while (low <= high) {
|
||||||
mid = (low + high) >>> 1;
|
mid = (low + high) >>> 1;
|
||||||
in.setPosition(arc.posArcsStart());
|
in.setPosition(arc.posArcsStart());
|
||||||
in.skipBytes(arc.bytesPerArc() * mid + 1);
|
in.skipBytes((long) arc.bytesPerArc() * mid + 1);
|
||||||
final int midLabel = fst.readLabel(in);
|
final int midLabel = fst.readLabel(in);
|
||||||
final int cmp = midLabel - targetLabel;
|
final int cmp = midLabel - targetLabel;
|
||||||
if (cmp < 0) {
|
if (cmp < 0) {
|
||||||
|
|
|
@ -25,8 +25,6 @@
|
||||||
* <li>Fast and low memory overhead construction of the minimal FST (but inputs must be provided
|
* <li>Fast and low memory overhead construction of the minimal FST (but inputs must be provided
|
||||||
* in sorted order)
|
* in sorted order)
|
||||||
* <li>Low object overhead and quick deserialization (byte[] representation)
|
* <li>Low object overhead and quick deserialization (byte[] representation)
|
||||||
* <li>{@link org.apache.lucene.util.fst.Util#getByOutput Lookup-by-output} when the outputs are
|
|
||||||
* in sorted order (e.g., ordinals or file pointers)
|
|
||||||
* <li>Pluggable {@link org.apache.lucene.util.fst.Outputs Outputs} representation
|
* <li>Pluggable {@link org.apache.lucene.util.fst.Outputs Outputs} representation
|
||||||
* <li>{@link org.apache.lucene.util.fst.Util#shortestPaths N-shortest-paths} search by weight
|
* <li>{@link org.apache.lucene.util.fst.Util#shortestPaths N-shortest-paths} search by weight
|
||||||
* <li>Enumerators ({@link org.apache.lucene.util.fst.IntsRefFSTEnum IntsRef} and {@link
|
* <li>Enumerators ({@link org.apache.lucene.util.fst.IntsRefFSTEnum IntsRef} and {@link
|
||||||
|
|
|
@ -277,12 +277,8 @@ public class Test2BFST extends LuceneTestCase {
|
||||||
System.out.println(i + "...: ");
|
System.out.println(i + "...: ");
|
||||||
}
|
}
|
||||||
|
|
||||||
// forward lookup:
|
|
||||||
assertEquals(output, Util.get(fst, input).longValue());
|
assertEquals(output, Util.get(fst, input).longValue());
|
||||||
// reverse lookup:
|
|
||||||
@SuppressWarnings("deprecation")
|
|
||||||
IntsRef inputResult = Util.getByOutput(fst, output);
|
|
||||||
assertEquals(input, inputResult);
|
|
||||||
output += 1 + r.nextInt(10);
|
output += 1 + r.nextInt(10);
|
||||||
nextInput(r, ints);
|
nextInput(r, ints);
|
||||||
}
|
}
|
||||||
|
|
|
@ -92,7 +92,9 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
@Override
|
@Override
|
||||||
public void tearDown() throws Exception {
|
public void tearDown() throws Exception {
|
||||||
// can be null if we force simpletext (funky, some kind of bug in test runner maybe)
|
// can be null if we force simpletext (funky, some kind of bug in test runner maybe)
|
||||||
if (dir != null) dir.close();
|
if (dir != null) {
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
super.tearDown();
|
super.tearDown();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -133,7 +135,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
for (IntsRef term : terms2) {
|
for (IntsRef term : terms2) {
|
||||||
pairs.add(new FSTTester.InputOutput<>(term, NO_OUTPUT));
|
pairs.add(new FSTTester.InputOutput<>(term, NO_OUTPUT));
|
||||||
}
|
}
|
||||||
FSTTester<Object> tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs, false);
|
FSTTester<Object> tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs);
|
||||||
FST<Object> fst = tester.doTest(0, 0, false);
|
FST<Object> fst = tester.doTest(0, 0, false);
|
||||||
assertNotNull(fst);
|
assertNotNull(fst);
|
||||||
assertEquals(22, tester.nodeCount);
|
assertEquals(22, tester.nodeCount);
|
||||||
|
@ -147,7 +149,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
for (int idx = 0; idx < terms2.length; idx++) {
|
for (int idx = 0; idx < terms2.length; idx++) {
|
||||||
pairs.add(new FSTTester.InputOutput<>(terms2[idx], (long) idx));
|
pairs.add(new FSTTester.InputOutput<>(terms2[idx], (long) idx));
|
||||||
}
|
}
|
||||||
FSTTester<Long> tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs, true);
|
FSTTester<Long> tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs);
|
||||||
final FST<Long> fst = tester.doTest(0, 0, false);
|
final FST<Long> fst = tester.doTest(0, 0, false);
|
||||||
assertNotNull(fst);
|
assertNotNull(fst);
|
||||||
assertEquals(22, tester.nodeCount);
|
assertEquals(22, tester.nodeCount);
|
||||||
|
@ -157,14 +159,12 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
// FST byte sequence ord
|
// FST byte sequence ord
|
||||||
{
|
{
|
||||||
final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
|
final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
|
||||||
final BytesRef NO_OUTPUT = outputs.getNoOutput();
|
|
||||||
final List<FSTTester.InputOutput<BytesRef>> pairs = new ArrayList<>(terms2.length);
|
final List<FSTTester.InputOutput<BytesRef>> pairs = new ArrayList<>(terms2.length);
|
||||||
for (int idx = 0; idx < terms2.length; idx++) {
|
for (int idx = 0; idx < terms2.length; idx++) {
|
||||||
final BytesRef output = idx == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx));
|
final BytesRef output = new BytesRef(Integer.toString(idx));
|
||||||
pairs.add(new FSTTester.InputOutput<>(terms2[idx], output));
|
pairs.add(new FSTTester.InputOutput<>(terms2[idx], output));
|
||||||
}
|
}
|
||||||
FSTTester<BytesRef> tester =
|
FSTTester<BytesRef> tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs);
|
||||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false);
|
|
||||||
final FST<BytesRef> fst = tester.doTest(0, 0, false);
|
final FST<BytesRef> fst = tester.doTest(0, 0, false);
|
||||||
assertNotNull(fst);
|
assertNotNull(fst);
|
||||||
assertEquals(24, tester.nodeCount);
|
assertEquals(24, tester.nodeCount);
|
||||||
|
@ -185,7 +185,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
for (IntsRef term : terms) {
|
for (IntsRef term : terms) {
|
||||||
pairs.add(new FSTTester.InputOutput<>(term, NO_OUTPUT));
|
pairs.add(new FSTTester.InputOutput<>(term, NO_OUTPUT));
|
||||||
}
|
}
|
||||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true);
|
new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// PositiveIntOutput (ord)
|
// PositiveIntOutput (ord)
|
||||||
|
@ -195,7 +195,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
for (int idx = 0; idx < terms.length; idx++) {
|
for (int idx = 0; idx < terms.length; idx++) {
|
||||||
pairs.add(new FSTTester.InputOutput<>(terms[idx], (long) idx));
|
pairs.add(new FSTTester.InputOutput<>(terms[idx], (long) idx));
|
||||||
}
|
}
|
||||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs, true).doTest(true);
|
new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// PositiveIntOutput (random monotonically increasing positive number)
|
// PositiveIntOutput (random monotonically increasing positive number)
|
||||||
|
@ -203,24 +203,23 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||||
final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<>(terms.length);
|
final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<>(terms.length);
|
||||||
long lastOutput = 0;
|
long lastOutput = 0;
|
||||||
for (int idx = 0; idx < terms.length; idx++) {
|
for (IntsRef term : terms) {
|
||||||
final long value = lastOutput + TestUtil.nextInt(random(), 1, 1000);
|
final long value = lastOutput + TestUtil.nextInt(random(), 1, 1000);
|
||||||
lastOutput = value;
|
lastOutput = value;
|
||||||
pairs.add(new FSTTester.InputOutput<>(terms[idx], value));
|
pairs.add(new FSTTester.InputOutput<>(term, value));
|
||||||
}
|
}
|
||||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs, true).doTest(true);
|
new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// PositiveIntOutput (random positive number)
|
// PositiveIntOutput (random positive number)
|
||||||
{
|
{
|
||||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||||
final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<>(terms.length);
|
final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<>(terms.length);
|
||||||
for (int idx = 0; idx < terms.length; idx++) {
|
for (IntsRef term : terms) {
|
||||||
pairs.add(
|
pairs.add(
|
||||||
new FSTTester.InputOutput<>(
|
new FSTTester.InputOutput<>(term, TestUtil.nextLong(random(), 0, Long.MAX_VALUE)));
|
||||||
terms[idx], TestUtil.nextLong(random(), 0, Long.MAX_VALUE)));
|
|
||||||
}
|
}
|
||||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true);
|
new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pair<ord, (random monotonically increasing positive number>
|
// Pair<ord, (random monotonically increasing positive number>
|
||||||
|
@ -236,7 +235,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
lastOutput = value;
|
lastOutput = value;
|
||||||
pairs.add(new FSTTester.InputOutput<>(terms[idx], outputs.newPair((long) idx, value)));
|
pairs.add(new FSTTester.InputOutput<>(terms[idx], outputs.newPair((long) idx, value)));
|
||||||
}
|
}
|
||||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true);
|
new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sequence-of-bytes
|
// Sequence-of-bytes
|
||||||
|
@ -249,7 +248,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
random().nextInt(30) == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx));
|
random().nextInt(30) == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx));
|
||||||
pairs.add(new FSTTester.InputOutput<>(terms[idx], output));
|
pairs.add(new FSTTester.InputOutput<>(terms[idx], output));
|
||||||
}
|
}
|
||||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true);
|
new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sequence-of-ints
|
// Sequence-of-ints
|
||||||
|
@ -265,7 +264,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
pairs.add(new FSTTester.InputOutput<>(terms[idx], output));
|
pairs.add(new FSTTester.InputOutput<>(terms[idx], output));
|
||||||
}
|
}
|
||||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true);
|
new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -298,7 +297,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
final String term = getRandomString(random);
|
final String term = getRandomString(random);
|
||||||
termsSet.add(toIntsRef(term, inputMode));
|
termsSet.add(toIntsRef(term, inputMode));
|
||||||
}
|
}
|
||||||
doTest(inputMode, termsSet.toArray(new IntsRef[termsSet.size()]));
|
doTest(inputMode, termsSet.toArray(new IntsRef[0]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -497,7 +496,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
private abstract static class VisitTerms<T> {
|
private abstract static class VisitTerms<T> {
|
||||||
private final Path dirOut;
|
private final Path dirOut;
|
||||||
private final Path wordsFileIn;
|
private final Path wordsFileIn;
|
||||||
private int inputMode;
|
private final int inputMode;
|
||||||
private final Outputs<T> outputs;
|
private final Outputs<T> outputs;
|
||||||
private final FSTCompiler<T> fstCompiler;
|
private final FSTCompiler<T> fstCompiler;
|
||||||
|
|
||||||
|
@ -524,7 +523,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
|
|
||||||
protected abstract T getOutput(IntsRef input, int ord) throws IOException;
|
protected abstract T getOutput(IntsRef input, int ord) throws IOException;
|
||||||
|
|
||||||
public void run(int limit, boolean verify, boolean verifyByOutput) throws IOException {
|
public void run(int limit, boolean verify) throws IOException {
|
||||||
|
|
||||||
BufferedReader is = Files.newBufferedReader(wordsFileIn, StandardCharsets.UTF_8);
|
BufferedReader is = Files.newBufferedReader(wordsFileIn, StandardCharsets.UTF_8);
|
||||||
try {
|
try {
|
||||||
|
@ -541,12 +540,11 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
|
|
||||||
ord++;
|
ord++;
|
||||||
if (ord % 500000 == 0) {
|
if (ord % 500000 == 0) {
|
||||||
System.out.println(
|
System.out.printf(
|
||||||
String.format(
|
Locale.ROOT,
|
||||||
Locale.ROOT,
|
"%6.2fs: %9d...",
|
||||||
"%6.2fs: %9d...",
|
((System.currentTimeMillis() - tStart) / 1000.0),
|
||||||
((System.currentTimeMillis() - tStart) / 1000.0),
|
ord);
|
||||||
ord));
|
|
||||||
}
|
}
|
||||||
if (ord >= limit) {
|
if (ord >= limit) {
|
||||||
break;
|
break;
|
||||||
|
@ -594,90 +592,51 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
IndexInput in = dir.openInput("fst.bin", IOContext.DEFAULT);
|
|
||||||
fst = new FST<T>(in, outputs);
|
|
||||||
in.close();
|
|
||||||
*/
|
|
||||||
|
|
||||||
System.out.println("\nNow verify...");
|
System.out.println("\nNow verify...");
|
||||||
|
|
||||||
|
is.close();
|
||||||
|
is = Files.newBufferedReader(wordsFileIn, StandardCharsets.UTF_8);
|
||||||
|
|
||||||
|
ord = 0;
|
||||||
|
tStart = System.currentTimeMillis();
|
||||||
while (true) {
|
while (true) {
|
||||||
for (int iter = 0; iter < 2; iter++) {
|
String w = is.readLine();
|
||||||
is.close();
|
if (w == null) {
|
||||||
is = Files.newBufferedReader(wordsFileIn, StandardCharsets.UTF_8);
|
break;
|
||||||
|
}
|
||||||
ord = 0;
|
toIntsRef(w, inputMode, intsRefBuilder);
|
||||||
tStart = System.currentTimeMillis();
|
T expected = getOutput(intsRefBuilder.get(), ord);
|
||||||
while (true) {
|
T actual = Util.get(fst, intsRefBuilder.get());
|
||||||
String w = is.readLine();
|
if (actual == null) {
|
||||||
if (w == null) {
|
throw new RuntimeException("unexpected null output on input=" + w);
|
||||||
break;
|
}
|
||||||
}
|
if (!actual.equals(expected)) {
|
||||||
toIntsRef(w, inputMode, intsRefBuilder);
|
throw new RuntimeException(
|
||||||
if (iter == 0) {
|
"wrong output (got "
|
||||||
T expected = getOutput(intsRefBuilder.get(), ord);
|
+ outputs.outputToString(actual)
|
||||||
T actual = Util.get(fst, intsRefBuilder.get());
|
+ " but expected "
|
||||||
if (actual == null) {
|
+ outputs.outputToString(expected)
|
||||||
throw new RuntimeException("unexpected null output on input=" + w);
|
+ ") on input="
|
||||||
}
|
+ w);
|
||||||
if (!actual.equals(expected)) {
|
}
|
||||||
throw new RuntimeException(
|
ord++;
|
||||||
"wrong output (got "
|
if (ord % 500000 == 0) {
|
||||||
+ outputs.outputToString(actual)
|
System.out.println(
|
||||||
+ " but expected "
|
((System.currentTimeMillis() - tStart) / 1000.0) + "s: " + ord + "...");
|
||||||
+ outputs.outputToString(expected)
|
}
|
||||||
+ ") on input="
|
if (ord >= limit) {
|
||||||
+ w);
|
break;
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Get by output
|
|
||||||
final Long output = (Long) getOutput(intsRefBuilder.get(), ord);
|
|
||||||
@SuppressWarnings({"unchecked", "deprecation"})
|
|
||||||
final IntsRef actual = Util.getByOutput((FST<Long>) fst, output.longValue());
|
|
||||||
if (actual == null) {
|
|
||||||
throw new RuntimeException("unexpected null input from output=" + output);
|
|
||||||
}
|
|
||||||
if (!actual.equals(intsRefBuilder.get())) {
|
|
||||||
throw new RuntimeException(
|
|
||||||
"wrong input (got "
|
|
||||||
+ actual
|
|
||||||
+ " but expected "
|
|
||||||
+ intsRefBuilder
|
|
||||||
+ " from output="
|
|
||||||
+ output);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ord++;
|
|
||||||
if (ord % 500000 == 0) {
|
|
||||||
System.out.println(
|
|
||||||
((System.currentTimeMillis() - tStart) / 1000.0) + "s: " + ord + "...");
|
|
||||||
}
|
|
||||||
if (ord >= limit) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
double totSec = ((System.currentTimeMillis() - tStart) / 1000.0);
|
|
||||||
System.out.println(
|
|
||||||
"Verify "
|
|
||||||
+ (iter == 1 ? "(by output) " : "")
|
|
||||||
+ "took "
|
|
||||||
+ totSec
|
|
||||||
+ " sec + ("
|
|
||||||
+ (int) ((totSec * 1000000000 / ord))
|
|
||||||
+ " nsec per lookup)");
|
|
||||||
|
|
||||||
if (!verifyByOutput) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// NOTE: comment out to profile lookup...
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double totSec = ((System.currentTimeMillis() - tStart) / 1000.0);
|
||||||
|
System.out.println(
|
||||||
|
"Verify took "
|
||||||
|
+ totSec
|
||||||
|
+ " sec + ("
|
||||||
|
+ (int) ((totSec * 1000000000 / ord))
|
||||||
|
+ " nsec per lookup)");
|
||||||
|
|
||||||
} finally {
|
} finally {
|
||||||
is.close();
|
is.close();
|
||||||
}
|
}
|
||||||
|
@ -762,7 +721,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
return outputs.newPair((long) ord, (long) TestUtil.nextInt(rand, 1, 5000));
|
return outputs.newPair((long) ord, (long) TestUtil.nextInt(rand, 1, 5000));
|
||||||
}
|
}
|
||||||
}.run(limit, verify, false);
|
}.run(limit, verify);
|
||||||
} else if (storeOrds) {
|
} else if (storeOrds) {
|
||||||
// Store only ords
|
// Store only ords
|
||||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||||
|
@ -771,7 +730,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
public Long getOutput(IntsRef input, int ord) {
|
public Long getOutput(IntsRef input, int ord) {
|
||||||
return (long) ord;
|
return (long) ord;
|
||||||
}
|
}
|
||||||
}.run(limit, verify, true);
|
}.run(limit, verify);
|
||||||
} else if (storeDocFreqs) {
|
} else if (storeDocFreqs) {
|
||||||
// Store only docFreq
|
// Store only docFreq
|
||||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||||
|
@ -785,7 +744,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
return (long) TestUtil.nextInt(rand, 1, 5000);
|
return (long) TestUtil.nextInt(rand, 1, 5000);
|
||||||
}
|
}
|
||||||
}.run(limit, verify, false);
|
}.run(limit, verify);
|
||||||
} else {
|
} else {
|
||||||
// Store nothing
|
// Store nothing
|
||||||
final NoOutputs outputs = NoOutputs.getSingleton();
|
final NoOutputs outputs = NoOutputs.getSingleton();
|
||||||
|
@ -795,7 +754,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
public Object getOutput(IntsRef input, int ord) {
|
public Object getOutput(IntsRef input, int ord) {
|
||||||
return NO_OUTPUT;
|
return NO_OUTPUT;
|
||||||
}
|
}
|
||||||
}.run(limit, verify, false);
|
}.run(limit, verify);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -913,22 +872,6 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
assertNotNull(seekResult);
|
assertNotNull(seekResult);
|
||||||
assertEquals(b, seekResult.input);
|
assertEquals(b, seekResult.input);
|
||||||
assertEquals(42, (long) seekResult.output);
|
assertEquals(42, (long) seekResult.output);
|
||||||
|
|
||||||
@SuppressWarnings("deprecation")
|
|
||||||
IntsRef byOutput = Util.getByOutput(fst, 13824324872317238L);
|
|
||||||
assertEquals(Util.toIntsRef(new BytesRef("c"), new IntsRefBuilder()), byOutput);
|
|
||||||
|
|
||||||
@SuppressWarnings("deprecation")
|
|
||||||
IntsRef byOutput47 = Util.getByOutput(fst, 47);
|
|
||||||
assertNull(byOutput47);
|
|
||||||
|
|
||||||
@SuppressWarnings("deprecation")
|
|
||||||
IntsRef byOutput42 = Util.getByOutput(fst, 42);
|
|
||||||
assertEquals(Util.toIntsRef(new BytesRef("b"), new IntsRefBuilder()), byOutput42);
|
|
||||||
|
|
||||||
@SuppressWarnings("deprecation")
|
|
||||||
IntsRef byOutput17 = Util.getByOutput(fst, 17);
|
|
||||||
assertEquals(Util.toIntsRef(new BytesRef("a"), new IntsRefBuilder()), byOutput17);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPrimaryKeys() throws Exception {
|
public void testPrimaryKeys() throws Exception {
|
||||||
|
@ -991,12 +934,9 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
if (cycle == 0) {
|
if (cycle == 0) {
|
||||||
idString = String.format(Locale.ROOT, "%07d", (NUM_IDS + idx));
|
idString = String.format(Locale.ROOT, "%07d", (NUM_IDS + idx));
|
||||||
} else {
|
} else {
|
||||||
while (true) {
|
do {
|
||||||
idString = Long.toString(random().nextLong());
|
idString = Long.toString(random().nextLong());
|
||||||
if (!allIDs.contains(idString)) {
|
} while (allIDs.contains(idString));
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
outOfBounds.add(idString);
|
outOfBounds.add(idString);
|
||||||
allIDsList.add(idString);
|
allIDsList.add(idString);
|
||||||
|
@ -1063,8 +1003,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
new BytesRef(nextID),
|
new BytesRef(nextID),
|
||||||
termsEnum.term());
|
termsEnum.term());
|
||||||
} else if (!exists) {
|
} else if (!exists) {
|
||||||
assertTrue(
|
assertEquals(TermsEnum.SeekStatus.NOT_FOUND, status);
|
||||||
status == TermsEnum.SeekStatus.NOT_FOUND || status == TermsEnum.SeekStatus.END);
|
|
||||||
} else {
|
} else {
|
||||||
assertEquals(TermsEnum.SeekStatus.FOUND, status);
|
assertEquals(TermsEnum.SeekStatus.FOUND, status);
|
||||||
}
|
}
|
||||||
|
@ -1204,7 +1143,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
ArrayList<String> out = new ArrayList<>();
|
ArrayList<String> out = new ArrayList<>();
|
||||||
StringBuilder b = new StringBuilder();
|
StringBuilder b = new StringBuilder();
|
||||||
s.generate(out, b, 'a', 'i', 10);
|
s.generate(out, b, 'a', 'i', 10);
|
||||||
String[] input = out.toArray(new String[out.size()]);
|
String[] input = out.toArray(new String[0]);
|
||||||
Arrays.sort(input);
|
Arrays.sort(input);
|
||||||
FST<Object> fst = s.compile(input);
|
FST<Object> fst = s.compile(input);
|
||||||
FST.Arc<Object> arc = fst.getFirstArc(new FST.Arc<>());
|
FST.Arc<Object> arc = fst.getFirstArc(new FST.Arc<>());
|
||||||
|
@ -1224,7 +1163,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
Util.toDot(fst, w, false, false);
|
Util.toDot(fst, w, false, false);
|
||||||
w.close();
|
w.close();
|
||||||
// System.out.println(w.toString());
|
// System.out.println(w.toString());
|
||||||
assertTrue(w.toString().indexOf("label=\"t/[7]\"") != -1);
|
assertTrue(w.toString().contains("label=\"t/[7]\""));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testInternalFinalState() throws Exception {
|
public void testInternalFinalState() throws Exception {
|
||||||
|
@ -1242,9 +1181,9 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
// System.out.println(w.toString());
|
// System.out.println(w.toString());
|
||||||
|
|
||||||
// check for accept state at label t
|
// check for accept state at label t
|
||||||
assertTrue(w.toString().indexOf("[label=\"t\" style=\"bold\"") != -1);
|
assertTrue(w.toString().contains("[label=\"t\" style=\"bold\""));
|
||||||
// check for accept state at label n
|
// check for accept state at label n
|
||||||
assertTrue(w.toString().indexOf("[label=\"n\" style=\"bold\"") != -1);
|
assertTrue(w.toString().contains("[label=\"n\" style=\"bold\""));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make sure raw FST can differentiate between final vs
|
// Make sure raw FST can differentiate between final vs
|
||||||
|
@ -1253,9 +1192,6 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||||
final Long nothing = outputs.getNoOutput();
|
final Long nothing = outputs.getNoOutput();
|
||||||
final FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
|
final FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||||
|
|
||||||
// final FST<Long> fst = new FST<>(FST.INPUT_TYPE.BYTE1, outputs, false, PackedInts.COMPACT,
|
|
||||||
// 15);
|
|
||||||
final FST<Long> fst = fstCompiler.fst;
|
final FST<Long> fst = fstCompiler.fst;
|
||||||
|
|
||||||
final FSTCompiler.UnCompiledNode<Long> rootNode =
|
final FSTCompiler.UnCompiledNode<Long> rootNode =
|
||||||
|
@ -1311,11 +1247,11 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
|
|
||||||
private void checkStopNodes(FST<Long> fst, PositiveIntOutputs outputs) throws Exception {
|
private void checkStopNodes(FST<Long> fst, PositiveIntOutputs outputs) throws Exception {
|
||||||
final Long nothing = outputs.getNoOutput();
|
final Long nothing = outputs.getNoOutput();
|
||||||
FST.Arc<Long> startArc = fst.getFirstArc(new FST.Arc<Long>());
|
FST.Arc<Long> startArc = fst.getFirstArc(new FST.Arc<>());
|
||||||
assertEquals(nothing, startArc.output());
|
assertEquals(nothing, startArc.output());
|
||||||
assertEquals(nothing, startArc.nextFinalOutput());
|
assertEquals(nothing, startArc.nextFinalOutput());
|
||||||
|
|
||||||
FST.Arc<Long> arc = fst.readFirstTargetArc(startArc, new FST.Arc<Long>(), fst.getBytesReader());
|
FST.Arc<Long> arc = fst.readFirstTargetArc(startArc, new FST.Arc<>(), fst.getBytesReader());
|
||||||
assertEquals('a', arc.label());
|
assertEquals('a', arc.label());
|
||||||
assertEquals(17, arc.nextFinalOutput().longValue());
|
assertEquals(17, arc.nextFinalOutput().longValue());
|
||||||
assertTrue(arc.isFinal());
|
assertTrue(arc.isFinal());
|
||||||
|
@ -1326,13 +1262,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
assertEquals(42, arc.output().longValue());
|
assertEquals(42, arc.output().longValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
static final Comparator<Long> minLongComparator =
|
static final Comparator<Long> minLongComparator = Comparator.naturalOrder();
|
||||||
new Comparator<Long>() {
|
|
||||||
@Override
|
|
||||||
public int compare(Long left, Long right) {
|
|
||||||
return left.compareTo(right);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
public void testShortestPaths() throws Exception {
|
public void testShortestPaths() throws Exception {
|
||||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||||
|
@ -1350,7 +1280,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
Util.TopResults<Long> res =
|
Util.TopResults<Long> res =
|
||||||
Util.shortestPaths(
|
Util.shortestPaths(
|
||||||
fst,
|
fst,
|
||||||
fst.getFirstArc(new FST.Arc<Long>()),
|
fst.getFirstArc(new FST.Arc<>()),
|
||||||
outputs.getNoOutput(),
|
outputs.getNoOutput(),
|
||||||
minLongComparator,
|
minLongComparator,
|
||||||
3,
|
3,
|
||||||
|
@ -1369,7 +1299,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
|
|
||||||
public void testRejectNoLimits() throws IOException {
|
public void testRejectNoLimits() throws IOException {
|
||||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||||
final FSTCompiler<Long> fstCompiler = new FSTCompiler<Long>(FST.INPUT_TYPE.BYTE1, outputs);
|
final FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||||
|
|
||||||
final IntsRefBuilder scratch = new IntsRefBuilder();
|
final IntsRefBuilder scratch = new IntsRefBuilder();
|
||||||
fstCompiler.add(Util.toIntsRef(new BytesRef("aab"), scratch), 22L);
|
fstCompiler.add(Util.toIntsRef(new BytesRef("aab"), scratch), 22L);
|
||||||
|
@ -1381,7 +1311,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
final FST<Long> fst = fstCompiler.compile();
|
final FST<Long> fst = fstCompiler.compile();
|
||||||
final AtomicInteger rejectCount = new AtomicInteger();
|
final AtomicInteger rejectCount = new AtomicInteger();
|
||||||
Util.TopNSearcher<Long> searcher =
|
Util.TopNSearcher<Long> searcher =
|
||||||
new Util.TopNSearcher<Long>(fst, 2, 6, minLongComparator) {
|
new Util.TopNSearcher<>(fst, 2, 6, minLongComparator) {
|
||||||
@Override
|
@Override
|
||||||
protected boolean acceptResult(IntsRef input, Long output) {
|
protected boolean acceptResult(IntsRef input, Long output) {
|
||||||
boolean accept = output.intValue() == 7;
|
boolean accept = output.intValue() == 7;
|
||||||
|
@ -1393,7 +1323,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
};
|
};
|
||||||
|
|
||||||
searcher.addStartPaths(
|
searcher.addStartPaths(
|
||||||
fst.getFirstArc(new FST.Arc<Long>()), outputs.getNoOutput(), true, new IntsRefBuilder());
|
fst.getFirstArc(new FST.Arc<>()), outputs.getNoOutput(), true, new IntsRefBuilder());
|
||||||
Util.TopResults<Long> res = searcher.search();
|
Util.TopResults<Long> res = searcher.search();
|
||||||
assertEquals(rejectCount.get(), 4);
|
assertEquals(rejectCount.get(), 4);
|
||||||
assertTrue(res.isComplete); // rejected(4) + topN(2) <= maxQueueSize(6)
|
assertTrue(res.isComplete); // rejected(4) + topN(2) <= maxQueueSize(6)
|
||||||
|
@ -1403,7 +1333,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
assertEquals(7L, res.topN.get(0).output.longValue());
|
assertEquals(7L, res.topN.get(0).output.longValue());
|
||||||
rejectCount.set(0);
|
rejectCount.set(0);
|
||||||
searcher =
|
searcher =
|
||||||
new Util.TopNSearcher<Long>(fst, 2, 5, minLongComparator) {
|
new Util.TopNSearcher<>(fst, 2, 5, minLongComparator) {
|
||||||
@Override
|
@Override
|
||||||
protected boolean acceptResult(IntsRef input, Long output) {
|
protected boolean acceptResult(IntsRef input, Long output) {
|
||||||
boolean accept = output.intValue() == 7;
|
boolean accept = output.intValue() == 7;
|
||||||
|
@ -1415,7 +1345,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
};
|
};
|
||||||
|
|
||||||
searcher.addStartPaths(
|
searcher.addStartPaths(
|
||||||
fst.getFirstArc(new FST.Arc<Long>()), outputs.getNoOutput(), true, new IntsRefBuilder());
|
fst.getFirstArc(new FST.Arc<>()), outputs.getNoOutput(), true, new IntsRefBuilder());
|
||||||
res = searcher.search();
|
res = searcher.search();
|
||||||
assertEquals(rejectCount.get(), 4);
|
assertEquals(rejectCount.get(), 4);
|
||||||
assertFalse(res.isComplete); // rejected(4) + topN(2) > maxQueueSize(5)
|
assertFalse(res.isComplete); // rejected(4) + topN(2) > maxQueueSize(5)
|
||||||
|
@ -1423,12 +1353,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
|
|
||||||
// compares just the weight side of the pair
|
// compares just the weight side of the pair
|
||||||
static final Comparator<Pair<Long, Long>> minPairWeightComparator =
|
static final Comparator<Pair<Long, Long>> minPairWeightComparator =
|
||||||
new Comparator<Pair<Long, Long>>() {
|
Comparator.comparing(left -> left.output1);
|
||||||
@Override
|
|
||||||
public int compare(Pair<Long, Long> left, Pair<Long, Long> right) {
|
|
||||||
return left.output1.compareTo(right.output1);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/** like testShortestPaths, but uses pairoutputs so we have both a weight and an output */
|
/** like testShortestPaths, but uses pairoutputs so we have both a weight and an output */
|
||||||
public void testShortestPathsWFST() throws Exception {
|
public void testShortestPathsWFST() throws Exception {
|
||||||
|
@ -1454,7 +1379,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
Util.TopResults<Pair<Long, Long>> res =
|
Util.TopResults<Pair<Long, Long>> res =
|
||||||
Util.shortestPaths(
|
Util.shortestPaths(
|
||||||
fst,
|
fst,
|
||||||
fst.getFirstArc(new FST.Arc<Pair<Long, Long>>()),
|
fst.getFirstArc(new FST.Arc<>()),
|
||||||
outputs.getNoOutput(),
|
outputs.getNoOutput(),
|
||||||
minPairWeightComparator,
|
minPairWeightComparator,
|
||||||
3,
|
3,
|
||||||
|
@ -1488,12 +1413,9 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
|
|
||||||
for (int i = 0; i < numWords; i++) {
|
for (int i = 0; i < numWords; i++) {
|
||||||
String s;
|
String s;
|
||||||
while (true) {
|
do {
|
||||||
s = TestUtil.randomSimpleString(random);
|
s = TestUtil.randomSimpleString(random);
|
||||||
if (!slowCompletor.containsKey(s)) {
|
} while (slowCompletor.containsKey(s));
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int j = 1; j < s.length(); j++) {
|
for (int j = 1; j < s.length(); j++) {
|
||||||
allPrefixes.add(s.substring(0, j));
|
allPrefixes.add(s.substring(0, j));
|
||||||
|
@ -1521,9 +1443,9 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
// System.out.println("TEST: " + prefix);
|
// System.out.println("TEST: " + prefix);
|
||||||
|
|
||||||
long prefixOutput = 0;
|
long prefixOutput = 0;
|
||||||
FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<Long>());
|
FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<>());
|
||||||
for (int idx = 0; idx < prefix.length(); idx++) {
|
for (int idx = 0; idx < prefix.length(); idx++) {
|
||||||
if (fst.findTargetArc((int) prefix.charAt(idx), arc, arc, reader) == null) {
|
if (fst.findTargetArc(prefix.charAt(idx), arc, arc, reader) == null) {
|
||||||
fail();
|
fail();
|
||||||
}
|
}
|
||||||
prefixOutput += arc.output();
|
prefixOutput += arc.output();
|
||||||
|
@ -1551,7 +1473,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
assertTrue(matches.size() > 0);
|
assertTrue(matches.size() > 0);
|
||||||
Collections.sort(matches, new TieBreakByInputComparator<>(minLongComparator));
|
matches.sort(new TieBreakByInputComparator<>(minLongComparator));
|
||||||
if (matches.size() > topN) {
|
if (matches.size() > topN) {
|
||||||
matches.subList(topN, matches.size()).clear();
|
matches.subList(topN, matches.size()).clear();
|
||||||
}
|
}
|
||||||
|
@ -1614,12 +1536,9 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
Random random = random();
|
Random random = random();
|
||||||
for (int i = 0; i < numWords; i++) {
|
for (int i = 0; i < numWords; i++) {
|
||||||
String s;
|
String s;
|
||||||
while (true) {
|
do {
|
||||||
s = TestUtil.randomSimpleString(random);
|
s = TestUtil.randomSimpleString(random);
|
||||||
if (!slowCompletor.containsKey(s)) {
|
} while (slowCompletor.containsKey(s));
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int j = 1; j < s.length(); j++) {
|
for (int j = 1; j < s.length(); j++) {
|
||||||
allPrefixes.add(s.substring(0, j));
|
allPrefixes.add(s.substring(0, j));
|
||||||
|
@ -1651,9 +1570,9 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
// System.out.println("TEST: " + prefix);
|
// System.out.println("TEST: " + prefix);
|
||||||
|
|
||||||
Pair<Long, Long> prefixOutput = outputs.getNoOutput();
|
Pair<Long, Long> prefixOutput = outputs.getNoOutput();
|
||||||
FST.Arc<Pair<Long, Long>> arc = fst.getFirstArc(new FST.Arc<Pair<Long, Long>>());
|
FST.Arc<Pair<Long, Long>> arc = fst.getFirstArc(new FST.Arc<>());
|
||||||
for (int idx = 0; idx < prefix.length(); idx++) {
|
for (int idx = 0; idx < prefix.length(); idx++) {
|
||||||
if (fst.findTargetArc((int) prefix.charAt(idx), arc, arc, reader) == null) {
|
if (fst.findTargetArc(prefix.charAt(idx), arc, arc, reader) == null) {
|
||||||
fail();
|
fail();
|
||||||
}
|
}
|
||||||
prefixOutput = outputs.add(prefixOutput, arc.output());
|
prefixOutput = outputs.add(prefixOutput, arc.output());
|
||||||
|
@ -1683,7 +1602,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
assertTrue(matches.size() > 0);
|
assertTrue(matches.size() > 0);
|
||||||
Collections.sort(matches, new TieBreakByInputComparator<>(minPairWeightComparator));
|
matches.sort(new TieBreakByInputComparator<>(minPairWeightComparator));
|
||||||
if (matches.size() > topN) {
|
if (matches.size() > topN) {
|
||||||
matches.subList(topN, matches.size()).clear();
|
matches.subList(topN, matches.size()).clear();
|
||||||
}
|
}
|
||||||
|
@ -1758,7 +1677,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
Arc<BytesRef> arc = new FST.Arc<>();
|
Arc<BytesRef> arc = new FST.Arc<>();
|
||||||
fst.getFirstArc(arc);
|
fst.getFirstArc(arc);
|
||||||
FST.BytesReader reader = fst.getBytesReader();
|
FST.BytesReader reader = fst.getBytesReader();
|
||||||
arc = fst.findTargetArc((int) 'm', arc, arc, reader);
|
arc = fst.findTargetArc('m', arc, arc, reader);
|
||||||
assertNotNull(arc);
|
assertNotNull(arc);
|
||||||
assertEquals(new BytesRef("m"), arc.output());
|
assertEquals(new BytesRef("m"), arc.output());
|
||||||
|
|
||||||
|
@ -1767,7 +1686,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
|
|
||||||
fst.getFirstArc(arc);
|
fst.getFirstArc(arc);
|
||||||
try {
|
try {
|
||||||
arc = fst.findTargetArc((int) 'm', arc, arc, reader);
|
fst.findTargetArc((int) 'm', arc, arc, reader);
|
||||||
} catch (
|
} catch (
|
||||||
@SuppressWarnings("unused")
|
@SuppressWarnings("unused")
|
||||||
AssertionError ae) {
|
AssertionError ae) {
|
||||||
|
|
|
@ -109,7 +109,7 @@ public class TestFSTsMisc extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
pairs.add(new FSTTester.InputOutput<>(terms[idx], output));
|
pairs.add(new FSTTester.InputOutput<>(terms[idx], output));
|
||||||
}
|
}
|
||||||
new FSTTester<Object>(random(), dir, inputMode, pairs, outputs, false) {
|
new FSTTester<Object>(random(), dir, inputMode, pairs, outputs) {
|
||||||
@Override
|
@Override
|
||||||
protected boolean outputsEqual(Object output1, Object output2) {
|
protected boolean outputsEqual(Object output1, Object output2) {
|
||||||
if (output1 instanceof TwoLongs && output2 instanceof List) {
|
if (output1 instanceof TwoLongs && output2 instanceof List) {
|
||||||
|
@ -157,7 +157,7 @@ public class TestFSTsMisc extends LuceneTestCase {
|
||||||
|
|
||||||
pairs.add(new FSTTester.InputOutput<>(terms[idx], output));
|
pairs.add(new FSTTester.InputOutput<>(terms[idx], output));
|
||||||
}
|
}
|
||||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(false);
|
new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -29,12 +29,10 @@ import java.nio.charset.Charset;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import java.util.Set;
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
@ -54,23 +52,16 @@ public class FSTTester<T> {
|
||||||
final int inputMode;
|
final int inputMode;
|
||||||
final Outputs<T> outputs;
|
final Outputs<T> outputs;
|
||||||
final Directory dir;
|
final Directory dir;
|
||||||
final boolean doReverseLookup;
|
|
||||||
long nodeCount;
|
long nodeCount;
|
||||||
long arcCount;
|
long arcCount;
|
||||||
|
|
||||||
public FSTTester(
|
public FSTTester(
|
||||||
Random random,
|
Random random, Directory dir, int inputMode, List<InputOutput<T>> pairs, Outputs<T> outputs) {
|
||||||
Directory dir,
|
|
||||||
int inputMode,
|
|
||||||
List<InputOutput<T>> pairs,
|
|
||||||
Outputs<T> outputs,
|
|
||||||
boolean doReverseLookup) {
|
|
||||||
this.random = random;
|
this.random = random;
|
||||||
this.dir = dir;
|
this.dir = dir;
|
||||||
this.inputMode = inputMode;
|
this.inputMode = inputMode;
|
||||||
this.pairs = pairs;
|
this.pairs = pairs;
|
||||||
this.outputs = outputs;
|
this.outputs = outputs;
|
||||||
this.doReverseLookup = doReverseLookup;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static String inputToString(int inputMode, IntsRef term) {
|
static String inputToString(int inputMode, IntsRef term) {
|
||||||
|
@ -181,11 +172,7 @@ public class FSTTester<T> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int compareTo(InputOutput<T> other) {
|
public int compareTo(InputOutput<T> other) {
|
||||||
if (other instanceof InputOutput) {
|
return input.compareTo(other.input);
|
||||||
return input.compareTo((other).input);
|
|
||||||
} else {
|
|
||||||
throw new IllegalArgumentException();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -208,9 +195,8 @@ public class FSTTester<T> {
|
||||||
// of the term prefix that matches
|
// of the term prefix that matches
|
||||||
private T run(FST<T> fst, IntsRef term, int[] prefixLength) throws IOException {
|
private T run(FST<T> fst, IntsRef term, int[] prefixLength) throws IOException {
|
||||||
assert prefixLength == null || prefixLength.length == 1;
|
assert prefixLength == null || prefixLength.length == 1;
|
||||||
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
|
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<>());
|
||||||
final T NO_OUTPUT = fst.outputs.getNoOutput();
|
T output = fst.outputs.getNoOutput();
|
||||||
T output = NO_OUTPUT;
|
|
||||||
final FST.BytesReader fstReader = fst.getBytesReader();
|
final FST.BytesReader fstReader = fst.getBytesReader();
|
||||||
|
|
||||||
for (int i = 0; i <= term.length; i++) {
|
for (int i = 0; i <= term.length; i++) {
|
||||||
|
@ -243,12 +229,11 @@ public class FSTTester<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
private T randomAcceptedWord(FST<T> fst, IntsRefBuilder in) throws IOException {
|
private T randomAcceptedWord(FST<T> fst, IntsRefBuilder in) throws IOException {
|
||||||
FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
|
FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<>());
|
||||||
|
|
||||||
final List<FST.Arc<T>> arcs = new ArrayList<>();
|
final List<FST.Arc<T>> arcs = new ArrayList<>();
|
||||||
in.clear();
|
in.clear();
|
||||||
final T NO_OUTPUT = fst.outputs.getNoOutput();
|
T output = fst.outputs.getNoOutput();
|
||||||
T output = NO_OUTPUT;
|
|
||||||
final FST.BytesReader fstReader = fst.getBytesReader();
|
final FST.BytesReader fstReader = fst.getBytesReader();
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
|
@ -311,14 +296,12 @@ public class FSTTester<T> {
|
||||||
|
|
||||||
if (random.nextBoolean() && fst != null) {
|
if (random.nextBoolean() && fst != null) {
|
||||||
IOContext context = LuceneTestCase.newIOContext(random);
|
IOContext context = LuceneTestCase.newIOContext(random);
|
||||||
IndexOutput out = dir.createOutput("fst.bin", context);
|
try (IndexOutput out = dir.createOutput("fst.bin", context)) {
|
||||||
fst.save(out, out);
|
fst.save(out, out);
|
||||||
out.close();
|
}
|
||||||
IndexInput in = dir.openInput("fst.bin", context);
|
try (IndexInput in = dir.openInput("fst.bin", context)) {
|
||||||
try {
|
fst = new FST<>(in, in, outputs);
|
||||||
fst = new FST<T>(in, in, outputs);
|
|
||||||
} finally {
|
} finally {
|
||||||
in.close();
|
|
||||||
dir.deleteFile("fst.bin");
|
dir.deleteFile("fst.bin");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -361,30 +344,8 @@ public class FSTTester<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// FST is complete
|
// FST is complete
|
||||||
@SuppressWarnings("deprecation")
|
|
||||||
private void verifyUnPruned(int inputMode, FST<T> fst) throws IOException {
|
private void verifyUnPruned(int inputMode, FST<T> fst) throws IOException {
|
||||||
|
|
||||||
final FST<Long> fstLong;
|
|
||||||
final Set<Long> validOutputs;
|
|
||||||
long minLong = Long.MAX_VALUE;
|
|
||||||
long maxLong = Long.MIN_VALUE;
|
|
||||||
|
|
||||||
if (doReverseLookup) {
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
FST<Long> fstLong0 = (FST<Long>) fst;
|
|
||||||
fstLong = fstLong0;
|
|
||||||
validOutputs = new HashSet<>();
|
|
||||||
for (InputOutput<T> pair : pairs) {
|
|
||||||
Long output = (Long) pair.output;
|
|
||||||
maxLong = Math.max(maxLong, output);
|
|
||||||
minLong = Math.min(minLong, output);
|
|
||||||
validOutputs.add(output);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
fstLong = null;
|
|
||||||
validOutputs = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pairs.size() == 0) {
|
if (pairs.size() == 0) {
|
||||||
assertNull(fst);
|
assertNull(fst);
|
||||||
return;
|
return;
|
||||||
|
@ -447,20 +408,6 @@ public class FSTTester<T> {
|
||||||
termsMap.put(pair.input, pair.output);
|
termsMap.put(pair.input, pair.output);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (doReverseLookup && maxLong > minLong) {
|
|
||||||
// Do random lookups so we test null (output doesn't
|
|
||||||
// exist) case:
|
|
||||||
assertNull(Util.getByOutput(fstLong, minLong - 7));
|
|
||||||
assertNull(Util.getByOutput(fstLong, maxLong + 7));
|
|
||||||
|
|
||||||
final int num = LuceneTestCase.atLeast(random, 100);
|
|
||||||
for (int iter = 0; iter < num; iter++) {
|
|
||||||
Long v = TestUtil.nextLong(random, minLong, maxLong);
|
|
||||||
IntsRef input = Util.getByOutput(fstLong, v);
|
|
||||||
assertTrue(validOutputs.contains(v) || input == null);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// find random matching word and make sure it's valid
|
// find random matching word and make sure it's valid
|
||||||
if (LuceneTestCase.VERBOSE) {
|
if (LuceneTestCase.VERBOSE) {
|
||||||
System.out.println("TEST: verify random accepted terms");
|
System.out.println("TEST: verify random accepted terms");
|
||||||
|
@ -473,14 +420,6 @@ public class FSTTester<T> {
|
||||||
"accepted word " + inputToString(inputMode, scratch.get()) + " is not valid",
|
"accepted word " + inputToString(inputMode, scratch.get()) + " is not valid",
|
||||||
termsMap.containsKey(scratch.get()));
|
termsMap.containsKey(scratch.get()));
|
||||||
assertTrue(outputsEqual(termsMap.get(scratch.get()), output));
|
assertTrue(outputsEqual(termsMap.get(scratch.get()), output));
|
||||||
|
|
||||||
if (doReverseLookup) {
|
|
||||||
// System.out.println("lookup output=" + output + " outs=" + fst.outputs);
|
|
||||||
IntsRef input = Util.getByOutput(fstLong, (Long) output);
|
|
||||||
assertNotNull(input);
|
|
||||||
// System.out.println(" got " + Util.toBytesRef(input, new BytesRef()).utf8ToString());
|
|
||||||
assertEquals(scratch.get(), input);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// test IntsRefFSTEnum.seek:
|
// test IntsRefFSTEnum.seek:
|
||||||
|
@ -497,7 +436,7 @@ public class FSTTester<T> {
|
||||||
// seek to term that doesn't exist:
|
// seek to term that doesn't exist:
|
||||||
while (true) {
|
while (true) {
|
||||||
final IntsRef term = toIntsRef(getRandomString(random), inputMode);
|
final IntsRef term = toIntsRef(getRandomString(random), inputMode);
|
||||||
int pos = Collections.binarySearch(pairs, new InputOutput<T>(term, null));
|
int pos = Collections.binarySearch(pairs, new InputOutput<>(term, null));
|
||||||
if (pos < 0) {
|
if (pos < 0) {
|
||||||
pos = -(pos + 1);
|
pos = -(pos + 1);
|
||||||
// ok doesn't exist
|
// ok doesn't exist
|
||||||
|
@ -617,7 +556,7 @@ public class FSTTester<T> {
|
||||||
for (; attempt < 10; attempt++) {
|
for (; attempt < 10; attempt++) {
|
||||||
IntsRef term = toIntsRef(getRandomString(random), inputMode);
|
IntsRef term = toIntsRef(getRandomString(random), inputMode);
|
||||||
if (!termsMap.containsKey(term) && term.compareTo(pairs.get(upto).input) > 0) {
|
if (!termsMap.containsKey(term) && term.compareTo(pairs.get(upto).input) > 0) {
|
||||||
int pos = Collections.binarySearch(pairs, new InputOutput<T>(term, null));
|
int pos = Collections.binarySearch(pairs, new InputOutput<>(term, null));
|
||||||
assert pos < 0;
|
assert pos < 0;
|
||||||
upto = -(pos + 1);
|
upto = -(pos + 1);
|
||||||
|
|
||||||
|
@ -806,10 +745,8 @@ public class FSTTester<T> {
|
||||||
cmo2 != null
|
cmo2 != null
|
||||||
&& ((prune2 > 1 && cmo2.count >= prune2)
|
&& ((prune2 > 1 && cmo2.count >= prune2)
|
||||||
|| (prune2 == 1 && (cmo2.count >= 2 || prefix.length <= 1)));
|
|| (prune2 == 1 && (cmo2.count >= 2 || prefix.length <= 1)));
|
||||||
} else if (cmo.count >= prune2) {
|
|
||||||
keep = true;
|
|
||||||
} else {
|
} else {
|
||||||
keep = false;
|
keep = cmo.count >= prune2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue