LUCENE-8638: remove deprecated FST get by output

This commit is contained in:
Michael Sokolov 2021-08-18 08:15:31 -04:00 committed by GitHub
parent a37844aedd
commit 666c7a2590
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 122 additions and 434 deletions

View File

@ -22,6 +22,8 @@ System Requirements
API Changes API Changes
* LUCENE-8638: Remove deprecated methods in FST for lookup by output.
* LUCENE-8638: haversin() expressions function now returns its result in meters rather than * LUCENE-8638: haversin() expressions function now returns its result in meters rather than
kilometers. kilometers.

View File

@ -45,7 +45,7 @@ public final class Util {
public static <T> T get(FST<T> fst, IntsRef input) throws IOException { public static <T> T get(FST<T> fst, IntsRef input) throws IOException {
// TODO: would be nice not to alloc this on every lookup // TODO: would be nice not to alloc this on every lookup
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>()); final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<>());
final BytesReader fstReader = fst.getBytesReader(); final BytesReader fstReader = fst.getBytesReader();
@ -92,167 +92,6 @@ public final class Util {
} }
} }
/**
* Reverse lookup (lookup by output instead of by input), in the special case when your FSTs
* outputs are strictly ascending. This locates the input/output pair where the output is equal to
* the target, and will return null if that output does not exist.
*
* <p>NOTE: this only works with {@code FST<Long>}, only works when the outputs are ascending in
* order with the inputs. For example, simple ordinals (0, 1, 2, ...), or file offsets (when
* appending to a file) fit this.
*/
@Deprecated
public static IntsRef getByOutput(FST<Long> fst, long targetOutput) throws IOException {
final BytesReader in = fst.getBytesReader();
// TODO: would be nice not to alloc this on every lookup
FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<Long>());
FST.Arc<Long> scratchArc = new FST.Arc<>();
final IntsRefBuilder result = new IntsRefBuilder();
return getByOutput(fst, targetOutput, in, arc, scratchArc, result);
}
/**
* Expert: like {@link Util#getByOutput(FST, long)} except reusing BytesReader, initial and
* scratch Arc, and result.
*/
@Deprecated
public static IntsRef getByOutput(
FST<Long> fst,
long targetOutput,
BytesReader in,
Arc<Long> arc,
Arc<Long> scratchArc,
IntsRefBuilder result)
throws IOException {
long output = arc.output();
int upto = 0;
// System.out.println("reverseLookup output=" + targetOutput);
while (true) {
// System.out.println("loop: output=" + output + " upto=" + upto + " arc=" + arc);
if (arc.isFinal()) {
final long finalOutput = output + arc.nextFinalOutput();
// System.out.println(" isFinal finalOutput=" + finalOutput);
if (finalOutput == targetOutput) {
result.setLength(upto);
// System.out.println(" found!");
return result.get();
} else if (finalOutput > targetOutput) {
// System.out.println(" not found!");
return null;
}
}
if (FST.targetHasArcs(arc)) {
// System.out.println(" targetHasArcs");
result.grow(1 + upto);
fst.readFirstRealTargetArc(arc.target(), arc, in);
if (arc.bytesPerArc() != 0 && arc.nodeFlags() == FST.ARCS_FOR_BINARY_SEARCH) {
int low = 0;
int high = arc.numArcs() - 1;
int mid = 0;
// System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput + "
// output=" + output);
boolean exact = false;
while (low <= high) {
mid = (low + high) >>> 1;
in.setPosition(arc.posArcsStart());
in.skipBytes(arc.bytesPerArc() * mid);
final byte flags = in.readByte();
fst.readLabel(in);
final long minArcOutput;
if ((flags & FST.BIT_ARC_HAS_OUTPUT) != 0) {
final long arcOutput = fst.outputs.read(in);
minArcOutput = output + arcOutput;
} else {
minArcOutput = output;
}
// System.out.println(" cycle mid=" + mid + " output=" + minArcOutput);
if (minArcOutput == targetOutput) {
exact = true;
break;
} else if (minArcOutput < targetOutput) {
low = mid + 1;
} else {
high = mid - 1;
}
}
int idx;
if (high == -1) {
return null;
} else if (exact) {
idx = mid;
} else {
idx = low - 1;
}
fst.readArcByIndex(arc, in, idx);
result.setIntAt(upto++, arc.label());
output += arc.output();
} else {
FST.Arc<Long> prevArc = null;
while (true) {
// System.out.println(" cycle label=" + arc.label + " output=" + arc.output);
// This is the min output we'd hit if we follow
// this arc:
final long minArcOutput = output + arc.output();
if (minArcOutput == targetOutput) {
// Recurse on this arc:
// System.out.println(" match! break");
output = minArcOutput;
result.setIntAt(upto++, arc.label());
break;
} else if (minArcOutput > targetOutput) {
if (prevArc == null) {
// Output doesn't exist
return null;
} else {
// Recurse on previous arc:
arc.copyFrom(prevArc);
result.setIntAt(upto++, arc.label());
output += arc.output();
// System.out.println(" recurse prev label=" + (char) arc.label + " output=" +
// output);
break;
}
} else if (arc.isLast()) {
// Recurse on this arc:
output = minArcOutput;
// System.out.println(" recurse last label=" + (char) arc.label + " output=" +
// output);
result.setIntAt(upto++, arc.label());
break;
} else {
// Read next arc in this node:
prevArc = scratchArc;
prevArc.copyFrom(arc);
// System.out.println(" after copy label=" + (char) prevArc.label + " vs " +
// (char) arc.label);
fst.readNextRealArc(arc, in);
}
}
}
} else {
// System.out.println(" no target arcs; not found!");
return null;
}
}
}
/** /**
* Represents a path in TopNSearcher. * Represents a path in TopNSearcher.
* *
@ -899,7 +738,7 @@ public final class Util {
scratch.setLength(charLimit); scratch.setLength(charLimit);
scratch.grow(charLimit); scratch.grow(charLimit);
for (int idx = 0; idx < charLimit; idx++) { for (int idx = 0; idx < charLimit; idx++) {
scratch.setIntAt(idx, (int) s.charAt(idx)); scratch.setIntAt(idx, s.charAt(idx));
} }
return scratch.get(); return scratch.get();
} }
@ -1033,9 +872,6 @@ public final class Util {
while (true) { while (true) {
// System.out.println(" non-bs cycle"); // System.out.println(" non-bs cycle");
// TODO: we should fix this code to not have to create
// object for the output of every arc we scan... only
// for the matching arc, if found
if (arc.label() >= label) { if (arc.label() >= label) {
// System.out.println(" found!"); // System.out.println(" found!");
return arc; return arc;
@ -1067,12 +903,12 @@ public final class Util {
+ ")"; + ")";
BytesReader in = fst.getBytesReader(); BytesReader in = fst.getBytesReader();
int low = arc.arcIdx(); int low = arc.arcIdx();
int mid = 0; int mid;
int high = arc.numArcs() - 1; int high = arc.numArcs() - 1;
while (low <= high) { while (low <= high) {
mid = (low + high) >>> 1; mid = (low + high) >>> 1;
in.setPosition(arc.posArcsStart()); in.setPosition(arc.posArcsStart());
in.skipBytes(arc.bytesPerArc() * mid + 1); in.skipBytes((long) arc.bytesPerArc() * mid + 1);
final int midLabel = fst.readLabel(in); final int midLabel = fst.readLabel(in);
final int cmp = midLabel - targetLabel; final int cmp = midLabel - targetLabel;
if (cmp < 0) { if (cmp < 0) {

View File

@ -25,8 +25,6 @@
* <li>Fast and low memory overhead construction of the minimal FST (but inputs must be provided * <li>Fast and low memory overhead construction of the minimal FST (but inputs must be provided
* in sorted order) * in sorted order)
* <li>Low object overhead and quick deserialization (byte[] representation) * <li>Low object overhead and quick deserialization (byte[] representation)
* <li>{@link org.apache.lucene.util.fst.Util#getByOutput Lookup-by-output} when the outputs are
* in sorted order (e.g., ordinals or file pointers)
* <li>Pluggable {@link org.apache.lucene.util.fst.Outputs Outputs} representation * <li>Pluggable {@link org.apache.lucene.util.fst.Outputs Outputs} representation
* <li>{@link org.apache.lucene.util.fst.Util#shortestPaths N-shortest-paths} search by weight * <li>{@link org.apache.lucene.util.fst.Util#shortestPaths N-shortest-paths} search by weight
* <li>Enumerators ({@link org.apache.lucene.util.fst.IntsRefFSTEnum IntsRef} and {@link * <li>Enumerators ({@link org.apache.lucene.util.fst.IntsRefFSTEnum IntsRef} and {@link

View File

@ -277,12 +277,8 @@ public class Test2BFST extends LuceneTestCase {
System.out.println(i + "...: "); System.out.println(i + "...: ");
} }
// forward lookup:
assertEquals(output, Util.get(fst, input).longValue()); assertEquals(output, Util.get(fst, input).longValue());
// reverse lookup:
@SuppressWarnings("deprecation")
IntsRef inputResult = Util.getByOutput(fst, output);
assertEquals(input, inputResult);
output += 1 + r.nextInt(10); output += 1 + r.nextInt(10);
nextInput(r, ints); nextInput(r, ints);
} }

View File

@ -92,7 +92,9 @@ public class TestFSTs extends LuceneTestCase {
@Override @Override
public void tearDown() throws Exception { public void tearDown() throws Exception {
// can be null if we force simpletext (funky, some kind of bug in test runner maybe) // can be null if we force simpletext (funky, some kind of bug in test runner maybe)
if (dir != null) dir.close(); if (dir != null) {
dir.close();
}
super.tearDown(); super.tearDown();
} }
@ -133,7 +135,7 @@ public class TestFSTs extends LuceneTestCase {
for (IntsRef term : terms2) { for (IntsRef term : terms2) {
pairs.add(new FSTTester.InputOutput<>(term, NO_OUTPUT)); pairs.add(new FSTTester.InputOutput<>(term, NO_OUTPUT));
} }
FSTTester<Object> tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs, false); FSTTester<Object> tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs);
FST<Object> fst = tester.doTest(0, 0, false); FST<Object> fst = tester.doTest(0, 0, false);
assertNotNull(fst); assertNotNull(fst);
assertEquals(22, tester.nodeCount); assertEquals(22, tester.nodeCount);
@ -147,7 +149,7 @@ public class TestFSTs extends LuceneTestCase {
for (int idx = 0; idx < terms2.length; idx++) { for (int idx = 0; idx < terms2.length; idx++) {
pairs.add(new FSTTester.InputOutput<>(terms2[idx], (long) idx)); pairs.add(new FSTTester.InputOutput<>(terms2[idx], (long) idx));
} }
FSTTester<Long> tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs, true); FSTTester<Long> tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs);
final FST<Long> fst = tester.doTest(0, 0, false); final FST<Long> fst = tester.doTest(0, 0, false);
assertNotNull(fst); assertNotNull(fst);
assertEquals(22, tester.nodeCount); assertEquals(22, tester.nodeCount);
@ -157,14 +159,12 @@ public class TestFSTs extends LuceneTestCase {
// FST byte sequence ord // FST byte sequence ord
{ {
final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton(); final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
final BytesRef NO_OUTPUT = outputs.getNoOutput();
final List<FSTTester.InputOutput<BytesRef>> pairs = new ArrayList<>(terms2.length); final List<FSTTester.InputOutput<BytesRef>> pairs = new ArrayList<>(terms2.length);
for (int idx = 0; idx < terms2.length; idx++) { for (int idx = 0; idx < terms2.length; idx++) {
final BytesRef output = idx == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx)); final BytesRef output = new BytesRef(Integer.toString(idx));
pairs.add(new FSTTester.InputOutput<>(terms2[idx], output)); pairs.add(new FSTTester.InputOutput<>(terms2[idx], output));
} }
FSTTester<BytesRef> tester = FSTTester<BytesRef> tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs);
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false);
final FST<BytesRef> fst = tester.doTest(0, 0, false); final FST<BytesRef> fst = tester.doTest(0, 0, false);
assertNotNull(fst); assertNotNull(fst);
assertEquals(24, tester.nodeCount); assertEquals(24, tester.nodeCount);
@ -185,7 +185,7 @@ public class TestFSTs extends LuceneTestCase {
for (IntsRef term : terms) { for (IntsRef term : terms) {
pairs.add(new FSTTester.InputOutput<>(term, NO_OUTPUT)); pairs.add(new FSTTester.InputOutput<>(term, NO_OUTPUT));
} }
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true); new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
} }
// PositiveIntOutput (ord) // PositiveIntOutput (ord)
@ -195,7 +195,7 @@ public class TestFSTs extends LuceneTestCase {
for (int idx = 0; idx < terms.length; idx++) { for (int idx = 0; idx < terms.length; idx++) {
pairs.add(new FSTTester.InputOutput<>(terms[idx], (long) idx)); pairs.add(new FSTTester.InputOutput<>(terms[idx], (long) idx));
} }
new FSTTester<>(random(), dir, inputMode, pairs, outputs, true).doTest(true); new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
} }
// PositiveIntOutput (random monotonically increasing positive number) // PositiveIntOutput (random monotonically increasing positive number)
@ -203,24 +203,23 @@ public class TestFSTs extends LuceneTestCase {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<>(terms.length); final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<>(terms.length);
long lastOutput = 0; long lastOutput = 0;
for (int idx = 0; idx < terms.length; idx++) { for (IntsRef term : terms) {
final long value = lastOutput + TestUtil.nextInt(random(), 1, 1000); final long value = lastOutput + TestUtil.nextInt(random(), 1, 1000);
lastOutput = value; lastOutput = value;
pairs.add(new FSTTester.InputOutput<>(terms[idx], value)); pairs.add(new FSTTester.InputOutput<>(term, value));
} }
new FSTTester<>(random(), dir, inputMode, pairs, outputs, true).doTest(true); new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
} }
// PositiveIntOutput (random positive number) // PositiveIntOutput (random positive number)
{ {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<>(terms.length); final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<>(terms.length);
for (int idx = 0; idx < terms.length; idx++) { for (IntsRef term : terms) {
pairs.add( pairs.add(
new FSTTester.InputOutput<>( new FSTTester.InputOutput<>(term, TestUtil.nextLong(random(), 0, Long.MAX_VALUE)));
terms[idx], TestUtil.nextLong(random(), 0, Long.MAX_VALUE)));
} }
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true); new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
} }
// Pair<ord, (random monotonically increasing positive number> // Pair<ord, (random monotonically increasing positive number>
@ -236,7 +235,7 @@ public class TestFSTs extends LuceneTestCase {
lastOutput = value; lastOutput = value;
pairs.add(new FSTTester.InputOutput<>(terms[idx], outputs.newPair((long) idx, value))); pairs.add(new FSTTester.InputOutput<>(terms[idx], outputs.newPair((long) idx, value)));
} }
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true); new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
} }
// Sequence-of-bytes // Sequence-of-bytes
@ -249,7 +248,7 @@ public class TestFSTs extends LuceneTestCase {
random().nextInt(30) == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx)); random().nextInt(30) == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx));
pairs.add(new FSTTester.InputOutput<>(terms[idx], output)); pairs.add(new FSTTester.InputOutput<>(terms[idx], output));
} }
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true); new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
} }
// Sequence-of-ints // Sequence-of-ints
@ -265,7 +264,7 @@ public class TestFSTs extends LuceneTestCase {
} }
pairs.add(new FSTTester.InputOutput<>(terms[idx], output)); pairs.add(new FSTTester.InputOutput<>(terms[idx], output));
} }
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true); new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
} }
} }
@ -298,7 +297,7 @@ public class TestFSTs extends LuceneTestCase {
final String term = getRandomString(random); final String term = getRandomString(random);
termsSet.add(toIntsRef(term, inputMode)); termsSet.add(toIntsRef(term, inputMode));
} }
doTest(inputMode, termsSet.toArray(new IntsRef[termsSet.size()])); doTest(inputMode, termsSet.toArray(new IntsRef[0]));
} }
} }
} }
@ -497,7 +496,7 @@ public class TestFSTs extends LuceneTestCase {
private abstract static class VisitTerms<T> { private abstract static class VisitTerms<T> {
private final Path dirOut; private final Path dirOut;
private final Path wordsFileIn; private final Path wordsFileIn;
private int inputMode; private final int inputMode;
private final Outputs<T> outputs; private final Outputs<T> outputs;
private final FSTCompiler<T> fstCompiler; private final FSTCompiler<T> fstCompiler;
@ -524,7 +523,7 @@ public class TestFSTs extends LuceneTestCase {
protected abstract T getOutput(IntsRef input, int ord) throws IOException; protected abstract T getOutput(IntsRef input, int ord) throws IOException;
public void run(int limit, boolean verify, boolean verifyByOutput) throws IOException { public void run(int limit, boolean verify) throws IOException {
BufferedReader is = Files.newBufferedReader(wordsFileIn, StandardCharsets.UTF_8); BufferedReader is = Files.newBufferedReader(wordsFileIn, StandardCharsets.UTF_8);
try { try {
@ -541,12 +540,11 @@ public class TestFSTs extends LuceneTestCase {
ord++; ord++;
if (ord % 500000 == 0) { if (ord % 500000 == 0) {
System.out.println( System.out.printf(
String.format( Locale.ROOT,
Locale.ROOT, "%6.2fs: %9d...",
"%6.2fs: %9d...", ((System.currentTimeMillis() - tStart) / 1000.0),
((System.currentTimeMillis() - tStart) / 1000.0), ord);
ord));
} }
if (ord >= limit) { if (ord >= limit) {
break; break;
@ -594,90 +592,51 @@ public class TestFSTs extends LuceneTestCase {
return; return;
} }
/*
IndexInput in = dir.openInput("fst.bin", IOContext.DEFAULT);
fst = new FST<T>(in, outputs);
in.close();
*/
System.out.println("\nNow verify..."); System.out.println("\nNow verify...");
is.close();
is = Files.newBufferedReader(wordsFileIn, StandardCharsets.UTF_8);
ord = 0;
tStart = System.currentTimeMillis();
while (true) { while (true) {
for (int iter = 0; iter < 2; iter++) { String w = is.readLine();
is.close(); if (w == null) {
is = Files.newBufferedReader(wordsFileIn, StandardCharsets.UTF_8); break;
}
ord = 0; toIntsRef(w, inputMode, intsRefBuilder);
tStart = System.currentTimeMillis(); T expected = getOutput(intsRefBuilder.get(), ord);
while (true) { T actual = Util.get(fst, intsRefBuilder.get());
String w = is.readLine(); if (actual == null) {
if (w == null) { throw new RuntimeException("unexpected null output on input=" + w);
break; }
} if (!actual.equals(expected)) {
toIntsRef(w, inputMode, intsRefBuilder); throw new RuntimeException(
if (iter == 0) { "wrong output (got "
T expected = getOutput(intsRefBuilder.get(), ord); + outputs.outputToString(actual)
T actual = Util.get(fst, intsRefBuilder.get()); + " but expected "
if (actual == null) { + outputs.outputToString(expected)
throw new RuntimeException("unexpected null output on input=" + w); + ") on input="
} + w);
if (!actual.equals(expected)) { }
throw new RuntimeException( ord++;
"wrong output (got " if (ord % 500000 == 0) {
+ outputs.outputToString(actual) System.out.println(
+ " but expected " ((System.currentTimeMillis() - tStart) / 1000.0) + "s: " + ord + "...");
+ outputs.outputToString(expected) }
+ ") on input=" if (ord >= limit) {
+ w); break;
}
} else {
// Get by output
final Long output = (Long) getOutput(intsRefBuilder.get(), ord);
@SuppressWarnings({"unchecked", "deprecation"})
final IntsRef actual = Util.getByOutput((FST<Long>) fst, output.longValue());
if (actual == null) {
throw new RuntimeException("unexpected null input from output=" + output);
}
if (!actual.equals(intsRefBuilder.get())) {
throw new RuntimeException(
"wrong input (got "
+ actual
+ " but expected "
+ intsRefBuilder
+ " from output="
+ output);
}
}
ord++;
if (ord % 500000 == 0) {
System.out.println(
((System.currentTimeMillis() - tStart) / 1000.0) + "s: " + ord + "...");
}
if (ord >= limit) {
break;
}
}
double totSec = ((System.currentTimeMillis() - tStart) / 1000.0);
System.out.println(
"Verify "
+ (iter == 1 ? "(by output) " : "")
+ "took "
+ totSec
+ " sec + ("
+ (int) ((totSec * 1000000000 / ord))
+ " nsec per lookup)");
if (!verifyByOutput) {
break;
}
} }
// NOTE: comment out to profile lookup...
break;
} }
double totSec = ((System.currentTimeMillis() - tStart) / 1000.0);
System.out.println(
"Verify took "
+ totSec
+ " sec + ("
+ (int) ((totSec * 1000000000 / ord))
+ " nsec per lookup)");
} finally { } finally {
is.close(); is.close();
} }
@ -762,7 +721,7 @@ public class TestFSTs extends LuceneTestCase {
} }
return outputs.newPair((long) ord, (long) TestUtil.nextInt(rand, 1, 5000)); return outputs.newPair((long) ord, (long) TestUtil.nextInt(rand, 1, 5000));
} }
}.run(limit, verify, false); }.run(limit, verify);
} else if (storeOrds) { } else if (storeOrds) {
// Store only ords // Store only ords
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
@ -771,7 +730,7 @@ public class TestFSTs extends LuceneTestCase {
public Long getOutput(IntsRef input, int ord) { public Long getOutput(IntsRef input, int ord) {
return (long) ord; return (long) ord;
} }
}.run(limit, verify, true); }.run(limit, verify);
} else if (storeDocFreqs) { } else if (storeDocFreqs) {
// Store only docFreq // Store only docFreq
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
@ -785,7 +744,7 @@ public class TestFSTs extends LuceneTestCase {
} }
return (long) TestUtil.nextInt(rand, 1, 5000); return (long) TestUtil.nextInt(rand, 1, 5000);
} }
}.run(limit, verify, false); }.run(limit, verify);
} else { } else {
// Store nothing // Store nothing
final NoOutputs outputs = NoOutputs.getSingleton(); final NoOutputs outputs = NoOutputs.getSingleton();
@ -795,7 +754,7 @@ public class TestFSTs extends LuceneTestCase {
public Object getOutput(IntsRef input, int ord) { public Object getOutput(IntsRef input, int ord) {
return NO_OUTPUT; return NO_OUTPUT;
} }
}.run(limit, verify, false); }.run(limit, verify);
} }
} }
@ -913,22 +872,6 @@ public class TestFSTs extends LuceneTestCase {
assertNotNull(seekResult); assertNotNull(seekResult);
assertEquals(b, seekResult.input); assertEquals(b, seekResult.input);
assertEquals(42, (long) seekResult.output); assertEquals(42, (long) seekResult.output);
@SuppressWarnings("deprecation")
IntsRef byOutput = Util.getByOutput(fst, 13824324872317238L);
assertEquals(Util.toIntsRef(new BytesRef("c"), new IntsRefBuilder()), byOutput);
@SuppressWarnings("deprecation")
IntsRef byOutput47 = Util.getByOutput(fst, 47);
assertNull(byOutput47);
@SuppressWarnings("deprecation")
IntsRef byOutput42 = Util.getByOutput(fst, 42);
assertEquals(Util.toIntsRef(new BytesRef("b"), new IntsRefBuilder()), byOutput42);
@SuppressWarnings("deprecation")
IntsRef byOutput17 = Util.getByOutput(fst, 17);
assertEquals(Util.toIntsRef(new BytesRef("a"), new IntsRefBuilder()), byOutput17);
} }
public void testPrimaryKeys() throws Exception { public void testPrimaryKeys() throws Exception {
@ -991,12 +934,9 @@ public class TestFSTs extends LuceneTestCase {
if (cycle == 0) { if (cycle == 0) {
idString = String.format(Locale.ROOT, "%07d", (NUM_IDS + idx)); idString = String.format(Locale.ROOT, "%07d", (NUM_IDS + idx));
} else { } else {
while (true) { do {
idString = Long.toString(random().nextLong()); idString = Long.toString(random().nextLong());
if (!allIDs.contains(idString)) { } while (allIDs.contains(idString));
break;
}
}
} }
outOfBounds.add(idString); outOfBounds.add(idString);
allIDsList.add(idString); allIDsList.add(idString);
@ -1063,8 +1003,7 @@ public class TestFSTs extends LuceneTestCase {
new BytesRef(nextID), new BytesRef(nextID),
termsEnum.term()); termsEnum.term());
} else if (!exists) { } else if (!exists) {
assertTrue( assertEquals(TermsEnum.SeekStatus.NOT_FOUND, status);
status == TermsEnum.SeekStatus.NOT_FOUND || status == TermsEnum.SeekStatus.END);
} else { } else {
assertEquals(TermsEnum.SeekStatus.FOUND, status); assertEquals(TermsEnum.SeekStatus.FOUND, status);
} }
@ -1204,7 +1143,7 @@ public class TestFSTs extends LuceneTestCase {
ArrayList<String> out = new ArrayList<>(); ArrayList<String> out = new ArrayList<>();
StringBuilder b = new StringBuilder(); StringBuilder b = new StringBuilder();
s.generate(out, b, 'a', 'i', 10); s.generate(out, b, 'a', 'i', 10);
String[] input = out.toArray(new String[out.size()]); String[] input = out.toArray(new String[0]);
Arrays.sort(input); Arrays.sort(input);
FST<Object> fst = s.compile(input); FST<Object> fst = s.compile(input);
FST.Arc<Object> arc = fst.getFirstArc(new FST.Arc<>()); FST.Arc<Object> arc = fst.getFirstArc(new FST.Arc<>());
@ -1224,7 +1163,7 @@ public class TestFSTs extends LuceneTestCase {
Util.toDot(fst, w, false, false); Util.toDot(fst, w, false, false);
w.close(); w.close();
// System.out.println(w.toString()); // System.out.println(w.toString());
assertTrue(w.toString().indexOf("label=\"t/[7]\"") != -1); assertTrue(w.toString().contains("label=\"t/[7]\""));
} }
public void testInternalFinalState() throws Exception { public void testInternalFinalState() throws Exception {
@ -1242,9 +1181,9 @@ public class TestFSTs extends LuceneTestCase {
// System.out.println(w.toString()); // System.out.println(w.toString());
// check for accept state at label t // check for accept state at label t
assertTrue(w.toString().indexOf("[label=\"t\" style=\"bold\"") != -1); assertTrue(w.toString().contains("[label=\"t\" style=\"bold\""));
// check for accept state at label n // check for accept state at label n
assertTrue(w.toString().indexOf("[label=\"n\" style=\"bold\"") != -1); assertTrue(w.toString().contains("[label=\"n\" style=\"bold\""));
} }
// Make sure raw FST can differentiate between final vs // Make sure raw FST can differentiate between final vs
@ -1253,9 +1192,6 @@ public class TestFSTs extends LuceneTestCase {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final Long nothing = outputs.getNoOutput(); final Long nothing = outputs.getNoOutput();
final FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs); final FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
// final FST<Long> fst = new FST<>(FST.INPUT_TYPE.BYTE1, outputs, false, PackedInts.COMPACT,
// 15);
final FST<Long> fst = fstCompiler.fst; final FST<Long> fst = fstCompiler.fst;
final FSTCompiler.UnCompiledNode<Long> rootNode = final FSTCompiler.UnCompiledNode<Long> rootNode =
@ -1311,11 +1247,11 @@ public class TestFSTs extends LuceneTestCase {
private void checkStopNodes(FST<Long> fst, PositiveIntOutputs outputs) throws Exception { private void checkStopNodes(FST<Long> fst, PositiveIntOutputs outputs) throws Exception {
final Long nothing = outputs.getNoOutput(); final Long nothing = outputs.getNoOutput();
FST.Arc<Long> startArc = fst.getFirstArc(new FST.Arc<Long>()); FST.Arc<Long> startArc = fst.getFirstArc(new FST.Arc<>());
assertEquals(nothing, startArc.output()); assertEquals(nothing, startArc.output());
assertEquals(nothing, startArc.nextFinalOutput()); assertEquals(nothing, startArc.nextFinalOutput());
FST.Arc<Long> arc = fst.readFirstTargetArc(startArc, new FST.Arc<Long>(), fst.getBytesReader()); FST.Arc<Long> arc = fst.readFirstTargetArc(startArc, new FST.Arc<>(), fst.getBytesReader());
assertEquals('a', arc.label()); assertEquals('a', arc.label());
assertEquals(17, arc.nextFinalOutput().longValue()); assertEquals(17, arc.nextFinalOutput().longValue());
assertTrue(arc.isFinal()); assertTrue(arc.isFinal());
@ -1326,13 +1262,7 @@ public class TestFSTs extends LuceneTestCase {
assertEquals(42, arc.output().longValue()); assertEquals(42, arc.output().longValue());
} }
static final Comparator<Long> minLongComparator = static final Comparator<Long> minLongComparator = Comparator.naturalOrder();
new Comparator<Long>() {
@Override
public int compare(Long left, Long right) {
return left.compareTo(right);
}
};
public void testShortestPaths() throws Exception { public void testShortestPaths() throws Exception {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
@ -1350,7 +1280,7 @@ public class TestFSTs extends LuceneTestCase {
Util.TopResults<Long> res = Util.TopResults<Long> res =
Util.shortestPaths( Util.shortestPaths(
fst, fst,
fst.getFirstArc(new FST.Arc<Long>()), fst.getFirstArc(new FST.Arc<>()),
outputs.getNoOutput(), outputs.getNoOutput(),
minLongComparator, minLongComparator,
3, 3,
@ -1369,7 +1299,7 @@ public class TestFSTs extends LuceneTestCase {
public void testRejectNoLimits() throws IOException { public void testRejectNoLimits() throws IOException {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final FSTCompiler<Long> fstCompiler = new FSTCompiler<Long>(FST.INPUT_TYPE.BYTE1, outputs); final FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
final IntsRefBuilder scratch = new IntsRefBuilder(); final IntsRefBuilder scratch = new IntsRefBuilder();
fstCompiler.add(Util.toIntsRef(new BytesRef("aab"), scratch), 22L); fstCompiler.add(Util.toIntsRef(new BytesRef("aab"), scratch), 22L);
@ -1381,7 +1311,7 @@ public class TestFSTs extends LuceneTestCase {
final FST<Long> fst = fstCompiler.compile(); final FST<Long> fst = fstCompiler.compile();
final AtomicInteger rejectCount = new AtomicInteger(); final AtomicInteger rejectCount = new AtomicInteger();
Util.TopNSearcher<Long> searcher = Util.TopNSearcher<Long> searcher =
new Util.TopNSearcher<Long>(fst, 2, 6, minLongComparator) { new Util.TopNSearcher<>(fst, 2, 6, minLongComparator) {
@Override @Override
protected boolean acceptResult(IntsRef input, Long output) { protected boolean acceptResult(IntsRef input, Long output) {
boolean accept = output.intValue() == 7; boolean accept = output.intValue() == 7;
@ -1393,7 +1323,7 @@ public class TestFSTs extends LuceneTestCase {
}; };
searcher.addStartPaths( searcher.addStartPaths(
fst.getFirstArc(new FST.Arc<Long>()), outputs.getNoOutput(), true, new IntsRefBuilder()); fst.getFirstArc(new FST.Arc<>()), outputs.getNoOutput(), true, new IntsRefBuilder());
Util.TopResults<Long> res = searcher.search(); Util.TopResults<Long> res = searcher.search();
assertEquals(rejectCount.get(), 4); assertEquals(rejectCount.get(), 4);
assertTrue(res.isComplete); // rejected(4) + topN(2) <= maxQueueSize(6) assertTrue(res.isComplete); // rejected(4) + topN(2) <= maxQueueSize(6)
@ -1403,7 +1333,7 @@ public class TestFSTs extends LuceneTestCase {
assertEquals(7L, res.topN.get(0).output.longValue()); assertEquals(7L, res.topN.get(0).output.longValue());
rejectCount.set(0); rejectCount.set(0);
searcher = searcher =
new Util.TopNSearcher<Long>(fst, 2, 5, minLongComparator) { new Util.TopNSearcher<>(fst, 2, 5, minLongComparator) {
@Override @Override
protected boolean acceptResult(IntsRef input, Long output) { protected boolean acceptResult(IntsRef input, Long output) {
boolean accept = output.intValue() == 7; boolean accept = output.intValue() == 7;
@ -1415,7 +1345,7 @@ public class TestFSTs extends LuceneTestCase {
}; };
searcher.addStartPaths( searcher.addStartPaths(
fst.getFirstArc(new FST.Arc<Long>()), outputs.getNoOutput(), true, new IntsRefBuilder()); fst.getFirstArc(new FST.Arc<>()), outputs.getNoOutput(), true, new IntsRefBuilder());
res = searcher.search(); res = searcher.search();
assertEquals(rejectCount.get(), 4); assertEquals(rejectCount.get(), 4);
assertFalse(res.isComplete); // rejected(4) + topN(2) > maxQueueSize(5) assertFalse(res.isComplete); // rejected(4) + topN(2) > maxQueueSize(5)
@ -1423,12 +1353,7 @@ public class TestFSTs extends LuceneTestCase {
// compares just the weight side of the pair // compares just the weight side of the pair
static final Comparator<Pair<Long, Long>> minPairWeightComparator = static final Comparator<Pair<Long, Long>> minPairWeightComparator =
new Comparator<Pair<Long, Long>>() { Comparator.comparing(left -> left.output1);
@Override
public int compare(Pair<Long, Long> left, Pair<Long, Long> right) {
return left.output1.compareTo(right.output1);
}
};
/** like testShortestPaths, but uses pairoutputs so we have both a weight and an output */ /** like testShortestPaths, but uses pairoutputs so we have both a weight and an output */
public void testShortestPathsWFST() throws Exception { public void testShortestPathsWFST() throws Exception {
@ -1454,7 +1379,7 @@ public class TestFSTs extends LuceneTestCase {
Util.TopResults<Pair<Long, Long>> res = Util.TopResults<Pair<Long, Long>> res =
Util.shortestPaths( Util.shortestPaths(
fst, fst,
fst.getFirstArc(new FST.Arc<Pair<Long, Long>>()), fst.getFirstArc(new FST.Arc<>()),
outputs.getNoOutput(), outputs.getNoOutput(),
minPairWeightComparator, minPairWeightComparator,
3, 3,
@ -1488,12 +1413,9 @@ public class TestFSTs extends LuceneTestCase {
for (int i = 0; i < numWords; i++) { for (int i = 0; i < numWords; i++) {
String s; String s;
while (true) { do {
s = TestUtil.randomSimpleString(random); s = TestUtil.randomSimpleString(random);
if (!slowCompletor.containsKey(s)) { } while (slowCompletor.containsKey(s));
break;
}
}
for (int j = 1; j < s.length(); j++) { for (int j = 1; j < s.length(); j++) {
allPrefixes.add(s.substring(0, j)); allPrefixes.add(s.substring(0, j));
@ -1521,9 +1443,9 @@ public class TestFSTs extends LuceneTestCase {
// System.out.println("TEST: " + prefix); // System.out.println("TEST: " + prefix);
long prefixOutput = 0; long prefixOutput = 0;
FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<Long>()); FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<>());
for (int idx = 0; idx < prefix.length(); idx++) { for (int idx = 0; idx < prefix.length(); idx++) {
if (fst.findTargetArc((int) prefix.charAt(idx), arc, arc, reader) == null) { if (fst.findTargetArc(prefix.charAt(idx), arc, arc, reader) == null) {
fail(); fail();
} }
prefixOutput += arc.output(); prefixOutput += arc.output();
@ -1551,7 +1473,7 @@ public class TestFSTs extends LuceneTestCase {
} }
assertTrue(matches.size() > 0); assertTrue(matches.size() > 0);
Collections.sort(matches, new TieBreakByInputComparator<>(minLongComparator)); matches.sort(new TieBreakByInputComparator<>(minLongComparator));
if (matches.size() > topN) { if (matches.size() > topN) {
matches.subList(topN, matches.size()).clear(); matches.subList(topN, matches.size()).clear();
} }
@ -1614,12 +1536,9 @@ public class TestFSTs extends LuceneTestCase {
Random random = random(); Random random = random();
for (int i = 0; i < numWords; i++) { for (int i = 0; i < numWords; i++) {
String s; String s;
while (true) { do {
s = TestUtil.randomSimpleString(random); s = TestUtil.randomSimpleString(random);
if (!slowCompletor.containsKey(s)) { } while (slowCompletor.containsKey(s));
break;
}
}
for (int j = 1; j < s.length(); j++) { for (int j = 1; j < s.length(); j++) {
allPrefixes.add(s.substring(0, j)); allPrefixes.add(s.substring(0, j));
@ -1651,9 +1570,9 @@ public class TestFSTs extends LuceneTestCase {
// System.out.println("TEST: " + prefix); // System.out.println("TEST: " + prefix);
Pair<Long, Long> prefixOutput = outputs.getNoOutput(); Pair<Long, Long> prefixOutput = outputs.getNoOutput();
FST.Arc<Pair<Long, Long>> arc = fst.getFirstArc(new FST.Arc<Pair<Long, Long>>()); FST.Arc<Pair<Long, Long>> arc = fst.getFirstArc(new FST.Arc<>());
for (int idx = 0; idx < prefix.length(); idx++) { for (int idx = 0; idx < prefix.length(); idx++) {
if (fst.findTargetArc((int) prefix.charAt(idx), arc, arc, reader) == null) { if (fst.findTargetArc(prefix.charAt(idx), arc, arc, reader) == null) {
fail(); fail();
} }
prefixOutput = outputs.add(prefixOutput, arc.output()); prefixOutput = outputs.add(prefixOutput, arc.output());
@ -1683,7 +1602,7 @@ public class TestFSTs extends LuceneTestCase {
} }
assertTrue(matches.size() > 0); assertTrue(matches.size() > 0);
Collections.sort(matches, new TieBreakByInputComparator<>(minPairWeightComparator)); matches.sort(new TieBreakByInputComparator<>(minPairWeightComparator));
if (matches.size() > topN) { if (matches.size() > topN) {
matches.subList(topN, matches.size()).clear(); matches.subList(topN, matches.size()).clear();
} }
@ -1758,7 +1677,7 @@ public class TestFSTs extends LuceneTestCase {
Arc<BytesRef> arc = new FST.Arc<>(); Arc<BytesRef> arc = new FST.Arc<>();
fst.getFirstArc(arc); fst.getFirstArc(arc);
FST.BytesReader reader = fst.getBytesReader(); FST.BytesReader reader = fst.getBytesReader();
arc = fst.findTargetArc((int) 'm', arc, arc, reader); arc = fst.findTargetArc('m', arc, arc, reader);
assertNotNull(arc); assertNotNull(arc);
assertEquals(new BytesRef("m"), arc.output()); assertEquals(new BytesRef("m"), arc.output());
@ -1767,7 +1686,7 @@ public class TestFSTs extends LuceneTestCase {
fst.getFirstArc(arc); fst.getFirstArc(arc);
try { try {
arc = fst.findTargetArc((int) 'm', arc, arc, reader); fst.findTargetArc((int) 'm', arc, arc, reader);
} catch ( } catch (
@SuppressWarnings("unused") @SuppressWarnings("unused")
AssertionError ae) { AssertionError ae) {

View File

@ -109,7 +109,7 @@ public class TestFSTsMisc extends LuceneTestCase {
} }
pairs.add(new FSTTester.InputOutput<>(terms[idx], output)); pairs.add(new FSTTester.InputOutput<>(terms[idx], output));
} }
new FSTTester<Object>(random(), dir, inputMode, pairs, outputs, false) { new FSTTester<Object>(random(), dir, inputMode, pairs, outputs) {
@Override @Override
protected boolean outputsEqual(Object output1, Object output2) { protected boolean outputsEqual(Object output1, Object output2) {
if (output1 instanceof TwoLongs && output2 instanceof List) { if (output1 instanceof TwoLongs && output2 instanceof List) {
@ -157,7 +157,7 @@ public class TestFSTsMisc extends LuceneTestCase {
pairs.add(new FSTTester.InputOutput<>(terms[idx], output)); pairs.add(new FSTTester.InputOutput<>(terms[idx], output));
} }
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(false); new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(false);
} }
} }

View File

@ -29,12 +29,10 @@ import java.nio.charset.Charset;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Random; import java.util.Random;
import java.util.Set;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
@ -54,23 +52,16 @@ public class FSTTester<T> {
final int inputMode; final int inputMode;
final Outputs<T> outputs; final Outputs<T> outputs;
final Directory dir; final Directory dir;
final boolean doReverseLookup;
long nodeCount; long nodeCount;
long arcCount; long arcCount;
public FSTTester( public FSTTester(
Random random, Random random, Directory dir, int inputMode, List<InputOutput<T>> pairs, Outputs<T> outputs) {
Directory dir,
int inputMode,
List<InputOutput<T>> pairs,
Outputs<T> outputs,
boolean doReverseLookup) {
this.random = random; this.random = random;
this.dir = dir; this.dir = dir;
this.inputMode = inputMode; this.inputMode = inputMode;
this.pairs = pairs; this.pairs = pairs;
this.outputs = outputs; this.outputs = outputs;
this.doReverseLookup = doReverseLookup;
} }
static String inputToString(int inputMode, IntsRef term) { static String inputToString(int inputMode, IntsRef term) {
@ -181,11 +172,7 @@ public class FSTTester<T> {
@Override @Override
public int compareTo(InputOutput<T> other) { public int compareTo(InputOutput<T> other) {
if (other instanceof InputOutput) { return input.compareTo(other.input);
return input.compareTo((other).input);
} else {
throw new IllegalArgumentException();
}
} }
} }
@ -208,9 +195,8 @@ public class FSTTester<T> {
// of the term prefix that matches // of the term prefix that matches
private T run(FST<T> fst, IntsRef term, int[] prefixLength) throws IOException { private T run(FST<T> fst, IntsRef term, int[] prefixLength) throws IOException {
assert prefixLength == null || prefixLength.length == 1; assert prefixLength == null || prefixLength.length == 1;
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>()); final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<>());
final T NO_OUTPUT = fst.outputs.getNoOutput(); T output = fst.outputs.getNoOutput();
T output = NO_OUTPUT;
final FST.BytesReader fstReader = fst.getBytesReader(); final FST.BytesReader fstReader = fst.getBytesReader();
for (int i = 0; i <= term.length; i++) { for (int i = 0; i <= term.length; i++) {
@ -243,12 +229,11 @@ public class FSTTester<T> {
} }
private T randomAcceptedWord(FST<T> fst, IntsRefBuilder in) throws IOException { private T randomAcceptedWord(FST<T> fst, IntsRefBuilder in) throws IOException {
FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>()); FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<>());
final List<FST.Arc<T>> arcs = new ArrayList<>(); final List<FST.Arc<T>> arcs = new ArrayList<>();
in.clear(); in.clear();
final T NO_OUTPUT = fst.outputs.getNoOutput(); T output = fst.outputs.getNoOutput();
T output = NO_OUTPUT;
final FST.BytesReader fstReader = fst.getBytesReader(); final FST.BytesReader fstReader = fst.getBytesReader();
while (true) { while (true) {
@ -311,14 +296,12 @@ public class FSTTester<T> {
if (random.nextBoolean() && fst != null) { if (random.nextBoolean() && fst != null) {
IOContext context = LuceneTestCase.newIOContext(random); IOContext context = LuceneTestCase.newIOContext(random);
IndexOutput out = dir.createOutput("fst.bin", context); try (IndexOutput out = dir.createOutput("fst.bin", context)) {
fst.save(out, out); fst.save(out, out);
out.close(); }
IndexInput in = dir.openInput("fst.bin", context); try (IndexInput in = dir.openInput("fst.bin", context)) {
try { fst = new FST<>(in, in, outputs);
fst = new FST<T>(in, in, outputs);
} finally { } finally {
in.close();
dir.deleteFile("fst.bin"); dir.deleteFile("fst.bin");
} }
} }
@ -361,30 +344,8 @@ public class FSTTester<T> {
} }
// FST is complete // FST is complete
@SuppressWarnings("deprecation")
private void verifyUnPruned(int inputMode, FST<T> fst) throws IOException { private void verifyUnPruned(int inputMode, FST<T> fst) throws IOException {
final FST<Long> fstLong;
final Set<Long> validOutputs;
long minLong = Long.MAX_VALUE;
long maxLong = Long.MIN_VALUE;
if (doReverseLookup) {
@SuppressWarnings("unchecked")
FST<Long> fstLong0 = (FST<Long>) fst;
fstLong = fstLong0;
validOutputs = new HashSet<>();
for (InputOutput<T> pair : pairs) {
Long output = (Long) pair.output;
maxLong = Math.max(maxLong, output);
minLong = Math.min(minLong, output);
validOutputs.add(output);
}
} else {
fstLong = null;
validOutputs = null;
}
if (pairs.size() == 0) { if (pairs.size() == 0) {
assertNull(fst); assertNull(fst);
return; return;
@ -447,20 +408,6 @@ public class FSTTester<T> {
termsMap.put(pair.input, pair.output); termsMap.put(pair.input, pair.output);
} }
if (doReverseLookup && maxLong > minLong) {
// Do random lookups so we test null (output doesn't
// exist) case:
assertNull(Util.getByOutput(fstLong, minLong - 7));
assertNull(Util.getByOutput(fstLong, maxLong + 7));
final int num = LuceneTestCase.atLeast(random, 100);
for (int iter = 0; iter < num; iter++) {
Long v = TestUtil.nextLong(random, minLong, maxLong);
IntsRef input = Util.getByOutput(fstLong, v);
assertTrue(validOutputs.contains(v) || input == null);
}
}
// find random matching word and make sure it's valid // find random matching word and make sure it's valid
if (LuceneTestCase.VERBOSE) { if (LuceneTestCase.VERBOSE) {
System.out.println("TEST: verify random accepted terms"); System.out.println("TEST: verify random accepted terms");
@ -473,14 +420,6 @@ public class FSTTester<T> {
"accepted word " + inputToString(inputMode, scratch.get()) + " is not valid", "accepted word " + inputToString(inputMode, scratch.get()) + " is not valid",
termsMap.containsKey(scratch.get())); termsMap.containsKey(scratch.get()));
assertTrue(outputsEqual(termsMap.get(scratch.get()), output)); assertTrue(outputsEqual(termsMap.get(scratch.get()), output));
if (doReverseLookup) {
// System.out.println("lookup output=" + output + " outs=" + fst.outputs);
IntsRef input = Util.getByOutput(fstLong, (Long) output);
assertNotNull(input);
// System.out.println(" got " + Util.toBytesRef(input, new BytesRef()).utf8ToString());
assertEquals(scratch.get(), input);
}
} }
// test IntsRefFSTEnum.seek: // test IntsRefFSTEnum.seek:
@ -497,7 +436,7 @@ public class FSTTester<T> {
// seek to term that doesn't exist: // seek to term that doesn't exist:
while (true) { while (true) {
final IntsRef term = toIntsRef(getRandomString(random), inputMode); final IntsRef term = toIntsRef(getRandomString(random), inputMode);
int pos = Collections.binarySearch(pairs, new InputOutput<T>(term, null)); int pos = Collections.binarySearch(pairs, new InputOutput<>(term, null));
if (pos < 0) { if (pos < 0) {
pos = -(pos + 1); pos = -(pos + 1);
// ok doesn't exist // ok doesn't exist
@ -617,7 +556,7 @@ public class FSTTester<T> {
for (; attempt < 10; attempt++) { for (; attempt < 10; attempt++) {
IntsRef term = toIntsRef(getRandomString(random), inputMode); IntsRef term = toIntsRef(getRandomString(random), inputMode);
if (!termsMap.containsKey(term) && term.compareTo(pairs.get(upto).input) > 0) { if (!termsMap.containsKey(term) && term.compareTo(pairs.get(upto).input) > 0) {
int pos = Collections.binarySearch(pairs, new InputOutput<T>(term, null)); int pos = Collections.binarySearch(pairs, new InputOutput<>(term, null));
assert pos < 0; assert pos < 0;
upto = -(pos + 1); upto = -(pos + 1);
@ -806,10 +745,8 @@ public class FSTTester<T> {
cmo2 != null cmo2 != null
&& ((prune2 > 1 && cmo2.count >= prune2) && ((prune2 > 1 && cmo2.count >= prune2)
|| (prune2 == 1 && (cmo2.count >= 2 || prefix.length <= 1))); || (prune2 == 1 && (cmo2.count >= 2 || prefix.length <= 1)));
} else if (cmo.count >= prune2) {
keep = true;
} else { } else {
keep = false; keep = cmo.count >= prune2;
} }
} }