LUCENE-8638: remove deprecated FST get by output

This commit is contained in:
Michael Sokolov 2021-08-18 08:15:31 -04:00 committed by GitHub
parent a37844aedd
commit 666c7a2590
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 122 additions and 434 deletions

View File

@ -22,6 +22,8 @@ System Requirements
API Changes
* LUCENE-8638: Remove deprecated methods in FST for lookup by output.
* LUCENE-8638: haversin() expressions function now returns its result in meters rather than
kilometers.

View File

@ -45,7 +45,7 @@ public final class Util {
public static <T> T get(FST<T> fst, IntsRef input) throws IOException {
// TODO: would be nice not to alloc this on every lookup
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<>());
final BytesReader fstReader = fst.getBytesReader();
@ -92,167 +92,6 @@ public final class Util {
}
}
/**
* Reverse lookup (lookup by output instead of by input), in the special case when your FSTs
* outputs are strictly ascending. This locates the input/output pair where the output is equal to
* the target, and will return null if that output does not exist.
*
* <p>NOTE: this only works with {@code FST<Long>}, only works when the outputs are ascending in
* order with the inputs. For example, simple ordinals (0, 1, 2, ...), or file offsets (when
* appending to a file) fit this.
*/
@Deprecated
public static IntsRef getByOutput(FST<Long> fst, long targetOutput) throws IOException {
final BytesReader in = fst.getBytesReader();
// TODO: would be nice not to alloc this on every lookup
FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<Long>());
FST.Arc<Long> scratchArc = new FST.Arc<>();
final IntsRefBuilder result = new IntsRefBuilder();
return getByOutput(fst, targetOutput, in, arc, scratchArc, result);
}
/**
* Expert: like {@link Util#getByOutput(FST, long)} except reusing BytesReader, initial and
* scratch Arc, and result.
*/
@Deprecated
public static IntsRef getByOutput(
FST<Long> fst,
long targetOutput,
BytesReader in,
Arc<Long> arc,
Arc<Long> scratchArc,
IntsRefBuilder result)
throws IOException {
long output = arc.output();
int upto = 0;
// System.out.println("reverseLookup output=" + targetOutput);
while (true) {
// System.out.println("loop: output=" + output + " upto=" + upto + " arc=" + arc);
if (arc.isFinal()) {
final long finalOutput = output + arc.nextFinalOutput();
// System.out.println(" isFinal finalOutput=" + finalOutput);
if (finalOutput == targetOutput) {
result.setLength(upto);
// System.out.println(" found!");
return result.get();
} else if (finalOutput > targetOutput) {
// System.out.println(" not found!");
return null;
}
}
if (FST.targetHasArcs(arc)) {
// System.out.println(" targetHasArcs");
result.grow(1 + upto);
fst.readFirstRealTargetArc(arc.target(), arc, in);
if (arc.bytesPerArc() != 0 && arc.nodeFlags() == FST.ARCS_FOR_BINARY_SEARCH) {
int low = 0;
int high = arc.numArcs() - 1;
int mid = 0;
// System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput + "
// output=" + output);
boolean exact = false;
while (low <= high) {
mid = (low + high) >>> 1;
in.setPosition(arc.posArcsStart());
in.skipBytes(arc.bytesPerArc() * mid);
final byte flags = in.readByte();
fst.readLabel(in);
final long minArcOutput;
if ((flags & FST.BIT_ARC_HAS_OUTPUT) != 0) {
final long arcOutput = fst.outputs.read(in);
minArcOutput = output + arcOutput;
} else {
minArcOutput = output;
}
// System.out.println(" cycle mid=" + mid + " output=" + minArcOutput);
if (minArcOutput == targetOutput) {
exact = true;
break;
} else if (minArcOutput < targetOutput) {
low = mid + 1;
} else {
high = mid - 1;
}
}
int idx;
if (high == -1) {
return null;
} else if (exact) {
idx = mid;
} else {
idx = low - 1;
}
fst.readArcByIndex(arc, in, idx);
result.setIntAt(upto++, arc.label());
output += arc.output();
} else {
FST.Arc<Long> prevArc = null;
while (true) {
// System.out.println(" cycle label=" + arc.label + " output=" + arc.output);
// This is the min output we'd hit if we follow
// this arc:
final long minArcOutput = output + arc.output();
if (minArcOutput == targetOutput) {
// Recurse on this arc:
// System.out.println(" match! break");
output = minArcOutput;
result.setIntAt(upto++, arc.label());
break;
} else if (minArcOutput > targetOutput) {
if (prevArc == null) {
// Output doesn't exist
return null;
} else {
// Recurse on previous arc:
arc.copyFrom(prevArc);
result.setIntAt(upto++, arc.label());
output += arc.output();
// System.out.println(" recurse prev label=" + (char) arc.label + " output=" +
// output);
break;
}
} else if (arc.isLast()) {
// Recurse on this arc:
output = minArcOutput;
// System.out.println(" recurse last label=" + (char) arc.label + " output=" +
// output);
result.setIntAt(upto++, arc.label());
break;
} else {
// Read next arc in this node:
prevArc = scratchArc;
prevArc.copyFrom(arc);
// System.out.println(" after copy label=" + (char) prevArc.label + " vs " +
// (char) arc.label);
fst.readNextRealArc(arc, in);
}
}
}
} else {
// System.out.println(" no target arcs; not found!");
return null;
}
}
}
/**
* Represents a path in TopNSearcher.
*
@ -899,7 +738,7 @@ public final class Util {
scratch.setLength(charLimit);
scratch.grow(charLimit);
for (int idx = 0; idx < charLimit; idx++) {
scratch.setIntAt(idx, (int) s.charAt(idx));
scratch.setIntAt(idx, s.charAt(idx));
}
return scratch.get();
}
@ -1033,9 +872,6 @@ public final class Util {
while (true) {
// System.out.println(" non-bs cycle");
// TODO: we should fix this code to not have to create
// object for the output of every arc we scan... only
// for the matching arc, if found
if (arc.label() >= label) {
// System.out.println(" found!");
return arc;
@ -1067,12 +903,12 @@ public final class Util {
+ ")";
BytesReader in = fst.getBytesReader();
int low = arc.arcIdx();
int mid = 0;
int mid;
int high = arc.numArcs() - 1;
while (low <= high) {
mid = (low + high) >>> 1;
in.setPosition(arc.posArcsStart());
in.skipBytes(arc.bytesPerArc() * mid + 1);
in.skipBytes((long) arc.bytesPerArc() * mid + 1);
final int midLabel = fst.readLabel(in);
final int cmp = midLabel - targetLabel;
if (cmp < 0) {

View File

@ -25,8 +25,6 @@
* <li>Fast and low memory overhead construction of the minimal FST (but inputs must be provided
* in sorted order)
* <li>Low object overhead and quick deserialization (byte[] representation)
* <li>{@link org.apache.lucene.util.fst.Util#getByOutput Lookup-by-output} when the outputs are
* in sorted order (e.g., ordinals or file pointers)
* <li>Pluggable {@link org.apache.lucene.util.fst.Outputs Outputs} representation
* <li>{@link org.apache.lucene.util.fst.Util#shortestPaths N-shortest-paths} search by weight
* <li>Enumerators ({@link org.apache.lucene.util.fst.IntsRefFSTEnum IntsRef} and {@link

View File

@ -277,12 +277,8 @@ public class Test2BFST extends LuceneTestCase {
System.out.println(i + "...: ");
}
// forward lookup:
assertEquals(output, Util.get(fst, input).longValue());
// reverse lookup:
@SuppressWarnings("deprecation")
IntsRef inputResult = Util.getByOutput(fst, output);
assertEquals(input, inputResult);
output += 1 + r.nextInt(10);
nextInput(r, ints);
}

View File

@ -92,7 +92,9 @@ public class TestFSTs extends LuceneTestCase {
@Override
public void tearDown() throws Exception {
// can be null if we force simpletext (funky, some kind of bug in test runner maybe)
if (dir != null) dir.close();
if (dir != null) {
dir.close();
}
super.tearDown();
}
@ -133,7 +135,7 @@ public class TestFSTs extends LuceneTestCase {
for (IntsRef term : terms2) {
pairs.add(new FSTTester.InputOutput<>(term, NO_OUTPUT));
}
FSTTester<Object> tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs, false);
FSTTester<Object> tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs);
FST<Object> fst = tester.doTest(0, 0, false);
assertNotNull(fst);
assertEquals(22, tester.nodeCount);
@ -147,7 +149,7 @@ public class TestFSTs extends LuceneTestCase {
for (int idx = 0; idx < terms2.length; idx++) {
pairs.add(new FSTTester.InputOutput<>(terms2[idx], (long) idx));
}
FSTTester<Long> tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs, true);
FSTTester<Long> tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs);
final FST<Long> fst = tester.doTest(0, 0, false);
assertNotNull(fst);
assertEquals(22, tester.nodeCount);
@ -157,14 +159,12 @@ public class TestFSTs extends LuceneTestCase {
// FST byte sequence ord
{
final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
final BytesRef NO_OUTPUT = outputs.getNoOutput();
final List<FSTTester.InputOutput<BytesRef>> pairs = new ArrayList<>(terms2.length);
for (int idx = 0; idx < terms2.length; idx++) {
final BytesRef output = idx == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx));
final BytesRef output = new BytesRef(Integer.toString(idx));
pairs.add(new FSTTester.InputOutput<>(terms2[idx], output));
}
FSTTester<BytesRef> tester =
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false);
FSTTester<BytesRef> tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs);
final FST<BytesRef> fst = tester.doTest(0, 0, false);
assertNotNull(fst);
assertEquals(24, tester.nodeCount);
@ -185,7 +185,7 @@ public class TestFSTs extends LuceneTestCase {
for (IntsRef term : terms) {
pairs.add(new FSTTester.InputOutput<>(term, NO_OUTPUT));
}
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true);
new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
}
// PositiveIntOutput (ord)
@ -195,7 +195,7 @@ public class TestFSTs extends LuceneTestCase {
for (int idx = 0; idx < terms.length; idx++) {
pairs.add(new FSTTester.InputOutput<>(terms[idx], (long) idx));
}
new FSTTester<>(random(), dir, inputMode, pairs, outputs, true).doTest(true);
new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
}
// PositiveIntOutput (random monotonically increasing positive number)
@ -203,24 +203,23 @@ public class TestFSTs extends LuceneTestCase {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<>(terms.length);
long lastOutput = 0;
for (int idx = 0; idx < terms.length; idx++) {
for (IntsRef term : terms) {
final long value = lastOutput + TestUtil.nextInt(random(), 1, 1000);
lastOutput = value;
pairs.add(new FSTTester.InputOutput<>(terms[idx], value));
pairs.add(new FSTTester.InputOutput<>(term, value));
}
new FSTTester<>(random(), dir, inputMode, pairs, outputs, true).doTest(true);
new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
}
// PositiveIntOutput (random positive number)
{
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<>(terms.length);
for (int idx = 0; idx < terms.length; idx++) {
for (IntsRef term : terms) {
pairs.add(
new FSTTester.InputOutput<>(
terms[idx], TestUtil.nextLong(random(), 0, Long.MAX_VALUE)));
new FSTTester.InputOutput<>(term, TestUtil.nextLong(random(), 0, Long.MAX_VALUE)));
}
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true);
new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
}
// Pair<ord, (random monotonically increasing positive number>
@ -236,7 +235,7 @@ public class TestFSTs extends LuceneTestCase {
lastOutput = value;
pairs.add(new FSTTester.InputOutput<>(terms[idx], outputs.newPair((long) idx, value)));
}
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true);
new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
}
// Sequence-of-bytes
@ -249,7 +248,7 @@ public class TestFSTs extends LuceneTestCase {
random().nextInt(30) == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx));
pairs.add(new FSTTester.InputOutput<>(terms[idx], output));
}
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true);
new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
}
// Sequence-of-ints
@ -265,7 +264,7 @@ public class TestFSTs extends LuceneTestCase {
}
pairs.add(new FSTTester.InputOutput<>(terms[idx], output));
}
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true);
new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
}
}
@ -298,7 +297,7 @@ public class TestFSTs extends LuceneTestCase {
final String term = getRandomString(random);
termsSet.add(toIntsRef(term, inputMode));
}
doTest(inputMode, termsSet.toArray(new IntsRef[termsSet.size()]));
doTest(inputMode, termsSet.toArray(new IntsRef[0]));
}
}
}
@ -497,7 +496,7 @@ public class TestFSTs extends LuceneTestCase {
private abstract static class VisitTerms<T> {
private final Path dirOut;
private final Path wordsFileIn;
private int inputMode;
private final int inputMode;
private final Outputs<T> outputs;
private final FSTCompiler<T> fstCompiler;
@ -524,7 +523,7 @@ public class TestFSTs extends LuceneTestCase {
protected abstract T getOutput(IntsRef input, int ord) throws IOException;
public void run(int limit, boolean verify, boolean verifyByOutput) throws IOException {
public void run(int limit, boolean verify) throws IOException {
BufferedReader is = Files.newBufferedReader(wordsFileIn, StandardCharsets.UTF_8);
try {
@ -541,12 +540,11 @@ public class TestFSTs extends LuceneTestCase {
ord++;
if (ord % 500000 == 0) {
System.out.println(
String.format(
Locale.ROOT,
"%6.2fs: %9d...",
((System.currentTimeMillis() - tStart) / 1000.0),
ord));
System.out.printf(
Locale.ROOT,
"%6.2fs: %9d...",
((System.currentTimeMillis() - tStart) / 1000.0),
ord);
}
if (ord >= limit) {
break;
@ -594,90 +592,51 @@ public class TestFSTs extends LuceneTestCase {
return;
}
/*
IndexInput in = dir.openInput("fst.bin", IOContext.DEFAULT);
fst = new FST<T>(in, outputs);
in.close();
*/
System.out.println("\nNow verify...");
is.close();
is = Files.newBufferedReader(wordsFileIn, StandardCharsets.UTF_8);
ord = 0;
tStart = System.currentTimeMillis();
while (true) {
for (int iter = 0; iter < 2; iter++) {
is.close();
is = Files.newBufferedReader(wordsFileIn, StandardCharsets.UTF_8);
ord = 0;
tStart = System.currentTimeMillis();
while (true) {
String w = is.readLine();
if (w == null) {
break;
}
toIntsRef(w, inputMode, intsRefBuilder);
if (iter == 0) {
T expected = getOutput(intsRefBuilder.get(), ord);
T actual = Util.get(fst, intsRefBuilder.get());
if (actual == null) {
throw new RuntimeException("unexpected null output on input=" + w);
}
if (!actual.equals(expected)) {
throw new RuntimeException(
"wrong output (got "
+ outputs.outputToString(actual)
+ " but expected "
+ outputs.outputToString(expected)
+ ") on input="
+ w);
}
} else {
// Get by output
final Long output = (Long) getOutput(intsRefBuilder.get(), ord);
@SuppressWarnings({"unchecked", "deprecation"})
final IntsRef actual = Util.getByOutput((FST<Long>) fst, output.longValue());
if (actual == null) {
throw new RuntimeException("unexpected null input from output=" + output);
}
if (!actual.equals(intsRefBuilder.get())) {
throw new RuntimeException(
"wrong input (got "
+ actual
+ " but expected "
+ intsRefBuilder
+ " from output="
+ output);
}
}
ord++;
if (ord % 500000 == 0) {
System.out.println(
((System.currentTimeMillis() - tStart) / 1000.0) + "s: " + ord + "...");
}
if (ord >= limit) {
break;
}
}
double totSec = ((System.currentTimeMillis() - tStart) / 1000.0);
System.out.println(
"Verify "
+ (iter == 1 ? "(by output) " : "")
+ "took "
+ totSec
+ " sec + ("
+ (int) ((totSec * 1000000000 / ord))
+ " nsec per lookup)");
if (!verifyByOutput) {
break;
}
String w = is.readLine();
if (w == null) {
break;
}
toIntsRef(w, inputMode, intsRefBuilder);
T expected = getOutput(intsRefBuilder.get(), ord);
T actual = Util.get(fst, intsRefBuilder.get());
if (actual == null) {
throw new RuntimeException("unexpected null output on input=" + w);
}
if (!actual.equals(expected)) {
throw new RuntimeException(
"wrong output (got "
+ outputs.outputToString(actual)
+ " but expected "
+ outputs.outputToString(expected)
+ ") on input="
+ w);
}
ord++;
if (ord % 500000 == 0) {
System.out.println(
((System.currentTimeMillis() - tStart) / 1000.0) + "s: " + ord + "...");
}
if (ord >= limit) {
break;
}
// NOTE: comment out to profile lookup...
break;
}
double totSec = ((System.currentTimeMillis() - tStart) / 1000.0);
System.out.println(
"Verify took "
+ totSec
+ " sec + ("
+ (int) ((totSec * 1000000000 / ord))
+ " nsec per lookup)");
} finally {
is.close();
}
@ -762,7 +721,7 @@ public class TestFSTs extends LuceneTestCase {
}
return outputs.newPair((long) ord, (long) TestUtil.nextInt(rand, 1, 5000));
}
}.run(limit, verify, false);
}.run(limit, verify);
} else if (storeOrds) {
// Store only ords
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
@ -771,7 +730,7 @@ public class TestFSTs extends LuceneTestCase {
public Long getOutput(IntsRef input, int ord) {
return (long) ord;
}
}.run(limit, verify, true);
}.run(limit, verify);
} else if (storeDocFreqs) {
// Store only docFreq
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
@ -785,7 +744,7 @@ public class TestFSTs extends LuceneTestCase {
}
return (long) TestUtil.nextInt(rand, 1, 5000);
}
}.run(limit, verify, false);
}.run(limit, verify);
} else {
// Store nothing
final NoOutputs outputs = NoOutputs.getSingleton();
@ -795,7 +754,7 @@ public class TestFSTs extends LuceneTestCase {
public Object getOutput(IntsRef input, int ord) {
return NO_OUTPUT;
}
}.run(limit, verify, false);
}.run(limit, verify);
}
}
@ -913,22 +872,6 @@ public class TestFSTs extends LuceneTestCase {
assertNotNull(seekResult);
assertEquals(b, seekResult.input);
assertEquals(42, (long) seekResult.output);
@SuppressWarnings("deprecation")
IntsRef byOutput = Util.getByOutput(fst, 13824324872317238L);
assertEquals(Util.toIntsRef(new BytesRef("c"), new IntsRefBuilder()), byOutput);
@SuppressWarnings("deprecation")
IntsRef byOutput47 = Util.getByOutput(fst, 47);
assertNull(byOutput47);
@SuppressWarnings("deprecation")
IntsRef byOutput42 = Util.getByOutput(fst, 42);
assertEquals(Util.toIntsRef(new BytesRef("b"), new IntsRefBuilder()), byOutput42);
@SuppressWarnings("deprecation")
IntsRef byOutput17 = Util.getByOutput(fst, 17);
assertEquals(Util.toIntsRef(new BytesRef("a"), new IntsRefBuilder()), byOutput17);
}
public void testPrimaryKeys() throws Exception {
@ -991,12 +934,9 @@ public class TestFSTs extends LuceneTestCase {
if (cycle == 0) {
idString = String.format(Locale.ROOT, "%07d", (NUM_IDS + idx));
} else {
while (true) {
do {
idString = Long.toString(random().nextLong());
if (!allIDs.contains(idString)) {
break;
}
}
} while (allIDs.contains(idString));
}
outOfBounds.add(idString);
allIDsList.add(idString);
@ -1063,8 +1003,7 @@ public class TestFSTs extends LuceneTestCase {
new BytesRef(nextID),
termsEnum.term());
} else if (!exists) {
assertTrue(
status == TermsEnum.SeekStatus.NOT_FOUND || status == TermsEnum.SeekStatus.END);
assertEquals(TermsEnum.SeekStatus.NOT_FOUND, status);
} else {
assertEquals(TermsEnum.SeekStatus.FOUND, status);
}
@ -1204,7 +1143,7 @@ public class TestFSTs extends LuceneTestCase {
ArrayList<String> out = new ArrayList<>();
StringBuilder b = new StringBuilder();
s.generate(out, b, 'a', 'i', 10);
String[] input = out.toArray(new String[out.size()]);
String[] input = out.toArray(new String[0]);
Arrays.sort(input);
FST<Object> fst = s.compile(input);
FST.Arc<Object> arc = fst.getFirstArc(new FST.Arc<>());
@ -1224,7 +1163,7 @@ public class TestFSTs extends LuceneTestCase {
Util.toDot(fst, w, false, false);
w.close();
// System.out.println(w.toString());
assertTrue(w.toString().indexOf("label=\"t/[7]\"") != -1);
assertTrue(w.toString().contains("label=\"t/[7]\""));
}
public void testInternalFinalState() throws Exception {
@ -1242,9 +1181,9 @@ public class TestFSTs extends LuceneTestCase {
// System.out.println(w.toString());
// check for accept state at label t
assertTrue(w.toString().indexOf("[label=\"t\" style=\"bold\"") != -1);
assertTrue(w.toString().contains("[label=\"t\" style=\"bold\""));
// check for accept state at label n
assertTrue(w.toString().indexOf("[label=\"n\" style=\"bold\"") != -1);
assertTrue(w.toString().contains("[label=\"n\" style=\"bold\""));
}
// Make sure raw FST can differentiate between final vs
@ -1253,9 +1192,6 @@ public class TestFSTs extends LuceneTestCase {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final Long nothing = outputs.getNoOutput();
final FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
// final FST<Long> fst = new FST<>(FST.INPUT_TYPE.BYTE1, outputs, false, PackedInts.COMPACT,
// 15);
final FST<Long> fst = fstCompiler.fst;
final FSTCompiler.UnCompiledNode<Long> rootNode =
@ -1311,11 +1247,11 @@ public class TestFSTs extends LuceneTestCase {
private void checkStopNodes(FST<Long> fst, PositiveIntOutputs outputs) throws Exception {
final Long nothing = outputs.getNoOutput();
FST.Arc<Long> startArc = fst.getFirstArc(new FST.Arc<Long>());
FST.Arc<Long> startArc = fst.getFirstArc(new FST.Arc<>());
assertEquals(nothing, startArc.output());
assertEquals(nothing, startArc.nextFinalOutput());
FST.Arc<Long> arc = fst.readFirstTargetArc(startArc, new FST.Arc<Long>(), fst.getBytesReader());
FST.Arc<Long> arc = fst.readFirstTargetArc(startArc, new FST.Arc<>(), fst.getBytesReader());
assertEquals('a', arc.label());
assertEquals(17, arc.nextFinalOutput().longValue());
assertTrue(arc.isFinal());
@ -1326,13 +1262,7 @@ public class TestFSTs extends LuceneTestCase {
assertEquals(42, arc.output().longValue());
}
static final Comparator<Long> minLongComparator =
new Comparator<Long>() {
@Override
public int compare(Long left, Long right) {
return left.compareTo(right);
}
};
static final Comparator<Long> minLongComparator = Comparator.naturalOrder();
public void testShortestPaths() throws Exception {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
@ -1350,7 +1280,7 @@ public class TestFSTs extends LuceneTestCase {
Util.TopResults<Long> res =
Util.shortestPaths(
fst,
fst.getFirstArc(new FST.Arc<Long>()),
fst.getFirstArc(new FST.Arc<>()),
outputs.getNoOutput(),
minLongComparator,
3,
@ -1369,7 +1299,7 @@ public class TestFSTs extends LuceneTestCase {
public void testRejectNoLimits() throws IOException {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final FSTCompiler<Long> fstCompiler = new FSTCompiler<Long>(FST.INPUT_TYPE.BYTE1, outputs);
final FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
final IntsRefBuilder scratch = new IntsRefBuilder();
fstCompiler.add(Util.toIntsRef(new BytesRef("aab"), scratch), 22L);
@ -1381,7 +1311,7 @@ public class TestFSTs extends LuceneTestCase {
final FST<Long> fst = fstCompiler.compile();
final AtomicInteger rejectCount = new AtomicInteger();
Util.TopNSearcher<Long> searcher =
new Util.TopNSearcher<Long>(fst, 2, 6, minLongComparator) {
new Util.TopNSearcher<>(fst, 2, 6, minLongComparator) {
@Override
protected boolean acceptResult(IntsRef input, Long output) {
boolean accept = output.intValue() == 7;
@ -1393,7 +1323,7 @@ public class TestFSTs extends LuceneTestCase {
};
searcher.addStartPaths(
fst.getFirstArc(new FST.Arc<Long>()), outputs.getNoOutput(), true, new IntsRefBuilder());
fst.getFirstArc(new FST.Arc<>()), outputs.getNoOutput(), true, new IntsRefBuilder());
Util.TopResults<Long> res = searcher.search();
assertEquals(rejectCount.get(), 4);
assertTrue(res.isComplete); // rejected(4) + topN(2) <= maxQueueSize(6)
@ -1403,7 +1333,7 @@ public class TestFSTs extends LuceneTestCase {
assertEquals(7L, res.topN.get(0).output.longValue());
rejectCount.set(0);
searcher =
new Util.TopNSearcher<Long>(fst, 2, 5, minLongComparator) {
new Util.TopNSearcher<>(fst, 2, 5, minLongComparator) {
@Override
protected boolean acceptResult(IntsRef input, Long output) {
boolean accept = output.intValue() == 7;
@ -1415,7 +1345,7 @@ public class TestFSTs extends LuceneTestCase {
};
searcher.addStartPaths(
fst.getFirstArc(new FST.Arc<Long>()), outputs.getNoOutput(), true, new IntsRefBuilder());
fst.getFirstArc(new FST.Arc<>()), outputs.getNoOutput(), true, new IntsRefBuilder());
res = searcher.search();
assertEquals(rejectCount.get(), 4);
assertFalse(res.isComplete); // rejected(4) + topN(2) > maxQueueSize(5)
@ -1423,12 +1353,7 @@ public class TestFSTs extends LuceneTestCase {
// compares just the weight side of the pair
static final Comparator<Pair<Long, Long>> minPairWeightComparator =
new Comparator<Pair<Long, Long>>() {
@Override
public int compare(Pair<Long, Long> left, Pair<Long, Long> right) {
return left.output1.compareTo(right.output1);
}
};
Comparator.comparing(left -> left.output1);
/** like testShortestPaths, but uses pairoutputs so we have both a weight and an output */
public void testShortestPathsWFST() throws Exception {
@ -1454,7 +1379,7 @@ public class TestFSTs extends LuceneTestCase {
Util.TopResults<Pair<Long, Long>> res =
Util.shortestPaths(
fst,
fst.getFirstArc(new FST.Arc<Pair<Long, Long>>()),
fst.getFirstArc(new FST.Arc<>()),
outputs.getNoOutput(),
minPairWeightComparator,
3,
@ -1488,12 +1413,9 @@ public class TestFSTs extends LuceneTestCase {
for (int i = 0; i < numWords; i++) {
String s;
while (true) {
do {
s = TestUtil.randomSimpleString(random);
if (!slowCompletor.containsKey(s)) {
break;
}
}
} while (slowCompletor.containsKey(s));
for (int j = 1; j < s.length(); j++) {
allPrefixes.add(s.substring(0, j));
@ -1521,9 +1443,9 @@ public class TestFSTs extends LuceneTestCase {
// System.out.println("TEST: " + prefix);
long prefixOutput = 0;
FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<Long>());
FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<>());
for (int idx = 0; idx < prefix.length(); idx++) {
if (fst.findTargetArc((int) prefix.charAt(idx), arc, arc, reader) == null) {
if (fst.findTargetArc(prefix.charAt(idx), arc, arc, reader) == null) {
fail();
}
prefixOutput += arc.output();
@ -1551,7 +1473,7 @@ public class TestFSTs extends LuceneTestCase {
}
assertTrue(matches.size() > 0);
Collections.sort(matches, new TieBreakByInputComparator<>(minLongComparator));
matches.sort(new TieBreakByInputComparator<>(minLongComparator));
if (matches.size() > topN) {
matches.subList(topN, matches.size()).clear();
}
@ -1614,12 +1536,9 @@ public class TestFSTs extends LuceneTestCase {
Random random = random();
for (int i = 0; i < numWords; i++) {
String s;
while (true) {
do {
s = TestUtil.randomSimpleString(random);
if (!slowCompletor.containsKey(s)) {
break;
}
}
} while (slowCompletor.containsKey(s));
for (int j = 1; j < s.length(); j++) {
allPrefixes.add(s.substring(0, j));
@ -1651,9 +1570,9 @@ public class TestFSTs extends LuceneTestCase {
// System.out.println("TEST: " + prefix);
Pair<Long, Long> prefixOutput = outputs.getNoOutput();
FST.Arc<Pair<Long, Long>> arc = fst.getFirstArc(new FST.Arc<Pair<Long, Long>>());
FST.Arc<Pair<Long, Long>> arc = fst.getFirstArc(new FST.Arc<>());
for (int idx = 0; idx < prefix.length(); idx++) {
if (fst.findTargetArc((int) prefix.charAt(idx), arc, arc, reader) == null) {
if (fst.findTargetArc(prefix.charAt(idx), arc, arc, reader) == null) {
fail();
}
prefixOutput = outputs.add(prefixOutput, arc.output());
@ -1683,7 +1602,7 @@ public class TestFSTs extends LuceneTestCase {
}
assertTrue(matches.size() > 0);
Collections.sort(matches, new TieBreakByInputComparator<>(minPairWeightComparator));
matches.sort(new TieBreakByInputComparator<>(minPairWeightComparator));
if (matches.size() > topN) {
matches.subList(topN, matches.size()).clear();
}
@ -1758,7 +1677,7 @@ public class TestFSTs extends LuceneTestCase {
Arc<BytesRef> arc = new FST.Arc<>();
fst.getFirstArc(arc);
FST.BytesReader reader = fst.getBytesReader();
arc = fst.findTargetArc((int) 'm', arc, arc, reader);
arc = fst.findTargetArc('m', arc, arc, reader);
assertNotNull(arc);
assertEquals(new BytesRef("m"), arc.output());
@ -1767,7 +1686,7 @@ public class TestFSTs extends LuceneTestCase {
fst.getFirstArc(arc);
try {
arc = fst.findTargetArc((int) 'm', arc, arc, reader);
fst.findTargetArc((int) 'm', arc, arc, reader);
} catch (
@SuppressWarnings("unused")
AssertionError ae) {

View File

@ -109,7 +109,7 @@ public class TestFSTsMisc extends LuceneTestCase {
}
pairs.add(new FSTTester.InputOutput<>(terms[idx], output));
}
new FSTTester<Object>(random(), dir, inputMode, pairs, outputs, false) {
new FSTTester<Object>(random(), dir, inputMode, pairs, outputs) {
@Override
protected boolean outputsEqual(Object output1, Object output2) {
if (output1 instanceof TwoLongs && output2 instanceof List) {
@ -157,7 +157,7 @@ public class TestFSTsMisc extends LuceneTestCase {
pairs.add(new FSTTester.InputOutput<>(terms[idx], output));
}
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(false);
new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(false);
}
}

View File

@ -29,12 +29,10 @@ import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@ -54,23 +52,16 @@ public class FSTTester<T> {
final int inputMode;
final Outputs<T> outputs;
final Directory dir;
final boolean doReverseLookup;
long nodeCount;
long arcCount;
public FSTTester(
Random random,
Directory dir,
int inputMode,
List<InputOutput<T>> pairs,
Outputs<T> outputs,
boolean doReverseLookup) {
Random random, Directory dir, int inputMode, List<InputOutput<T>> pairs, Outputs<T> outputs) {
this.random = random;
this.dir = dir;
this.inputMode = inputMode;
this.pairs = pairs;
this.outputs = outputs;
this.doReverseLookup = doReverseLookup;
}
static String inputToString(int inputMode, IntsRef term) {
@ -181,11 +172,7 @@ public class FSTTester<T> {
@Override
public int compareTo(InputOutput<T> other) {
if (other instanceof InputOutput) {
return input.compareTo((other).input);
} else {
throw new IllegalArgumentException();
}
return input.compareTo(other.input);
}
}
@ -208,9 +195,8 @@ public class FSTTester<T> {
// of the term prefix that matches
private T run(FST<T> fst, IntsRef term, int[] prefixLength) throws IOException {
assert prefixLength == null || prefixLength.length == 1;
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
final T NO_OUTPUT = fst.outputs.getNoOutput();
T output = NO_OUTPUT;
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<>());
T output = fst.outputs.getNoOutput();
final FST.BytesReader fstReader = fst.getBytesReader();
for (int i = 0; i <= term.length; i++) {
@ -243,12 +229,11 @@ public class FSTTester<T> {
}
private T randomAcceptedWord(FST<T> fst, IntsRefBuilder in) throws IOException {
FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<>());
final List<FST.Arc<T>> arcs = new ArrayList<>();
in.clear();
final T NO_OUTPUT = fst.outputs.getNoOutput();
T output = NO_OUTPUT;
T output = fst.outputs.getNoOutput();
final FST.BytesReader fstReader = fst.getBytesReader();
while (true) {
@ -311,14 +296,12 @@ public class FSTTester<T> {
if (random.nextBoolean() && fst != null) {
IOContext context = LuceneTestCase.newIOContext(random);
IndexOutput out = dir.createOutput("fst.bin", context);
fst.save(out, out);
out.close();
IndexInput in = dir.openInput("fst.bin", context);
try {
fst = new FST<T>(in, in, outputs);
try (IndexOutput out = dir.createOutput("fst.bin", context)) {
fst.save(out, out);
}
try (IndexInput in = dir.openInput("fst.bin", context)) {
fst = new FST<>(in, in, outputs);
} finally {
in.close();
dir.deleteFile("fst.bin");
}
}
@ -361,30 +344,8 @@ public class FSTTester<T> {
}
// FST is complete
@SuppressWarnings("deprecation")
private void verifyUnPruned(int inputMode, FST<T> fst) throws IOException {
final FST<Long> fstLong;
final Set<Long> validOutputs;
long minLong = Long.MAX_VALUE;
long maxLong = Long.MIN_VALUE;
if (doReverseLookup) {
@SuppressWarnings("unchecked")
FST<Long> fstLong0 = (FST<Long>) fst;
fstLong = fstLong0;
validOutputs = new HashSet<>();
for (InputOutput<T> pair : pairs) {
Long output = (Long) pair.output;
maxLong = Math.max(maxLong, output);
minLong = Math.min(minLong, output);
validOutputs.add(output);
}
} else {
fstLong = null;
validOutputs = null;
}
if (pairs.size() == 0) {
assertNull(fst);
return;
@ -447,20 +408,6 @@ public class FSTTester<T> {
termsMap.put(pair.input, pair.output);
}
if (doReverseLookup && maxLong > minLong) {
// Do random lookups so we test null (output doesn't
// exist) case:
assertNull(Util.getByOutput(fstLong, minLong - 7));
assertNull(Util.getByOutput(fstLong, maxLong + 7));
final int num = LuceneTestCase.atLeast(random, 100);
for (int iter = 0; iter < num; iter++) {
Long v = TestUtil.nextLong(random, minLong, maxLong);
IntsRef input = Util.getByOutput(fstLong, v);
assertTrue(validOutputs.contains(v) || input == null);
}
}
// find random matching word and make sure it's valid
if (LuceneTestCase.VERBOSE) {
System.out.println("TEST: verify random accepted terms");
@ -473,14 +420,6 @@ public class FSTTester<T> {
"accepted word " + inputToString(inputMode, scratch.get()) + " is not valid",
termsMap.containsKey(scratch.get()));
assertTrue(outputsEqual(termsMap.get(scratch.get()), output));
if (doReverseLookup) {
// System.out.println("lookup output=" + output + " outs=" + fst.outputs);
IntsRef input = Util.getByOutput(fstLong, (Long) output);
assertNotNull(input);
// System.out.println(" got " + Util.toBytesRef(input, new BytesRef()).utf8ToString());
assertEquals(scratch.get(), input);
}
}
// test IntsRefFSTEnum.seek:
@ -497,7 +436,7 @@ public class FSTTester<T> {
// seek to term that doesn't exist:
while (true) {
final IntsRef term = toIntsRef(getRandomString(random), inputMode);
int pos = Collections.binarySearch(pairs, new InputOutput<T>(term, null));
int pos = Collections.binarySearch(pairs, new InputOutput<>(term, null));
if (pos < 0) {
pos = -(pos + 1);
// ok doesn't exist
@ -617,7 +556,7 @@ public class FSTTester<T> {
for (; attempt < 10; attempt++) {
IntsRef term = toIntsRef(getRandomString(random), inputMode);
if (!termsMap.containsKey(term) && term.compareTo(pairs.get(upto).input) > 0) {
int pos = Collections.binarySearch(pairs, new InputOutput<T>(term, null));
int pos = Collections.binarySearch(pairs, new InputOutput<>(term, null));
assert pos < 0;
upto = -(pos + 1);
@ -806,10 +745,8 @@ public class FSTTester<T> {
cmo2 != null
&& ((prune2 > 1 && cmo2.count >= prune2)
|| (prune2 == 1 && (cmo2.count >= 2 || prefix.length <= 1)));
} else if (cmo.count >= prune2) {
keep = true;
} else {
keep = false;
keep = cmo.count >= prune2;
}
}