mirror of https://github.com/apache/lucene.git
LUCENE-8638: remove deprecated FST get by output
This commit is contained in:
parent
a37844aedd
commit
666c7a2590
|
@ -22,6 +22,8 @@ System Requirements
|
|||
|
||||
API Changes
|
||||
|
||||
* LUCENE-8638: Remove deprecated methods in FST for lookup by output.
|
||||
|
||||
* LUCENE-8638: haversin() expressions function now returns its result in meters rather than
|
||||
kilometers.
|
||||
|
||||
|
|
|
@ -45,7 +45,7 @@ public final class Util {
|
|||
public static <T> T get(FST<T> fst, IntsRef input) throws IOException {
|
||||
|
||||
// TODO: would be nice not to alloc this on every lookup
|
||||
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
|
||||
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<>());
|
||||
|
||||
final BytesReader fstReader = fst.getBytesReader();
|
||||
|
||||
|
@ -92,167 +92,6 @@ public final class Util {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverse lookup (lookup by output instead of by input), in the special case when your FSTs
|
||||
* outputs are strictly ascending. This locates the input/output pair where the output is equal to
|
||||
* the target, and will return null if that output does not exist.
|
||||
*
|
||||
* <p>NOTE: this only works with {@code FST<Long>}, only works when the outputs are ascending in
|
||||
* order with the inputs. For example, simple ordinals (0, 1, 2, ...), or file offsets (when
|
||||
* appending to a file) fit this.
|
||||
*/
|
||||
@Deprecated
|
||||
public static IntsRef getByOutput(FST<Long> fst, long targetOutput) throws IOException {
|
||||
|
||||
final BytesReader in = fst.getBytesReader();
|
||||
|
||||
// TODO: would be nice not to alloc this on every lookup
|
||||
FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<Long>());
|
||||
|
||||
FST.Arc<Long> scratchArc = new FST.Arc<>();
|
||||
|
||||
final IntsRefBuilder result = new IntsRefBuilder();
|
||||
return getByOutput(fst, targetOutput, in, arc, scratchArc, result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: like {@link Util#getByOutput(FST, long)} except reusing BytesReader, initial and
|
||||
* scratch Arc, and result.
|
||||
*/
|
||||
@Deprecated
|
||||
public static IntsRef getByOutput(
|
||||
FST<Long> fst,
|
||||
long targetOutput,
|
||||
BytesReader in,
|
||||
Arc<Long> arc,
|
||||
Arc<Long> scratchArc,
|
||||
IntsRefBuilder result)
|
||||
throws IOException {
|
||||
long output = arc.output();
|
||||
int upto = 0;
|
||||
|
||||
// System.out.println("reverseLookup output=" + targetOutput);
|
||||
|
||||
while (true) {
|
||||
// System.out.println("loop: output=" + output + " upto=" + upto + " arc=" + arc);
|
||||
if (arc.isFinal()) {
|
||||
final long finalOutput = output + arc.nextFinalOutput();
|
||||
// System.out.println(" isFinal finalOutput=" + finalOutput);
|
||||
if (finalOutput == targetOutput) {
|
||||
result.setLength(upto);
|
||||
// System.out.println(" found!");
|
||||
return result.get();
|
||||
} else if (finalOutput > targetOutput) {
|
||||
// System.out.println(" not found!");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
if (FST.targetHasArcs(arc)) {
|
||||
// System.out.println(" targetHasArcs");
|
||||
result.grow(1 + upto);
|
||||
|
||||
fst.readFirstRealTargetArc(arc.target(), arc, in);
|
||||
|
||||
if (arc.bytesPerArc() != 0 && arc.nodeFlags() == FST.ARCS_FOR_BINARY_SEARCH) {
|
||||
|
||||
int low = 0;
|
||||
int high = arc.numArcs() - 1;
|
||||
int mid = 0;
|
||||
// System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput + "
|
||||
// output=" + output);
|
||||
boolean exact = false;
|
||||
while (low <= high) {
|
||||
mid = (low + high) >>> 1;
|
||||
in.setPosition(arc.posArcsStart());
|
||||
in.skipBytes(arc.bytesPerArc() * mid);
|
||||
final byte flags = in.readByte();
|
||||
fst.readLabel(in);
|
||||
final long minArcOutput;
|
||||
if ((flags & FST.BIT_ARC_HAS_OUTPUT) != 0) {
|
||||
final long arcOutput = fst.outputs.read(in);
|
||||
minArcOutput = output + arcOutput;
|
||||
} else {
|
||||
minArcOutput = output;
|
||||
}
|
||||
// System.out.println(" cycle mid=" + mid + " output=" + minArcOutput);
|
||||
if (minArcOutput == targetOutput) {
|
||||
exact = true;
|
||||
break;
|
||||
} else if (minArcOutput < targetOutput) {
|
||||
low = mid + 1;
|
||||
} else {
|
||||
high = mid - 1;
|
||||
}
|
||||
}
|
||||
|
||||
int idx;
|
||||
if (high == -1) {
|
||||
return null;
|
||||
} else if (exact) {
|
||||
idx = mid;
|
||||
} else {
|
||||
idx = low - 1;
|
||||
}
|
||||
|
||||
fst.readArcByIndex(arc, in, idx);
|
||||
result.setIntAt(upto++, arc.label());
|
||||
output += arc.output();
|
||||
|
||||
} else {
|
||||
|
||||
FST.Arc<Long> prevArc = null;
|
||||
|
||||
while (true) {
|
||||
// System.out.println(" cycle label=" + arc.label + " output=" + arc.output);
|
||||
|
||||
// This is the min output we'd hit if we follow
|
||||
// this arc:
|
||||
final long minArcOutput = output + arc.output();
|
||||
|
||||
if (minArcOutput == targetOutput) {
|
||||
// Recurse on this arc:
|
||||
// System.out.println(" match! break");
|
||||
output = minArcOutput;
|
||||
result.setIntAt(upto++, arc.label());
|
||||
break;
|
||||
} else if (minArcOutput > targetOutput) {
|
||||
if (prevArc == null) {
|
||||
// Output doesn't exist
|
||||
return null;
|
||||
} else {
|
||||
// Recurse on previous arc:
|
||||
arc.copyFrom(prevArc);
|
||||
result.setIntAt(upto++, arc.label());
|
||||
output += arc.output();
|
||||
// System.out.println(" recurse prev label=" + (char) arc.label + " output=" +
|
||||
// output);
|
||||
break;
|
||||
}
|
||||
} else if (arc.isLast()) {
|
||||
// Recurse on this arc:
|
||||
output = minArcOutput;
|
||||
// System.out.println(" recurse last label=" + (char) arc.label + " output=" +
|
||||
// output);
|
||||
result.setIntAt(upto++, arc.label());
|
||||
break;
|
||||
} else {
|
||||
// Read next arc in this node:
|
||||
prevArc = scratchArc;
|
||||
prevArc.copyFrom(arc);
|
||||
// System.out.println(" after copy label=" + (char) prevArc.label + " vs " +
|
||||
// (char) arc.label);
|
||||
fst.readNextRealArc(arc, in);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// System.out.println(" no target arcs; not found!");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a path in TopNSearcher.
|
||||
*
|
||||
|
@ -899,7 +738,7 @@ public final class Util {
|
|||
scratch.setLength(charLimit);
|
||||
scratch.grow(charLimit);
|
||||
for (int idx = 0; idx < charLimit; idx++) {
|
||||
scratch.setIntAt(idx, (int) s.charAt(idx));
|
||||
scratch.setIntAt(idx, s.charAt(idx));
|
||||
}
|
||||
return scratch.get();
|
||||
}
|
||||
|
@ -1033,9 +872,6 @@ public final class Util {
|
|||
|
||||
while (true) {
|
||||
// System.out.println(" non-bs cycle");
|
||||
// TODO: we should fix this code to not have to create
|
||||
// object for the output of every arc we scan... only
|
||||
// for the matching arc, if found
|
||||
if (arc.label() >= label) {
|
||||
// System.out.println(" found!");
|
||||
return arc;
|
||||
|
@ -1067,12 +903,12 @@ public final class Util {
|
|||
+ ")";
|
||||
BytesReader in = fst.getBytesReader();
|
||||
int low = arc.arcIdx();
|
||||
int mid = 0;
|
||||
int mid;
|
||||
int high = arc.numArcs() - 1;
|
||||
while (low <= high) {
|
||||
mid = (low + high) >>> 1;
|
||||
in.setPosition(arc.posArcsStart());
|
||||
in.skipBytes(arc.bytesPerArc() * mid + 1);
|
||||
in.skipBytes((long) arc.bytesPerArc() * mid + 1);
|
||||
final int midLabel = fst.readLabel(in);
|
||||
final int cmp = midLabel - targetLabel;
|
||||
if (cmp < 0) {
|
||||
|
|
|
@ -25,8 +25,6 @@
|
|||
* <li>Fast and low memory overhead construction of the minimal FST (but inputs must be provided
|
||||
* in sorted order)
|
||||
* <li>Low object overhead and quick deserialization (byte[] representation)
|
||||
* <li>{@link org.apache.lucene.util.fst.Util#getByOutput Lookup-by-output} when the outputs are
|
||||
* in sorted order (e.g., ordinals or file pointers)
|
||||
* <li>Pluggable {@link org.apache.lucene.util.fst.Outputs Outputs} representation
|
||||
* <li>{@link org.apache.lucene.util.fst.Util#shortestPaths N-shortest-paths} search by weight
|
||||
* <li>Enumerators ({@link org.apache.lucene.util.fst.IntsRefFSTEnum IntsRef} and {@link
|
||||
|
|
|
@ -277,12 +277,8 @@ public class Test2BFST extends LuceneTestCase {
|
|||
System.out.println(i + "...: ");
|
||||
}
|
||||
|
||||
// forward lookup:
|
||||
assertEquals(output, Util.get(fst, input).longValue());
|
||||
// reverse lookup:
|
||||
@SuppressWarnings("deprecation")
|
||||
IntsRef inputResult = Util.getByOutput(fst, output);
|
||||
assertEquals(input, inputResult);
|
||||
|
||||
output += 1 + r.nextInt(10);
|
||||
nextInput(r, ints);
|
||||
}
|
||||
|
|
|
@ -92,7 +92,9 @@ public class TestFSTs extends LuceneTestCase {
|
|||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
// can be null if we force simpletext (funky, some kind of bug in test runner maybe)
|
||||
if (dir != null) dir.close();
|
||||
if (dir != null) {
|
||||
dir.close();
|
||||
}
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
|
@ -133,7 +135,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
for (IntsRef term : terms2) {
|
||||
pairs.add(new FSTTester.InputOutput<>(term, NO_OUTPUT));
|
||||
}
|
||||
FSTTester<Object> tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs, false);
|
||||
FSTTester<Object> tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs);
|
||||
FST<Object> fst = tester.doTest(0, 0, false);
|
||||
assertNotNull(fst);
|
||||
assertEquals(22, tester.nodeCount);
|
||||
|
@ -147,7 +149,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
for (int idx = 0; idx < terms2.length; idx++) {
|
||||
pairs.add(new FSTTester.InputOutput<>(terms2[idx], (long) idx));
|
||||
}
|
||||
FSTTester<Long> tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs, true);
|
||||
FSTTester<Long> tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs);
|
||||
final FST<Long> fst = tester.doTest(0, 0, false);
|
||||
assertNotNull(fst);
|
||||
assertEquals(22, tester.nodeCount);
|
||||
|
@ -157,14 +159,12 @@ public class TestFSTs extends LuceneTestCase {
|
|||
// FST byte sequence ord
|
||||
{
|
||||
final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
|
||||
final BytesRef NO_OUTPUT = outputs.getNoOutput();
|
||||
final List<FSTTester.InputOutput<BytesRef>> pairs = new ArrayList<>(terms2.length);
|
||||
for (int idx = 0; idx < terms2.length; idx++) {
|
||||
final BytesRef output = idx == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx));
|
||||
final BytesRef output = new BytesRef(Integer.toString(idx));
|
||||
pairs.add(new FSTTester.InputOutput<>(terms2[idx], output));
|
||||
}
|
||||
FSTTester<BytesRef> tester =
|
||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false);
|
||||
FSTTester<BytesRef> tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs);
|
||||
final FST<BytesRef> fst = tester.doTest(0, 0, false);
|
||||
assertNotNull(fst);
|
||||
assertEquals(24, tester.nodeCount);
|
||||
|
@ -185,7 +185,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
for (IntsRef term : terms) {
|
||||
pairs.add(new FSTTester.InputOutput<>(term, NO_OUTPUT));
|
||||
}
|
||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true);
|
||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
|
||||
}
|
||||
|
||||
// PositiveIntOutput (ord)
|
||||
|
@ -195,7 +195,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
for (int idx = 0; idx < terms.length; idx++) {
|
||||
pairs.add(new FSTTester.InputOutput<>(terms[idx], (long) idx));
|
||||
}
|
||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs, true).doTest(true);
|
||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
|
||||
}
|
||||
|
||||
// PositiveIntOutput (random monotonically increasing positive number)
|
||||
|
@ -203,24 +203,23 @@ public class TestFSTs extends LuceneTestCase {
|
|||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||
final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<>(terms.length);
|
||||
long lastOutput = 0;
|
||||
for (int idx = 0; idx < terms.length; idx++) {
|
||||
for (IntsRef term : terms) {
|
||||
final long value = lastOutput + TestUtil.nextInt(random(), 1, 1000);
|
||||
lastOutput = value;
|
||||
pairs.add(new FSTTester.InputOutput<>(terms[idx], value));
|
||||
pairs.add(new FSTTester.InputOutput<>(term, value));
|
||||
}
|
||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs, true).doTest(true);
|
||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
|
||||
}
|
||||
|
||||
// PositiveIntOutput (random positive number)
|
||||
{
|
||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||
final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<>(terms.length);
|
||||
for (int idx = 0; idx < terms.length; idx++) {
|
||||
for (IntsRef term : terms) {
|
||||
pairs.add(
|
||||
new FSTTester.InputOutput<>(
|
||||
terms[idx], TestUtil.nextLong(random(), 0, Long.MAX_VALUE)));
|
||||
new FSTTester.InputOutput<>(term, TestUtil.nextLong(random(), 0, Long.MAX_VALUE)));
|
||||
}
|
||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true);
|
||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
|
||||
}
|
||||
|
||||
// Pair<ord, (random monotonically increasing positive number>
|
||||
|
@ -236,7 +235,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
lastOutput = value;
|
||||
pairs.add(new FSTTester.InputOutput<>(terms[idx], outputs.newPair((long) idx, value)));
|
||||
}
|
||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true);
|
||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
|
||||
}
|
||||
|
||||
// Sequence-of-bytes
|
||||
|
@ -249,7 +248,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
random().nextInt(30) == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx));
|
||||
pairs.add(new FSTTester.InputOutput<>(terms[idx], output));
|
||||
}
|
||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true);
|
||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
|
||||
}
|
||||
|
||||
// Sequence-of-ints
|
||||
|
@ -265,7 +264,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
}
|
||||
pairs.add(new FSTTester.InputOutput<>(terms[idx], output));
|
||||
}
|
||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true);
|
||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -298,7 +297,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
final String term = getRandomString(random);
|
||||
termsSet.add(toIntsRef(term, inputMode));
|
||||
}
|
||||
doTest(inputMode, termsSet.toArray(new IntsRef[termsSet.size()]));
|
||||
doTest(inputMode, termsSet.toArray(new IntsRef[0]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -497,7 +496,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
private abstract static class VisitTerms<T> {
|
||||
private final Path dirOut;
|
||||
private final Path wordsFileIn;
|
||||
private int inputMode;
|
||||
private final int inputMode;
|
||||
private final Outputs<T> outputs;
|
||||
private final FSTCompiler<T> fstCompiler;
|
||||
|
||||
|
@ -524,7 +523,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
|
||||
protected abstract T getOutput(IntsRef input, int ord) throws IOException;
|
||||
|
||||
public void run(int limit, boolean verify, boolean verifyByOutput) throws IOException {
|
||||
public void run(int limit, boolean verify) throws IOException {
|
||||
|
||||
BufferedReader is = Files.newBufferedReader(wordsFileIn, StandardCharsets.UTF_8);
|
||||
try {
|
||||
|
@ -541,12 +540,11 @@ public class TestFSTs extends LuceneTestCase {
|
|||
|
||||
ord++;
|
||||
if (ord % 500000 == 0) {
|
||||
System.out.println(
|
||||
String.format(
|
||||
Locale.ROOT,
|
||||
"%6.2fs: %9d...",
|
||||
((System.currentTimeMillis() - tStart) / 1000.0),
|
||||
ord));
|
||||
System.out.printf(
|
||||
Locale.ROOT,
|
||||
"%6.2fs: %9d...",
|
||||
((System.currentTimeMillis() - tStart) / 1000.0),
|
||||
ord);
|
||||
}
|
||||
if (ord >= limit) {
|
||||
break;
|
||||
|
@ -594,90 +592,51 @@ public class TestFSTs extends LuceneTestCase {
|
|||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
IndexInput in = dir.openInput("fst.bin", IOContext.DEFAULT);
|
||||
fst = new FST<T>(in, outputs);
|
||||
in.close();
|
||||
*/
|
||||
|
||||
System.out.println("\nNow verify...");
|
||||
|
||||
is.close();
|
||||
is = Files.newBufferedReader(wordsFileIn, StandardCharsets.UTF_8);
|
||||
|
||||
ord = 0;
|
||||
tStart = System.currentTimeMillis();
|
||||
while (true) {
|
||||
for (int iter = 0; iter < 2; iter++) {
|
||||
is.close();
|
||||
is = Files.newBufferedReader(wordsFileIn, StandardCharsets.UTF_8);
|
||||
|
||||
ord = 0;
|
||||
tStart = System.currentTimeMillis();
|
||||
while (true) {
|
||||
String w = is.readLine();
|
||||
if (w == null) {
|
||||
break;
|
||||
}
|
||||
toIntsRef(w, inputMode, intsRefBuilder);
|
||||
if (iter == 0) {
|
||||
T expected = getOutput(intsRefBuilder.get(), ord);
|
||||
T actual = Util.get(fst, intsRefBuilder.get());
|
||||
if (actual == null) {
|
||||
throw new RuntimeException("unexpected null output on input=" + w);
|
||||
}
|
||||
if (!actual.equals(expected)) {
|
||||
throw new RuntimeException(
|
||||
"wrong output (got "
|
||||
+ outputs.outputToString(actual)
|
||||
+ " but expected "
|
||||
+ outputs.outputToString(expected)
|
||||
+ ") on input="
|
||||
+ w);
|
||||
}
|
||||
} else {
|
||||
// Get by output
|
||||
final Long output = (Long) getOutput(intsRefBuilder.get(), ord);
|
||||
@SuppressWarnings({"unchecked", "deprecation"})
|
||||
final IntsRef actual = Util.getByOutput((FST<Long>) fst, output.longValue());
|
||||
if (actual == null) {
|
||||
throw new RuntimeException("unexpected null input from output=" + output);
|
||||
}
|
||||
if (!actual.equals(intsRefBuilder.get())) {
|
||||
throw new RuntimeException(
|
||||
"wrong input (got "
|
||||
+ actual
|
||||
+ " but expected "
|
||||
+ intsRefBuilder
|
||||
+ " from output="
|
||||
+ output);
|
||||
}
|
||||
}
|
||||
|
||||
ord++;
|
||||
if (ord % 500000 == 0) {
|
||||
System.out.println(
|
||||
((System.currentTimeMillis() - tStart) / 1000.0) + "s: " + ord + "...");
|
||||
}
|
||||
if (ord >= limit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
double totSec = ((System.currentTimeMillis() - tStart) / 1000.0);
|
||||
System.out.println(
|
||||
"Verify "
|
||||
+ (iter == 1 ? "(by output) " : "")
|
||||
+ "took "
|
||||
+ totSec
|
||||
+ " sec + ("
|
||||
+ (int) ((totSec * 1000000000 / ord))
|
||||
+ " nsec per lookup)");
|
||||
|
||||
if (!verifyByOutput) {
|
||||
break;
|
||||
}
|
||||
String w = is.readLine();
|
||||
if (w == null) {
|
||||
break;
|
||||
}
|
||||
toIntsRef(w, inputMode, intsRefBuilder);
|
||||
T expected = getOutput(intsRefBuilder.get(), ord);
|
||||
T actual = Util.get(fst, intsRefBuilder.get());
|
||||
if (actual == null) {
|
||||
throw new RuntimeException("unexpected null output on input=" + w);
|
||||
}
|
||||
if (!actual.equals(expected)) {
|
||||
throw new RuntimeException(
|
||||
"wrong output (got "
|
||||
+ outputs.outputToString(actual)
|
||||
+ " but expected "
|
||||
+ outputs.outputToString(expected)
|
||||
+ ") on input="
|
||||
+ w);
|
||||
}
|
||||
ord++;
|
||||
if (ord % 500000 == 0) {
|
||||
System.out.println(
|
||||
((System.currentTimeMillis() - tStart) / 1000.0) + "s: " + ord + "...");
|
||||
}
|
||||
if (ord >= limit) {
|
||||
break;
|
||||
}
|
||||
|
||||
// NOTE: comment out to profile lookup...
|
||||
break;
|
||||
}
|
||||
|
||||
double totSec = ((System.currentTimeMillis() - tStart) / 1000.0);
|
||||
System.out.println(
|
||||
"Verify took "
|
||||
+ totSec
|
||||
+ " sec + ("
|
||||
+ (int) ((totSec * 1000000000 / ord))
|
||||
+ " nsec per lookup)");
|
||||
|
||||
} finally {
|
||||
is.close();
|
||||
}
|
||||
|
@ -762,7 +721,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
}
|
||||
return outputs.newPair((long) ord, (long) TestUtil.nextInt(rand, 1, 5000));
|
||||
}
|
||||
}.run(limit, verify, false);
|
||||
}.run(limit, verify);
|
||||
} else if (storeOrds) {
|
||||
// Store only ords
|
||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||
|
@ -771,7 +730,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
public Long getOutput(IntsRef input, int ord) {
|
||||
return (long) ord;
|
||||
}
|
||||
}.run(limit, verify, true);
|
||||
}.run(limit, verify);
|
||||
} else if (storeDocFreqs) {
|
||||
// Store only docFreq
|
||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||
|
@ -785,7 +744,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
}
|
||||
return (long) TestUtil.nextInt(rand, 1, 5000);
|
||||
}
|
||||
}.run(limit, verify, false);
|
||||
}.run(limit, verify);
|
||||
} else {
|
||||
// Store nothing
|
||||
final NoOutputs outputs = NoOutputs.getSingleton();
|
||||
|
@ -795,7 +754,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
public Object getOutput(IntsRef input, int ord) {
|
||||
return NO_OUTPUT;
|
||||
}
|
||||
}.run(limit, verify, false);
|
||||
}.run(limit, verify);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -913,22 +872,6 @@ public class TestFSTs extends LuceneTestCase {
|
|||
assertNotNull(seekResult);
|
||||
assertEquals(b, seekResult.input);
|
||||
assertEquals(42, (long) seekResult.output);
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
IntsRef byOutput = Util.getByOutput(fst, 13824324872317238L);
|
||||
assertEquals(Util.toIntsRef(new BytesRef("c"), new IntsRefBuilder()), byOutput);
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
IntsRef byOutput47 = Util.getByOutput(fst, 47);
|
||||
assertNull(byOutput47);
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
IntsRef byOutput42 = Util.getByOutput(fst, 42);
|
||||
assertEquals(Util.toIntsRef(new BytesRef("b"), new IntsRefBuilder()), byOutput42);
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
IntsRef byOutput17 = Util.getByOutput(fst, 17);
|
||||
assertEquals(Util.toIntsRef(new BytesRef("a"), new IntsRefBuilder()), byOutput17);
|
||||
}
|
||||
|
||||
public void testPrimaryKeys() throws Exception {
|
||||
|
@ -991,12 +934,9 @@ public class TestFSTs extends LuceneTestCase {
|
|||
if (cycle == 0) {
|
||||
idString = String.format(Locale.ROOT, "%07d", (NUM_IDS + idx));
|
||||
} else {
|
||||
while (true) {
|
||||
do {
|
||||
idString = Long.toString(random().nextLong());
|
||||
if (!allIDs.contains(idString)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (allIDs.contains(idString));
|
||||
}
|
||||
outOfBounds.add(idString);
|
||||
allIDsList.add(idString);
|
||||
|
@ -1063,8 +1003,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
new BytesRef(nextID),
|
||||
termsEnum.term());
|
||||
} else if (!exists) {
|
||||
assertTrue(
|
||||
status == TermsEnum.SeekStatus.NOT_FOUND || status == TermsEnum.SeekStatus.END);
|
||||
assertEquals(TermsEnum.SeekStatus.NOT_FOUND, status);
|
||||
} else {
|
||||
assertEquals(TermsEnum.SeekStatus.FOUND, status);
|
||||
}
|
||||
|
@ -1204,7 +1143,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
ArrayList<String> out = new ArrayList<>();
|
||||
StringBuilder b = new StringBuilder();
|
||||
s.generate(out, b, 'a', 'i', 10);
|
||||
String[] input = out.toArray(new String[out.size()]);
|
||||
String[] input = out.toArray(new String[0]);
|
||||
Arrays.sort(input);
|
||||
FST<Object> fst = s.compile(input);
|
||||
FST.Arc<Object> arc = fst.getFirstArc(new FST.Arc<>());
|
||||
|
@ -1224,7 +1163,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
Util.toDot(fst, w, false, false);
|
||||
w.close();
|
||||
// System.out.println(w.toString());
|
||||
assertTrue(w.toString().indexOf("label=\"t/[7]\"") != -1);
|
||||
assertTrue(w.toString().contains("label=\"t/[7]\""));
|
||||
}
|
||||
|
||||
public void testInternalFinalState() throws Exception {
|
||||
|
@ -1242,9 +1181,9 @@ public class TestFSTs extends LuceneTestCase {
|
|||
// System.out.println(w.toString());
|
||||
|
||||
// check for accept state at label t
|
||||
assertTrue(w.toString().indexOf("[label=\"t\" style=\"bold\"") != -1);
|
||||
assertTrue(w.toString().contains("[label=\"t\" style=\"bold\""));
|
||||
// check for accept state at label n
|
||||
assertTrue(w.toString().indexOf("[label=\"n\" style=\"bold\"") != -1);
|
||||
assertTrue(w.toString().contains("[label=\"n\" style=\"bold\""));
|
||||
}
|
||||
|
||||
// Make sure raw FST can differentiate between final vs
|
||||
|
@ -1253,9 +1192,6 @@ public class TestFSTs extends LuceneTestCase {
|
|||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||
final Long nothing = outputs.getNoOutput();
|
||||
final FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||
|
||||
// final FST<Long> fst = new FST<>(FST.INPUT_TYPE.BYTE1, outputs, false, PackedInts.COMPACT,
|
||||
// 15);
|
||||
final FST<Long> fst = fstCompiler.fst;
|
||||
|
||||
final FSTCompiler.UnCompiledNode<Long> rootNode =
|
||||
|
@ -1311,11 +1247,11 @@ public class TestFSTs extends LuceneTestCase {
|
|||
|
||||
private void checkStopNodes(FST<Long> fst, PositiveIntOutputs outputs) throws Exception {
|
||||
final Long nothing = outputs.getNoOutput();
|
||||
FST.Arc<Long> startArc = fst.getFirstArc(new FST.Arc<Long>());
|
||||
FST.Arc<Long> startArc = fst.getFirstArc(new FST.Arc<>());
|
||||
assertEquals(nothing, startArc.output());
|
||||
assertEquals(nothing, startArc.nextFinalOutput());
|
||||
|
||||
FST.Arc<Long> arc = fst.readFirstTargetArc(startArc, new FST.Arc<Long>(), fst.getBytesReader());
|
||||
FST.Arc<Long> arc = fst.readFirstTargetArc(startArc, new FST.Arc<>(), fst.getBytesReader());
|
||||
assertEquals('a', arc.label());
|
||||
assertEquals(17, arc.nextFinalOutput().longValue());
|
||||
assertTrue(arc.isFinal());
|
||||
|
@ -1326,13 +1262,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
assertEquals(42, arc.output().longValue());
|
||||
}
|
||||
|
||||
static final Comparator<Long> minLongComparator =
|
||||
new Comparator<Long>() {
|
||||
@Override
|
||||
public int compare(Long left, Long right) {
|
||||
return left.compareTo(right);
|
||||
}
|
||||
};
|
||||
static final Comparator<Long> minLongComparator = Comparator.naturalOrder();
|
||||
|
||||
public void testShortestPaths() throws Exception {
|
||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||
|
@ -1350,7 +1280,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
Util.TopResults<Long> res =
|
||||
Util.shortestPaths(
|
||||
fst,
|
||||
fst.getFirstArc(new FST.Arc<Long>()),
|
||||
fst.getFirstArc(new FST.Arc<>()),
|
||||
outputs.getNoOutput(),
|
||||
minLongComparator,
|
||||
3,
|
||||
|
@ -1369,7 +1299,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
|
||||
public void testRejectNoLimits() throws IOException {
|
||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||
final FSTCompiler<Long> fstCompiler = new FSTCompiler<Long>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||
final FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||
|
||||
final IntsRefBuilder scratch = new IntsRefBuilder();
|
||||
fstCompiler.add(Util.toIntsRef(new BytesRef("aab"), scratch), 22L);
|
||||
|
@ -1381,7 +1311,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
final FST<Long> fst = fstCompiler.compile();
|
||||
final AtomicInteger rejectCount = new AtomicInteger();
|
||||
Util.TopNSearcher<Long> searcher =
|
||||
new Util.TopNSearcher<Long>(fst, 2, 6, minLongComparator) {
|
||||
new Util.TopNSearcher<>(fst, 2, 6, minLongComparator) {
|
||||
@Override
|
||||
protected boolean acceptResult(IntsRef input, Long output) {
|
||||
boolean accept = output.intValue() == 7;
|
||||
|
@ -1393,7 +1323,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
};
|
||||
|
||||
searcher.addStartPaths(
|
||||
fst.getFirstArc(new FST.Arc<Long>()), outputs.getNoOutput(), true, new IntsRefBuilder());
|
||||
fst.getFirstArc(new FST.Arc<>()), outputs.getNoOutput(), true, new IntsRefBuilder());
|
||||
Util.TopResults<Long> res = searcher.search();
|
||||
assertEquals(rejectCount.get(), 4);
|
||||
assertTrue(res.isComplete); // rejected(4) + topN(2) <= maxQueueSize(6)
|
||||
|
@ -1403,7 +1333,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
assertEquals(7L, res.topN.get(0).output.longValue());
|
||||
rejectCount.set(0);
|
||||
searcher =
|
||||
new Util.TopNSearcher<Long>(fst, 2, 5, minLongComparator) {
|
||||
new Util.TopNSearcher<>(fst, 2, 5, minLongComparator) {
|
||||
@Override
|
||||
protected boolean acceptResult(IntsRef input, Long output) {
|
||||
boolean accept = output.intValue() == 7;
|
||||
|
@ -1415,7 +1345,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
};
|
||||
|
||||
searcher.addStartPaths(
|
||||
fst.getFirstArc(new FST.Arc<Long>()), outputs.getNoOutput(), true, new IntsRefBuilder());
|
||||
fst.getFirstArc(new FST.Arc<>()), outputs.getNoOutput(), true, new IntsRefBuilder());
|
||||
res = searcher.search();
|
||||
assertEquals(rejectCount.get(), 4);
|
||||
assertFalse(res.isComplete); // rejected(4) + topN(2) > maxQueueSize(5)
|
||||
|
@ -1423,12 +1353,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
|
||||
// compares just the weight side of the pair
|
||||
static final Comparator<Pair<Long, Long>> minPairWeightComparator =
|
||||
new Comparator<Pair<Long, Long>>() {
|
||||
@Override
|
||||
public int compare(Pair<Long, Long> left, Pair<Long, Long> right) {
|
||||
return left.output1.compareTo(right.output1);
|
||||
}
|
||||
};
|
||||
Comparator.comparing(left -> left.output1);
|
||||
|
||||
/** like testShortestPaths, but uses pairoutputs so we have both a weight and an output */
|
||||
public void testShortestPathsWFST() throws Exception {
|
||||
|
@ -1454,7 +1379,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
Util.TopResults<Pair<Long, Long>> res =
|
||||
Util.shortestPaths(
|
||||
fst,
|
||||
fst.getFirstArc(new FST.Arc<Pair<Long, Long>>()),
|
||||
fst.getFirstArc(new FST.Arc<>()),
|
||||
outputs.getNoOutput(),
|
||||
minPairWeightComparator,
|
||||
3,
|
||||
|
@ -1488,12 +1413,9 @@ public class TestFSTs extends LuceneTestCase {
|
|||
|
||||
for (int i = 0; i < numWords; i++) {
|
||||
String s;
|
||||
while (true) {
|
||||
do {
|
||||
s = TestUtil.randomSimpleString(random);
|
||||
if (!slowCompletor.containsKey(s)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (slowCompletor.containsKey(s));
|
||||
|
||||
for (int j = 1; j < s.length(); j++) {
|
||||
allPrefixes.add(s.substring(0, j));
|
||||
|
@ -1521,9 +1443,9 @@ public class TestFSTs extends LuceneTestCase {
|
|||
// System.out.println("TEST: " + prefix);
|
||||
|
||||
long prefixOutput = 0;
|
||||
FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<Long>());
|
||||
FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<>());
|
||||
for (int idx = 0; idx < prefix.length(); idx++) {
|
||||
if (fst.findTargetArc((int) prefix.charAt(idx), arc, arc, reader) == null) {
|
||||
if (fst.findTargetArc(prefix.charAt(idx), arc, arc, reader) == null) {
|
||||
fail();
|
||||
}
|
||||
prefixOutput += arc.output();
|
||||
|
@ -1551,7 +1473,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
}
|
||||
|
||||
assertTrue(matches.size() > 0);
|
||||
Collections.sort(matches, new TieBreakByInputComparator<>(minLongComparator));
|
||||
matches.sort(new TieBreakByInputComparator<>(minLongComparator));
|
||||
if (matches.size() > topN) {
|
||||
matches.subList(topN, matches.size()).clear();
|
||||
}
|
||||
|
@ -1614,12 +1536,9 @@ public class TestFSTs extends LuceneTestCase {
|
|||
Random random = random();
|
||||
for (int i = 0; i < numWords; i++) {
|
||||
String s;
|
||||
while (true) {
|
||||
do {
|
||||
s = TestUtil.randomSimpleString(random);
|
||||
if (!slowCompletor.containsKey(s)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (slowCompletor.containsKey(s));
|
||||
|
||||
for (int j = 1; j < s.length(); j++) {
|
||||
allPrefixes.add(s.substring(0, j));
|
||||
|
@ -1651,9 +1570,9 @@ public class TestFSTs extends LuceneTestCase {
|
|||
// System.out.println("TEST: " + prefix);
|
||||
|
||||
Pair<Long, Long> prefixOutput = outputs.getNoOutput();
|
||||
FST.Arc<Pair<Long, Long>> arc = fst.getFirstArc(new FST.Arc<Pair<Long, Long>>());
|
||||
FST.Arc<Pair<Long, Long>> arc = fst.getFirstArc(new FST.Arc<>());
|
||||
for (int idx = 0; idx < prefix.length(); idx++) {
|
||||
if (fst.findTargetArc((int) prefix.charAt(idx), arc, arc, reader) == null) {
|
||||
if (fst.findTargetArc(prefix.charAt(idx), arc, arc, reader) == null) {
|
||||
fail();
|
||||
}
|
||||
prefixOutput = outputs.add(prefixOutput, arc.output());
|
||||
|
@ -1683,7 +1602,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
}
|
||||
|
||||
assertTrue(matches.size() > 0);
|
||||
Collections.sort(matches, new TieBreakByInputComparator<>(minPairWeightComparator));
|
||||
matches.sort(new TieBreakByInputComparator<>(minPairWeightComparator));
|
||||
if (matches.size() > topN) {
|
||||
matches.subList(topN, matches.size()).clear();
|
||||
}
|
||||
|
@ -1758,7 +1677,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
Arc<BytesRef> arc = new FST.Arc<>();
|
||||
fst.getFirstArc(arc);
|
||||
FST.BytesReader reader = fst.getBytesReader();
|
||||
arc = fst.findTargetArc((int) 'm', arc, arc, reader);
|
||||
arc = fst.findTargetArc('m', arc, arc, reader);
|
||||
assertNotNull(arc);
|
||||
assertEquals(new BytesRef("m"), arc.output());
|
||||
|
||||
|
@ -1767,7 +1686,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
|
||||
fst.getFirstArc(arc);
|
||||
try {
|
||||
arc = fst.findTargetArc((int) 'm', arc, arc, reader);
|
||||
fst.findTargetArc((int) 'm', arc, arc, reader);
|
||||
} catch (
|
||||
@SuppressWarnings("unused")
|
||||
AssertionError ae) {
|
||||
|
|
|
@ -109,7 +109,7 @@ public class TestFSTsMisc extends LuceneTestCase {
|
|||
}
|
||||
pairs.add(new FSTTester.InputOutput<>(terms[idx], output));
|
||||
}
|
||||
new FSTTester<Object>(random(), dir, inputMode, pairs, outputs, false) {
|
||||
new FSTTester<Object>(random(), dir, inputMode, pairs, outputs) {
|
||||
@Override
|
||||
protected boolean outputsEqual(Object output1, Object output2) {
|
||||
if (output1 instanceof TwoLongs && output2 instanceof List) {
|
||||
|
@ -157,7 +157,7 @@ public class TestFSTsMisc extends LuceneTestCase {
|
|||
|
||||
pairs.add(new FSTTester.InputOutput<>(terms[idx], output));
|
||||
}
|
||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(false);
|
||||
new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(false);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -29,12 +29,10 @@ import java.nio.charset.Charset;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
@ -54,23 +52,16 @@ public class FSTTester<T> {
|
|||
final int inputMode;
|
||||
final Outputs<T> outputs;
|
||||
final Directory dir;
|
||||
final boolean doReverseLookup;
|
||||
long nodeCount;
|
||||
long arcCount;
|
||||
|
||||
public FSTTester(
|
||||
Random random,
|
||||
Directory dir,
|
||||
int inputMode,
|
||||
List<InputOutput<T>> pairs,
|
||||
Outputs<T> outputs,
|
||||
boolean doReverseLookup) {
|
||||
Random random, Directory dir, int inputMode, List<InputOutput<T>> pairs, Outputs<T> outputs) {
|
||||
this.random = random;
|
||||
this.dir = dir;
|
||||
this.inputMode = inputMode;
|
||||
this.pairs = pairs;
|
||||
this.outputs = outputs;
|
||||
this.doReverseLookup = doReverseLookup;
|
||||
}
|
||||
|
||||
static String inputToString(int inputMode, IntsRef term) {
|
||||
|
@ -181,11 +172,7 @@ public class FSTTester<T> {
|
|||
|
||||
@Override
|
||||
public int compareTo(InputOutput<T> other) {
|
||||
if (other instanceof InputOutput) {
|
||||
return input.compareTo((other).input);
|
||||
} else {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
return input.compareTo(other.input);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -208,9 +195,8 @@ public class FSTTester<T> {
|
|||
// of the term prefix that matches
|
||||
private T run(FST<T> fst, IntsRef term, int[] prefixLength) throws IOException {
|
||||
assert prefixLength == null || prefixLength.length == 1;
|
||||
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
|
||||
final T NO_OUTPUT = fst.outputs.getNoOutput();
|
||||
T output = NO_OUTPUT;
|
||||
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<>());
|
||||
T output = fst.outputs.getNoOutput();
|
||||
final FST.BytesReader fstReader = fst.getBytesReader();
|
||||
|
||||
for (int i = 0; i <= term.length; i++) {
|
||||
|
@ -243,12 +229,11 @@ public class FSTTester<T> {
|
|||
}
|
||||
|
||||
private T randomAcceptedWord(FST<T> fst, IntsRefBuilder in) throws IOException {
|
||||
FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
|
||||
FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<>());
|
||||
|
||||
final List<FST.Arc<T>> arcs = new ArrayList<>();
|
||||
in.clear();
|
||||
final T NO_OUTPUT = fst.outputs.getNoOutput();
|
||||
T output = NO_OUTPUT;
|
||||
T output = fst.outputs.getNoOutput();
|
||||
final FST.BytesReader fstReader = fst.getBytesReader();
|
||||
|
||||
while (true) {
|
||||
|
@ -311,14 +296,12 @@ public class FSTTester<T> {
|
|||
|
||||
if (random.nextBoolean() && fst != null) {
|
||||
IOContext context = LuceneTestCase.newIOContext(random);
|
||||
IndexOutput out = dir.createOutput("fst.bin", context);
|
||||
fst.save(out, out);
|
||||
out.close();
|
||||
IndexInput in = dir.openInput("fst.bin", context);
|
||||
try {
|
||||
fst = new FST<T>(in, in, outputs);
|
||||
try (IndexOutput out = dir.createOutput("fst.bin", context)) {
|
||||
fst.save(out, out);
|
||||
}
|
||||
try (IndexInput in = dir.openInput("fst.bin", context)) {
|
||||
fst = new FST<>(in, in, outputs);
|
||||
} finally {
|
||||
in.close();
|
||||
dir.deleteFile("fst.bin");
|
||||
}
|
||||
}
|
||||
|
@ -361,30 +344,8 @@ public class FSTTester<T> {
|
|||
}
|
||||
|
||||
// FST is complete
|
||||
@SuppressWarnings("deprecation")
|
||||
private void verifyUnPruned(int inputMode, FST<T> fst) throws IOException {
|
||||
|
||||
final FST<Long> fstLong;
|
||||
final Set<Long> validOutputs;
|
||||
long minLong = Long.MAX_VALUE;
|
||||
long maxLong = Long.MIN_VALUE;
|
||||
|
||||
if (doReverseLookup) {
|
||||
@SuppressWarnings("unchecked")
|
||||
FST<Long> fstLong0 = (FST<Long>) fst;
|
||||
fstLong = fstLong0;
|
||||
validOutputs = new HashSet<>();
|
||||
for (InputOutput<T> pair : pairs) {
|
||||
Long output = (Long) pair.output;
|
||||
maxLong = Math.max(maxLong, output);
|
||||
minLong = Math.min(minLong, output);
|
||||
validOutputs.add(output);
|
||||
}
|
||||
} else {
|
||||
fstLong = null;
|
||||
validOutputs = null;
|
||||
}
|
||||
|
||||
if (pairs.size() == 0) {
|
||||
assertNull(fst);
|
||||
return;
|
||||
|
@ -447,20 +408,6 @@ public class FSTTester<T> {
|
|||
termsMap.put(pair.input, pair.output);
|
||||
}
|
||||
|
||||
if (doReverseLookup && maxLong > minLong) {
|
||||
// Do random lookups so we test null (output doesn't
|
||||
// exist) case:
|
||||
assertNull(Util.getByOutput(fstLong, minLong - 7));
|
||||
assertNull(Util.getByOutput(fstLong, maxLong + 7));
|
||||
|
||||
final int num = LuceneTestCase.atLeast(random, 100);
|
||||
for (int iter = 0; iter < num; iter++) {
|
||||
Long v = TestUtil.nextLong(random, minLong, maxLong);
|
||||
IntsRef input = Util.getByOutput(fstLong, v);
|
||||
assertTrue(validOutputs.contains(v) || input == null);
|
||||
}
|
||||
}
|
||||
|
||||
// find random matching word and make sure it's valid
|
||||
if (LuceneTestCase.VERBOSE) {
|
||||
System.out.println("TEST: verify random accepted terms");
|
||||
|
@ -473,14 +420,6 @@ public class FSTTester<T> {
|
|||
"accepted word " + inputToString(inputMode, scratch.get()) + " is not valid",
|
||||
termsMap.containsKey(scratch.get()));
|
||||
assertTrue(outputsEqual(termsMap.get(scratch.get()), output));
|
||||
|
||||
if (doReverseLookup) {
|
||||
// System.out.println("lookup output=" + output + " outs=" + fst.outputs);
|
||||
IntsRef input = Util.getByOutput(fstLong, (Long) output);
|
||||
assertNotNull(input);
|
||||
// System.out.println(" got " + Util.toBytesRef(input, new BytesRef()).utf8ToString());
|
||||
assertEquals(scratch.get(), input);
|
||||
}
|
||||
}
|
||||
|
||||
// test IntsRefFSTEnum.seek:
|
||||
|
@ -497,7 +436,7 @@ public class FSTTester<T> {
|
|||
// seek to term that doesn't exist:
|
||||
while (true) {
|
||||
final IntsRef term = toIntsRef(getRandomString(random), inputMode);
|
||||
int pos = Collections.binarySearch(pairs, new InputOutput<T>(term, null));
|
||||
int pos = Collections.binarySearch(pairs, new InputOutput<>(term, null));
|
||||
if (pos < 0) {
|
||||
pos = -(pos + 1);
|
||||
// ok doesn't exist
|
||||
|
@ -617,7 +556,7 @@ public class FSTTester<T> {
|
|||
for (; attempt < 10; attempt++) {
|
||||
IntsRef term = toIntsRef(getRandomString(random), inputMode);
|
||||
if (!termsMap.containsKey(term) && term.compareTo(pairs.get(upto).input) > 0) {
|
||||
int pos = Collections.binarySearch(pairs, new InputOutput<T>(term, null));
|
||||
int pos = Collections.binarySearch(pairs, new InputOutput<>(term, null));
|
||||
assert pos < 0;
|
||||
upto = -(pos + 1);
|
||||
|
||||
|
@ -806,10 +745,8 @@ public class FSTTester<T> {
|
|||
cmo2 != null
|
||||
&& ((prune2 > 1 && cmo2.count >= prune2)
|
||||
|| (prune2 == 1 && (cmo2.count >= 2 || prefix.length <= 1)));
|
||||
} else if (cmo.count >= prune2) {
|
||||
keep = true;
|
||||
} else {
|
||||
keep = false;
|
||||
keep = cmo.count >= prune2;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue