From 666c7a25907336f3abb285f7f75dea8015a98ac7 Mon Sep 17 00:00:00 2001 From: Michael Sokolov Date: Wed, 18 Aug 2021 08:15:31 -0400 Subject: [PATCH] LUCENE-8638: remove deprecated FST get by output --- lucene/CHANGES.txt | 2 + .../java/org/apache/lucene/util/fst/Util.java | 172 +---------- .../apache/lucene/util/fst/package-info.java | 2 - .../org/apache/lucene/util/fst/Test2BFST.java | 6 +- .../org/apache/lucene/util/fst/TestFSTs.java | 279 +++++++----------- .../lucene/misc/util/fst/TestFSTsMisc.java | 4 +- .../org/apache/lucene/util/fst/FSTTester.java | 91 +----- 7 files changed, 122 insertions(+), 434 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 20f847e39ac..7ab6d40951b 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -22,6 +22,8 @@ System Requirements API Changes +* LUCENE-8638: Remove deprecated methods in FST for lookup by output. + * LUCENE-8638: haversin() expressions function now returns its result in meters rather than kilometers. diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/Util.java b/lucene/core/src/java/org/apache/lucene/util/fst/Util.java index 5087bff9b63..f8d82cbe855 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/Util.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/Util.java @@ -45,7 +45,7 @@ public final class Util { public static T get(FST fst, IntsRef input) throws IOException { // TODO: would be nice not to alloc this on every lookup - final FST.Arc arc = fst.getFirstArc(new FST.Arc()); + final FST.Arc arc = fst.getFirstArc(new FST.Arc<>()); final BytesReader fstReader = fst.getBytesReader(); @@ -92,167 +92,6 @@ public final class Util { } } - /** - * Reverse lookup (lookup by output instead of by input), in the special case when your FSTs - * outputs are strictly ascending. This locates the input/output pair where the output is equal to - * the target, and will return null if that output does not exist. - * - *

NOTE: this only works with {@code FST}, only works when the outputs are ascending in - * order with the inputs. For example, simple ordinals (0, 1, 2, ...), or file offsets (when - * appending to a file) fit this. - */ - @Deprecated - public static IntsRef getByOutput(FST fst, long targetOutput) throws IOException { - - final BytesReader in = fst.getBytesReader(); - - // TODO: would be nice not to alloc this on every lookup - FST.Arc arc = fst.getFirstArc(new FST.Arc()); - - FST.Arc scratchArc = new FST.Arc<>(); - - final IntsRefBuilder result = new IntsRefBuilder(); - return getByOutput(fst, targetOutput, in, arc, scratchArc, result); - } - - /** - * Expert: like {@link Util#getByOutput(FST, long)} except reusing BytesReader, initial and - * scratch Arc, and result. - */ - @Deprecated - public static IntsRef getByOutput( - FST fst, - long targetOutput, - BytesReader in, - Arc arc, - Arc scratchArc, - IntsRefBuilder result) - throws IOException { - long output = arc.output(); - int upto = 0; - - // System.out.println("reverseLookup output=" + targetOutput); - - while (true) { - // System.out.println("loop: output=" + output + " upto=" + upto + " arc=" + arc); - if (arc.isFinal()) { - final long finalOutput = output + arc.nextFinalOutput(); - // System.out.println(" isFinal finalOutput=" + finalOutput); - if (finalOutput == targetOutput) { - result.setLength(upto); - // System.out.println(" found!"); - return result.get(); - } else if (finalOutput > targetOutput) { - // System.out.println(" not found!"); - return null; - } - } - - if (FST.targetHasArcs(arc)) { - // System.out.println(" targetHasArcs"); - result.grow(1 + upto); - - fst.readFirstRealTargetArc(arc.target(), arc, in); - - if (arc.bytesPerArc() != 0 && arc.nodeFlags() == FST.ARCS_FOR_BINARY_SEARCH) { - - int low = 0; - int high = arc.numArcs() - 1; - int mid = 0; - // System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput + " - // output=" + output); - boolean exact = false; - while (low <= high) { - mid = (low + high) >>> 1; - in.setPosition(arc.posArcsStart()); - in.skipBytes(arc.bytesPerArc() * mid); - final byte flags = in.readByte(); - fst.readLabel(in); - final long minArcOutput; - if ((flags & FST.BIT_ARC_HAS_OUTPUT) != 0) { - final long arcOutput = fst.outputs.read(in); - minArcOutput = output + arcOutput; - } else { - minArcOutput = output; - } - // System.out.println(" cycle mid=" + mid + " output=" + minArcOutput); - if (minArcOutput == targetOutput) { - exact = true; - break; - } else if (minArcOutput < targetOutput) { - low = mid + 1; - } else { - high = mid - 1; - } - } - - int idx; - if (high == -1) { - return null; - } else if (exact) { - idx = mid; - } else { - idx = low - 1; - } - - fst.readArcByIndex(arc, in, idx); - result.setIntAt(upto++, arc.label()); - output += arc.output(); - - } else { - - FST.Arc prevArc = null; - - while (true) { - // System.out.println(" cycle label=" + arc.label + " output=" + arc.output); - - // This is the min output we'd hit if we follow - // this arc: - final long minArcOutput = output + arc.output(); - - if (minArcOutput == targetOutput) { - // Recurse on this arc: - // System.out.println(" match! break"); - output = minArcOutput; - result.setIntAt(upto++, arc.label()); - break; - } else if (minArcOutput > targetOutput) { - if (prevArc == null) { - // Output doesn't exist - return null; - } else { - // Recurse on previous arc: - arc.copyFrom(prevArc); - result.setIntAt(upto++, arc.label()); - output += arc.output(); - // System.out.println(" recurse prev label=" + (char) arc.label + " output=" + - // output); - break; - } - } else if (arc.isLast()) { - // Recurse on this arc: - output = minArcOutput; - // System.out.println(" recurse last label=" + (char) arc.label + " output=" + - // output); - result.setIntAt(upto++, arc.label()); - break; - } else { - // Read next arc in this node: - prevArc = scratchArc; - prevArc.copyFrom(arc); - // System.out.println(" after copy label=" + (char) prevArc.label + " vs " + - // (char) arc.label); - fst.readNextRealArc(arc, in); - } - } - } - } else { - // System.out.println(" no target arcs; not found!"); - return null; - } - } - } - /** * Represents a path in TopNSearcher. * @@ -899,7 +738,7 @@ public final class Util { scratch.setLength(charLimit); scratch.grow(charLimit); for (int idx = 0; idx < charLimit; idx++) { - scratch.setIntAt(idx, (int) s.charAt(idx)); + scratch.setIntAt(idx, s.charAt(idx)); } return scratch.get(); } @@ -1033,9 +872,6 @@ public final class Util { while (true) { // System.out.println(" non-bs cycle"); - // TODO: we should fix this code to not have to create - // object for the output of every arc we scan... only - // for the matching arc, if found if (arc.label() >= label) { // System.out.println(" found!"); return arc; @@ -1067,12 +903,12 @@ public final class Util { + ")"; BytesReader in = fst.getBytesReader(); int low = arc.arcIdx(); - int mid = 0; + int mid; int high = arc.numArcs() - 1; while (low <= high) { mid = (low + high) >>> 1; in.setPosition(arc.posArcsStart()); - in.skipBytes(arc.bytesPerArc() * mid + 1); + in.skipBytes((long) arc.bytesPerArc() * mid + 1); final int midLabel = fst.readLabel(in); final int cmp = midLabel - targetLabel; if (cmp < 0) { diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/package-info.java b/lucene/core/src/java/org/apache/lucene/util/fst/package-info.java index dd130984d11..aa7bd91f8ec 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/package-info.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/package-info.java @@ -25,8 +25,6 @@ *

  • Fast and low memory overhead construction of the minimal FST (but inputs must be provided * in sorted order) *
  • Low object overhead and quick deserialization (byte[] representation) - *
  • {@link org.apache.lucene.util.fst.Util#getByOutput Lookup-by-output} when the outputs are - * in sorted order (e.g., ordinals or file pointers) *
  • Pluggable {@link org.apache.lucene.util.fst.Outputs Outputs} representation *
  • {@link org.apache.lucene.util.fst.Util#shortestPaths N-shortest-paths} search by weight *
  • Enumerators ({@link org.apache.lucene.util.fst.IntsRefFSTEnum IntsRef} and {@link diff --git a/lucene/core/src/test/org/apache/lucene/util/fst/Test2BFST.java b/lucene/core/src/test/org/apache/lucene/util/fst/Test2BFST.java index 6ab4a5c9208..350f8558583 100644 --- a/lucene/core/src/test/org/apache/lucene/util/fst/Test2BFST.java +++ b/lucene/core/src/test/org/apache/lucene/util/fst/Test2BFST.java @@ -277,12 +277,8 @@ public class Test2BFST extends LuceneTestCase { System.out.println(i + "...: "); } - // forward lookup: assertEquals(output, Util.get(fst, input).longValue()); - // reverse lookup: - @SuppressWarnings("deprecation") - IntsRef inputResult = Util.getByOutput(fst, output); - assertEquals(input, inputResult); + output += 1 + r.nextInt(10); nextInput(r, ints); } diff --git a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java index 6608f0f15d1..58eeb2d4537 100644 --- a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java +++ b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java @@ -92,7 +92,9 @@ public class TestFSTs extends LuceneTestCase { @Override public void tearDown() throws Exception { // can be null if we force simpletext (funky, some kind of bug in test runner maybe) - if (dir != null) dir.close(); + if (dir != null) { + dir.close(); + } super.tearDown(); } @@ -133,7 +135,7 @@ public class TestFSTs extends LuceneTestCase { for (IntsRef term : terms2) { pairs.add(new FSTTester.InputOutput<>(term, NO_OUTPUT)); } - FSTTester tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs, false); + FSTTester tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs); FST fst = tester.doTest(0, 0, false); assertNotNull(fst); assertEquals(22, tester.nodeCount); @@ -147,7 +149,7 @@ public class TestFSTs extends LuceneTestCase { for (int idx = 0; idx < terms2.length; idx++) { pairs.add(new FSTTester.InputOutput<>(terms2[idx], (long) idx)); } - FSTTester tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs, true); + FSTTester tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs); final FST fst = tester.doTest(0, 0, false); assertNotNull(fst); assertEquals(22, tester.nodeCount); @@ -157,14 +159,12 @@ public class TestFSTs extends LuceneTestCase { // FST byte sequence ord { final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton(); - final BytesRef NO_OUTPUT = outputs.getNoOutput(); final List> pairs = new ArrayList<>(terms2.length); for (int idx = 0; idx < terms2.length; idx++) { - final BytesRef output = idx == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx)); + final BytesRef output = new BytesRef(Integer.toString(idx)); pairs.add(new FSTTester.InputOutput<>(terms2[idx], output)); } - FSTTester tester = - new FSTTester<>(random(), dir, inputMode, pairs, outputs, false); + FSTTester tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs); final FST fst = tester.doTest(0, 0, false); assertNotNull(fst); assertEquals(24, tester.nodeCount); @@ -185,7 +185,7 @@ public class TestFSTs extends LuceneTestCase { for (IntsRef term : terms) { pairs.add(new FSTTester.InputOutput<>(term, NO_OUTPUT)); } - new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true); + new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true); } // PositiveIntOutput (ord) @@ -195,7 +195,7 @@ public class TestFSTs extends LuceneTestCase { for (int idx = 0; idx < terms.length; idx++) { pairs.add(new FSTTester.InputOutput<>(terms[idx], (long) idx)); } - new FSTTester<>(random(), dir, inputMode, pairs, outputs, true).doTest(true); + new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true); } // PositiveIntOutput (random monotonically increasing positive number) @@ -203,24 +203,23 @@ public class TestFSTs extends LuceneTestCase { final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); final List> pairs = new ArrayList<>(terms.length); long lastOutput = 0; - for (int idx = 0; idx < terms.length; idx++) { + for (IntsRef term : terms) { final long value = lastOutput + TestUtil.nextInt(random(), 1, 1000); lastOutput = value; - pairs.add(new FSTTester.InputOutput<>(terms[idx], value)); + pairs.add(new FSTTester.InputOutput<>(term, value)); } - new FSTTester<>(random(), dir, inputMode, pairs, outputs, true).doTest(true); + new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true); } // PositiveIntOutput (random positive number) { final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); final List> pairs = new ArrayList<>(terms.length); - for (int idx = 0; idx < terms.length; idx++) { + for (IntsRef term : terms) { pairs.add( - new FSTTester.InputOutput<>( - terms[idx], TestUtil.nextLong(random(), 0, Long.MAX_VALUE))); + new FSTTester.InputOutput<>(term, TestUtil.nextLong(random(), 0, Long.MAX_VALUE))); } - new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true); + new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true); } // Pair @@ -236,7 +235,7 @@ public class TestFSTs extends LuceneTestCase { lastOutput = value; pairs.add(new FSTTester.InputOutput<>(terms[idx], outputs.newPair((long) idx, value))); } - new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true); + new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true); } // Sequence-of-bytes @@ -249,7 +248,7 @@ public class TestFSTs extends LuceneTestCase { random().nextInt(30) == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx)); pairs.add(new FSTTester.InputOutput<>(terms[idx], output)); } - new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true); + new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true); } // Sequence-of-ints @@ -265,7 +264,7 @@ public class TestFSTs extends LuceneTestCase { } pairs.add(new FSTTester.InputOutput<>(terms[idx], output)); } - new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true); + new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(true); } } @@ -298,7 +297,7 @@ public class TestFSTs extends LuceneTestCase { final String term = getRandomString(random); termsSet.add(toIntsRef(term, inputMode)); } - doTest(inputMode, termsSet.toArray(new IntsRef[termsSet.size()])); + doTest(inputMode, termsSet.toArray(new IntsRef[0])); } } } @@ -497,7 +496,7 @@ public class TestFSTs extends LuceneTestCase { private abstract static class VisitTerms { private final Path dirOut; private final Path wordsFileIn; - private int inputMode; + private final int inputMode; private final Outputs outputs; private final FSTCompiler fstCompiler; @@ -524,7 +523,7 @@ public class TestFSTs extends LuceneTestCase { protected abstract T getOutput(IntsRef input, int ord) throws IOException; - public void run(int limit, boolean verify, boolean verifyByOutput) throws IOException { + public void run(int limit, boolean verify) throws IOException { BufferedReader is = Files.newBufferedReader(wordsFileIn, StandardCharsets.UTF_8); try { @@ -541,12 +540,11 @@ public class TestFSTs extends LuceneTestCase { ord++; if (ord % 500000 == 0) { - System.out.println( - String.format( - Locale.ROOT, - "%6.2fs: %9d...", - ((System.currentTimeMillis() - tStart) / 1000.0), - ord)); + System.out.printf( + Locale.ROOT, + "%6.2fs: %9d...", + ((System.currentTimeMillis() - tStart) / 1000.0), + ord); } if (ord >= limit) { break; @@ -594,90 +592,51 @@ public class TestFSTs extends LuceneTestCase { return; } - /* - IndexInput in = dir.openInput("fst.bin", IOContext.DEFAULT); - fst = new FST(in, outputs); - in.close(); - */ - System.out.println("\nNow verify..."); + is.close(); + is = Files.newBufferedReader(wordsFileIn, StandardCharsets.UTF_8); + + ord = 0; + tStart = System.currentTimeMillis(); while (true) { - for (int iter = 0; iter < 2; iter++) { - is.close(); - is = Files.newBufferedReader(wordsFileIn, StandardCharsets.UTF_8); - - ord = 0; - tStart = System.currentTimeMillis(); - while (true) { - String w = is.readLine(); - if (w == null) { - break; - } - toIntsRef(w, inputMode, intsRefBuilder); - if (iter == 0) { - T expected = getOutput(intsRefBuilder.get(), ord); - T actual = Util.get(fst, intsRefBuilder.get()); - if (actual == null) { - throw new RuntimeException("unexpected null output on input=" + w); - } - if (!actual.equals(expected)) { - throw new RuntimeException( - "wrong output (got " - + outputs.outputToString(actual) - + " but expected " - + outputs.outputToString(expected) - + ") on input=" - + w); - } - } else { - // Get by output - final Long output = (Long) getOutput(intsRefBuilder.get(), ord); - @SuppressWarnings({"unchecked", "deprecation"}) - final IntsRef actual = Util.getByOutput((FST) fst, output.longValue()); - if (actual == null) { - throw new RuntimeException("unexpected null input from output=" + output); - } - if (!actual.equals(intsRefBuilder.get())) { - throw new RuntimeException( - "wrong input (got " - + actual - + " but expected " - + intsRefBuilder - + " from output=" - + output); - } - } - - ord++; - if (ord % 500000 == 0) { - System.out.println( - ((System.currentTimeMillis() - tStart) / 1000.0) + "s: " + ord + "..."); - } - if (ord >= limit) { - break; - } - } - - double totSec = ((System.currentTimeMillis() - tStart) / 1000.0); - System.out.println( - "Verify " - + (iter == 1 ? "(by output) " : "") - + "took " - + totSec - + " sec + (" - + (int) ((totSec * 1000000000 / ord)) - + " nsec per lookup)"); - - if (!verifyByOutput) { - break; - } + String w = is.readLine(); + if (w == null) { + break; + } + toIntsRef(w, inputMode, intsRefBuilder); + T expected = getOutput(intsRefBuilder.get(), ord); + T actual = Util.get(fst, intsRefBuilder.get()); + if (actual == null) { + throw new RuntimeException("unexpected null output on input=" + w); + } + if (!actual.equals(expected)) { + throw new RuntimeException( + "wrong output (got " + + outputs.outputToString(actual) + + " but expected " + + outputs.outputToString(expected) + + ") on input=" + + w); + } + ord++; + if (ord % 500000 == 0) { + System.out.println( + ((System.currentTimeMillis() - tStart) / 1000.0) + "s: " + ord + "..."); + } + if (ord >= limit) { + break; } - - // NOTE: comment out to profile lookup... - break; } + double totSec = ((System.currentTimeMillis() - tStart) / 1000.0); + System.out.println( + "Verify took " + + totSec + + " sec + (" + + (int) ((totSec * 1000000000 / ord)) + + " nsec per lookup)"); + } finally { is.close(); } @@ -762,7 +721,7 @@ public class TestFSTs extends LuceneTestCase { } return outputs.newPair((long) ord, (long) TestUtil.nextInt(rand, 1, 5000)); } - }.run(limit, verify, false); + }.run(limit, verify); } else if (storeOrds) { // Store only ords final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); @@ -771,7 +730,7 @@ public class TestFSTs extends LuceneTestCase { public Long getOutput(IntsRef input, int ord) { return (long) ord; } - }.run(limit, verify, true); + }.run(limit, verify); } else if (storeDocFreqs) { // Store only docFreq final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); @@ -785,7 +744,7 @@ public class TestFSTs extends LuceneTestCase { } return (long) TestUtil.nextInt(rand, 1, 5000); } - }.run(limit, verify, false); + }.run(limit, verify); } else { // Store nothing final NoOutputs outputs = NoOutputs.getSingleton(); @@ -795,7 +754,7 @@ public class TestFSTs extends LuceneTestCase { public Object getOutput(IntsRef input, int ord) { return NO_OUTPUT; } - }.run(limit, verify, false); + }.run(limit, verify); } } @@ -913,22 +872,6 @@ public class TestFSTs extends LuceneTestCase { assertNotNull(seekResult); assertEquals(b, seekResult.input); assertEquals(42, (long) seekResult.output); - - @SuppressWarnings("deprecation") - IntsRef byOutput = Util.getByOutput(fst, 13824324872317238L); - assertEquals(Util.toIntsRef(new BytesRef("c"), new IntsRefBuilder()), byOutput); - - @SuppressWarnings("deprecation") - IntsRef byOutput47 = Util.getByOutput(fst, 47); - assertNull(byOutput47); - - @SuppressWarnings("deprecation") - IntsRef byOutput42 = Util.getByOutput(fst, 42); - assertEquals(Util.toIntsRef(new BytesRef("b"), new IntsRefBuilder()), byOutput42); - - @SuppressWarnings("deprecation") - IntsRef byOutput17 = Util.getByOutput(fst, 17); - assertEquals(Util.toIntsRef(new BytesRef("a"), new IntsRefBuilder()), byOutput17); } public void testPrimaryKeys() throws Exception { @@ -991,12 +934,9 @@ public class TestFSTs extends LuceneTestCase { if (cycle == 0) { idString = String.format(Locale.ROOT, "%07d", (NUM_IDS + idx)); } else { - while (true) { + do { idString = Long.toString(random().nextLong()); - if (!allIDs.contains(idString)) { - break; - } - } + } while (allIDs.contains(idString)); } outOfBounds.add(idString); allIDsList.add(idString); @@ -1063,8 +1003,7 @@ public class TestFSTs extends LuceneTestCase { new BytesRef(nextID), termsEnum.term()); } else if (!exists) { - assertTrue( - status == TermsEnum.SeekStatus.NOT_FOUND || status == TermsEnum.SeekStatus.END); + assertEquals(TermsEnum.SeekStatus.NOT_FOUND, status); } else { assertEquals(TermsEnum.SeekStatus.FOUND, status); } @@ -1204,7 +1143,7 @@ public class TestFSTs extends LuceneTestCase { ArrayList out = new ArrayList<>(); StringBuilder b = new StringBuilder(); s.generate(out, b, 'a', 'i', 10); - String[] input = out.toArray(new String[out.size()]); + String[] input = out.toArray(new String[0]); Arrays.sort(input); FST fst = s.compile(input); FST.Arc arc = fst.getFirstArc(new FST.Arc<>()); @@ -1224,7 +1163,7 @@ public class TestFSTs extends LuceneTestCase { Util.toDot(fst, w, false, false); w.close(); // System.out.println(w.toString()); - assertTrue(w.toString().indexOf("label=\"t/[7]\"") != -1); + assertTrue(w.toString().contains("label=\"t/[7]\"")); } public void testInternalFinalState() throws Exception { @@ -1242,9 +1181,9 @@ public class TestFSTs extends LuceneTestCase { // System.out.println(w.toString()); // check for accept state at label t - assertTrue(w.toString().indexOf("[label=\"t\" style=\"bold\"") != -1); + assertTrue(w.toString().contains("[label=\"t\" style=\"bold\"")); // check for accept state at label n - assertTrue(w.toString().indexOf("[label=\"n\" style=\"bold\"") != -1); + assertTrue(w.toString().contains("[label=\"n\" style=\"bold\"")); } // Make sure raw FST can differentiate between final vs @@ -1253,9 +1192,6 @@ public class TestFSTs extends LuceneTestCase { final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); final Long nothing = outputs.getNoOutput(); final FSTCompiler fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs); - - // final FST fst = new FST<>(FST.INPUT_TYPE.BYTE1, outputs, false, PackedInts.COMPACT, - // 15); final FST fst = fstCompiler.fst; final FSTCompiler.UnCompiledNode rootNode = @@ -1311,11 +1247,11 @@ public class TestFSTs extends LuceneTestCase { private void checkStopNodes(FST fst, PositiveIntOutputs outputs) throws Exception { final Long nothing = outputs.getNoOutput(); - FST.Arc startArc = fst.getFirstArc(new FST.Arc()); + FST.Arc startArc = fst.getFirstArc(new FST.Arc<>()); assertEquals(nothing, startArc.output()); assertEquals(nothing, startArc.nextFinalOutput()); - FST.Arc arc = fst.readFirstTargetArc(startArc, new FST.Arc(), fst.getBytesReader()); + FST.Arc arc = fst.readFirstTargetArc(startArc, new FST.Arc<>(), fst.getBytesReader()); assertEquals('a', arc.label()); assertEquals(17, arc.nextFinalOutput().longValue()); assertTrue(arc.isFinal()); @@ -1326,13 +1262,7 @@ public class TestFSTs extends LuceneTestCase { assertEquals(42, arc.output().longValue()); } - static final Comparator minLongComparator = - new Comparator() { - @Override - public int compare(Long left, Long right) { - return left.compareTo(right); - } - }; + static final Comparator minLongComparator = Comparator.naturalOrder(); public void testShortestPaths() throws Exception { final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); @@ -1350,7 +1280,7 @@ public class TestFSTs extends LuceneTestCase { Util.TopResults res = Util.shortestPaths( fst, - fst.getFirstArc(new FST.Arc()), + fst.getFirstArc(new FST.Arc<>()), outputs.getNoOutput(), minLongComparator, 3, @@ -1369,7 +1299,7 @@ public class TestFSTs extends LuceneTestCase { public void testRejectNoLimits() throws IOException { final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); - final FSTCompiler fstCompiler = new FSTCompiler(FST.INPUT_TYPE.BYTE1, outputs); + final FSTCompiler fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs); final IntsRefBuilder scratch = new IntsRefBuilder(); fstCompiler.add(Util.toIntsRef(new BytesRef("aab"), scratch), 22L); @@ -1381,7 +1311,7 @@ public class TestFSTs extends LuceneTestCase { final FST fst = fstCompiler.compile(); final AtomicInteger rejectCount = new AtomicInteger(); Util.TopNSearcher searcher = - new Util.TopNSearcher(fst, 2, 6, minLongComparator) { + new Util.TopNSearcher<>(fst, 2, 6, minLongComparator) { @Override protected boolean acceptResult(IntsRef input, Long output) { boolean accept = output.intValue() == 7; @@ -1393,7 +1323,7 @@ public class TestFSTs extends LuceneTestCase { }; searcher.addStartPaths( - fst.getFirstArc(new FST.Arc()), outputs.getNoOutput(), true, new IntsRefBuilder()); + fst.getFirstArc(new FST.Arc<>()), outputs.getNoOutput(), true, new IntsRefBuilder()); Util.TopResults res = searcher.search(); assertEquals(rejectCount.get(), 4); assertTrue(res.isComplete); // rejected(4) + topN(2) <= maxQueueSize(6) @@ -1403,7 +1333,7 @@ public class TestFSTs extends LuceneTestCase { assertEquals(7L, res.topN.get(0).output.longValue()); rejectCount.set(0); searcher = - new Util.TopNSearcher(fst, 2, 5, minLongComparator) { + new Util.TopNSearcher<>(fst, 2, 5, minLongComparator) { @Override protected boolean acceptResult(IntsRef input, Long output) { boolean accept = output.intValue() == 7; @@ -1415,7 +1345,7 @@ public class TestFSTs extends LuceneTestCase { }; searcher.addStartPaths( - fst.getFirstArc(new FST.Arc()), outputs.getNoOutput(), true, new IntsRefBuilder()); + fst.getFirstArc(new FST.Arc<>()), outputs.getNoOutput(), true, new IntsRefBuilder()); res = searcher.search(); assertEquals(rejectCount.get(), 4); assertFalse(res.isComplete); // rejected(4) + topN(2) > maxQueueSize(5) @@ -1423,12 +1353,7 @@ public class TestFSTs extends LuceneTestCase { // compares just the weight side of the pair static final Comparator> minPairWeightComparator = - new Comparator>() { - @Override - public int compare(Pair left, Pair right) { - return left.output1.compareTo(right.output1); - } - }; + Comparator.comparing(left -> left.output1); /** like testShortestPaths, but uses pairoutputs so we have both a weight and an output */ public void testShortestPathsWFST() throws Exception { @@ -1454,7 +1379,7 @@ public class TestFSTs extends LuceneTestCase { Util.TopResults> res = Util.shortestPaths( fst, - fst.getFirstArc(new FST.Arc>()), + fst.getFirstArc(new FST.Arc<>()), outputs.getNoOutput(), minPairWeightComparator, 3, @@ -1488,12 +1413,9 @@ public class TestFSTs extends LuceneTestCase { for (int i = 0; i < numWords; i++) { String s; - while (true) { + do { s = TestUtil.randomSimpleString(random); - if (!slowCompletor.containsKey(s)) { - break; - } - } + } while (slowCompletor.containsKey(s)); for (int j = 1; j < s.length(); j++) { allPrefixes.add(s.substring(0, j)); @@ -1521,9 +1443,9 @@ public class TestFSTs extends LuceneTestCase { // System.out.println("TEST: " + prefix); long prefixOutput = 0; - FST.Arc arc = fst.getFirstArc(new FST.Arc()); + FST.Arc arc = fst.getFirstArc(new FST.Arc<>()); for (int idx = 0; idx < prefix.length(); idx++) { - if (fst.findTargetArc((int) prefix.charAt(idx), arc, arc, reader) == null) { + if (fst.findTargetArc(prefix.charAt(idx), arc, arc, reader) == null) { fail(); } prefixOutput += arc.output(); @@ -1551,7 +1473,7 @@ public class TestFSTs extends LuceneTestCase { } assertTrue(matches.size() > 0); - Collections.sort(matches, new TieBreakByInputComparator<>(minLongComparator)); + matches.sort(new TieBreakByInputComparator<>(minLongComparator)); if (matches.size() > topN) { matches.subList(topN, matches.size()).clear(); } @@ -1614,12 +1536,9 @@ public class TestFSTs extends LuceneTestCase { Random random = random(); for (int i = 0; i < numWords; i++) { String s; - while (true) { + do { s = TestUtil.randomSimpleString(random); - if (!slowCompletor.containsKey(s)) { - break; - } - } + } while (slowCompletor.containsKey(s)); for (int j = 1; j < s.length(); j++) { allPrefixes.add(s.substring(0, j)); @@ -1651,9 +1570,9 @@ public class TestFSTs extends LuceneTestCase { // System.out.println("TEST: " + prefix); Pair prefixOutput = outputs.getNoOutput(); - FST.Arc> arc = fst.getFirstArc(new FST.Arc>()); + FST.Arc> arc = fst.getFirstArc(new FST.Arc<>()); for (int idx = 0; idx < prefix.length(); idx++) { - if (fst.findTargetArc((int) prefix.charAt(idx), arc, arc, reader) == null) { + if (fst.findTargetArc(prefix.charAt(idx), arc, arc, reader) == null) { fail(); } prefixOutput = outputs.add(prefixOutput, arc.output()); @@ -1683,7 +1602,7 @@ public class TestFSTs extends LuceneTestCase { } assertTrue(matches.size() > 0); - Collections.sort(matches, new TieBreakByInputComparator<>(minPairWeightComparator)); + matches.sort(new TieBreakByInputComparator<>(minPairWeightComparator)); if (matches.size() > topN) { matches.subList(topN, matches.size()).clear(); } @@ -1758,7 +1677,7 @@ public class TestFSTs extends LuceneTestCase { Arc arc = new FST.Arc<>(); fst.getFirstArc(arc); FST.BytesReader reader = fst.getBytesReader(); - arc = fst.findTargetArc((int) 'm', arc, arc, reader); + arc = fst.findTargetArc('m', arc, arc, reader); assertNotNull(arc); assertEquals(new BytesRef("m"), arc.output()); @@ -1767,7 +1686,7 @@ public class TestFSTs extends LuceneTestCase { fst.getFirstArc(arc); try { - arc = fst.findTargetArc((int) 'm', arc, arc, reader); + fst.findTargetArc((int) 'm', arc, arc, reader); } catch ( @SuppressWarnings("unused") AssertionError ae) { diff --git a/lucene/misc/src/test/org/apache/lucene/misc/util/fst/TestFSTsMisc.java b/lucene/misc/src/test/org/apache/lucene/misc/util/fst/TestFSTsMisc.java index 55c1675a3d0..b1888b77582 100644 --- a/lucene/misc/src/test/org/apache/lucene/misc/util/fst/TestFSTsMisc.java +++ b/lucene/misc/src/test/org/apache/lucene/misc/util/fst/TestFSTsMisc.java @@ -109,7 +109,7 @@ public class TestFSTsMisc extends LuceneTestCase { } pairs.add(new FSTTester.InputOutput<>(terms[idx], output)); } - new FSTTester(random(), dir, inputMode, pairs, outputs, false) { + new FSTTester(random(), dir, inputMode, pairs, outputs) { @Override protected boolean outputsEqual(Object output1, Object output2) { if (output1 instanceof TwoLongs && output2 instanceof List) { @@ -157,7 +157,7 @@ public class TestFSTsMisc extends LuceneTestCase { pairs.add(new FSTTester.InputOutput<>(terms[idx], output)); } - new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(false); + new FSTTester<>(random(), dir, inputMode, pairs, outputs).doTest(false); } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java b/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java index fe14c76e03d..d5d26844653 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java @@ -29,12 +29,10 @@ import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Random; -import java.util.Set; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; @@ -54,23 +52,16 @@ public class FSTTester { final int inputMode; final Outputs outputs; final Directory dir; - final boolean doReverseLookup; long nodeCount; long arcCount; public FSTTester( - Random random, - Directory dir, - int inputMode, - List> pairs, - Outputs outputs, - boolean doReverseLookup) { + Random random, Directory dir, int inputMode, List> pairs, Outputs outputs) { this.random = random; this.dir = dir; this.inputMode = inputMode; this.pairs = pairs; this.outputs = outputs; - this.doReverseLookup = doReverseLookup; } static String inputToString(int inputMode, IntsRef term) { @@ -181,11 +172,7 @@ public class FSTTester { @Override public int compareTo(InputOutput other) { - if (other instanceof InputOutput) { - return input.compareTo((other).input); - } else { - throw new IllegalArgumentException(); - } + return input.compareTo(other.input); } } @@ -208,9 +195,8 @@ public class FSTTester { // of the term prefix that matches private T run(FST fst, IntsRef term, int[] prefixLength) throws IOException { assert prefixLength == null || prefixLength.length == 1; - final FST.Arc arc = fst.getFirstArc(new FST.Arc()); - final T NO_OUTPUT = fst.outputs.getNoOutput(); - T output = NO_OUTPUT; + final FST.Arc arc = fst.getFirstArc(new FST.Arc<>()); + T output = fst.outputs.getNoOutput(); final FST.BytesReader fstReader = fst.getBytesReader(); for (int i = 0; i <= term.length; i++) { @@ -243,12 +229,11 @@ public class FSTTester { } private T randomAcceptedWord(FST fst, IntsRefBuilder in) throws IOException { - FST.Arc arc = fst.getFirstArc(new FST.Arc()); + FST.Arc arc = fst.getFirstArc(new FST.Arc<>()); final List> arcs = new ArrayList<>(); in.clear(); - final T NO_OUTPUT = fst.outputs.getNoOutput(); - T output = NO_OUTPUT; + T output = fst.outputs.getNoOutput(); final FST.BytesReader fstReader = fst.getBytesReader(); while (true) { @@ -311,14 +296,12 @@ public class FSTTester { if (random.nextBoolean() && fst != null) { IOContext context = LuceneTestCase.newIOContext(random); - IndexOutput out = dir.createOutput("fst.bin", context); - fst.save(out, out); - out.close(); - IndexInput in = dir.openInput("fst.bin", context); - try { - fst = new FST(in, in, outputs); + try (IndexOutput out = dir.createOutput("fst.bin", context)) { + fst.save(out, out); + } + try (IndexInput in = dir.openInput("fst.bin", context)) { + fst = new FST<>(in, in, outputs); } finally { - in.close(); dir.deleteFile("fst.bin"); } } @@ -361,30 +344,8 @@ public class FSTTester { } // FST is complete - @SuppressWarnings("deprecation") private void verifyUnPruned(int inputMode, FST fst) throws IOException { - final FST fstLong; - final Set validOutputs; - long minLong = Long.MAX_VALUE; - long maxLong = Long.MIN_VALUE; - - if (doReverseLookup) { - @SuppressWarnings("unchecked") - FST fstLong0 = (FST) fst; - fstLong = fstLong0; - validOutputs = new HashSet<>(); - for (InputOutput pair : pairs) { - Long output = (Long) pair.output; - maxLong = Math.max(maxLong, output); - minLong = Math.min(minLong, output); - validOutputs.add(output); - } - } else { - fstLong = null; - validOutputs = null; - } - if (pairs.size() == 0) { assertNull(fst); return; @@ -447,20 +408,6 @@ public class FSTTester { termsMap.put(pair.input, pair.output); } - if (doReverseLookup && maxLong > minLong) { - // Do random lookups so we test null (output doesn't - // exist) case: - assertNull(Util.getByOutput(fstLong, minLong - 7)); - assertNull(Util.getByOutput(fstLong, maxLong + 7)); - - final int num = LuceneTestCase.atLeast(random, 100); - for (int iter = 0; iter < num; iter++) { - Long v = TestUtil.nextLong(random, minLong, maxLong); - IntsRef input = Util.getByOutput(fstLong, v); - assertTrue(validOutputs.contains(v) || input == null); - } - } - // find random matching word and make sure it's valid if (LuceneTestCase.VERBOSE) { System.out.println("TEST: verify random accepted terms"); @@ -473,14 +420,6 @@ public class FSTTester { "accepted word " + inputToString(inputMode, scratch.get()) + " is not valid", termsMap.containsKey(scratch.get())); assertTrue(outputsEqual(termsMap.get(scratch.get()), output)); - - if (doReverseLookup) { - // System.out.println("lookup output=" + output + " outs=" + fst.outputs); - IntsRef input = Util.getByOutput(fstLong, (Long) output); - assertNotNull(input); - // System.out.println(" got " + Util.toBytesRef(input, new BytesRef()).utf8ToString()); - assertEquals(scratch.get(), input); - } } // test IntsRefFSTEnum.seek: @@ -497,7 +436,7 @@ public class FSTTester { // seek to term that doesn't exist: while (true) { final IntsRef term = toIntsRef(getRandomString(random), inputMode); - int pos = Collections.binarySearch(pairs, new InputOutput(term, null)); + int pos = Collections.binarySearch(pairs, new InputOutput<>(term, null)); if (pos < 0) { pos = -(pos + 1); // ok doesn't exist @@ -617,7 +556,7 @@ public class FSTTester { for (; attempt < 10; attempt++) { IntsRef term = toIntsRef(getRandomString(random), inputMode); if (!termsMap.containsKey(term) && term.compareTo(pairs.get(upto).input) > 0) { - int pos = Collections.binarySearch(pairs, new InputOutput(term, null)); + int pos = Collections.binarySearch(pairs, new InputOutput<>(term, null)); assert pos < 0; upto = -(pos + 1); @@ -806,10 +745,8 @@ public class FSTTester { cmo2 != null && ((prune2 > 1 && cmo2.count >= prune2) || (prune2 == 1 && (cmo2.count >= 2 || prefix.length <= 1))); - } else if (cmo.count >= prune2) { - keep = true; } else { - keep = false; + keep = cmo.count >= prune2; } }