diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 269eb39d6eb..050606c9360 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -31,6 +31,9 @@ API Changes * LUCENE-10603: Remove SortedSetDocValues#NO_MORE_ORDS definition. (Greg Miller) +* GITHUB#11813: Remove Operations.isFinite: the recursive implementation could be problematic + for large automatons (WildcardQuery, PrefixQuery, RegExpQuery, etc). (taroplus, Robert Muir) + New Features --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/analysis/AutomatonToTokenStream.java b/lucene/core/src/java/org/apache/lucene/analysis/AutomatonToTokenStream.java index ef1bbd20bea..bd2d915ae78 100644 --- a/lucene/core/src/java/org/apache/lucene/analysis/AutomatonToTokenStream.java +++ b/lucene/core/src/java/org/apache/lucene/analysis/AutomatonToTokenStream.java @@ -28,7 +28,6 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; import org.apache.lucene.util.automaton.Automaton; -import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.Transition; /** Converts an Automaton into a TokenStream. */ @@ -42,14 +41,10 @@ public class AutomatonToTokenStream { * position nodes for the TokenStream. The resulting TokenStream releases edges from the automaton * as tokens in order from the position nodes. This requires the automaton be a finite DAG. * - * @param automaton automaton to convert. Must be a finite DAG. + * @param automaton automaton to convert. Must be a finite DAG to avoid infinite loops! * @return TokenStream representation of automaton. */ public static TokenStream toTokenStream(Automaton automaton) { - if (Operations.isFinite(automaton) == false) { - throw new IllegalArgumentException("Automaton must be finite"); - } - List> positionNodes = new ArrayList<>(); Transition[][] transitions = automaton.getSortedTransitions(); diff --git a/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java b/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java index 0428f6c4abf..a1f8a5d4c15 100644 --- a/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java @@ -95,8 +95,7 @@ public class AutomatonQuery extends MultiTermQuery implements Accountable { this.term = term; this.automaton = automaton; this.automatonIsBinary = isBinary; - // TODO: we could take isFinite too, to save a bit of CPU in CompiledAutomaton ctor?: - this.compiled = new CompiledAutomaton(automaton, null, true, isBinary); + this.compiled = new CompiledAutomaton(automaton, false, true, isBinary); this.ramBytesUsed = BASE_RAM_BYTES + term.ramBytesUsed() + automaton.ramBytesUsed() + compiled.ramBytesUsed(); diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java index c8422719667..46f074a33d6 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java @@ -96,20 +96,17 @@ public class CompiledAutomaton implements Accountable { public final BytesRef commonSuffixRef; /** - * Indicates if the automaton accepts a finite set of strings. Null if this was not computed. Only - * valid for {@link AUTOMATON_TYPE#NORMAL}. + * Indicates if the automaton accepts a finite set of strings. Only valid for {@link + * AUTOMATON_TYPE#NORMAL}. */ - public final Boolean finite; + public final boolean finite; /** Which state, if any, accepts all suffixes, else -1. */ public final int sinkState; - /** - * Create this, passing simplify=true and finite=null, so that we try to simplify the automaton - * and determine if it is finite. - */ + /** Create this, passing simplify=true, so that we try to simplify the automaton. */ public CompiledAutomaton(Automaton automaton) { - this(automaton, null, true); + this(automaton, false, true); } /** Returns sink state, if present, else -1. */ @@ -139,21 +136,21 @@ public class CompiledAutomaton implements Accountable { } /** - * Create this. If finite is null, we use {@link Operations#isFinite} to determine whether it is - * finite. If simplify is true, we run possibly expensive operations to determine if the automaton - * is one the cases in {@link CompiledAutomaton.AUTOMATON_TYPE}. + * Create this. If simplify is true, we run possibly expensive operations to determine if the + * automaton is one the cases in {@link CompiledAutomaton.AUTOMATON_TYPE}. Set finite to true if + * the automaton is finite, otherwise set to false if infinite or you don't know. */ - public CompiledAutomaton(Automaton automaton, Boolean finite, boolean simplify) { + public CompiledAutomaton(Automaton automaton, boolean finite, boolean simplify) { this(automaton, finite, simplify, false); } /** - * Create this. If finite is null, we use {@link Operations#isFinite} to determine whether it is - * finite. If simplify is true, we run possibly expensive operations to determine if the automaton - * is one the cases in {@link CompiledAutomaton.AUTOMATON_TYPE}. + * Create this. If simplify is true, we run possibly expensive operations to determine if the + * automaton is one the cases in {@link CompiledAutomaton.AUTOMATON_TYPE}. Set finite to true if + * the automaton is finite, otherwise set to false if infinite or you don't know. */ public CompiledAutomaton( - Automaton automaton, Boolean finite, boolean simplify, boolean isBinary) { + Automaton automaton, boolean finite, boolean simplify, boolean isBinary) { if (automaton.getNumStates() == 0) { automaton = new Automaton(); @@ -174,7 +171,7 @@ public class CompiledAutomaton implements Accountable { commonSuffixRef = null; runAutomaton = null; this.automaton = null; - this.finite = null; + this.finite = true; sinkState = -1; nfaRunAutomaton = null; return; @@ -196,7 +193,7 @@ public class CompiledAutomaton implements Accountable { commonSuffixRef = null; runAutomaton = null; this.automaton = null; - this.finite = null; + this.finite = false; sinkState = -1; nfaRunAutomaton = null; return; @@ -210,7 +207,7 @@ public class CompiledAutomaton implements Accountable { commonSuffixRef = null; runAutomaton = null; this.automaton = null; - this.finite = null; + this.finite = true; if (isBinary) { term = StringHelper.intsRefToBytesRef(singleton); @@ -228,11 +225,7 @@ public class CompiledAutomaton implements Accountable { type = AUTOMATON_TYPE.NORMAL; term = null; - if (finite == null) { - this.finite = Operations.isFinite(automaton); - } else { - this.finite = finite; - } + this.finite = finite; Automaton binary; if (isBinary) { diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java index 0a729ba1d65..26874420315 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java @@ -1037,44 +1037,6 @@ public final class Operations { return result; } - /** - * Returns true if the language of this automaton is finite. The automaton must not have any dead - * states. - */ - public static boolean isFinite(Automaton a) { - if (a.getNumStates() == 0) { - return true; - } - return isFinite( - new Transition(), a, 0, new BitSet(a.getNumStates()), new BitSet(a.getNumStates()), 0); - } - - /** - * Checks whether there is a loop containing state. (This is sufficient since there are never - * transitions to dead states.) - */ - // TODO: not great that this is recursive... in theory a - // large automata could exceed java's stack so the maximum level of recursion is bounded to 1000 - private static boolean isFinite( - Transition scratch, Automaton a, int state, BitSet path, BitSet visited, int level) { - if (level > MAX_RECURSION_LEVEL) { - throw new IllegalArgumentException("input automaton is too large: " + level); - } - path.set(state); - int numTransitions = a.initTransition(state, scratch); - for (int t = 0; t < numTransitions; t++) { - a.getTransition(state, t, scratch); - if (path.get(scratch.dest) - || (!visited.get(scratch.dest) - && !isFinite(scratch, a, scratch.dest, path, visited, level + 1))) { - return false; - } - } - path.clear(state); - visited.set(state); - return true; - } - /** * Returns the longest string that is a prefix of all accepted strings and visits each state at * most once. The automaton must not have dead states. If this automaton has already been diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java index 4cac33011f3..50db97ec3f9 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java @@ -167,8 +167,7 @@ public class TestTermsEnum2 extends LuceneTestCase { String reg = AutomatonTestUtil.randomRegexp(random()); Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton(); automaton = Operations.determinize(automaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT); - CompiledAutomaton ca = - new CompiledAutomaton(automaton, Operations.isFinite(automaton), false); + CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false); TermsEnum te = MultiTerms.getTerms(reader, "field").intersect(ca, null); Automaton expected = Operations.determinize( diff --git a/lucene/core/src/test/org/apache/lucene/search/TestWildcard.java b/lucene/core/src/test/org/apache/lucene/search/TestWildcard.java index 4522ab8ca51..a92f53682bf 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestWildcard.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestWildcard.java @@ -389,4 +389,25 @@ public class TestWildcard extends LuceneTestCase { reader.close(); dir.close(); } + + /** Tests large Wildcard queries */ + public void testLarge() throws IOException { + // big string from a user + String big = + "{group-bm-http-server-02083.node.dm.reg,group-bm-http-server-02082.node.dm.reg,group-bm-http-server-02081.node.dm.reg,group-bm-http-server-02080.node.dm.reg,group-bm-http-server-02079.node.dm.reg,group-bm-http-server-02078.node.dm.reg,group-bm-http-server-02077.node.dm.reg,group-bm-http-server-02076.node.dm.reg,group-bm-http-server-02073.node.dm.reg,group-bm-http-server-02070.node.dm.reg,group-bm-http-server-02067.node.dm.reg,group-bm-http-server-02064.node.dm.reg,group-bm-http-server-02029.node.dm.reg,group-bm-http-server-02028.node.dm.reg,group-bm-http-server-02027.node.dm.reg,group-bm-http-server-02026.node.dm.reg,group-bm-http-server-02025.node.dm.reg,group-bm-http-server-02023.node.dm.reg,group-bm-http-server-02022.node.dm.reg,group-bm-http-server-02021.node.dm.reg,group-bm-http-server-02020.node.dm.reg,group-bm-http-server-02019.node.dm.reg,group-bm-http-server-02018.node.dm.reg,group-bm-http-server-02016.node.dm.reg,group-bm-http-server-02015.node.dm.reg,group-bm-http-server-02014.node.dm.reg,group-bm-http-server-02009.node.dm.reg,group-bm-http-server-02007.node.dm.reg,group-bm-http-server-02004.node.dm.reg,group-bm-http-server-02003.node.dm.reg,group-bm-http-server-02002.node.dm.reg,group-bm-http-server-01311.node.dm.reg,group-bm-http-server-01309.node.dm.reg,group-bm-http-server-01307.node.dm.reg}"; + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(newStringField("body", big, Field.Store.YES)); + writer.addDocument(doc); + writer.close(); + + IndexReader reader = DirectoryReader.open(dir); + IndexSearcher searcher = newSearcher(reader); + Query query = new WildcardQuery(new Term("body", big + "*")); + assertMatches(searcher, query, 1); + + reader.close(); + dir.close(); + } } diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java index 093882a3001..6abe0aaff08 100644 --- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java @@ -224,7 +224,7 @@ public class TestAutomaton extends LuceneTestCase { assertTrue(Operations.run(a, "mn")); assertTrue(Operations.run(a, "mone")); assertFalse(Operations.run(a, "m")); - assertFalse(Operations.isFinite(a)); + assertFalse(AutomatonTestUtil.isFinite(a)); } public void testUnion1() throws Exception { @@ -1237,7 +1237,7 @@ public class TestAutomaton extends LuceneTestCase { private void assertSame(Collection terms, Automaton a) { try { - assertTrue(Operations.isFinite(a)); + assertTrue(AutomatonTestUtil.isFinite(a)); assertFalse(Operations.isTotal(a)); Automaton detA = Operations.determinize(a, DEFAULT_DETERMINIZE_WORK_LIMIT); @@ -1354,7 +1354,7 @@ public class TestAutomaton extends LuceneTestCase { byte[] zeros = new byte[3]; Automaton a = makeBinaryInterval(newBytesRef(zeros, 0, 1), true, newBytesRef(zeros, 0, 2), true); - assertTrue(Operations.isFinite(a)); + assertTrue(AutomatonTestUtil.isFinite(a)); assertFalse(accepts(a, newBytesRef())); assertTrue(accepts(a, newBytesRef(zeros, 0, 1))); assertTrue(accepts(a, newBytesRef(zeros, 0, 2))); @@ -1362,7 +1362,7 @@ public class TestAutomaton extends LuceneTestCase { // '' (incl) - 00 (incl) a = makeBinaryInterval(newBytesRef(), true, newBytesRef(zeros, 0, 2), true); - assertTrue(Operations.isFinite(a)); + assertTrue(AutomatonTestUtil.isFinite(a)); assertTrue(accepts(a, newBytesRef())); assertTrue(accepts(a, newBytesRef(zeros, 0, 1))); assertTrue(accepts(a, newBytesRef(zeros, 0, 2))); @@ -1370,7 +1370,7 @@ public class TestAutomaton extends LuceneTestCase { // '' (excl) - 00 (incl) a = makeBinaryInterval(newBytesRef(), false, newBytesRef(zeros, 0, 2), true); - assertTrue(Operations.isFinite(a)); + assertTrue(AutomatonTestUtil.isFinite(a)); assertFalse(accepts(a, newBytesRef())); assertTrue(accepts(a, newBytesRef(zeros, 0, 1))); assertTrue(accepts(a, newBytesRef(zeros, 0, 2))); @@ -1378,7 +1378,7 @@ public class TestAutomaton extends LuceneTestCase { // 0 (excl) - 00 (incl) a = makeBinaryInterval(newBytesRef(zeros, 0, 1), false, newBytesRef(zeros, 0, 2), true); - assertTrue(Operations.isFinite(a)); + assertTrue(AutomatonTestUtil.isFinite(a)); assertFalse(accepts(a, newBytesRef())); assertFalse(accepts(a, newBytesRef(zeros, 0, 1))); assertTrue(accepts(a, newBytesRef(zeros, 0, 2))); @@ -1386,7 +1386,7 @@ public class TestAutomaton extends LuceneTestCase { // 0 (excl) - 00 (excl) a = makeBinaryInterval(newBytesRef(zeros, 0, 1), false, newBytesRef(zeros, 0, 2), false); - assertTrue(Operations.isFinite(a)); + assertTrue(AutomatonTestUtil.isFinite(a)); assertFalse(accepts(a, newBytesRef())); assertFalse(accepts(a, newBytesRef(zeros, 0, 1))); assertFalse(accepts(a, newBytesRef(zeros, 0, 2))); @@ -1420,7 +1420,7 @@ public class TestAutomaton extends LuceneTestCase { makeBinaryInterval( minTerm, minInclusive, maxTerm, maxInclusive); - assertTrue(Operations.isFinite(a)); + assertTrue(AutomatonTestUtil.isFinite(a)); int expectedCount = maxTerm.length - minTerm.length + 1; if (minInclusive == false) { expectedCount--; @@ -1529,7 +1529,7 @@ public class TestAutomaton extends LuceneTestCase { public void testMakeBinaryIntervalEqual() throws Exception { Automaton a = Automata.makeBinaryInterval(newBytesRef("bar"), true, newBytesRef("bar"), true); assertTrue(Operations.run(a, intsRef("bar"))); - assertTrue(Operations.isFinite(a)); + assertTrue(AutomatonTestUtil.isFinite(a)); assertEquals(1, TestOperations.getFiniteStrings(a).size()); } diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java index d24bed8c793..432c281b184 100644 --- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java @@ -130,7 +130,7 @@ public class TestCompiledAutomaton extends LuceneTestCase { a.addTransition(state, state, 0, 0xff); a.finishState(); - CompiledAutomaton ca = new CompiledAutomaton(a, null, true, true); + CompiledAutomaton ca = new CompiledAutomaton(a, false, true, true); assertEquals(CompiledAutomaton.AUTOMATON_TYPE.ALL, ca.type); } @@ -142,7 +142,7 @@ public class TestCompiledAutomaton extends LuceneTestCase { a.addTransition(state, state, 0, Character.MAX_CODE_POINT); a.finishState(); - CompiledAutomaton ca = new CompiledAutomaton(a, null, true, false); + CompiledAutomaton ca = new CompiledAutomaton(a, false, true, false); assertEquals(CompiledAutomaton.AUTOMATON_TYPE.ALL, ca.type); } @@ -150,14 +150,14 @@ public class TestCompiledAutomaton extends LuceneTestCase { public void testBinarySingleton() throws Exception { // This is just ascii so we can pretend it's binary: Automaton a = Automata.makeString("foobar"); - CompiledAutomaton ca = new CompiledAutomaton(a, null, true, true); + CompiledAutomaton ca = new CompiledAutomaton(a, true, true, true); assertEquals(CompiledAutomaton.AUTOMATON_TYPE.SINGLE, ca.type); } // LUCENE-6367 public void testUnicodeSingleton() throws Exception { Automaton a = Automata.makeString(TestUtil.randomRealisticUnicodeString(random())); - CompiledAutomaton ca = new CompiledAutomaton(a, null, true, false); + CompiledAutomaton ca = new CompiledAutomaton(a, true, true, false); assertEquals(CompiledAutomaton.AUTOMATON_TYPE.SINGLE, ca.type); } } diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminizeLexicon.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminizeLexicon.java index 6aef1562522..ce654cb3a7f 100644 --- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminizeLexicon.java +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminizeLexicon.java @@ -22,6 +22,7 @@ import java.util.Collections; import java.util.List; import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.TestUtil; +import org.apache.lucene.tests.util.automaton.AutomatonTestUtil; /** * Not thorough, but tries to test determinism correctness somewhat randomly, by determinizing a @@ -49,7 +50,7 @@ public class TestDeterminizeLexicon extends LuceneTestCase { Collections.shuffle(automata, random()); Automaton lex = Operations.union(automata); lex = Operations.determinize(lex, 1000000); - assertTrue(Operations.isFinite(lex)); + assertTrue(AutomatonTestUtil.isFinite(lex)); for (String s : terms) { assertTrue(Operations.run(lex, s)); } diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java index c112fd3fbc1..32fc2005cdd 100644 --- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java @@ -21,6 +21,7 @@ import static org.apache.lucene.util.automaton.Operations.DEFAULT_DETERMINIZE_WO import java.util.ArrayList; import java.util.List; import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.tests.util.automaton.AutomatonTestUtil; public class TestLevenshteinAutomata extends LuceneTestCase { @@ -73,8 +74,8 @@ public class TestLevenshteinAutomata extends LuceneTestCase { assertNotNull(tautomata[n]); assertTrue(automata[n].isDeterministic()); assertTrue(tautomata[n].isDeterministic()); - assertTrue(Operations.isFinite(automata[n])); - assertTrue(Operations.isFinite(tautomata[n])); + assertTrue(AutomatonTestUtil.isFinite(automata[n])); + assertTrue(AutomatonTestUtil.isFinite(tautomata[n])); assertFalse(Operations.hasDeadStatesFromInitial(automata[n])); assertFalse(Operations.hasDeadStatesFromInitial(tautomata[n])); // check that the dfa for n-1 accepts a subset of the dfa for n diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestLimitedFiniteStringsIterator.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestLimitedFiniteStringsIterator.java index f99e41d5b09..3d69bcde4ec 100644 --- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestLimitedFiniteStringsIterator.java +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestLimitedFiniteStringsIterator.java @@ -40,11 +40,11 @@ public class TestLimitedFiniteStringsIterator extends LuceneTestCase { getFiniteStrings(new LimitedFiniteStringsIterator(a, TestUtil.nextInt(random(), 1, 1000))); // NOTE: cannot do this, because the method is not // guaranteed to detect cycles when you have a limit - // assertTrue(Operations.isFinite(a)); + // assertTrue(AutomatonTestUtil.isFinite(a)); } catch ( @SuppressWarnings("unused") IllegalArgumentException iae) { - assertFalse(Operations.isFinite(a)); + assertFalse(AutomatonTestUtil.isFinite(a)); } } } diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java index 55131c78126..495d8f528af 100644 --- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java @@ -115,14 +115,6 @@ public class TestOperations extends LuceneTestCase { } } } - /** tests against the original brics implementation. */ - public void testIsFinite() { - int num = atLeast(200); - for (int i = 0; i < num; i++) { - Automaton a = AutomatonTestUtil.randomAutomaton(random()); - assertEquals(AutomatonTestUtil.isFiniteSlow(a), Operations.isFinite(a)); - } - } public void testIsFiniteEatsStack() { char[] chars = new char[50000]; @@ -133,7 +125,7 @@ public class TestOperations extends LuceneTestCase { Automaton a = Operations.union(Automata.makeString(bigString1), Automata.makeString(bigString2)); IllegalArgumentException exc = - expectThrows(IllegalArgumentException.class, () -> Operations.isFinite(a)); + expectThrows(IllegalArgumentException.class, () -> AutomatonTestUtil.isFinite(a)); assertTrue(exc.getMessage().contains("input automaton is too large")); } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/intervals/Intervals.java b/lucene/queries/src/java/org/apache/lucene/queries/intervals/Intervals.java index ae494b90a95..aa546a469c2 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/intervals/Intervals.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/intervals/Intervals.java @@ -169,7 +169,8 @@ public final class Intervals { * @throws IllegalStateException if the prefix expands to more than {@code maxExpansions} terms */ public static IntervalsSource prefix(BytesRef prefix, int maxExpansions) { - CompiledAutomaton ca = new CompiledAutomaton(PrefixQuery.toAutomaton(prefix), null, true, true); + CompiledAutomaton ca = + new CompiledAutomaton(PrefixQuery.toAutomaton(prefix), false, true, true); return new MultiTermIntervalsSource(ca, maxExpansions, prefix.utf8ToString() + "*"); } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java index f20dded1c1c..389069e0a9b 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java @@ -877,9 +877,6 @@ public class AnalyzingSuggester extends Lookup { automaton = replaceSep(automaton); automaton = convertAutomaton(automaton); - // TODO: LUCENE-5660 re-enable this once we disallow massive suggestion strings - // assert SpecialOperations.isFinite(automaton); - // Get all paths from the automaton (there can be // more than one path, eg if the analyzer created a // graph using SynFilter or WDF): diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/RandomPostingsTester.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/RandomPostingsTester.java index 76948f68733..5eb48d31cb5 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/RandomPostingsTester.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/RandomPostingsTester.java @@ -1605,7 +1605,7 @@ public class RandomPostingsTester { while (true) { Automaton a = AutomatonTestUtil.randomAutomaton(random); a = Operations.determinize(a, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT); - CompiledAutomaton ca = new CompiledAutomaton(a, null, true, false); + CompiledAutomaton ca = new CompiledAutomaton(a, false, true, false); if (ca.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) { // Keep retrying until we get an A that will really "use" the PF's intersect code: continue; diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/util/automaton/AutomatonTestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/tests/util/automaton/AutomatonTestUtil.java index a4cd40a3bd1..0a64252387d 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/util/automaton/AutomatonTestUtil.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/util/automaton/AutomatonTestUtil.java @@ -17,6 +17,7 @@ package org.apache.lucene.tests.util.automaton; import java.util.ArrayList; +import java.util.BitSet; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; @@ -463,35 +464,40 @@ public class AutomatonTestUtil { } /** - * Returns true if the language of this automaton is finite. - * - *

WARNING: this method is slow, it will blow up if the automaton is large. this is only used - * to test the correctness of our faster implementation. + * Returns true if the language of this automaton is finite. The automaton must not have any dead + * states. */ - public static boolean isFiniteSlow(Automaton a) { + public static boolean isFinite(Automaton a) { if (a.getNumStates() == 0) { return true; } - return isFiniteSlow(a, 0, new HashSet()); + return isFinite( + new Transition(), a, 0, new BitSet(a.getNumStates()), new BitSet(a.getNumStates()), 0); } /** - * Checks whether there is a loop containing s. (This is sufficient since there are never + * Checks whether there is a loop containing state. (This is sufficient since there are never * transitions to dead states.) */ // TODO: not great that this is recursive... in theory a - // large automata could exceed java's stack - private static boolean isFiniteSlow(Automaton a, int s, HashSet path) { - path.add(s); - Transition t = new Transition(); - int count = a.initTransition(s, t); - for (int i = 0; i < count; i++) { - a.getNextTransition(t); - if (path.contains(t.dest) || !isFiniteSlow(a, t.dest, path)) { + // large automata could exceed java's stack so the maximum level of recursion is bounded to 1000 + private static boolean isFinite( + Transition scratch, Automaton a, int state, BitSet path, BitSet visited, int level) { + if (level > Operations.MAX_RECURSION_LEVEL) { + throw new IllegalArgumentException("input automaton is too large: " + level); + } + path.set(state); + int numTransitions = a.initTransition(state, scratch); + for (int t = 0; t < numTransitions; t++) { + a.getTransition(state, t, scratch); + if (path.get(scratch.dest) + || (!visited.get(scratch.dest) + && !isFinite(scratch, a, scratch.dest, path, visited, level + 1))) { return false; } } - path.remove(s); + path.clear(state); + visited.set(state); return true; }