mirror of https://github.com/apache/lucene.git
Remove Operations.isFinite (#11813)
This method is recursive: to avoid eating too much stack we apply a small limit. This means it can't really be used on any largish automata without hitting exception. But the benefit of knowing finite vs infinite in AutomatonTermsEnum is minor: let's not auto-compute this. FuzzyQuery still gets the finite optimization because its finite by definition. PrefixQuery is always infinite. Wildcard/Regex just assume infinite which is safe to do. Remove the auto-computation and the "trillean" Boolean parameter. If you dont know that your automaton is finite, pass false to CompiledAutomaton, it is safe. Move this method to AutomatonTestUtil so we can still use it in test asserts. Closes #11809
This commit is contained in:
parent
54fba99cb1
commit
15f3743f02
|
@ -31,6 +31,9 @@ API Changes
|
|||
|
||||
* LUCENE-10603: Remove SortedSetDocValues#NO_MORE_ORDS definition. (Greg Miller)
|
||||
|
||||
* GITHUB#11813: Remove Operations.isFinite: the recursive implementation could be problematic
|
||||
for large automatons (WildcardQuery, PrefixQuery, RegExpQuery, etc). (taroplus, Robert Muir)
|
||||
|
||||
New Features
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -28,7 +28,6 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.apache.lucene.util.automaton.Transition;
|
||||
|
||||
/** Converts an Automaton into a TokenStream. */
|
||||
|
@ -42,14 +41,10 @@ public class AutomatonToTokenStream {
|
|||
* position nodes for the TokenStream. The resulting TokenStream releases edges from the automaton
|
||||
* as tokens in order from the position nodes. This requires the automaton be a finite DAG.
|
||||
*
|
||||
* @param automaton automaton to convert. Must be a finite DAG.
|
||||
* @param automaton automaton to convert. Must be a finite DAG to avoid infinite loops!
|
||||
* @return TokenStream representation of automaton.
|
||||
*/
|
||||
public static TokenStream toTokenStream(Automaton automaton) {
|
||||
if (Operations.isFinite(automaton) == false) {
|
||||
throw new IllegalArgumentException("Automaton must be finite");
|
||||
}
|
||||
|
||||
List<List<Integer>> positionNodes = new ArrayList<>();
|
||||
|
||||
Transition[][] transitions = automaton.getSortedTransitions();
|
||||
|
|
|
@ -95,8 +95,7 @@ public class AutomatonQuery extends MultiTermQuery implements Accountable {
|
|||
this.term = term;
|
||||
this.automaton = automaton;
|
||||
this.automatonIsBinary = isBinary;
|
||||
// TODO: we could take isFinite too, to save a bit of CPU in CompiledAutomaton ctor?:
|
||||
this.compiled = new CompiledAutomaton(automaton, null, true, isBinary);
|
||||
this.compiled = new CompiledAutomaton(automaton, false, true, isBinary);
|
||||
|
||||
this.ramBytesUsed =
|
||||
BASE_RAM_BYTES + term.ramBytesUsed() + automaton.ramBytesUsed() + compiled.ramBytesUsed();
|
||||
|
|
|
@ -96,20 +96,17 @@ public class CompiledAutomaton implements Accountable {
|
|||
public final BytesRef commonSuffixRef;
|
||||
|
||||
/**
|
||||
* Indicates if the automaton accepts a finite set of strings. Null if this was not computed. Only
|
||||
* valid for {@link AUTOMATON_TYPE#NORMAL}.
|
||||
* Indicates if the automaton accepts a finite set of strings. Only valid for {@link
|
||||
* AUTOMATON_TYPE#NORMAL}.
|
||||
*/
|
||||
public final Boolean finite;
|
||||
public final boolean finite;
|
||||
|
||||
/** Which state, if any, accepts all suffixes, else -1. */
|
||||
public final int sinkState;
|
||||
|
||||
/**
|
||||
* Create this, passing simplify=true and finite=null, so that we try to simplify the automaton
|
||||
* and determine if it is finite.
|
||||
*/
|
||||
/** Create this, passing simplify=true, so that we try to simplify the automaton. */
|
||||
public CompiledAutomaton(Automaton automaton) {
|
||||
this(automaton, null, true);
|
||||
this(automaton, false, true);
|
||||
}
|
||||
|
||||
/** Returns sink state, if present, else -1. */
|
||||
|
@ -139,21 +136,21 @@ public class CompiledAutomaton implements Accountable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Create this. If finite is null, we use {@link Operations#isFinite} to determine whether it is
|
||||
* finite. If simplify is true, we run possibly expensive operations to determine if the automaton
|
||||
* is one the cases in {@link CompiledAutomaton.AUTOMATON_TYPE}.
|
||||
* Create this. If simplify is true, we run possibly expensive operations to determine if the
|
||||
* automaton is one the cases in {@link CompiledAutomaton.AUTOMATON_TYPE}. Set finite to true if
|
||||
* the automaton is finite, otherwise set to false if infinite or you don't know.
|
||||
*/
|
||||
public CompiledAutomaton(Automaton automaton, Boolean finite, boolean simplify) {
|
||||
public CompiledAutomaton(Automaton automaton, boolean finite, boolean simplify) {
|
||||
this(automaton, finite, simplify, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create this. If finite is null, we use {@link Operations#isFinite} to determine whether it is
|
||||
* finite. If simplify is true, we run possibly expensive operations to determine if the automaton
|
||||
* is one the cases in {@link CompiledAutomaton.AUTOMATON_TYPE}.
|
||||
* Create this. If simplify is true, we run possibly expensive operations to determine if the
|
||||
* automaton is one the cases in {@link CompiledAutomaton.AUTOMATON_TYPE}. Set finite to true if
|
||||
* the automaton is finite, otherwise set to false if infinite or you don't know.
|
||||
*/
|
||||
public CompiledAutomaton(
|
||||
Automaton automaton, Boolean finite, boolean simplify, boolean isBinary) {
|
||||
Automaton automaton, boolean finite, boolean simplify, boolean isBinary) {
|
||||
|
||||
if (automaton.getNumStates() == 0) {
|
||||
automaton = new Automaton();
|
||||
|
@ -174,7 +171,7 @@ public class CompiledAutomaton implements Accountable {
|
|||
commonSuffixRef = null;
|
||||
runAutomaton = null;
|
||||
this.automaton = null;
|
||||
this.finite = null;
|
||||
this.finite = true;
|
||||
sinkState = -1;
|
||||
nfaRunAutomaton = null;
|
||||
return;
|
||||
|
@ -196,7 +193,7 @@ public class CompiledAutomaton implements Accountable {
|
|||
commonSuffixRef = null;
|
||||
runAutomaton = null;
|
||||
this.automaton = null;
|
||||
this.finite = null;
|
||||
this.finite = false;
|
||||
sinkState = -1;
|
||||
nfaRunAutomaton = null;
|
||||
return;
|
||||
|
@ -210,7 +207,7 @@ public class CompiledAutomaton implements Accountable {
|
|||
commonSuffixRef = null;
|
||||
runAutomaton = null;
|
||||
this.automaton = null;
|
||||
this.finite = null;
|
||||
this.finite = true;
|
||||
|
||||
if (isBinary) {
|
||||
term = StringHelper.intsRefToBytesRef(singleton);
|
||||
|
@ -228,11 +225,7 @@ public class CompiledAutomaton implements Accountable {
|
|||
type = AUTOMATON_TYPE.NORMAL;
|
||||
term = null;
|
||||
|
||||
if (finite == null) {
|
||||
this.finite = Operations.isFinite(automaton);
|
||||
} else {
|
||||
this.finite = finite;
|
||||
}
|
||||
|
||||
Automaton binary;
|
||||
if (isBinary) {
|
||||
|
|
|
@ -1037,44 +1037,6 @@ public final class Operations {
|
|||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the language of this automaton is finite. The automaton must not have any dead
|
||||
* states.
|
||||
*/
|
||||
public static boolean isFinite(Automaton a) {
|
||||
if (a.getNumStates() == 0) {
|
||||
return true;
|
||||
}
|
||||
return isFinite(
|
||||
new Transition(), a, 0, new BitSet(a.getNumStates()), new BitSet(a.getNumStates()), 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether there is a loop containing state. (This is sufficient since there are never
|
||||
* transitions to dead states.)
|
||||
*/
|
||||
// TODO: not great that this is recursive... in theory a
|
||||
// large automata could exceed java's stack so the maximum level of recursion is bounded to 1000
|
||||
private static boolean isFinite(
|
||||
Transition scratch, Automaton a, int state, BitSet path, BitSet visited, int level) {
|
||||
if (level > MAX_RECURSION_LEVEL) {
|
||||
throw new IllegalArgumentException("input automaton is too large: " + level);
|
||||
}
|
||||
path.set(state);
|
||||
int numTransitions = a.initTransition(state, scratch);
|
||||
for (int t = 0; t < numTransitions; t++) {
|
||||
a.getTransition(state, t, scratch);
|
||||
if (path.get(scratch.dest)
|
||||
|| (!visited.get(scratch.dest)
|
||||
&& !isFinite(scratch, a, scratch.dest, path, visited, level + 1))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
path.clear(state);
|
||||
visited.set(state);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the longest string that is a prefix of all accepted strings and visits each state at
|
||||
* most once. The automaton must not have dead states. If this automaton has already been
|
||||
|
|
|
@ -167,8 +167,7 @@ public class TestTermsEnum2 extends LuceneTestCase {
|
|||
String reg = AutomatonTestUtil.randomRegexp(random());
|
||||
Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton();
|
||||
automaton = Operations.determinize(automaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||
CompiledAutomaton ca =
|
||||
new CompiledAutomaton(automaton, Operations.isFinite(automaton), false);
|
||||
CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);
|
||||
TermsEnum te = MultiTerms.getTerms(reader, "field").intersect(ca, null);
|
||||
Automaton expected =
|
||||
Operations.determinize(
|
||||
|
|
|
@ -389,4 +389,25 @@ public class TestWildcard extends LuceneTestCase {
|
|||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
/** Tests large Wildcard queries */
|
||||
public void testLarge() throws IOException {
|
||||
// big string from a user
|
||||
String big =
|
||||
"{group-bm-http-server-02083.node.dm.reg,group-bm-http-server-02082.node.dm.reg,group-bm-http-server-02081.node.dm.reg,group-bm-http-server-02080.node.dm.reg,group-bm-http-server-02079.node.dm.reg,group-bm-http-server-02078.node.dm.reg,group-bm-http-server-02077.node.dm.reg,group-bm-http-server-02076.node.dm.reg,group-bm-http-server-02073.node.dm.reg,group-bm-http-server-02070.node.dm.reg,group-bm-http-server-02067.node.dm.reg,group-bm-http-server-02064.node.dm.reg,group-bm-http-server-02029.node.dm.reg,group-bm-http-server-02028.node.dm.reg,group-bm-http-server-02027.node.dm.reg,group-bm-http-server-02026.node.dm.reg,group-bm-http-server-02025.node.dm.reg,group-bm-http-server-02023.node.dm.reg,group-bm-http-server-02022.node.dm.reg,group-bm-http-server-02021.node.dm.reg,group-bm-http-server-02020.node.dm.reg,group-bm-http-server-02019.node.dm.reg,group-bm-http-server-02018.node.dm.reg,group-bm-http-server-02016.node.dm.reg,group-bm-http-server-02015.node.dm.reg,group-bm-http-server-02014.node.dm.reg,group-bm-http-server-02009.node.dm.reg,group-bm-http-server-02007.node.dm.reg,group-bm-http-server-02004.node.dm.reg,group-bm-http-server-02003.node.dm.reg,group-bm-http-server-02002.node.dm.reg,group-bm-http-server-01311.node.dm.reg,group-bm-http-server-01309.node.dm.reg,group-bm-http-server-01307.node.dm.reg}";
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
doc.add(newStringField("body", big, Field.Store.YES));
|
||||
writer.addDocument(doc);
|
||||
writer.close();
|
||||
|
||||
IndexReader reader = DirectoryReader.open(dir);
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
Query query = new WildcardQuery(new Term("body", big + "*"));
|
||||
assertMatches(searcher, query, 1);
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -224,7 +224,7 @@ public class TestAutomaton extends LuceneTestCase {
|
|||
assertTrue(Operations.run(a, "mn"));
|
||||
assertTrue(Operations.run(a, "mone"));
|
||||
assertFalse(Operations.run(a, "m"));
|
||||
assertFalse(Operations.isFinite(a));
|
||||
assertFalse(AutomatonTestUtil.isFinite(a));
|
||||
}
|
||||
|
||||
public void testUnion1() throws Exception {
|
||||
|
@ -1237,7 +1237,7 @@ public class TestAutomaton extends LuceneTestCase {
|
|||
private void assertSame(Collection<BytesRef> terms, Automaton a) {
|
||||
|
||||
try {
|
||||
assertTrue(Operations.isFinite(a));
|
||||
assertTrue(AutomatonTestUtil.isFinite(a));
|
||||
assertFalse(Operations.isTotal(a));
|
||||
|
||||
Automaton detA = Operations.determinize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||
|
@ -1354,7 +1354,7 @@ public class TestAutomaton extends LuceneTestCase {
|
|||
byte[] zeros = new byte[3];
|
||||
Automaton a =
|
||||
makeBinaryInterval(newBytesRef(zeros, 0, 1), true, newBytesRef(zeros, 0, 2), true);
|
||||
assertTrue(Operations.isFinite(a));
|
||||
assertTrue(AutomatonTestUtil.isFinite(a));
|
||||
assertFalse(accepts(a, newBytesRef()));
|
||||
assertTrue(accepts(a, newBytesRef(zeros, 0, 1)));
|
||||
assertTrue(accepts(a, newBytesRef(zeros, 0, 2)));
|
||||
|
@ -1362,7 +1362,7 @@ public class TestAutomaton extends LuceneTestCase {
|
|||
|
||||
// '' (incl) - 00 (incl)
|
||||
a = makeBinaryInterval(newBytesRef(), true, newBytesRef(zeros, 0, 2), true);
|
||||
assertTrue(Operations.isFinite(a));
|
||||
assertTrue(AutomatonTestUtil.isFinite(a));
|
||||
assertTrue(accepts(a, newBytesRef()));
|
||||
assertTrue(accepts(a, newBytesRef(zeros, 0, 1)));
|
||||
assertTrue(accepts(a, newBytesRef(zeros, 0, 2)));
|
||||
|
@ -1370,7 +1370,7 @@ public class TestAutomaton extends LuceneTestCase {
|
|||
|
||||
// '' (excl) - 00 (incl)
|
||||
a = makeBinaryInterval(newBytesRef(), false, newBytesRef(zeros, 0, 2), true);
|
||||
assertTrue(Operations.isFinite(a));
|
||||
assertTrue(AutomatonTestUtil.isFinite(a));
|
||||
assertFalse(accepts(a, newBytesRef()));
|
||||
assertTrue(accepts(a, newBytesRef(zeros, 0, 1)));
|
||||
assertTrue(accepts(a, newBytesRef(zeros, 0, 2)));
|
||||
|
@ -1378,7 +1378,7 @@ public class TestAutomaton extends LuceneTestCase {
|
|||
|
||||
// 0 (excl) - 00 (incl)
|
||||
a = makeBinaryInterval(newBytesRef(zeros, 0, 1), false, newBytesRef(zeros, 0, 2), true);
|
||||
assertTrue(Operations.isFinite(a));
|
||||
assertTrue(AutomatonTestUtil.isFinite(a));
|
||||
assertFalse(accepts(a, newBytesRef()));
|
||||
assertFalse(accepts(a, newBytesRef(zeros, 0, 1)));
|
||||
assertTrue(accepts(a, newBytesRef(zeros, 0, 2)));
|
||||
|
@ -1386,7 +1386,7 @@ public class TestAutomaton extends LuceneTestCase {
|
|||
|
||||
// 0 (excl) - 00 (excl)
|
||||
a = makeBinaryInterval(newBytesRef(zeros, 0, 1), false, newBytesRef(zeros, 0, 2), false);
|
||||
assertTrue(Operations.isFinite(a));
|
||||
assertTrue(AutomatonTestUtil.isFinite(a));
|
||||
assertFalse(accepts(a, newBytesRef()));
|
||||
assertFalse(accepts(a, newBytesRef(zeros, 0, 1)));
|
||||
assertFalse(accepts(a, newBytesRef(zeros, 0, 2)));
|
||||
|
@ -1420,7 +1420,7 @@ public class TestAutomaton extends LuceneTestCase {
|
|||
makeBinaryInterval(
|
||||
minTerm, minInclusive,
|
||||
maxTerm, maxInclusive);
|
||||
assertTrue(Operations.isFinite(a));
|
||||
assertTrue(AutomatonTestUtil.isFinite(a));
|
||||
int expectedCount = maxTerm.length - minTerm.length + 1;
|
||||
if (minInclusive == false) {
|
||||
expectedCount--;
|
||||
|
@ -1529,7 +1529,7 @@ public class TestAutomaton extends LuceneTestCase {
|
|||
public void testMakeBinaryIntervalEqual() throws Exception {
|
||||
Automaton a = Automata.makeBinaryInterval(newBytesRef("bar"), true, newBytesRef("bar"), true);
|
||||
assertTrue(Operations.run(a, intsRef("bar")));
|
||||
assertTrue(Operations.isFinite(a));
|
||||
assertTrue(AutomatonTestUtil.isFinite(a));
|
||||
assertEquals(1, TestOperations.getFiniteStrings(a).size());
|
||||
}
|
||||
|
||||
|
|
|
@ -130,7 +130,7 @@ public class TestCompiledAutomaton extends LuceneTestCase {
|
|||
a.addTransition(state, state, 0, 0xff);
|
||||
a.finishState();
|
||||
|
||||
CompiledAutomaton ca = new CompiledAutomaton(a, null, true, true);
|
||||
CompiledAutomaton ca = new CompiledAutomaton(a, false, true, true);
|
||||
assertEquals(CompiledAutomaton.AUTOMATON_TYPE.ALL, ca.type);
|
||||
}
|
||||
|
||||
|
@ -142,7 +142,7 @@ public class TestCompiledAutomaton extends LuceneTestCase {
|
|||
a.addTransition(state, state, 0, Character.MAX_CODE_POINT);
|
||||
a.finishState();
|
||||
|
||||
CompiledAutomaton ca = new CompiledAutomaton(a, null, true, false);
|
||||
CompiledAutomaton ca = new CompiledAutomaton(a, false, true, false);
|
||||
assertEquals(CompiledAutomaton.AUTOMATON_TYPE.ALL, ca.type);
|
||||
}
|
||||
|
||||
|
@ -150,14 +150,14 @@ public class TestCompiledAutomaton extends LuceneTestCase {
|
|||
public void testBinarySingleton() throws Exception {
|
||||
// This is just ascii so we can pretend it's binary:
|
||||
Automaton a = Automata.makeString("foobar");
|
||||
CompiledAutomaton ca = new CompiledAutomaton(a, null, true, true);
|
||||
CompiledAutomaton ca = new CompiledAutomaton(a, true, true, true);
|
||||
assertEquals(CompiledAutomaton.AUTOMATON_TYPE.SINGLE, ca.type);
|
||||
}
|
||||
|
||||
// LUCENE-6367
|
||||
public void testUnicodeSingleton() throws Exception {
|
||||
Automaton a = Automata.makeString(TestUtil.randomRealisticUnicodeString(random()));
|
||||
CompiledAutomaton ca = new CompiledAutomaton(a, null, true, false);
|
||||
CompiledAutomaton ca = new CompiledAutomaton(a, true, true, false);
|
||||
assertEquals(CompiledAutomaton.AUTOMATON_TYPE.SINGLE, ca.type);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.util.Collections;
|
|||
import java.util.List;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
import org.apache.lucene.tests.util.TestUtil;
|
||||
import org.apache.lucene.tests.util.automaton.AutomatonTestUtil;
|
||||
|
||||
/**
|
||||
* Not thorough, but tries to test determinism correctness somewhat randomly, by determinizing a
|
||||
|
@ -49,7 +50,7 @@ public class TestDeterminizeLexicon extends LuceneTestCase {
|
|||
Collections.shuffle(automata, random());
|
||||
Automaton lex = Operations.union(automata);
|
||||
lex = Operations.determinize(lex, 1000000);
|
||||
assertTrue(Operations.isFinite(lex));
|
||||
assertTrue(AutomatonTestUtil.isFinite(lex));
|
||||
for (String s : terms) {
|
||||
assertTrue(Operations.run(lex, s));
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ import static org.apache.lucene.util.automaton.Operations.DEFAULT_DETERMINIZE_WO
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
import org.apache.lucene.tests.util.automaton.AutomatonTestUtil;
|
||||
|
||||
public class TestLevenshteinAutomata extends LuceneTestCase {
|
||||
|
||||
|
@ -73,8 +74,8 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
|
|||
assertNotNull(tautomata[n]);
|
||||
assertTrue(automata[n].isDeterministic());
|
||||
assertTrue(tautomata[n].isDeterministic());
|
||||
assertTrue(Operations.isFinite(automata[n]));
|
||||
assertTrue(Operations.isFinite(tautomata[n]));
|
||||
assertTrue(AutomatonTestUtil.isFinite(automata[n]));
|
||||
assertTrue(AutomatonTestUtil.isFinite(tautomata[n]));
|
||||
assertFalse(Operations.hasDeadStatesFromInitial(automata[n]));
|
||||
assertFalse(Operations.hasDeadStatesFromInitial(tautomata[n]));
|
||||
// check that the dfa for n-1 accepts a subset of the dfa for n
|
||||
|
|
|
@ -40,11 +40,11 @@ public class TestLimitedFiniteStringsIterator extends LuceneTestCase {
|
|||
getFiniteStrings(new LimitedFiniteStringsIterator(a, TestUtil.nextInt(random(), 1, 1000)));
|
||||
// NOTE: cannot do this, because the method is not
|
||||
// guaranteed to detect cycles when you have a limit
|
||||
// assertTrue(Operations.isFinite(a));
|
||||
// assertTrue(AutomatonTestUtil.isFinite(a));
|
||||
} catch (
|
||||
@SuppressWarnings("unused")
|
||||
IllegalArgumentException iae) {
|
||||
assertFalse(Operations.isFinite(a));
|
||||
assertFalse(AutomatonTestUtil.isFinite(a));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -115,14 +115,6 @@ public class TestOperations extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
}
|
||||
/** tests against the original brics implementation. */
|
||||
public void testIsFinite() {
|
||||
int num = atLeast(200);
|
||||
for (int i = 0; i < num; i++) {
|
||||
Automaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||
assertEquals(AutomatonTestUtil.isFiniteSlow(a), Operations.isFinite(a));
|
||||
}
|
||||
}
|
||||
|
||||
public void testIsFiniteEatsStack() {
|
||||
char[] chars = new char[50000];
|
||||
|
@ -133,7 +125,7 @@ public class TestOperations extends LuceneTestCase {
|
|||
Automaton a =
|
||||
Operations.union(Automata.makeString(bigString1), Automata.makeString(bigString2));
|
||||
IllegalArgumentException exc =
|
||||
expectThrows(IllegalArgumentException.class, () -> Operations.isFinite(a));
|
||||
expectThrows(IllegalArgumentException.class, () -> AutomatonTestUtil.isFinite(a));
|
||||
assertTrue(exc.getMessage().contains("input automaton is too large"));
|
||||
}
|
||||
|
||||
|
|
|
@ -169,7 +169,8 @@ public final class Intervals {
|
|||
* @throws IllegalStateException if the prefix expands to more than {@code maxExpansions} terms
|
||||
*/
|
||||
public static IntervalsSource prefix(BytesRef prefix, int maxExpansions) {
|
||||
CompiledAutomaton ca = new CompiledAutomaton(PrefixQuery.toAutomaton(prefix), null, true, true);
|
||||
CompiledAutomaton ca =
|
||||
new CompiledAutomaton(PrefixQuery.toAutomaton(prefix), false, true, true);
|
||||
return new MultiTermIntervalsSource(ca, maxExpansions, prefix.utf8ToString() + "*");
|
||||
}
|
||||
|
||||
|
|
|
@ -877,9 +877,6 @@ public class AnalyzingSuggester extends Lookup {
|
|||
automaton = replaceSep(automaton);
|
||||
automaton = convertAutomaton(automaton);
|
||||
|
||||
// TODO: LUCENE-5660 re-enable this once we disallow massive suggestion strings
|
||||
// assert SpecialOperations.isFinite(automaton);
|
||||
|
||||
// Get all paths from the automaton (there can be
|
||||
// more than one path, eg if the analyzer created a
|
||||
// graph using SynFilter or WDF):
|
||||
|
|
|
@ -1605,7 +1605,7 @@ public class RandomPostingsTester {
|
|||
while (true) {
|
||||
Automaton a = AutomatonTestUtil.randomAutomaton(random);
|
||||
a = Operations.determinize(a, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||
CompiledAutomaton ca = new CompiledAutomaton(a, null, true, false);
|
||||
CompiledAutomaton ca = new CompiledAutomaton(a, false, true, false);
|
||||
if (ca.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
|
||||
// Keep retrying until we get an A that will really "use" the PF's intersect code:
|
||||
continue;
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.lucene.tests.util.automaton;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.BitSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
|
@ -463,35 +464,40 @@ public class AutomatonTestUtil {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns true if the language of this automaton is finite.
|
||||
*
|
||||
* <p>WARNING: this method is slow, it will blow up if the automaton is large. this is only used
|
||||
* to test the correctness of our faster implementation.
|
||||
* Returns true if the language of this automaton is finite. The automaton must not have any dead
|
||||
* states.
|
||||
*/
|
||||
public static boolean isFiniteSlow(Automaton a) {
|
||||
public static boolean isFinite(Automaton a) {
|
||||
if (a.getNumStates() == 0) {
|
||||
return true;
|
||||
}
|
||||
return isFiniteSlow(a, 0, new HashSet<Integer>());
|
||||
return isFinite(
|
||||
new Transition(), a, 0, new BitSet(a.getNumStates()), new BitSet(a.getNumStates()), 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether there is a loop containing s. (This is sufficient since there are never
|
||||
* Checks whether there is a loop containing state. (This is sufficient since there are never
|
||||
* transitions to dead states.)
|
||||
*/
|
||||
// TODO: not great that this is recursive... in theory a
|
||||
// large automata could exceed java's stack
|
||||
private static boolean isFiniteSlow(Automaton a, int s, HashSet<Integer> path) {
|
||||
path.add(s);
|
||||
Transition t = new Transition();
|
||||
int count = a.initTransition(s, t);
|
||||
for (int i = 0; i < count; i++) {
|
||||
a.getNextTransition(t);
|
||||
if (path.contains(t.dest) || !isFiniteSlow(a, t.dest, path)) {
|
||||
// large automata could exceed java's stack so the maximum level of recursion is bounded to 1000
|
||||
private static boolean isFinite(
|
||||
Transition scratch, Automaton a, int state, BitSet path, BitSet visited, int level) {
|
||||
if (level > Operations.MAX_RECURSION_LEVEL) {
|
||||
throw new IllegalArgumentException("input automaton is too large: " + level);
|
||||
}
|
||||
path.set(state);
|
||||
int numTransitions = a.initTransition(state, scratch);
|
||||
for (int t = 0; t < numTransitions; t++) {
|
||||
a.getTransition(state, t, scratch);
|
||||
if (path.get(scratch.dest)
|
||||
|| (!visited.get(scratch.dest)
|
||||
&& !isFinite(scratch, a, scratch.dest, path, visited, level + 1))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
path.remove(s);
|
||||
path.clear(state);
|
||||
visited.set(state);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue