mirror of https://github.com/apache/lucene.git
Remove Operations.isFinite (#11813)
This method is recursive: to avoid eating too much stack we apply a small limit. This means it can't really be used on any largish automata without hitting exception. But the benefit of knowing finite vs infinite in AutomatonTermsEnum is minor: let's not auto-compute this. FuzzyQuery still gets the finite optimization because its finite by definition. PrefixQuery is always infinite. Wildcard/Regex just assume infinite which is safe to do. Remove the auto-computation and the "trillean" Boolean parameter. If you dont know that your automaton is finite, pass false to CompiledAutomaton, it is safe. Move this method to AutomatonTestUtil so we can still use it in test asserts. Closes #11809
This commit is contained in:
parent
54fba99cb1
commit
15f3743f02
|
@ -31,6 +31,9 @@ API Changes
|
||||||
|
|
||||||
* LUCENE-10603: Remove SortedSetDocValues#NO_MORE_ORDS definition. (Greg Miller)
|
* LUCENE-10603: Remove SortedSetDocValues#NO_MORE_ORDS definition. (Greg Miller)
|
||||||
|
|
||||||
|
* GITHUB#11813: Remove Operations.isFinite: the recursive implementation could be problematic
|
||||||
|
for large automatons (WildcardQuery, PrefixQuery, RegExpQuery, etc). (taroplus, Robert Muir)
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,6 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
||||||
import org.apache.lucene.util.automaton.Automaton;
|
import org.apache.lucene.util.automaton.Automaton;
|
||||||
import org.apache.lucene.util.automaton.Operations;
|
|
||||||
import org.apache.lucene.util.automaton.Transition;
|
import org.apache.lucene.util.automaton.Transition;
|
||||||
|
|
||||||
/** Converts an Automaton into a TokenStream. */
|
/** Converts an Automaton into a TokenStream. */
|
||||||
|
@ -42,14 +41,10 @@ public class AutomatonToTokenStream {
|
||||||
* position nodes for the TokenStream. The resulting TokenStream releases edges from the automaton
|
* position nodes for the TokenStream. The resulting TokenStream releases edges from the automaton
|
||||||
* as tokens in order from the position nodes. This requires the automaton be a finite DAG.
|
* as tokens in order from the position nodes. This requires the automaton be a finite DAG.
|
||||||
*
|
*
|
||||||
* @param automaton automaton to convert. Must be a finite DAG.
|
* @param automaton automaton to convert. Must be a finite DAG to avoid infinite loops!
|
||||||
* @return TokenStream representation of automaton.
|
* @return TokenStream representation of automaton.
|
||||||
*/
|
*/
|
||||||
public static TokenStream toTokenStream(Automaton automaton) {
|
public static TokenStream toTokenStream(Automaton automaton) {
|
||||||
if (Operations.isFinite(automaton) == false) {
|
|
||||||
throw new IllegalArgumentException("Automaton must be finite");
|
|
||||||
}
|
|
||||||
|
|
||||||
List<List<Integer>> positionNodes = new ArrayList<>();
|
List<List<Integer>> positionNodes = new ArrayList<>();
|
||||||
|
|
||||||
Transition[][] transitions = automaton.getSortedTransitions();
|
Transition[][] transitions = automaton.getSortedTransitions();
|
||||||
|
|
|
@ -95,8 +95,7 @@ public class AutomatonQuery extends MultiTermQuery implements Accountable {
|
||||||
this.term = term;
|
this.term = term;
|
||||||
this.automaton = automaton;
|
this.automaton = automaton;
|
||||||
this.automatonIsBinary = isBinary;
|
this.automatonIsBinary = isBinary;
|
||||||
// TODO: we could take isFinite too, to save a bit of CPU in CompiledAutomaton ctor?:
|
this.compiled = new CompiledAutomaton(automaton, false, true, isBinary);
|
||||||
this.compiled = new CompiledAutomaton(automaton, null, true, isBinary);
|
|
||||||
|
|
||||||
this.ramBytesUsed =
|
this.ramBytesUsed =
|
||||||
BASE_RAM_BYTES + term.ramBytesUsed() + automaton.ramBytesUsed() + compiled.ramBytesUsed();
|
BASE_RAM_BYTES + term.ramBytesUsed() + automaton.ramBytesUsed() + compiled.ramBytesUsed();
|
||||||
|
|
|
@ -96,20 +96,17 @@ public class CompiledAutomaton implements Accountable {
|
||||||
public final BytesRef commonSuffixRef;
|
public final BytesRef commonSuffixRef;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Indicates if the automaton accepts a finite set of strings. Null if this was not computed. Only
|
* Indicates if the automaton accepts a finite set of strings. Only valid for {@link
|
||||||
* valid for {@link AUTOMATON_TYPE#NORMAL}.
|
* AUTOMATON_TYPE#NORMAL}.
|
||||||
*/
|
*/
|
||||||
public final Boolean finite;
|
public final boolean finite;
|
||||||
|
|
||||||
/** Which state, if any, accepts all suffixes, else -1. */
|
/** Which state, if any, accepts all suffixes, else -1. */
|
||||||
public final int sinkState;
|
public final int sinkState;
|
||||||
|
|
||||||
/**
|
/** Create this, passing simplify=true, so that we try to simplify the automaton. */
|
||||||
* Create this, passing simplify=true and finite=null, so that we try to simplify the automaton
|
|
||||||
* and determine if it is finite.
|
|
||||||
*/
|
|
||||||
public CompiledAutomaton(Automaton automaton) {
|
public CompiledAutomaton(Automaton automaton) {
|
||||||
this(automaton, null, true);
|
this(automaton, false, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns sink state, if present, else -1. */
|
/** Returns sink state, if present, else -1. */
|
||||||
|
@ -139,21 +136,21 @@ public class CompiledAutomaton implements Accountable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create this. If finite is null, we use {@link Operations#isFinite} to determine whether it is
|
* Create this. If simplify is true, we run possibly expensive operations to determine if the
|
||||||
* finite. If simplify is true, we run possibly expensive operations to determine if the automaton
|
* automaton is one the cases in {@link CompiledAutomaton.AUTOMATON_TYPE}. Set finite to true if
|
||||||
* is one the cases in {@link CompiledAutomaton.AUTOMATON_TYPE}.
|
* the automaton is finite, otherwise set to false if infinite or you don't know.
|
||||||
*/
|
*/
|
||||||
public CompiledAutomaton(Automaton automaton, Boolean finite, boolean simplify) {
|
public CompiledAutomaton(Automaton automaton, boolean finite, boolean simplify) {
|
||||||
this(automaton, finite, simplify, false);
|
this(automaton, finite, simplify, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create this. If finite is null, we use {@link Operations#isFinite} to determine whether it is
|
* Create this. If simplify is true, we run possibly expensive operations to determine if the
|
||||||
* finite. If simplify is true, we run possibly expensive operations to determine if the automaton
|
* automaton is one the cases in {@link CompiledAutomaton.AUTOMATON_TYPE}. Set finite to true if
|
||||||
* is one the cases in {@link CompiledAutomaton.AUTOMATON_TYPE}.
|
* the automaton is finite, otherwise set to false if infinite or you don't know.
|
||||||
*/
|
*/
|
||||||
public CompiledAutomaton(
|
public CompiledAutomaton(
|
||||||
Automaton automaton, Boolean finite, boolean simplify, boolean isBinary) {
|
Automaton automaton, boolean finite, boolean simplify, boolean isBinary) {
|
||||||
|
|
||||||
if (automaton.getNumStates() == 0) {
|
if (automaton.getNumStates() == 0) {
|
||||||
automaton = new Automaton();
|
automaton = new Automaton();
|
||||||
|
@ -174,7 +171,7 @@ public class CompiledAutomaton implements Accountable {
|
||||||
commonSuffixRef = null;
|
commonSuffixRef = null;
|
||||||
runAutomaton = null;
|
runAutomaton = null;
|
||||||
this.automaton = null;
|
this.automaton = null;
|
||||||
this.finite = null;
|
this.finite = true;
|
||||||
sinkState = -1;
|
sinkState = -1;
|
||||||
nfaRunAutomaton = null;
|
nfaRunAutomaton = null;
|
||||||
return;
|
return;
|
||||||
|
@ -196,7 +193,7 @@ public class CompiledAutomaton implements Accountable {
|
||||||
commonSuffixRef = null;
|
commonSuffixRef = null;
|
||||||
runAutomaton = null;
|
runAutomaton = null;
|
||||||
this.automaton = null;
|
this.automaton = null;
|
||||||
this.finite = null;
|
this.finite = false;
|
||||||
sinkState = -1;
|
sinkState = -1;
|
||||||
nfaRunAutomaton = null;
|
nfaRunAutomaton = null;
|
||||||
return;
|
return;
|
||||||
|
@ -210,7 +207,7 @@ public class CompiledAutomaton implements Accountable {
|
||||||
commonSuffixRef = null;
|
commonSuffixRef = null;
|
||||||
runAutomaton = null;
|
runAutomaton = null;
|
||||||
this.automaton = null;
|
this.automaton = null;
|
||||||
this.finite = null;
|
this.finite = true;
|
||||||
|
|
||||||
if (isBinary) {
|
if (isBinary) {
|
||||||
term = StringHelper.intsRefToBytesRef(singleton);
|
term = StringHelper.intsRefToBytesRef(singleton);
|
||||||
|
@ -228,11 +225,7 @@ public class CompiledAutomaton implements Accountable {
|
||||||
type = AUTOMATON_TYPE.NORMAL;
|
type = AUTOMATON_TYPE.NORMAL;
|
||||||
term = null;
|
term = null;
|
||||||
|
|
||||||
if (finite == null) {
|
|
||||||
this.finite = Operations.isFinite(automaton);
|
|
||||||
} else {
|
|
||||||
this.finite = finite;
|
this.finite = finite;
|
||||||
}
|
|
||||||
|
|
||||||
Automaton binary;
|
Automaton binary;
|
||||||
if (isBinary) {
|
if (isBinary) {
|
||||||
|
|
|
@ -1037,44 +1037,6 @@ public final class Operations {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns true if the language of this automaton is finite. The automaton must not have any dead
|
|
||||||
* states.
|
|
||||||
*/
|
|
||||||
public static boolean isFinite(Automaton a) {
|
|
||||||
if (a.getNumStates() == 0) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return isFinite(
|
|
||||||
new Transition(), a, 0, new BitSet(a.getNumStates()), new BitSet(a.getNumStates()), 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks whether there is a loop containing state. (This is sufficient since there are never
|
|
||||||
* transitions to dead states.)
|
|
||||||
*/
|
|
||||||
// TODO: not great that this is recursive... in theory a
|
|
||||||
// large automata could exceed java's stack so the maximum level of recursion is bounded to 1000
|
|
||||||
private static boolean isFinite(
|
|
||||||
Transition scratch, Automaton a, int state, BitSet path, BitSet visited, int level) {
|
|
||||||
if (level > MAX_RECURSION_LEVEL) {
|
|
||||||
throw new IllegalArgumentException("input automaton is too large: " + level);
|
|
||||||
}
|
|
||||||
path.set(state);
|
|
||||||
int numTransitions = a.initTransition(state, scratch);
|
|
||||||
for (int t = 0; t < numTransitions; t++) {
|
|
||||||
a.getTransition(state, t, scratch);
|
|
||||||
if (path.get(scratch.dest)
|
|
||||||
|| (!visited.get(scratch.dest)
|
|
||||||
&& !isFinite(scratch, a, scratch.dest, path, visited, level + 1))) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
path.clear(state);
|
|
||||||
visited.set(state);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the longest string that is a prefix of all accepted strings and visits each state at
|
* Returns the longest string that is a prefix of all accepted strings and visits each state at
|
||||||
* most once. The automaton must not have dead states. If this automaton has already been
|
* most once. The automaton must not have dead states. If this automaton has already been
|
||||||
|
|
|
@ -167,8 +167,7 @@ public class TestTermsEnum2 extends LuceneTestCase {
|
||||||
String reg = AutomatonTestUtil.randomRegexp(random());
|
String reg = AutomatonTestUtil.randomRegexp(random());
|
||||||
Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton();
|
Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton();
|
||||||
automaton = Operations.determinize(automaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
|
automaton = Operations.determinize(automaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||||
CompiledAutomaton ca =
|
CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);
|
||||||
new CompiledAutomaton(automaton, Operations.isFinite(automaton), false);
|
|
||||||
TermsEnum te = MultiTerms.getTerms(reader, "field").intersect(ca, null);
|
TermsEnum te = MultiTerms.getTerms(reader, "field").intersect(ca, null);
|
||||||
Automaton expected =
|
Automaton expected =
|
||||||
Operations.determinize(
|
Operations.determinize(
|
||||||
|
|
|
@ -389,4 +389,25 @@ public class TestWildcard extends LuceneTestCase {
|
||||||
reader.close();
|
reader.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Tests large Wildcard queries */
|
||||||
|
public void testLarge() throws IOException {
|
||||||
|
// big string from a user
|
||||||
|
String big =
|
||||||
|
"{group-bm-http-server-02083.node.dm.reg,group-bm-http-server-02082.node.dm.reg,group-bm-http-server-02081.node.dm.reg,group-bm-http-server-02080.node.dm.reg,group-bm-http-server-02079.node.dm.reg,group-bm-http-server-02078.node.dm.reg,group-bm-http-server-02077.node.dm.reg,group-bm-http-server-02076.node.dm.reg,group-bm-http-server-02073.node.dm.reg,group-bm-http-server-02070.node.dm.reg,group-bm-http-server-02067.node.dm.reg,group-bm-http-server-02064.node.dm.reg,group-bm-http-server-02029.node.dm.reg,group-bm-http-server-02028.node.dm.reg,group-bm-http-server-02027.node.dm.reg,group-bm-http-server-02026.node.dm.reg,group-bm-http-server-02025.node.dm.reg,group-bm-http-server-02023.node.dm.reg,group-bm-http-server-02022.node.dm.reg,group-bm-http-server-02021.node.dm.reg,group-bm-http-server-02020.node.dm.reg,group-bm-http-server-02019.node.dm.reg,group-bm-http-server-02018.node.dm.reg,group-bm-http-server-02016.node.dm.reg,group-bm-http-server-02015.node.dm.reg,group-bm-http-server-02014.node.dm.reg,group-bm-http-server-02009.node.dm.reg,group-bm-http-server-02007.node.dm.reg,group-bm-http-server-02004.node.dm.reg,group-bm-http-server-02003.node.dm.reg,group-bm-http-server-02002.node.dm.reg,group-bm-http-server-01311.node.dm.reg,group-bm-http-server-01309.node.dm.reg,group-bm-http-server-01307.node.dm.reg}";
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(newStringField("body", big, Field.Store.YES));
|
||||||
|
writer.addDocument(doc);
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
IndexReader reader = DirectoryReader.open(dir);
|
||||||
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
|
Query query = new WildcardQuery(new Term("body", big + "*"));
|
||||||
|
assertMatches(searcher, query, 1);
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -224,7 +224,7 @@ public class TestAutomaton extends LuceneTestCase {
|
||||||
assertTrue(Operations.run(a, "mn"));
|
assertTrue(Operations.run(a, "mn"));
|
||||||
assertTrue(Operations.run(a, "mone"));
|
assertTrue(Operations.run(a, "mone"));
|
||||||
assertFalse(Operations.run(a, "m"));
|
assertFalse(Operations.run(a, "m"));
|
||||||
assertFalse(Operations.isFinite(a));
|
assertFalse(AutomatonTestUtil.isFinite(a));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testUnion1() throws Exception {
|
public void testUnion1() throws Exception {
|
||||||
|
@ -1237,7 +1237,7 @@ public class TestAutomaton extends LuceneTestCase {
|
||||||
private void assertSame(Collection<BytesRef> terms, Automaton a) {
|
private void assertSame(Collection<BytesRef> terms, Automaton a) {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
assertTrue(Operations.isFinite(a));
|
assertTrue(AutomatonTestUtil.isFinite(a));
|
||||||
assertFalse(Operations.isTotal(a));
|
assertFalse(Operations.isTotal(a));
|
||||||
|
|
||||||
Automaton detA = Operations.determinize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);
|
Automaton detA = Operations.determinize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||||
|
@ -1354,7 +1354,7 @@ public class TestAutomaton extends LuceneTestCase {
|
||||||
byte[] zeros = new byte[3];
|
byte[] zeros = new byte[3];
|
||||||
Automaton a =
|
Automaton a =
|
||||||
makeBinaryInterval(newBytesRef(zeros, 0, 1), true, newBytesRef(zeros, 0, 2), true);
|
makeBinaryInterval(newBytesRef(zeros, 0, 1), true, newBytesRef(zeros, 0, 2), true);
|
||||||
assertTrue(Operations.isFinite(a));
|
assertTrue(AutomatonTestUtil.isFinite(a));
|
||||||
assertFalse(accepts(a, newBytesRef()));
|
assertFalse(accepts(a, newBytesRef()));
|
||||||
assertTrue(accepts(a, newBytesRef(zeros, 0, 1)));
|
assertTrue(accepts(a, newBytesRef(zeros, 0, 1)));
|
||||||
assertTrue(accepts(a, newBytesRef(zeros, 0, 2)));
|
assertTrue(accepts(a, newBytesRef(zeros, 0, 2)));
|
||||||
|
@ -1362,7 +1362,7 @@ public class TestAutomaton extends LuceneTestCase {
|
||||||
|
|
||||||
// '' (incl) - 00 (incl)
|
// '' (incl) - 00 (incl)
|
||||||
a = makeBinaryInterval(newBytesRef(), true, newBytesRef(zeros, 0, 2), true);
|
a = makeBinaryInterval(newBytesRef(), true, newBytesRef(zeros, 0, 2), true);
|
||||||
assertTrue(Operations.isFinite(a));
|
assertTrue(AutomatonTestUtil.isFinite(a));
|
||||||
assertTrue(accepts(a, newBytesRef()));
|
assertTrue(accepts(a, newBytesRef()));
|
||||||
assertTrue(accepts(a, newBytesRef(zeros, 0, 1)));
|
assertTrue(accepts(a, newBytesRef(zeros, 0, 1)));
|
||||||
assertTrue(accepts(a, newBytesRef(zeros, 0, 2)));
|
assertTrue(accepts(a, newBytesRef(zeros, 0, 2)));
|
||||||
|
@ -1370,7 +1370,7 @@ public class TestAutomaton extends LuceneTestCase {
|
||||||
|
|
||||||
// '' (excl) - 00 (incl)
|
// '' (excl) - 00 (incl)
|
||||||
a = makeBinaryInterval(newBytesRef(), false, newBytesRef(zeros, 0, 2), true);
|
a = makeBinaryInterval(newBytesRef(), false, newBytesRef(zeros, 0, 2), true);
|
||||||
assertTrue(Operations.isFinite(a));
|
assertTrue(AutomatonTestUtil.isFinite(a));
|
||||||
assertFalse(accepts(a, newBytesRef()));
|
assertFalse(accepts(a, newBytesRef()));
|
||||||
assertTrue(accepts(a, newBytesRef(zeros, 0, 1)));
|
assertTrue(accepts(a, newBytesRef(zeros, 0, 1)));
|
||||||
assertTrue(accepts(a, newBytesRef(zeros, 0, 2)));
|
assertTrue(accepts(a, newBytesRef(zeros, 0, 2)));
|
||||||
|
@ -1378,7 +1378,7 @@ public class TestAutomaton extends LuceneTestCase {
|
||||||
|
|
||||||
// 0 (excl) - 00 (incl)
|
// 0 (excl) - 00 (incl)
|
||||||
a = makeBinaryInterval(newBytesRef(zeros, 0, 1), false, newBytesRef(zeros, 0, 2), true);
|
a = makeBinaryInterval(newBytesRef(zeros, 0, 1), false, newBytesRef(zeros, 0, 2), true);
|
||||||
assertTrue(Operations.isFinite(a));
|
assertTrue(AutomatonTestUtil.isFinite(a));
|
||||||
assertFalse(accepts(a, newBytesRef()));
|
assertFalse(accepts(a, newBytesRef()));
|
||||||
assertFalse(accepts(a, newBytesRef(zeros, 0, 1)));
|
assertFalse(accepts(a, newBytesRef(zeros, 0, 1)));
|
||||||
assertTrue(accepts(a, newBytesRef(zeros, 0, 2)));
|
assertTrue(accepts(a, newBytesRef(zeros, 0, 2)));
|
||||||
|
@ -1386,7 +1386,7 @@ public class TestAutomaton extends LuceneTestCase {
|
||||||
|
|
||||||
// 0 (excl) - 00 (excl)
|
// 0 (excl) - 00 (excl)
|
||||||
a = makeBinaryInterval(newBytesRef(zeros, 0, 1), false, newBytesRef(zeros, 0, 2), false);
|
a = makeBinaryInterval(newBytesRef(zeros, 0, 1), false, newBytesRef(zeros, 0, 2), false);
|
||||||
assertTrue(Operations.isFinite(a));
|
assertTrue(AutomatonTestUtil.isFinite(a));
|
||||||
assertFalse(accepts(a, newBytesRef()));
|
assertFalse(accepts(a, newBytesRef()));
|
||||||
assertFalse(accepts(a, newBytesRef(zeros, 0, 1)));
|
assertFalse(accepts(a, newBytesRef(zeros, 0, 1)));
|
||||||
assertFalse(accepts(a, newBytesRef(zeros, 0, 2)));
|
assertFalse(accepts(a, newBytesRef(zeros, 0, 2)));
|
||||||
|
@ -1420,7 +1420,7 @@ public class TestAutomaton extends LuceneTestCase {
|
||||||
makeBinaryInterval(
|
makeBinaryInterval(
|
||||||
minTerm, minInclusive,
|
minTerm, minInclusive,
|
||||||
maxTerm, maxInclusive);
|
maxTerm, maxInclusive);
|
||||||
assertTrue(Operations.isFinite(a));
|
assertTrue(AutomatonTestUtil.isFinite(a));
|
||||||
int expectedCount = maxTerm.length - minTerm.length + 1;
|
int expectedCount = maxTerm.length - minTerm.length + 1;
|
||||||
if (minInclusive == false) {
|
if (minInclusive == false) {
|
||||||
expectedCount--;
|
expectedCount--;
|
||||||
|
@ -1529,7 +1529,7 @@ public class TestAutomaton extends LuceneTestCase {
|
||||||
public void testMakeBinaryIntervalEqual() throws Exception {
|
public void testMakeBinaryIntervalEqual() throws Exception {
|
||||||
Automaton a = Automata.makeBinaryInterval(newBytesRef("bar"), true, newBytesRef("bar"), true);
|
Automaton a = Automata.makeBinaryInterval(newBytesRef("bar"), true, newBytesRef("bar"), true);
|
||||||
assertTrue(Operations.run(a, intsRef("bar")));
|
assertTrue(Operations.run(a, intsRef("bar")));
|
||||||
assertTrue(Operations.isFinite(a));
|
assertTrue(AutomatonTestUtil.isFinite(a));
|
||||||
assertEquals(1, TestOperations.getFiniteStrings(a).size());
|
assertEquals(1, TestOperations.getFiniteStrings(a).size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -130,7 +130,7 @@ public class TestCompiledAutomaton extends LuceneTestCase {
|
||||||
a.addTransition(state, state, 0, 0xff);
|
a.addTransition(state, state, 0, 0xff);
|
||||||
a.finishState();
|
a.finishState();
|
||||||
|
|
||||||
CompiledAutomaton ca = new CompiledAutomaton(a, null, true, true);
|
CompiledAutomaton ca = new CompiledAutomaton(a, false, true, true);
|
||||||
assertEquals(CompiledAutomaton.AUTOMATON_TYPE.ALL, ca.type);
|
assertEquals(CompiledAutomaton.AUTOMATON_TYPE.ALL, ca.type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -142,7 +142,7 @@ public class TestCompiledAutomaton extends LuceneTestCase {
|
||||||
a.addTransition(state, state, 0, Character.MAX_CODE_POINT);
|
a.addTransition(state, state, 0, Character.MAX_CODE_POINT);
|
||||||
a.finishState();
|
a.finishState();
|
||||||
|
|
||||||
CompiledAutomaton ca = new CompiledAutomaton(a, null, true, false);
|
CompiledAutomaton ca = new CompiledAutomaton(a, false, true, false);
|
||||||
assertEquals(CompiledAutomaton.AUTOMATON_TYPE.ALL, ca.type);
|
assertEquals(CompiledAutomaton.AUTOMATON_TYPE.ALL, ca.type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -150,14 +150,14 @@ public class TestCompiledAutomaton extends LuceneTestCase {
|
||||||
public void testBinarySingleton() throws Exception {
|
public void testBinarySingleton() throws Exception {
|
||||||
// This is just ascii so we can pretend it's binary:
|
// This is just ascii so we can pretend it's binary:
|
||||||
Automaton a = Automata.makeString("foobar");
|
Automaton a = Automata.makeString("foobar");
|
||||||
CompiledAutomaton ca = new CompiledAutomaton(a, null, true, true);
|
CompiledAutomaton ca = new CompiledAutomaton(a, true, true, true);
|
||||||
assertEquals(CompiledAutomaton.AUTOMATON_TYPE.SINGLE, ca.type);
|
assertEquals(CompiledAutomaton.AUTOMATON_TYPE.SINGLE, ca.type);
|
||||||
}
|
}
|
||||||
|
|
||||||
// LUCENE-6367
|
// LUCENE-6367
|
||||||
public void testUnicodeSingleton() throws Exception {
|
public void testUnicodeSingleton() throws Exception {
|
||||||
Automaton a = Automata.makeString(TestUtil.randomRealisticUnicodeString(random()));
|
Automaton a = Automata.makeString(TestUtil.randomRealisticUnicodeString(random()));
|
||||||
CompiledAutomaton ca = new CompiledAutomaton(a, null, true, false);
|
CompiledAutomaton ca = new CompiledAutomaton(a, true, true, false);
|
||||||
assertEquals(CompiledAutomaton.AUTOMATON_TYPE.SINGLE, ca.type);
|
assertEquals(CompiledAutomaton.AUTOMATON_TYPE.SINGLE, ca.type);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@ import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||||
import org.apache.lucene.tests.util.TestUtil;
|
import org.apache.lucene.tests.util.TestUtil;
|
||||||
|
import org.apache.lucene.tests.util.automaton.AutomatonTestUtil;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Not thorough, but tries to test determinism correctness somewhat randomly, by determinizing a
|
* Not thorough, but tries to test determinism correctness somewhat randomly, by determinizing a
|
||||||
|
@ -49,7 +50,7 @@ public class TestDeterminizeLexicon extends LuceneTestCase {
|
||||||
Collections.shuffle(automata, random());
|
Collections.shuffle(automata, random());
|
||||||
Automaton lex = Operations.union(automata);
|
Automaton lex = Operations.union(automata);
|
||||||
lex = Operations.determinize(lex, 1000000);
|
lex = Operations.determinize(lex, 1000000);
|
||||||
assertTrue(Operations.isFinite(lex));
|
assertTrue(AutomatonTestUtil.isFinite(lex));
|
||||||
for (String s : terms) {
|
for (String s : terms) {
|
||||||
assertTrue(Operations.run(lex, s));
|
assertTrue(Operations.run(lex, s));
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@ import static org.apache.lucene.util.automaton.Operations.DEFAULT_DETERMINIZE_WO
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.tests.util.automaton.AutomatonTestUtil;
|
||||||
|
|
||||||
public class TestLevenshteinAutomata extends LuceneTestCase {
|
public class TestLevenshteinAutomata extends LuceneTestCase {
|
||||||
|
|
||||||
|
@ -73,8 +74,8 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
|
||||||
assertNotNull(tautomata[n]);
|
assertNotNull(tautomata[n]);
|
||||||
assertTrue(automata[n].isDeterministic());
|
assertTrue(automata[n].isDeterministic());
|
||||||
assertTrue(tautomata[n].isDeterministic());
|
assertTrue(tautomata[n].isDeterministic());
|
||||||
assertTrue(Operations.isFinite(automata[n]));
|
assertTrue(AutomatonTestUtil.isFinite(automata[n]));
|
||||||
assertTrue(Operations.isFinite(tautomata[n]));
|
assertTrue(AutomatonTestUtil.isFinite(tautomata[n]));
|
||||||
assertFalse(Operations.hasDeadStatesFromInitial(automata[n]));
|
assertFalse(Operations.hasDeadStatesFromInitial(automata[n]));
|
||||||
assertFalse(Operations.hasDeadStatesFromInitial(tautomata[n]));
|
assertFalse(Operations.hasDeadStatesFromInitial(tautomata[n]));
|
||||||
// check that the dfa for n-1 accepts a subset of the dfa for n
|
// check that the dfa for n-1 accepts a subset of the dfa for n
|
||||||
|
|
|
@ -40,11 +40,11 @@ public class TestLimitedFiniteStringsIterator extends LuceneTestCase {
|
||||||
getFiniteStrings(new LimitedFiniteStringsIterator(a, TestUtil.nextInt(random(), 1, 1000)));
|
getFiniteStrings(new LimitedFiniteStringsIterator(a, TestUtil.nextInt(random(), 1, 1000)));
|
||||||
// NOTE: cannot do this, because the method is not
|
// NOTE: cannot do this, because the method is not
|
||||||
// guaranteed to detect cycles when you have a limit
|
// guaranteed to detect cycles when you have a limit
|
||||||
// assertTrue(Operations.isFinite(a));
|
// assertTrue(AutomatonTestUtil.isFinite(a));
|
||||||
} catch (
|
} catch (
|
||||||
@SuppressWarnings("unused")
|
@SuppressWarnings("unused")
|
||||||
IllegalArgumentException iae) {
|
IllegalArgumentException iae) {
|
||||||
assertFalse(Operations.isFinite(a));
|
assertFalse(AutomatonTestUtil.isFinite(a));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -115,14 +115,6 @@ public class TestOperations extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/** tests against the original brics implementation. */
|
|
||||||
public void testIsFinite() {
|
|
||||||
int num = atLeast(200);
|
|
||||||
for (int i = 0; i < num; i++) {
|
|
||||||
Automaton a = AutomatonTestUtil.randomAutomaton(random());
|
|
||||||
assertEquals(AutomatonTestUtil.isFiniteSlow(a), Operations.isFinite(a));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testIsFiniteEatsStack() {
|
public void testIsFiniteEatsStack() {
|
||||||
char[] chars = new char[50000];
|
char[] chars = new char[50000];
|
||||||
|
@ -133,7 +125,7 @@ public class TestOperations extends LuceneTestCase {
|
||||||
Automaton a =
|
Automaton a =
|
||||||
Operations.union(Automata.makeString(bigString1), Automata.makeString(bigString2));
|
Operations.union(Automata.makeString(bigString1), Automata.makeString(bigString2));
|
||||||
IllegalArgumentException exc =
|
IllegalArgumentException exc =
|
||||||
expectThrows(IllegalArgumentException.class, () -> Operations.isFinite(a));
|
expectThrows(IllegalArgumentException.class, () -> AutomatonTestUtil.isFinite(a));
|
||||||
assertTrue(exc.getMessage().contains("input automaton is too large"));
|
assertTrue(exc.getMessage().contains("input automaton is too large"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -169,7 +169,8 @@ public final class Intervals {
|
||||||
* @throws IllegalStateException if the prefix expands to more than {@code maxExpansions} terms
|
* @throws IllegalStateException if the prefix expands to more than {@code maxExpansions} terms
|
||||||
*/
|
*/
|
||||||
public static IntervalsSource prefix(BytesRef prefix, int maxExpansions) {
|
public static IntervalsSource prefix(BytesRef prefix, int maxExpansions) {
|
||||||
CompiledAutomaton ca = new CompiledAutomaton(PrefixQuery.toAutomaton(prefix), null, true, true);
|
CompiledAutomaton ca =
|
||||||
|
new CompiledAutomaton(PrefixQuery.toAutomaton(prefix), false, true, true);
|
||||||
return new MultiTermIntervalsSource(ca, maxExpansions, prefix.utf8ToString() + "*");
|
return new MultiTermIntervalsSource(ca, maxExpansions, prefix.utf8ToString() + "*");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -877,9 +877,6 @@ public class AnalyzingSuggester extends Lookup {
|
||||||
automaton = replaceSep(automaton);
|
automaton = replaceSep(automaton);
|
||||||
automaton = convertAutomaton(automaton);
|
automaton = convertAutomaton(automaton);
|
||||||
|
|
||||||
// TODO: LUCENE-5660 re-enable this once we disallow massive suggestion strings
|
|
||||||
// assert SpecialOperations.isFinite(automaton);
|
|
||||||
|
|
||||||
// Get all paths from the automaton (there can be
|
// Get all paths from the automaton (there can be
|
||||||
// more than one path, eg if the analyzer created a
|
// more than one path, eg if the analyzer created a
|
||||||
// graph using SynFilter or WDF):
|
// graph using SynFilter or WDF):
|
||||||
|
|
|
@ -1605,7 +1605,7 @@ public class RandomPostingsTester {
|
||||||
while (true) {
|
while (true) {
|
||||||
Automaton a = AutomatonTestUtil.randomAutomaton(random);
|
Automaton a = AutomatonTestUtil.randomAutomaton(random);
|
||||||
a = Operations.determinize(a, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
|
a = Operations.determinize(a, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||||
CompiledAutomaton ca = new CompiledAutomaton(a, null, true, false);
|
CompiledAutomaton ca = new CompiledAutomaton(a, false, true, false);
|
||||||
if (ca.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
|
if (ca.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
|
||||||
// Keep retrying until we get an A that will really "use" the PF's intersect code:
|
// Keep retrying until we get an A that will really "use" the PF's intersect code:
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
package org.apache.lucene.tests.util.automaton;
|
package org.apache.lucene.tests.util.automaton;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.BitSet;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
|
@ -463,35 +464,40 @@ public class AutomatonTestUtil {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if the language of this automaton is finite.
|
* Returns true if the language of this automaton is finite. The automaton must not have any dead
|
||||||
*
|
* states.
|
||||||
* <p>WARNING: this method is slow, it will blow up if the automaton is large. this is only used
|
|
||||||
* to test the correctness of our faster implementation.
|
|
||||||
*/
|
*/
|
||||||
public static boolean isFiniteSlow(Automaton a) {
|
public static boolean isFinite(Automaton a) {
|
||||||
if (a.getNumStates() == 0) {
|
if (a.getNumStates() == 0) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return isFiniteSlow(a, 0, new HashSet<Integer>());
|
return isFinite(
|
||||||
|
new Transition(), a, 0, new BitSet(a.getNumStates()), new BitSet(a.getNumStates()), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks whether there is a loop containing s. (This is sufficient since there are never
|
* Checks whether there is a loop containing state. (This is sufficient since there are never
|
||||||
* transitions to dead states.)
|
* transitions to dead states.)
|
||||||
*/
|
*/
|
||||||
// TODO: not great that this is recursive... in theory a
|
// TODO: not great that this is recursive... in theory a
|
||||||
// large automata could exceed java's stack
|
// large automata could exceed java's stack so the maximum level of recursion is bounded to 1000
|
||||||
private static boolean isFiniteSlow(Automaton a, int s, HashSet<Integer> path) {
|
private static boolean isFinite(
|
||||||
path.add(s);
|
Transition scratch, Automaton a, int state, BitSet path, BitSet visited, int level) {
|
||||||
Transition t = new Transition();
|
if (level > Operations.MAX_RECURSION_LEVEL) {
|
||||||
int count = a.initTransition(s, t);
|
throw new IllegalArgumentException("input automaton is too large: " + level);
|
||||||
for (int i = 0; i < count; i++) {
|
}
|
||||||
a.getNextTransition(t);
|
path.set(state);
|
||||||
if (path.contains(t.dest) || !isFiniteSlow(a, t.dest, path)) {
|
int numTransitions = a.initTransition(state, scratch);
|
||||||
|
for (int t = 0; t < numTransitions; t++) {
|
||||||
|
a.getTransition(state, t, scratch);
|
||||||
|
if (path.get(scratch.dest)
|
||||||
|
|| (!visited.get(scratch.dest)
|
||||||
|
&& !isFinite(scratch, a, scratch.dest, path, visited, level + 1))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
path.remove(s);
|
path.clear(state);
|
||||||
|
visited.set(state);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue