mirror of https://github.com/apache/lucene.git
LUCENE-7914: Add a maximum recursion level in automaton recursive functions (Operations.isFinite and Operations.topsortState) to prevent large automaton to overflow the stack.
This commit is contained in:
parent
d620326b88
commit
7dde798473
|
@ -14,7 +14,6 @@ Changes in Runtime Behavior
|
|||
|
||||
|
||||
======================= Lucene 7.1.0 =======================
|
||||
(No Changes)
|
||||
|
||||
Optimizations
|
||||
|
||||
|
@ -22,6 +21,12 @@ Optimizations
|
|||
SortedSetDocValuesFacetCounts and others) builds its map (Robert
|
||||
Muir, Adrien Grand, Mike McCandless)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-7914: Add a maximum recursion level in automaton recursive
|
||||
functions (Operations.isFinite and Operations.topsortState) to prevent
|
||||
large automaton to overflow the stack (Robert Muir, Adrien Grand, Jim Ferenczi)
|
||||
|
||||
======================= Lucene 7.0.0 =======================
|
||||
|
||||
New Features
|
||||
|
|
|
@ -58,6 +58,11 @@ final public class Operations {
|
|||
*/
|
||||
public static final int DEFAULT_MAX_DETERMINIZED_STATES = 10000;
|
||||
|
||||
/**
|
||||
* Maximum level of recursion allowed in recursive operations.
|
||||
*/
|
||||
public static final int MAX_RECURSION_LEVEL = 1000;
|
||||
|
||||
private Operations() {}
|
||||
|
||||
/**
|
||||
|
@ -1018,7 +1023,7 @@ final public class Operations {
|
|||
if (a.getNumStates() == 0) {
|
||||
return true;
|
||||
}
|
||||
return isFinite(new Transition(), a, 0, new BitSet(a.getNumStates()), new BitSet(a.getNumStates()));
|
||||
return isFinite(new Transition(), a, 0, new BitSet(a.getNumStates()), new BitSet(a.getNumStates()), 0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1026,13 +1031,16 @@ final public class Operations {
|
|||
* there are never transitions to dead states.)
|
||||
*/
|
||||
// TODO: not great that this is recursive... in theory a
|
||||
// large automata could exceed java's stack
|
||||
private static boolean isFinite(Transition scratch, Automaton a, int state, BitSet path, BitSet visited) {
|
||||
// large automata could exceed java's stack so the maximum level of recursion is bounded to 1000
|
||||
private static boolean isFinite(Transition scratch, Automaton a, int state, BitSet path, BitSet visited, int level) {
|
||||
if (level > MAX_RECURSION_LEVEL) {
|
||||
throw new IllegalArgumentException("input automaton is too large: " + level);
|
||||
}
|
||||
path.set(state);
|
||||
int numTransitions = a.initTransition(state, scratch);
|
||||
for(int t=0;t<numTransitions;t++) {
|
||||
a.getTransition(state, t, scratch);
|
||||
if (path.get(scratch.dest) || (!visited.get(scratch.dest) && !isFinite(scratch, a, scratch.dest, path, visited))) {
|
||||
if (path.get(scratch.dest) || (!visited.get(scratch.dest) && !isFinite(scratch, a, scratch.dest, path, visited, level+1))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -1264,7 +1272,7 @@ final public class Operations {
|
|||
int numStates = a.getNumStates();
|
||||
int[] states = new int[numStates];
|
||||
final BitSet visited = new BitSet(numStates);
|
||||
int upto = topoSortStatesRecurse(a, visited, states, 0, 0);
|
||||
int upto = topoSortStatesRecurse(a, visited, states, 0, 0, 0);
|
||||
|
||||
if (upto < states.length) {
|
||||
// There were dead states
|
||||
|
@ -1283,14 +1291,19 @@ final public class Operations {
|
|||
return states;
|
||||
}
|
||||
|
||||
private static int topoSortStatesRecurse(Automaton a, BitSet visited, int[] states, int upto, int state) {
|
||||
// TODO: not great that this is recursive... in theory a
|
||||
// large automata could exceed java's stack so the maximum level of recursion is bounded to 1000
|
||||
private static int topoSortStatesRecurse(Automaton a, BitSet visited, int[] states, int upto, int state, int level) {
|
||||
if (level > MAX_RECURSION_LEVEL) {
|
||||
throw new IllegalArgumentException("input automaton is too large: " + level);
|
||||
}
|
||||
Transition t = new Transition();
|
||||
int count = a.initTransition(state, t);
|
||||
for (int i=0;i<count;i++) {
|
||||
a.getNextTransition(t);
|
||||
if (!visited.get(t.dest)) {
|
||||
visited.set(t.dest);
|
||||
upto = topoSortStatesRecurse(a, visited, states, upto, t.dest);
|
||||
upto = topoSortStatesRecurse(a, visited, states, upto, t.dest, level+1);
|
||||
}
|
||||
}
|
||||
states[upto] = state;
|
||||
|
|
|
@ -542,19 +542,21 @@ public class RegExp {
|
|||
a = MinimizationOperations.minimize(a, maxDeterminizedStates);
|
||||
break;
|
||||
case REGEXP_REPEAT_MIN:
|
||||
a = Operations.repeat(
|
||||
exp1.toAutomatonInternal(automata, automaton_provider,
|
||||
maxDeterminizedStates),
|
||||
min);
|
||||
a = exp1.toAutomatonInternal(automata, automaton_provider, maxDeterminizedStates);
|
||||
int minNumStates = (a.getNumStates() - 1) * min;
|
||||
if (minNumStates > maxDeterminizedStates) {
|
||||
throw new TooComplexToDeterminizeException(a, minNumStates);
|
||||
}
|
||||
a = Operations.repeat(a, min);
|
||||
a = MinimizationOperations.minimize(a, maxDeterminizedStates);
|
||||
break;
|
||||
case REGEXP_REPEAT_MINMAX:
|
||||
a = Operations.repeat(
|
||||
exp1.toAutomatonInternal(automata, automaton_provider,
|
||||
maxDeterminizedStates),
|
||||
min,
|
||||
max);
|
||||
a = MinimizationOperations.minimize(a, maxDeterminizedStates);
|
||||
a = exp1.toAutomatonInternal(automata, automaton_provider, maxDeterminizedStates);
|
||||
int minMaxNumStates = (a.getNumStates() - 1) * max;
|
||||
if (minMaxNumStates > maxDeterminizedStates) {
|
||||
throw new TooComplexToDeterminizeException(a, minMaxNumStates);
|
||||
}
|
||||
a = Operations.repeat(a, min, max);
|
||||
break;
|
||||
case REGEXP_COMPLEMENT:
|
||||
a = Operations.complement(
|
||||
|
|
|
@ -52,8 +52,7 @@ public class TestOperations extends LuceneTestCase {
|
|||
for (BytesRef bref : strings) {
|
||||
eachIndividual[i++] = Automata.makeString(bref.utf8ToString());
|
||||
}
|
||||
return Operations.determinize(Operations.union(Arrays.asList(eachIndividual)),
|
||||
DEFAULT_MAX_DETERMINIZED_STATES);
|
||||
return Operations.determinize(Operations.union(Arrays.asList(eachIndividual)), DEFAULT_MAX_DETERMINIZED_STATES);
|
||||
}
|
||||
|
||||
/** Test concatenation with empty language returns empty */
|
||||
|
@ -61,6 +60,7 @@ public class TestOperations extends LuceneTestCase {
|
|||
Automaton a = Automata.makeString("a");
|
||||
Automaton concat = Operations.concatenate(a, Automata.makeEmpty());
|
||||
assertTrue(Operations.isEmpty(concat));
|
||||
|
||||
}
|
||||
|
||||
/** Test optimization to concatenate() with empty String to an NFA */
|
||||
|
@ -124,6 +124,28 @@ public class TestOperations extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testIsFiniteEatsStack() {
|
||||
char[] chars = new char[50000];
|
||||
TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length);
|
||||
String bigString1 = new String(chars);
|
||||
TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length);
|
||||
String bigString2 = new String(chars);
|
||||
Automaton a = Operations.union(Automata.makeString(bigString1), Automata.makeString(bigString2));
|
||||
IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> Operations.isFinite(a));
|
||||
assertTrue(exc.getMessage().contains("input automaton is too large"));
|
||||
}
|
||||
|
||||
public void testTopoSortEatsStack() {
|
||||
char[] chars = new char[50000];
|
||||
TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length);
|
||||
String bigString1 = new String(chars);
|
||||
TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length);
|
||||
String bigString2 = new String(chars);
|
||||
Automaton a = Operations.union(Automata.makeString(bigString1), Automata.makeString(bigString2));
|
||||
IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> Operations.topoSortStates(a));
|
||||
assertTrue(exc.getMessage().contains("input automaton is too large"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the set of all accepted strings.
|
||||
*
|
||||
|
|
|
@ -19,13 +19,6 @@ package org.apache.lucene.util.automaton;
|
|||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.ObjectInput;
|
||||
import java.io.ObjectInputStream;
|
||||
import java.io.ObjectOutput;
|
||||
import java.io.ObjectOutputStream;
|
||||
|
||||
public class TestRegExp extends LuceneTestCase {
|
||||
|
||||
/**
|
||||
|
@ -54,6 +47,14 @@ public class TestRegExp extends LuceneTestCase {
|
|||
assertTrue(expected.getMessage().contains(source));
|
||||
}
|
||||
|
||||
public void testSerializeTooManyStatesToRepeat() throws Exception {
|
||||
String source = "a{50001}";
|
||||
TooComplexToDeterminizeException expected = expectThrows(TooComplexToDeterminizeException.class, () -> {
|
||||
new RegExp(source).toAutomaton(50000);
|
||||
});
|
||||
assertTrue(expected.getMessage().contains(source));
|
||||
}
|
||||
|
||||
// LUCENE-6713
|
||||
public void testSerializeTooManyStatesToDeterminizeExc() throws Exception {
|
||||
// LUCENE-6046
|
||||
|
@ -62,16 +63,6 @@ public class TestRegExp extends LuceneTestCase {
|
|||
new RegExp(source).toAutomaton();
|
||||
});
|
||||
assertTrue(expected.getMessage().contains(source));
|
||||
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
||||
ObjectOutput out = new ObjectOutputStream(bos);
|
||||
out.writeObject(expected);
|
||||
byte[] bytes = bos.toByteArray();
|
||||
|
||||
ByteArrayInputStream bis = new ByteArrayInputStream(bytes);
|
||||
ObjectInput in = new ObjectInputStream(bis);
|
||||
TooComplexToDeterminizeException e2 = (TooComplexToDeterminizeException) in.readObject();
|
||||
assertNotNull(e2.getMessage());
|
||||
}
|
||||
|
||||
// LUCENE-6046
|
||||
|
|
|
@ -1252,10 +1252,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
suggester.build(new InputArrayIterator(new Input[] {
|
||||
new Input(bigString, 7)}));
|
||||
fail("did not hit expected exception");
|
||||
} catch (StackOverflowError soe) {
|
||||
// OK
|
||||
} catch (IllegalArgumentException iae) {
|
||||
// expected
|
||||
assertTrue(iae.getMessage().contains("input automaton is too large"));
|
||||
}
|
||||
IOUtils.close(a, tempDir);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue