mirror of https://github.com/apache/lucene.git
LUCENE-5628: change getFiniteStrings to iterative impl
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1592362 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6e9cbf3986
commit
8c4e826818
|
@ -152,6 +152,12 @@ Bug fixes
|
||||||
* LUCENE-5639: Fix PositionLengthAttribute implementation in Token class.
|
* LUCENE-5639: Fix PositionLengthAttribute implementation in Token class.
|
||||||
(Uwe Schindler, Robert Muir)
|
(Uwe Schindler, Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-5628: Change getFiniteStrings to iterative not recursive
|
||||||
|
implementation, so that building suggesters on a long suggestion
|
||||||
|
doesn't risk overflowing the stack; previously it consumed one Java
|
||||||
|
stack frame per character in the expanded suggestion (Robert Muir,
|
||||||
|
Simon Willnauer, Mike McCandless).
|
||||||
|
|
||||||
Test Framework
|
Test Framework
|
||||||
|
|
||||||
* LUCENE-5622: Fail tests if they print over the given limit of bytes to
|
* LUCENE-5622: Fail tests if they print over the given limit of bytes to
|
||||||
|
|
|
@ -30,12 +30,16 @@
|
||||||
package org.apache.lucene.util.automaton;
|
package org.apache.lucene.util.automaton;
|
||||||
|
|
||||||
import java.util.BitSet;
|
import java.util.BitSet;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.IdentityHashMap;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.IntsRef;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
import org.apache.lucene.util.fst.Util;
|
import org.apache.lucene.util.fst.Util;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -212,57 +216,159 @@ final public class SpecialOperations {
|
||||||
return accept;
|
return accept;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static class PathNode {
|
||||||
|
|
||||||
|
/** Which state the path node ends on, whose
|
||||||
|
* transitions we are enumerating. */
|
||||||
|
public State state;
|
||||||
|
|
||||||
|
/** Which state the current transition leads to. */
|
||||||
|
public State to;
|
||||||
|
|
||||||
|
/** Which transition we are on. */
|
||||||
|
public int transition;
|
||||||
|
|
||||||
|
/** Which label we are on, in the min-max range of the
|
||||||
|
* current Transition */
|
||||||
|
public int label;
|
||||||
|
|
||||||
|
public void resetState(State state) {
|
||||||
|
assert state.numTransitions() != 0;
|
||||||
|
this.state = state;
|
||||||
|
transition = 0;
|
||||||
|
Transition t = state.transitionsArray[transition];
|
||||||
|
label = t.min;
|
||||||
|
to = t.to;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns next label of current transition, or
|
||||||
|
* advances to next transition and returns its first
|
||||||
|
* label, if current one is exhausted. If there are
|
||||||
|
* no more transitions, returns -1. */
|
||||||
|
public int nextLabel() {
|
||||||
|
if (label > state.transitionsArray[transition].max) {
|
||||||
|
// We've exhaused the current transition's labels;
|
||||||
|
// move to next transitions:
|
||||||
|
transition++;
|
||||||
|
if (transition >= state.numTransitions()) {
|
||||||
|
// We're done iterating transitions leaving this state
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
Transition t = state.transitionsArray[transition];
|
||||||
|
label = t.min;
|
||||||
|
to = t.to;
|
||||||
|
}
|
||||||
|
return label++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static PathNode getNode(PathNode[] nodes, int index) {
|
||||||
|
assert index < nodes.length;
|
||||||
|
if (nodes[index] == null) {
|
||||||
|
nodes[index] = new PathNode();
|
||||||
|
}
|
||||||
|
return nodes[index];
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: this is a dangerous method ... Automaton could be
|
// TODO: this is a dangerous method ... Automaton could be
|
||||||
// huge ... and it's better in general for caller to
|
// huge ... and it's better in general for caller to
|
||||||
// enumerate & process in a single walk:
|
// enumerate & process in a single walk:
|
||||||
|
|
||||||
/**
|
/** Returns the set of accepted strings, up to at most
|
||||||
* Returns the set of accepted strings, assuming that at most
|
* <code>limit</code> strings. If more than <code>limit</code>
|
||||||
* <code>limit</code> strings are accepted. If more than <code>limit</code>
|
* strings are accepted, the first limit strings found are returned. If <code>limit</code> == -1, then
|
||||||
* strings are accepted, the first limit strings found are returned. If <code>limit</code><0, then
|
* the limit is infinite. If the {@link Automaton} has
|
||||||
* the limit is infinite.
|
* cycles then this method might throw {@code
|
||||||
*/
|
* IllegalArgumentException} but that is not guaranteed
|
||||||
|
* when the limit is set. */
|
||||||
public static Set<IntsRef> getFiniteStrings(Automaton a, int limit) {
|
public static Set<IntsRef> getFiniteStrings(Automaton a, int limit) {
|
||||||
HashSet<IntsRef> strings = new HashSet<>();
|
Set<IntsRef> results = new HashSet<>();
|
||||||
if (a.isSingleton()) {
|
|
||||||
if (limit > 0) {
|
if (limit == -1 || limit > 0) {
|
||||||
strings.add(Util.toUTF32(a.singleton, new IntsRef()));
|
// OK
|
||||||
}
|
} else {
|
||||||
} else if (!getFiniteStrings(a.initial, new HashSet<State>(), strings, new IntsRef(), limit)) {
|
throw new IllegalArgumentException("limit must be -1 (which means no limit), or > 0; got: " + limit);
|
||||||
return strings;
|
|
||||||
}
|
}
|
||||||
return strings;
|
|
||||||
}
|
if (a.isSingleton()) {
|
||||||
|
// Easy case: automaton accepts only 1 string
|
||||||
/**
|
results.add(Util.toUTF32(a.singleton, new IntsRef()));
|
||||||
* Returns the strings that can be produced from the given state, or
|
} else {
|
||||||
* false if more than <code>limit</code> strings are found.
|
|
||||||
* <code>limit</code><0 means "infinite".
|
if (a.initial.accept) {
|
||||||
*/
|
// Special case the empty string, as usual:
|
||||||
private static boolean getFiniteStrings(State s, HashSet<State> pathstates,
|
results.add(new IntsRef());
|
||||||
HashSet<IntsRef> strings, IntsRef path, int limit) {
|
|
||||||
pathstates.add(s);
|
|
||||||
for (Transition t : s.getTransitions()) {
|
|
||||||
if (pathstates.contains(t.to)) {
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
for (int n = t.min; n <= t.max; n++) {
|
|
||||||
path.grow(path.length+1);
|
if (a.initial.numTransitions() > 0 && (limit == -1 || results.size() < limit)) {
|
||||||
path.ints[path.length] = n;
|
|
||||||
path.length++;
|
// TODO: we could use state numbers here and just
|
||||||
if (t.to.accept) {
|
// alloc array, but asking for states array can be
|
||||||
strings.add(IntsRef.deepCopyOf(path));
|
// costly (it's lazily computed):
|
||||||
if (limit >= 0 && strings.size() > limit) {
|
|
||||||
return false;
|
// Tracks which states are in the current path, for
|
||||||
|
// cycle detection:
|
||||||
|
Set<State> pathStates = Collections.newSetFromMap(new IdentityHashMap<State,Boolean>());
|
||||||
|
|
||||||
|
// Stack to hold our current state in the
|
||||||
|
// recursion/iteration:
|
||||||
|
PathNode[] nodes = new PathNode[4];
|
||||||
|
|
||||||
|
pathStates.add(a.initial);
|
||||||
|
PathNode root = getNode(nodes, 0);
|
||||||
|
root.resetState(a.initial);
|
||||||
|
|
||||||
|
IntsRef string = new IntsRef(1);
|
||||||
|
string.length = 1;
|
||||||
|
|
||||||
|
while (string.length > 0) {
|
||||||
|
|
||||||
|
PathNode node = nodes[string.length-1];
|
||||||
|
|
||||||
|
// Get next label leaving the current node:
|
||||||
|
int label = node.nextLabel();
|
||||||
|
|
||||||
|
if (label != -1) {
|
||||||
|
string.ints[string.length-1] = label;
|
||||||
|
|
||||||
|
if (node.to.accept) {
|
||||||
|
// This transition leads to an accept state,
|
||||||
|
// so we save the current string:
|
||||||
|
results.add(IntsRef.deepCopyOf(string));
|
||||||
|
if (results.size() == limit) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (node.to.numTransitions() != 0) {
|
||||||
|
// Now recurse: the destination of this transition has
|
||||||
|
// outgoing transitions:
|
||||||
|
if (pathStates.contains(node.to)) {
|
||||||
|
throw new IllegalArgumentException("automaton has cycles");
|
||||||
|
}
|
||||||
|
pathStates.add(node.to);
|
||||||
|
|
||||||
|
// Push node onto stack:
|
||||||
|
if (nodes.length == string.length) {
|
||||||
|
PathNode[] newNodes = new PathNode[ArrayUtil.oversize(nodes.length+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
|
||||||
|
System.arraycopy(nodes, 0, newNodes, 0, nodes.length);
|
||||||
|
nodes = newNodes;
|
||||||
|
}
|
||||||
|
getNode(nodes, string.length).resetState(node.to);
|
||||||
|
string.length++;
|
||||||
|
string.grow(string.length);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// No more transitions leaving this state,
|
||||||
|
// pop/return back to previous state:
|
||||||
|
assert pathStates.contains(node.state);
|
||||||
|
pathStates.remove(node.state);
|
||||||
|
string.length--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!getFiniteStrings(t.to, pathstates, strings, path, limit)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
path.length--;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pathstates.remove(s);
|
|
||||||
return true;
|
return results;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -277,9 +277,4 @@ public class State implements Comparable<State> {
|
||||||
public int compareTo(State s) {
|
public int compareTo(State s) {
|
||||||
return s.id - id;
|
return s.id - id;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,12 +1,5 @@
|
||||||
package org.apache.lucene.util.automaton;
|
package org.apache.lucene.util.automaton;
|
||||||
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
|
||||||
import org.apache.lucene.util.fst.Util;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
@ -24,6 +17,18 @@ import org.apache.lucene.util.fst.Util;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.IntsRef;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
import org.apache.lucene.util.fst.Util;
|
||||||
|
|
||||||
public class TestSpecialOperations extends LuceneTestCase {
|
public class TestSpecialOperations extends LuceneTestCase {
|
||||||
/**
|
/**
|
||||||
* tests against the original brics implementation.
|
* tests against the original brics implementation.
|
||||||
|
@ -36,14 +41,24 @@ public class TestSpecialOperations extends LuceneTestCase {
|
||||||
assertEquals(AutomatonTestUtil.isFiniteSlow(a), SpecialOperations.isFinite(b));
|
assertEquals(AutomatonTestUtil.isFiniteSlow(a), SpecialOperations.isFinite(b));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Pass false for testRecursive if the expected strings
|
||||||
|
* may be too long */
|
||||||
|
private Set<IntsRef> getFiniteStrings(Automaton a, int limit, boolean testRecursive) {
|
||||||
|
Set<IntsRef> result = SpecialOperations.getFiniteStrings(a, limit);
|
||||||
|
if (testRecursive) {
|
||||||
|
assertEquals(AutomatonTestUtil.getFiniteStringsRecursive(a, limit), result);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Basic test for getFiniteStrings
|
* Basic test for getFiniteStrings
|
||||||
*/
|
*/
|
||||||
public void testFiniteStrings() {
|
public void testFiniteStringsBasic() {
|
||||||
Automaton a = BasicOperations.union(BasicAutomata.makeString("dog"), BasicAutomata.makeString("duck"));
|
Automaton a = BasicOperations.union(BasicAutomata.makeString("dog"), BasicAutomata.makeString("duck"));
|
||||||
MinimizationOperations.minimize(a);
|
MinimizationOperations.minimize(a);
|
||||||
Set<IntsRef> strings = SpecialOperations.getFiniteStrings(a, -1);
|
Set<IntsRef> strings = getFiniteStrings(a, -1, true);
|
||||||
assertEquals(2, strings.size());
|
assertEquals(2, strings.size());
|
||||||
IntsRef dog = new IntsRef();
|
IntsRef dog = new IntsRef();
|
||||||
Util.toIntsRef(new BytesRef("dog"), dog);
|
Util.toIntsRef(new BytesRef("dog"), dog);
|
||||||
|
@ -52,4 +67,156 @@ public class TestSpecialOperations extends LuceneTestCase {
|
||||||
Util.toIntsRef(new BytesRef("duck"), duck);
|
Util.toIntsRef(new BytesRef("duck"), duck);
|
||||||
assertTrue(strings.contains(duck));
|
assertTrue(strings.contains(duck));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testFiniteStringsEatsStack() {
|
||||||
|
char[] chars = new char[50000];
|
||||||
|
TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length);
|
||||||
|
String bigString1 = new String(chars);
|
||||||
|
TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length);
|
||||||
|
String bigString2 = new String(chars);
|
||||||
|
Automaton a = BasicOperations.union(BasicAutomata.makeString(bigString1), BasicAutomata.makeString(bigString2));
|
||||||
|
Set<IntsRef> strings = getFiniteStrings(a, -1, false);
|
||||||
|
assertEquals(2, strings.size());
|
||||||
|
IntsRef scratch = new IntsRef();
|
||||||
|
Util.toUTF32(bigString1.toCharArray(), 0, bigString1.length(), scratch);
|
||||||
|
assertTrue(strings.contains(scratch));
|
||||||
|
Util.toUTF32(bigString2.toCharArray(), 0, bigString2.length(), scratch);
|
||||||
|
assertTrue(strings.contains(scratch));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRandomFiniteStrings1() {
|
||||||
|
|
||||||
|
int numStrings = atLeast(500);
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: numStrings=" + numStrings);
|
||||||
|
}
|
||||||
|
|
||||||
|
Set<IntsRef> strings = new HashSet<IntsRef>();
|
||||||
|
List<Automaton> automata = new ArrayList<Automaton>();
|
||||||
|
for(int i=0;i<numStrings;i++) {
|
||||||
|
String s = TestUtil.randomSimpleString(random(), 1, 200);
|
||||||
|
automata.add(BasicAutomata.makeString(s));
|
||||||
|
IntsRef scratch = new IntsRef();
|
||||||
|
Util.toUTF32(s.toCharArray(), 0, s.length(), scratch);
|
||||||
|
strings.add(scratch);
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println(" add string=" + s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: we could sometimes use
|
||||||
|
// DaciukMihovAutomatonBuilder here
|
||||||
|
|
||||||
|
// TODO: what other random things can we do here...
|
||||||
|
Automaton a = BasicOperations.union(automata);
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
Automaton.minimize(a);
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: a.minimize numStates=" + a.getNumberOfStates());
|
||||||
|
}
|
||||||
|
} else if (random().nextBoolean()) {
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: a.determinize");
|
||||||
|
}
|
||||||
|
a.determinize();
|
||||||
|
} else if (random().nextBoolean()) {
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: a.reduce");
|
||||||
|
}
|
||||||
|
a.reduce();
|
||||||
|
} else if (random().nextBoolean()) {
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: a.getNumberedStates");
|
||||||
|
}
|
||||||
|
a.getNumberedStates();
|
||||||
|
}
|
||||||
|
|
||||||
|
Set<IntsRef> actual = getFiniteStrings(a, -1, true);
|
||||||
|
if (strings.equals(actual) == false) {
|
||||||
|
System.out.println("strings.size()=" + strings.size() + " actual.size=" + actual.size());
|
||||||
|
List<IntsRef> x = new ArrayList<>(strings);
|
||||||
|
Collections.sort(x);
|
||||||
|
List<IntsRef> y = new ArrayList<>(actual);
|
||||||
|
Collections.sort(y);
|
||||||
|
int end = Math.min(x.size(), y.size());
|
||||||
|
for(int i=0;i<end;i++) {
|
||||||
|
System.out.println(" i=" + i + " string=" + toString(x.get(i)) + " actual=" + toString(y.get(i)));
|
||||||
|
}
|
||||||
|
fail("wrong strings found");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ascii only!
|
||||||
|
private static String toString(IntsRef ints) {
|
||||||
|
BytesRef br = new BytesRef(ints.length);
|
||||||
|
for(int i=0;i<ints.length;i++) {
|
||||||
|
br.bytes[i] = (byte) ints.ints[i];
|
||||||
|
}
|
||||||
|
br.length = ints.length;
|
||||||
|
return br.utf8ToString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testWithCycle() throws Exception {
|
||||||
|
try {
|
||||||
|
SpecialOperations.getFiniteStrings(new RegExp("abc.*", RegExp.NONE).toAutomaton(), -1);
|
||||||
|
fail("did not hit exception");
|
||||||
|
} catch (IllegalArgumentException iae) {
|
||||||
|
// expected
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRandomFiniteStrings2() {
|
||||||
|
// Just makes sure we can run on any random finite
|
||||||
|
// automaton:
|
||||||
|
int iters = atLeast(100);
|
||||||
|
for(int i=0;i<iters;i++) {
|
||||||
|
Automaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||||
|
try {
|
||||||
|
// Must pass a limit because the random automaton
|
||||||
|
// can accept MANY strings:
|
||||||
|
SpecialOperations.getFiniteStrings(a, TestUtil.nextInt(random(), 1, 1000));
|
||||||
|
// NOTE: cannot do this, because the method is not
|
||||||
|
// guaranteed to detect cycles when you have a limit
|
||||||
|
//assertTrue(SpecialOperations.isFinite(a));
|
||||||
|
} catch (IllegalArgumentException iae) {
|
||||||
|
assertFalse(SpecialOperations.isFinite(a));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testInvalidLimit() {
|
||||||
|
Automaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||||
|
try {
|
||||||
|
SpecialOperations.getFiniteStrings(a, -7);
|
||||||
|
fail("did not hit exception");
|
||||||
|
} catch (IllegalArgumentException iae) {
|
||||||
|
// expected
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testInvalidLimit2() {
|
||||||
|
Automaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||||
|
try {
|
||||||
|
SpecialOperations.getFiniteStrings(a, 0);
|
||||||
|
fail("did not hit exception");
|
||||||
|
} catch (IllegalArgumentException iae) {
|
||||||
|
// expected
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSingletonNoLimit() {
|
||||||
|
Set<IntsRef> result = SpecialOperations.getFiniteStrings(BasicAutomata.makeString("foobar"), -1);
|
||||||
|
assertEquals(1, result.size());
|
||||||
|
IntsRef scratch = new IntsRef();
|
||||||
|
Util.toUTF32("foobar".toCharArray(), 0, 6, scratch);
|
||||||
|
assertTrue(result.contains(scratch));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSingletonLimit1() {
|
||||||
|
Set<IntsRef> result = SpecialOperations.getFiniteStrings(BasicAutomata.makeString("foobar"), 1);
|
||||||
|
assertEquals(1, result.size());
|
||||||
|
IntsRef scratch = new IntsRef();
|
||||||
|
Util.toUTF32("foobar".toCharArray(), 0, 6, scratch);
|
||||||
|
assertTrue(result.contains(scratch));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,8 +28,10 @@ import java.util.Random;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.IntsRef;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
import org.apache.lucene.util.UnicodeUtil;
|
import org.apache.lucene.util.UnicodeUtil;
|
||||||
|
import org.apache.lucene.util.fst.Util;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utilities for testing automata.
|
* Utilities for testing automata.
|
||||||
|
@ -387,6 +389,62 @@ public class AutomatonTestUtil {
|
||||||
a.removeDeadTransitions();
|
a.removeDeadTransitions();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple, original implementation of getFiniteStrings.
|
||||||
|
*
|
||||||
|
* <p>Returns the set of accepted strings, assuming that at most
|
||||||
|
* <code>limit</code> strings are accepted. If more than <code>limit</code>
|
||||||
|
* strings are accepted, the first limit strings found are returned. If <code>limit</code><0, then
|
||||||
|
* the limit is infinite.
|
||||||
|
*
|
||||||
|
* <p>This implementation is recursive: it uses one stack
|
||||||
|
* frame for each digit in the returned strings (ie, max
|
||||||
|
* is the max length returned string).
|
||||||
|
*/
|
||||||
|
public static Set<IntsRef> getFiniteStringsRecursive(Automaton a, int limit) {
|
||||||
|
HashSet<IntsRef> strings = new HashSet<>();
|
||||||
|
if (a.isSingleton()) {
|
||||||
|
if (limit > 0) {
|
||||||
|
strings.add(Util.toUTF32(a.singleton, new IntsRef()));
|
||||||
|
}
|
||||||
|
} else if (!getFiniteStrings(a.initial, new HashSet<State>(), strings, new IntsRef(), limit)) {
|
||||||
|
return strings;
|
||||||
|
}
|
||||||
|
return strings;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the strings that can be produced from the given state, or
|
||||||
|
* false if more than <code>limit</code> strings are found.
|
||||||
|
* <code>limit</code><0 means "infinite".
|
||||||
|
*/
|
||||||
|
private static boolean getFiniteStrings(State s, HashSet<State> pathstates,
|
||||||
|
HashSet<IntsRef> strings, IntsRef path, int limit) {
|
||||||
|
pathstates.add(s);
|
||||||
|
for (Transition t : s.getTransitions()) {
|
||||||
|
if (pathstates.contains(t.to)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
for (int n = t.min; n <= t.max; n++) {
|
||||||
|
path.grow(path.length+1);
|
||||||
|
path.ints[path.length] = n;
|
||||||
|
path.length++;
|
||||||
|
if (t.to.accept) {
|
||||||
|
strings.add(IntsRef.deepCopyOf(path));
|
||||||
|
if (limit >= 0 && strings.size() > limit) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!getFiniteStrings(t.to, pathstates, strings, path, limit)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
path.length--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pathstates.remove(s);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if the language of this automaton is finite.
|
* Returns true if the language of this automaton is finite.
|
||||||
* <p>
|
* <p>
|
||||||
|
|
Loading…
Reference in New Issue