mirror of https://github.com/apache/lucene.git
move random accepted string test utility code to src/test
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@956637 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c6bd141f11
commit
eb444204d6
|
@ -38,12 +38,10 @@ import java.util.Arrays;
|
|||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.IdentityHashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.Random;
|
||||
|
||||
/**
|
||||
* Basic automata operations.
|
||||
|
@ -848,143 +846,4 @@ final public class BasicOperations {
|
|||
return accept;
|
||||
}
|
||||
}
|
||||
|
||||
// picks a random int code point that this transition
|
||||
// accepts, avoiding the surrogates range since they are
|
||||
// "defined" in UTF32. Don't call this on a transition
|
||||
// that only accepts UTF16 surrogate values!!
|
||||
private static int getRandomCodePoint(final Random r, final Transition t) {
|
||||
return t.min+r.nextInt(t.max-t.min+1);
|
||||
}
|
||||
|
||||
public static class RandomAcceptedStrings {
|
||||
|
||||
private final Map<Transition,Boolean> leadsToAccept;
|
||||
private final Automaton a;
|
||||
|
||||
private static class ArrivingTransition {
|
||||
final State from;
|
||||
final Transition t;
|
||||
public ArrivingTransition(State from, Transition t) {
|
||||
this.from = from;
|
||||
this.t = t;
|
||||
}
|
||||
}
|
||||
|
||||
public RandomAcceptedStrings(Automaton a) {
|
||||
this.a = a;
|
||||
if (a.isSingleton()) {
|
||||
leadsToAccept = null;
|
||||
return;
|
||||
}
|
||||
|
||||
// must use IdentityHashmap because two Transitions w/
|
||||
// different start nodes can be considered the same
|
||||
leadsToAccept = new IdentityHashMap<Transition,Boolean>();
|
||||
final Map<State,List<ArrivingTransition>> allArriving = new HashMap<State,List<ArrivingTransition>>();
|
||||
|
||||
final LinkedList<State> q = new LinkedList<State>();
|
||||
final Set<State> seen = new HashSet<State>();
|
||||
|
||||
// reverse map the transitions, so we can quickly look
|
||||
// up all arriving transitions to a given state
|
||||
for(State s: a.getNumberedStates()) {
|
||||
for(int i=0;i<s.numTransitions;i++) {
|
||||
final Transition t = s.transitionsArray[i];
|
||||
List<ArrivingTransition> tl = allArriving.get(t.to);
|
||||
if (tl == null) {
|
||||
tl = new ArrayList<ArrivingTransition>();
|
||||
allArriving.put(t.to, tl);
|
||||
}
|
||||
tl.add(new ArrivingTransition(s, t));
|
||||
}
|
||||
if (s.accept) {
|
||||
q.add(s);
|
||||
seen.add(s);
|
||||
}
|
||||
}
|
||||
|
||||
// Breadth-first search, from accept states,
|
||||
// backwards:
|
||||
while(!q.isEmpty()) {
|
||||
final State s = q.removeFirst();
|
||||
List<ArrivingTransition> arriving = allArriving.get(s);
|
||||
if (arriving != null) {
|
||||
for(ArrivingTransition at : arriving) {
|
||||
final State from = at.from;
|
||||
if (!seen.contains(from)) {
|
||||
q.add(from);
|
||||
seen.add(from);
|
||||
leadsToAccept.put(at.t, Boolean.TRUE);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public int[] getRandomAcceptedString(Random r) {
|
||||
|
||||
final List<Integer> soFar = new ArrayList<Integer>();
|
||||
if (a.isSingleton()) {
|
||||
// accepts only one
|
||||
final String s = a.singleton;
|
||||
|
||||
int charUpto = 0;
|
||||
while(charUpto < s.length()) {
|
||||
final int cp = s.codePointAt(charUpto);
|
||||
charUpto += Character.charCount(cp);
|
||||
soFar.add(cp);
|
||||
}
|
||||
} else {
|
||||
|
||||
State s = a.initial;
|
||||
|
||||
while(true) {
|
||||
|
||||
if (s.accept) {
|
||||
if (s.numTransitions == 0) {
|
||||
// stop now
|
||||
break;
|
||||
} else {
|
||||
if (r.nextBoolean()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (s.numTransitions == 0) {
|
||||
throw new RuntimeException("this automaton has dead states");
|
||||
}
|
||||
|
||||
boolean cheat = r.nextBoolean();
|
||||
|
||||
final Transition t;
|
||||
if (cheat) {
|
||||
// pick a transition that we know is the fastest
|
||||
// path to an accept state
|
||||
List<Transition> toAccept = new ArrayList<Transition>();
|
||||
for(int i=0;i<s.numTransitions;i++) {
|
||||
final Transition t0 = s.transitionsArray[i];
|
||||
if (leadsToAccept.containsKey(t0)) {
|
||||
toAccept.add(t0);
|
||||
}
|
||||
}
|
||||
if (toAccept.size() == 0) {
|
||||
// this is OK -- it means we jumped into a cycle
|
||||
t = s.transitionsArray[r.nextInt(s.numTransitions)];
|
||||
} else {
|
||||
t = toAccept.get(r.nextInt(toAccept.size()));
|
||||
}
|
||||
} else {
|
||||
t = s.transitionsArray[r.nextInt(s.numTransitions)];
|
||||
}
|
||||
|
||||
soFar.add(getRandomCodePoint(r, t));
|
||||
s = t.to;
|
||||
}
|
||||
}
|
||||
|
||||
return ArrayUtil.toIntArray(soFar);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,8 +17,17 @@ package org.apache.lucene.util.automaton;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.IdentityHashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
|
@ -65,4 +74,143 @@ public class AutomatonTestUtil {
|
|||
}
|
||||
return new String(buffer, 0, end);
|
||||
}
|
||||
|
||||
// picks a random int code point that this transition
|
||||
// accepts, avoiding the surrogates range since they are
|
||||
// "defined" in UTF32. Don't call this on a transition
|
||||
// that only accepts UTF16 surrogate values!!
|
||||
private static int getRandomCodePoint(final Random r, final Transition t) {
|
||||
return t.min+r.nextInt(t.max-t.min+1);
|
||||
}
|
||||
|
||||
public static class RandomAcceptedStrings {
|
||||
|
||||
private final Map<Transition,Boolean> leadsToAccept;
|
||||
private final Automaton a;
|
||||
|
||||
private static class ArrivingTransition {
|
||||
final State from;
|
||||
final Transition t;
|
||||
public ArrivingTransition(State from, Transition t) {
|
||||
this.from = from;
|
||||
this.t = t;
|
||||
}
|
||||
}
|
||||
|
||||
public RandomAcceptedStrings(Automaton a) {
|
||||
this.a = a;
|
||||
if (a.isSingleton()) {
|
||||
leadsToAccept = null;
|
||||
return;
|
||||
}
|
||||
|
||||
// must use IdentityHashmap because two Transitions w/
|
||||
// different start nodes can be considered the same
|
||||
leadsToAccept = new IdentityHashMap<Transition,Boolean>();
|
||||
final Map<State,List<ArrivingTransition>> allArriving = new HashMap<State,List<ArrivingTransition>>();
|
||||
|
||||
final LinkedList<State> q = new LinkedList<State>();
|
||||
final Set<State> seen = new HashSet<State>();
|
||||
|
||||
// reverse map the transitions, so we can quickly look
|
||||
// up all arriving transitions to a given state
|
||||
for(State s: a.getNumberedStates()) {
|
||||
for(int i=0;i<s.numTransitions;i++) {
|
||||
final Transition t = s.transitionsArray[i];
|
||||
List<ArrivingTransition> tl = allArriving.get(t.to);
|
||||
if (tl == null) {
|
||||
tl = new ArrayList<ArrivingTransition>();
|
||||
allArriving.put(t.to, tl);
|
||||
}
|
||||
tl.add(new ArrivingTransition(s, t));
|
||||
}
|
||||
if (s.accept) {
|
||||
q.add(s);
|
||||
seen.add(s);
|
||||
}
|
||||
}
|
||||
|
||||
// Breadth-first search, from accept states,
|
||||
// backwards:
|
||||
while(!q.isEmpty()) {
|
||||
final State s = q.removeFirst();
|
||||
List<ArrivingTransition> arriving = allArriving.get(s);
|
||||
if (arriving != null) {
|
||||
for(ArrivingTransition at : arriving) {
|
||||
final State from = at.from;
|
||||
if (!seen.contains(from)) {
|
||||
q.add(from);
|
||||
seen.add(from);
|
||||
leadsToAccept.put(at.t, Boolean.TRUE);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public int[] getRandomAcceptedString(Random r) {
|
||||
|
||||
final List<Integer> soFar = new ArrayList<Integer>();
|
||||
if (a.isSingleton()) {
|
||||
// accepts only one
|
||||
final String s = a.singleton;
|
||||
|
||||
int charUpto = 0;
|
||||
while(charUpto < s.length()) {
|
||||
final int cp = s.codePointAt(charUpto);
|
||||
charUpto += Character.charCount(cp);
|
||||
soFar.add(cp);
|
||||
}
|
||||
} else {
|
||||
|
||||
State s = a.initial;
|
||||
|
||||
while(true) {
|
||||
|
||||
if (s.accept) {
|
||||
if (s.numTransitions == 0) {
|
||||
// stop now
|
||||
break;
|
||||
} else {
|
||||
if (r.nextBoolean()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (s.numTransitions == 0) {
|
||||
throw new RuntimeException("this automaton has dead states");
|
||||
}
|
||||
|
||||
boolean cheat = r.nextBoolean();
|
||||
|
||||
final Transition t;
|
||||
if (cheat) {
|
||||
// pick a transition that we know is the fastest
|
||||
// path to an accept state
|
||||
List<Transition> toAccept = new ArrayList<Transition>();
|
||||
for(int i=0;i<s.numTransitions;i++) {
|
||||
final Transition t0 = s.transitionsArray[i];
|
||||
if (leadsToAccept.containsKey(t0)) {
|
||||
toAccept.add(t0);
|
||||
}
|
||||
}
|
||||
if (toAccept.size() == 0) {
|
||||
// this is OK -- it means we jumped into a cycle
|
||||
t = s.transitionsArray[r.nextInt(s.numTransitions)];
|
||||
} else {
|
||||
t = toAccept.get(r.nextInt(toAccept.size()));
|
||||
}
|
||||
} else {
|
||||
t = s.transitionsArray[r.nextInt(s.numTransitions)];
|
||||
}
|
||||
|
||||
soFar.add(getRandomCodePoint(r, t));
|
||||
s = t.to;
|
||||
}
|
||||
}
|
||||
|
||||
return ArrayUtil.toIntArray(soFar);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -95,7 +95,7 @@ public class TestBasicOperations extends LuceneTestCase {
|
|||
final Automaton a = re.toAutomaton();
|
||||
assertFalse(BasicOperations.isEmpty(a));
|
||||
|
||||
final BasicOperations.RandomAcceptedStrings rx = new BasicOperations.RandomAcceptedStrings(a);
|
||||
final AutomatonTestUtil.RandomAcceptedStrings rx = new AutomatonTestUtil.RandomAcceptedStrings(a);
|
||||
for(int j=0;j<ITER2;j++) {
|
||||
int[] acc = null;
|
||||
try {
|
||||
|
|
|
@ -173,7 +173,7 @@ public class TestUTF32ToUTF8 extends LuceneTestCase {
|
|||
private void assertAutomaton(Automaton automaton) throws Exception {
|
||||
CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
|
||||
ByteRunAutomaton bra = new ByteRunAutomaton(automaton);
|
||||
final BasicOperations.RandomAcceptedStrings ras = new BasicOperations.RandomAcceptedStrings(automaton);
|
||||
final AutomatonTestUtil.RandomAcceptedStrings ras = new AutomatonTestUtil.RandomAcceptedStrings(automaton);
|
||||
|
||||
for (int i = 0; i < 1000*_TestUtil.getRandomMultiplier(); i++) {
|
||||
final String string;
|
||||
|
|
Loading…
Reference in New Issue