LUCENE-2716: increase test coverage for minimizeSchindler() and determinizeMcCandless()

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1026182 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2010-10-22 00:01:16 +00:00
parent d8208f5646
commit b627037726
4 changed files with 156 additions and 26 deletions

View File

@ -178,7 +178,7 @@ final public class SpecialOperations {
* Reverses the language of the given (non-singleton) automaton while returning
* the set of new initial states.
*/
private static Set<State> reverse(Automaton a) {
static Set<State> reverse(Automaton a) {
a.expandSingleton();
// reverse all edges
HashMap<State, HashSet<Transition>> m = new HashMap<State, HashSet<Transition>>();

View File

@ -250,4 +250,128 @@ public class AutomatonTestUtil {
return ArrayUtil.toIntArray(soFar);
}
}
/** return a random NFA/DFA for testing */
public static Automaton randomAutomaton(Random random) {
// get two random Automata from regexps
Automaton a1 = AutomatonTestUtil.randomRegexp(random).toAutomaton();
if (random.nextBoolean())
a1 = BasicOperations.complement(a1);
Automaton a2 = AutomatonTestUtil.randomRegexp(random).toAutomaton();
if (random.nextBoolean())
a2 = BasicOperations.complement(a2);
// combine them in random ways
switch(random.nextInt(4)) {
case 0: return BasicOperations.concatenate(a1, a2);
case 1: return BasicOperations.union(a1, a2);
case 2: return BasicOperations.intersection(a1, a2);
default: return BasicOperations.minus(a1, a2);
}
}
/**
* below are original, unoptimized implementations of DFA operations for testing.
* These are from brics automaton, full license (BSD) below:
*/
/*
* dk.brics.automaton
*
* Copyright (c) 2001-2009 Anders Moeller
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* Simple, original brics implementation of Brzozowski minimize()
*/
public static void minimizeSimple(Automaton a) {
if (a.isSingleton())
return;
determinizeSimple(a, SpecialOperations.reverse(a));
determinizeSimple(a, SpecialOperations.reverse(a));
}
/**
* Simple, original brics implementation of determinize()
*/
public static void determinizeSimple(Automaton a) {
if (a.deterministic || a.isSingleton())
return;
Set<State> initialset = new HashSet<State>();
initialset.add(a.initial);
determinizeSimple(a, initialset);
}
/**
* Simple, original brics implementation of determinize()
* Determinizes the given automaton using the given set of initial states.
*/
public static void determinizeSimple(Automaton a, Set<State> initialset) {
int[] points = a.getStartPoints();
// subset construction
Map<Set<State>, Set<State>> sets = new HashMap<Set<State>, Set<State>>();
LinkedList<Set<State>> worklist = new LinkedList<Set<State>>();
Map<Set<State>, State> newstate = new HashMap<Set<State>, State>();
sets.put(initialset, initialset);
worklist.add(initialset);
a.initial = new State();
newstate.put(initialset, a.initial);
while (worklist.size() > 0) {
Set<State> s = worklist.removeFirst();
State r = newstate.get(s);
for (State q : s)
if (q.accept) {
r.accept = true;
break;
}
for (int n = 0; n < points.length; n++) {
Set<State> p = new HashSet<State>();
for (State q : s)
for (Transition t : q.getTransitions())
if (t.min <= points[n] && points[n] <= t.max)
p.add(t.to);
if (!sets.containsKey(p)) {
sets.put(p, p);
worklist.add(p);
newstate.put(p, new State());
}
State q = newstate.get(p);
int min = points[n];
int max;
if (n + 1 < points.length)
max = points[n + 1] - 1;
else
max = Character.MAX_CODE_POINT;
r.addTransition(new Transition(min, max, q));
}
}
a.deterministic = true;
a.clearNumberedStates();
a.removeDeadTransitions();
}
}

View File

@ -20,16 +20,11 @@ package org.apache.lucene.util.automaton;
import org.apache.lucene.util.LuceneTestCase;
/**
* Not thorough, but tries to test determinism correctness
* Not completely thorough, but tries to test determinism correctness
* somewhat randomly.
*/
public class TestDeterminism extends LuceneTestCase {
@Override
public void setUp() throws Exception {
super.setUp();
}
/** test a bunch of random regular expressions */
public void testRegexps() throws Exception {
int num = 500 * RANDOM_MULTIPLIER;
@ -37,6 +32,20 @@ public class TestDeterminism extends LuceneTestCase {
assertAutomaton(AutomatonTestUtil.randomRegexp(random).toAutomaton());
}
/** test against a simple, unoptimized det */
public void testAgainstSimple() throws Exception {
int num = 2000 * RANDOM_MULTIPLIER;
for (int i = 0; i < num; i++) {
Automaton a = AutomatonTestUtil.randomAutomaton(random);
Automaton b = a.clone();
AutomatonTestUtil.determinizeSimple(a);
b.deterministic = false; // force det
b.determinize();
// TODO: more verifications possible?
assertTrue(BasicOperations.sameLanguage(a, b));
}
}
private static void assertAutomaton(Automaton a) {
Automaton clone = a.clone();
// complement(complement(a)) = a

View File

@ -21,35 +21,32 @@ import org.apache.lucene.util.LuceneTestCase;
/**
* This test builds some randomish NFA/DFA and minimizes them.
* the minimal and non-minimal are compared to ensure they are the same.
*/
public class TestMinimize extends LuceneTestCase {
/** the minimal and non-minimal are compared to ensure they are the same. */
public void test() {
int num = 10000 * RANDOM_MULTIPLIER;
int num = 2000 * RANDOM_MULTIPLIER;
for (int i = 0; i < num; i++) {
Automaton a = randomishAutomaton();
Automaton a = AutomatonTestUtil.randomAutomaton(random);
Automaton b = a.clone();
MinimizationOperations.minimize(b);
assertTrue(BasicOperations.sameLanguage(a, b));
}
}
private Automaton randomishAutomaton() {
// get two random Automata from regexps
Automaton a1 = AutomatonTestUtil.randomRegexp(random).toAutomaton();
if (random.nextBoolean())
a1 = BasicOperations.complement(a1);
Automaton a2 = AutomatonTestUtil.randomRegexp(random).toAutomaton();
if (random.nextBoolean())
a2 = BasicOperations.complement(a2);
// combine them in random ways
switch(random.nextInt(4)) {
case 0: return BasicOperations.concatenate(a1, a2);
case 1: return BasicOperations.union(a1, a2);
case 2: return BasicOperations.intersection(a1, a2);
default: return BasicOperations.minus(a1, a2);
/** compare minimized against minimized with a slower, simple impl.
* we check not only that they are the same, but that #states/#transitions
* are the same. */
public void testAgainstBrzozowski() {
int num = 2000 * RANDOM_MULTIPLIER;
for (int i = 0; i < num; i++) {
Automaton a = AutomatonTestUtil.randomAutomaton(random);
AutomatonTestUtil.minimizeSimple(a);
Automaton b = a.clone();
MinimizationOperations.minimize(b);
assertTrue(BasicOperations.sameLanguage(a, b));
assertEquals(a.getNumberOfStates(), b.getNumberOfStates());
assertEquals(a.getNumberOfTransitions(), b.getNumberOfTransitions());
}
}
}