mirror of https://github.com/apache/lucene.git
LUCENE-2716: increase test coverage for minimizeSchindler() and determinizeMcCandless()
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1026182 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d8208f5646
commit
b627037726
|
@ -178,7 +178,7 @@ final public class SpecialOperations {
|
|||
* Reverses the language of the given (non-singleton) automaton while returning
|
||||
* the set of new initial states.
|
||||
*/
|
||||
private static Set<State> reverse(Automaton a) {
|
||||
static Set<State> reverse(Automaton a) {
|
||||
a.expandSingleton();
|
||||
// reverse all edges
|
||||
HashMap<State, HashSet<Transition>> m = new HashMap<State, HashSet<Transition>>();
|
||||
|
|
|
@ -250,4 +250,128 @@ public class AutomatonTestUtil {
|
|||
return ArrayUtil.toIntArray(soFar);
|
||||
}
|
||||
}
|
||||
|
||||
/** return a random NFA/DFA for testing */
|
||||
public static Automaton randomAutomaton(Random random) {
|
||||
// get two random Automata from regexps
|
||||
Automaton a1 = AutomatonTestUtil.randomRegexp(random).toAutomaton();
|
||||
if (random.nextBoolean())
|
||||
a1 = BasicOperations.complement(a1);
|
||||
|
||||
Automaton a2 = AutomatonTestUtil.randomRegexp(random).toAutomaton();
|
||||
if (random.nextBoolean())
|
||||
a2 = BasicOperations.complement(a2);
|
||||
|
||||
// combine them in random ways
|
||||
switch(random.nextInt(4)) {
|
||||
case 0: return BasicOperations.concatenate(a1, a2);
|
||||
case 1: return BasicOperations.union(a1, a2);
|
||||
case 2: return BasicOperations.intersection(a1, a2);
|
||||
default: return BasicOperations.minus(a1, a2);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* below are original, unoptimized implementations of DFA operations for testing.
|
||||
* These are from brics automaton, full license (BSD) below:
|
||||
*/
|
||||
|
||||
/*
|
||||
* dk.brics.automaton
|
||||
*
|
||||
* Copyright (c) 2001-2009 Anders Moeller
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the author may not be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Simple, original brics implementation of Brzozowski minimize()
|
||||
*/
|
||||
public static void minimizeSimple(Automaton a) {
|
||||
if (a.isSingleton())
|
||||
return;
|
||||
determinizeSimple(a, SpecialOperations.reverse(a));
|
||||
determinizeSimple(a, SpecialOperations.reverse(a));
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple, original brics implementation of determinize()
|
||||
*/
|
||||
public static void determinizeSimple(Automaton a) {
|
||||
if (a.deterministic || a.isSingleton())
|
||||
return;
|
||||
Set<State> initialset = new HashSet<State>();
|
||||
initialset.add(a.initial);
|
||||
determinizeSimple(a, initialset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple, original brics implementation of determinize()
|
||||
* Determinizes the given automaton using the given set of initial states.
|
||||
*/
|
||||
public static void determinizeSimple(Automaton a, Set<State> initialset) {
|
||||
int[] points = a.getStartPoints();
|
||||
// subset construction
|
||||
Map<Set<State>, Set<State>> sets = new HashMap<Set<State>, Set<State>>();
|
||||
LinkedList<Set<State>> worklist = new LinkedList<Set<State>>();
|
||||
Map<Set<State>, State> newstate = new HashMap<Set<State>, State>();
|
||||
sets.put(initialset, initialset);
|
||||
worklist.add(initialset);
|
||||
a.initial = new State();
|
||||
newstate.put(initialset, a.initial);
|
||||
while (worklist.size() > 0) {
|
||||
Set<State> s = worklist.removeFirst();
|
||||
State r = newstate.get(s);
|
||||
for (State q : s)
|
||||
if (q.accept) {
|
||||
r.accept = true;
|
||||
break;
|
||||
}
|
||||
for (int n = 0; n < points.length; n++) {
|
||||
Set<State> p = new HashSet<State>();
|
||||
for (State q : s)
|
||||
for (Transition t : q.getTransitions())
|
||||
if (t.min <= points[n] && points[n] <= t.max)
|
||||
p.add(t.to);
|
||||
if (!sets.containsKey(p)) {
|
||||
sets.put(p, p);
|
||||
worklist.add(p);
|
||||
newstate.put(p, new State());
|
||||
}
|
||||
State q = newstate.get(p);
|
||||
int min = points[n];
|
||||
int max;
|
||||
if (n + 1 < points.length)
|
||||
max = points[n + 1] - 1;
|
||||
else
|
||||
max = Character.MAX_CODE_POINT;
|
||||
r.addTransition(new Transition(min, max, q));
|
||||
}
|
||||
}
|
||||
a.deterministic = true;
|
||||
a.clearNumberedStates();
|
||||
a.removeDeadTransitions();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -20,16 +20,11 @@ package org.apache.lucene.util.automaton;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
/**
|
||||
* Not thorough, but tries to test determinism correctness
|
||||
* Not completely thorough, but tries to test determinism correctness
|
||||
* somewhat randomly.
|
||||
*/
|
||||
public class TestDeterminism extends LuceneTestCase {
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
}
|
||||
|
||||
/** test a bunch of random regular expressions */
|
||||
public void testRegexps() throws Exception {
|
||||
int num = 500 * RANDOM_MULTIPLIER;
|
||||
|
@ -37,6 +32,20 @@ public class TestDeterminism extends LuceneTestCase {
|
|||
assertAutomaton(AutomatonTestUtil.randomRegexp(random).toAutomaton());
|
||||
}
|
||||
|
||||
/** test against a simple, unoptimized det */
|
||||
public void testAgainstSimple() throws Exception {
|
||||
int num = 2000 * RANDOM_MULTIPLIER;
|
||||
for (int i = 0; i < num; i++) {
|
||||
Automaton a = AutomatonTestUtil.randomAutomaton(random);
|
||||
Automaton b = a.clone();
|
||||
AutomatonTestUtil.determinizeSimple(a);
|
||||
b.deterministic = false; // force det
|
||||
b.determinize();
|
||||
// TODO: more verifications possible?
|
||||
assertTrue(BasicOperations.sameLanguage(a, b));
|
||||
}
|
||||
}
|
||||
|
||||
private static void assertAutomaton(Automaton a) {
|
||||
Automaton clone = a.clone();
|
||||
// complement(complement(a)) = a
|
||||
|
|
|
@ -21,35 +21,32 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
|
||||
/**
|
||||
* This test builds some randomish NFA/DFA and minimizes them.
|
||||
* the minimal and non-minimal are compared to ensure they are the same.
|
||||
*/
|
||||
public class TestMinimize extends LuceneTestCase {
|
||||
/** the minimal and non-minimal are compared to ensure they are the same. */
|
||||
public void test() {
|
||||
int num = 10000 * RANDOM_MULTIPLIER;
|
||||
int num = 2000 * RANDOM_MULTIPLIER;
|
||||
for (int i = 0; i < num; i++) {
|
||||
Automaton a = randomishAutomaton();
|
||||
Automaton a = AutomatonTestUtil.randomAutomaton(random);
|
||||
Automaton b = a.clone();
|
||||
MinimizationOperations.minimize(b);
|
||||
assertTrue(BasicOperations.sameLanguage(a, b));
|
||||
}
|
||||
}
|
||||
|
||||
private Automaton randomishAutomaton() {
|
||||
// get two random Automata from regexps
|
||||
Automaton a1 = AutomatonTestUtil.randomRegexp(random).toAutomaton();
|
||||
if (random.nextBoolean())
|
||||
a1 = BasicOperations.complement(a1);
|
||||
|
||||
Automaton a2 = AutomatonTestUtil.randomRegexp(random).toAutomaton();
|
||||
if (random.nextBoolean())
|
||||
a2 = BasicOperations.complement(a2);
|
||||
|
||||
// combine them in random ways
|
||||
switch(random.nextInt(4)) {
|
||||
case 0: return BasicOperations.concatenate(a1, a2);
|
||||
case 1: return BasicOperations.union(a1, a2);
|
||||
case 2: return BasicOperations.intersection(a1, a2);
|
||||
default: return BasicOperations.minus(a1, a2);
|
||||
/** compare minimized against minimized with a slower, simple impl.
|
||||
* we check not only that they are the same, but that #states/#transitions
|
||||
* are the same. */
|
||||
public void testAgainstBrzozowski() {
|
||||
int num = 2000 * RANDOM_MULTIPLIER;
|
||||
for (int i = 0; i < num; i++) {
|
||||
Automaton a = AutomatonTestUtil.randomAutomaton(random);
|
||||
AutomatonTestUtil.minimizeSimple(a);
|
||||
Automaton b = a.clone();
|
||||
MinimizationOperations.minimize(b);
|
||||
assertTrue(BasicOperations.sameLanguage(a, b));
|
||||
assertEquals(a.getNumberOfStates(), b.getNumberOfStates());
|
||||
assertEquals(a.getNumberOfTransitions(), b.getNumberOfTransitions());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue