Make Automata#optional create simpler automata. (#13793)

In the common case when the input automaton has no transition to state 0, the
optional automaton can be created by marking state 0 as accepted.
This commit is contained in:
Adrien Grand 2024-09-16 15:19:28 +02:00 committed by GitHub
parent e7a6382089
commit f4ebed2404
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 97 additions and 0 deletions

View File

@ -159,6 +159,37 @@ public final class Operations {
* <p>Complexity: linear in number of states.
*/
public static Automaton optional(Automaton a) {
if (a.isAccept(0)) {
// If the initial state is accepted, then the empty string is already accepted.
return a;
}
boolean hasTransitionsToInitialState = false;
Transition t = new Transition();
outer:
for (int state = 0; state < a.getNumStates(); ++state) {
int count = a.initTransition(state, t);
for (int i = 0; i < count; ++i) {
a.getNextTransition(t);
if (t.dest == 0) {
hasTransitionsToInitialState = true;
break outer;
}
}
}
if (hasTransitionsToInitialState == false) {
// If the automaton has no transition to the initial state, we can simply mark the initial
// state as accepted.
Automaton result = new Automaton();
result.copy(a);
if (result.getNumStates() == 0) {
result.createState();
}
result.setAccept(0, true);
return result;
}
Automaton result = new Automaton();
result.createState();
result.setAccept(0, true);

View File

@ -412,4 +412,70 @@ public class TestOperations extends LuceneTestCase {
return builder.finish();
}
public void testOptional() {
Automaton a = Automata.makeChar('a');
Automaton optionalA = new Automaton();
optionalA.createState();
optionalA.setAccept(0, true);
optionalA.finishState();
optionalA.createState();
optionalA.setAccept(1, true);
optionalA.addTransition(0, 1, 'a');
optionalA.finishState();
assertTrue(AutomatonTestUtil.sameLanguage(Operations.optional(a), optionalA));
assertSame(optionalA, Operations.optional(optionalA));
// Now test an automaton that has a transition to state 0. a(ba)*
a = new Automaton();
a.createState();
a.createState();
a.setAccept(1, true);
a.addTransition(0, 1, 'a');
a.finishState();
a.addTransition(1, 0, 'b');
a.finishState();
optionalA = new Automaton();
optionalA.createState();
optionalA.setAccept(0, true);
optionalA.createState();
optionalA.createState();
optionalA.setAccept(2, true);
optionalA.addTransition(0, 2, 'a');
optionalA.finishState();
optionalA.addTransition(1, 2, 'a');
optionalA.finishState();
optionalA.addTransition(2, 1, 'b');
optionalA.finishState();
assertTrue(AutomatonTestUtil.sameLanguage(Operations.optional(a), optionalA));
assertSame(optionalA, Operations.optional(optionalA));
}
public void testDuelOptional() {
final int iters = atLeast(1_000);
for (int iter = 0; iter < iters; ++iter) {
Automaton a = AutomatonTestUtil.randomAutomaton(random());
Automaton repeat1 = Operations.determinize(Operations.optional(a), Integer.MAX_VALUE);
Automaton repeat2 = Operations.determinize(naiveOptional(a), Integer.MAX_VALUE);
assertTrue(AutomatonTestUtil.sameLanguage(repeat1, repeat2));
}
}
// This is the original implementation of Operations#optional, before we improved it to generate
// simpler automata in some common cases.
private static Automaton naiveOptional(Automaton a) {
Automaton result = new Automaton();
result.createState();
result.setAccept(0, true);
if (a.getNumStates() > 0) {
result.copy(a);
result.addEpsilon(0, 1);
}
result.finishState();
return result;
}
}