From f4ebed2404eea001a76f6ad7dda03a135b0d7cbc Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 16 Sep 2024 15:19:28 +0200 Subject: [PATCH] Make Automata#optional create simpler automata. (#13793) In the common case when the input automaton has no transition to state 0, the optional automaton can be created by marking state 0 as accepted. --- .../lucene/util/automaton/Operations.java | 31 +++++++++ .../lucene/util/automaton/TestOperations.java | 66 +++++++++++++++++++ 2 files changed, 97 insertions(+) diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java index 3cb02c77a82..7c2b164aa10 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java @@ -159,6 +159,37 @@ public final class Operations { *

Complexity: linear in number of states. */ public static Automaton optional(Automaton a) { + if (a.isAccept(0)) { + // If the initial state is accepted, then the empty string is already accepted. + return a; + } + + boolean hasTransitionsToInitialState = false; + Transition t = new Transition(); + outer: + for (int state = 0; state < a.getNumStates(); ++state) { + int count = a.initTransition(state, t); + for (int i = 0; i < count; ++i) { + a.getNextTransition(t); + if (t.dest == 0) { + hasTransitionsToInitialState = true; + break outer; + } + } + } + + if (hasTransitionsToInitialState == false) { + // If the automaton has no transition to the initial state, we can simply mark the initial + // state as accepted. + Automaton result = new Automaton(); + result.copy(a); + if (result.getNumStates() == 0) { + result.createState(); + } + result.setAccept(0, true); + return result; + } + Automaton result = new Automaton(); result.createState(); result.setAccept(0, true); diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java index c158dd9f3bb..3de263030d6 100644 --- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java @@ -412,4 +412,70 @@ public class TestOperations extends LuceneTestCase { return builder.finish(); } + + public void testOptional() { + Automaton a = Automata.makeChar('a'); + + Automaton optionalA = new Automaton(); + optionalA.createState(); + optionalA.setAccept(0, true); + optionalA.finishState(); + optionalA.createState(); + optionalA.setAccept(1, true); + optionalA.addTransition(0, 1, 'a'); + optionalA.finishState(); + + assertTrue(AutomatonTestUtil.sameLanguage(Operations.optional(a), optionalA)); + assertSame(optionalA, Operations.optional(optionalA)); + + // Now test an automaton that has a transition to state 0. a(ba)* + a = new Automaton(); + a.createState(); + a.createState(); + a.setAccept(1, true); + a.addTransition(0, 1, 'a'); + a.finishState(); + a.addTransition(1, 0, 'b'); + a.finishState(); + + optionalA = new Automaton(); + optionalA.createState(); + optionalA.setAccept(0, true); + optionalA.createState(); + optionalA.createState(); + optionalA.setAccept(2, true); + optionalA.addTransition(0, 2, 'a'); + optionalA.finishState(); + optionalA.addTransition(1, 2, 'a'); + optionalA.finishState(); + optionalA.addTransition(2, 1, 'b'); + optionalA.finishState(); + + assertTrue(AutomatonTestUtil.sameLanguage(Operations.optional(a), optionalA)); + assertSame(optionalA, Operations.optional(optionalA)); + } + + public void testDuelOptional() { + final int iters = atLeast(1_000); + for (int iter = 0; iter < iters; ++iter) { + Automaton a = AutomatonTestUtil.randomAutomaton(random()); + Automaton repeat1 = Operations.determinize(Operations.optional(a), Integer.MAX_VALUE); + Automaton repeat2 = Operations.determinize(naiveOptional(a), Integer.MAX_VALUE); + assertTrue(AutomatonTestUtil.sameLanguage(repeat1, repeat2)); + } + } + + // This is the original implementation of Operations#optional, before we improved it to generate + // simpler automata in some common cases. + private static Automaton naiveOptional(Automaton a) { + Automaton result = new Automaton(); + result.createState(); + result.setAccept(0, true); + if (a.getNumStates() > 0) { + result.copy(a); + result.addEpsilon(0, 1); + } + result.finishState(); + return result; + } }