LUCENE-6046: fix test failure, add maxDeterminizedStates to AutomatonQuery and WildcardQuery too

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1636758 13f79535-47bb-0310-9956-ffa450edef68
2014-11-04 22:16:02 +00:00 · 2014-11-04 22:16:02 +00:00 · 9014bb342d
parent 9c1da956a4
commit 9014bb342d
5 changed files with 48 additions and 6 deletions
--- a/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java
@ -26,6 +26,7 @@ import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.ToStringUtils;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.Operations;

 /**
 * A {@link Query} that will match terms against a finite-state machine.
@ -61,10 +62,26 @@ public class AutomatonQuery extends MultiTermQuery {
   *        match.
   */
  public AutomatonQuery(final Term term, Automaton automaton) {
+    this(term, automaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
+  }
+
+  /**
+   * Create a new AutomatonQuery from an {@link Automaton}.
+   * 
+   * @param term Term containing field and possibly some pattern structure. The
+   *        term text is ignored.
+   * @param automaton Automaton to run, terms that are accepted are considered a
+   *        match.
+   * @param maxDeterminizedStates maximum number of states in the resulting
+   *   automata.  If the automata would need more than this many states
+   *   TooComplextToDeterminizeException is thrown.  Higher number require more
+   *   space but can process more complex automata.
+   */
+  public AutomatonQuery(final Term term, Automaton automaton, int maxDeterminizedStates) {
    super(term.field());
    this.term = term;
    this.automaton = automaton;
-    this.compiled = new CompiledAutomaton(automaton);
+    this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates);
  }

  @Override
--- a/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java
@ -105,8 +105,9 @@ public class RegexpQuery extends AutomatonQuery {
   */
  public RegexpQuery(Term term, int flags, AutomatonProvider provider,
      int maxDeterminizedStates) {
-    super(term, new RegExp(term.text(), flags).toAutomaton(
-      provider, maxDeterminizedStates));
+    super(term,
+          new RegExp(term.text(), flags).toAutomaton(
+                       provider, maxDeterminizedStates), maxDeterminizedStates);
  }
  
  /** Prints a user-readable version of this query. */
--- a/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java
@ -23,8 +23,8 @@ import java.util.List;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.util.ToStringUtils;
 import org.apache.lucene.util.automaton.Automata;
-import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.Operations;

 /** Implements the wildcard search query. Supported wildcards are <code>*</code>, which
 * matches any character sequence (including the empty one), and <code>?</code>,
@ -57,6 +57,17 @@ public class WildcardQuery extends AutomatonQuery {
    super(term, toAutomaton(term));
  }
  
+  /**
+   * Constructs a query for terms matching <code>term</code>.
+   * @param maxDeterminizedStates maximum number of states in the resulting
+   *   automata.  If the automata would need more than this many states
+   *   TooComplextToDeterminizeException is thrown.  Higher number require more
+   *   space but can process more complex automata.
+   */
+  public WildcardQuery(Term term, int maxDeterminizedStates) {
+    super(term, toAutomaton(term), maxDeterminizedStates);
+  }
+
  /**
   * Convert Lucene wildcard syntax into an automaton.
   * @lucene.internal
--- a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java
@ -100,9 +100,9 @@ public class TestTermsEnum2 extends LuceneTestCase {
      Automaton alternate = Automata.makeStringUnion(matchedTerms);
      //System.out.println("match " + matchedTerms.size() + " " + alternate.getNumberOfStates() + " states, sigma=" + alternate.getStartPoints().length);
      //AutomatonTestUtil.minimizeSimple(alternate);
-      //System.out.println("minmize done");
+      //System.out.println("minimize done");
      AutomatonQuery a1 = new AutomatonQuery(new Term("field", ""), automaton);
-      AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate);
+      AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate, Integer.MAX_VALUE);

      ScoreDoc[] origHits = searcher.search(a1, 25).scoreDocs;
      ScoreDoc[] newHits = searcher.search(a2, 25).scoreDocs;
--- a/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java
@ -18,6 +18,9 @@ package org.apache.lucene.search;
 */

 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
 import java.util.concurrent.CountDownLatch;

 import org.apache.lucene.document.Document;
@ -30,6 +33,7 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.Rethrow;
 import org.apache.lucene.util.TestUtil;
@ -237,4 +241,13 @@ public class TestAutomatonQuery extends LuceneTestCase {
      thread.join();
    }
  }
+
+  public void testHugeAutomaton() {
+    List<BytesRef> terms = new ArrayList<>();
+    while (terms.size() < 10000) {
+      terms.add(new BytesRef(TestUtil.randomUnicodeString(random())));
+    }
+    Collections.sort(terms);
+    new AutomatonQuery(new Term("foo", "bar"), Automata.makeStringUnion(terms), Integer.MAX_VALUE);
+  }
 }