LUCENE-6046: fix test failure, add maxDeterminizedStates to AutomatonQuery and WildcardQuery too

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1636758 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2014-11-04 22:16:02 +00:00
parent 9c1da956a4
commit 9014bb342d
5 changed files with 48 additions and 6 deletions

View File

@ -26,6 +26,7 @@ import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.automaton.Operations;
/**
* A {@link Query} that will match terms against a finite-state machine.
@ -61,10 +62,26 @@ public class AutomatonQuery extends MultiTermQuery {
* match.
*/
public AutomatonQuery(final Term term, Automaton automaton) {
this(term, automaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
}
/**
* Create a new AutomatonQuery from an {@link Automaton}.
*
* @param term Term containing field and possibly some pattern structure. The
* term text is ignored.
* @param automaton Automaton to run, terms that are accepted are considered a
* match.
* @param maxDeterminizedStates maximum number of states in the resulting
* automata. If the automata would need more than this many states
* TooComplextToDeterminizeException is thrown. Higher number require more
* space but can process more complex automata.
*/
public AutomatonQuery(final Term term, Automaton automaton, int maxDeterminizedStates) {
super(term.field());
this.term = term;
this.automaton = automaton;
this.compiled = new CompiledAutomaton(automaton);
this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates);
}
@Override

View File

@ -105,8 +105,9 @@ public class RegexpQuery extends AutomatonQuery {
*/
public RegexpQuery(Term term, int flags, AutomatonProvider provider,
int maxDeterminizedStates) {
super(term, new RegExp(term.text(), flags).toAutomaton(
provider, maxDeterminizedStates));
super(term,
new RegExp(term.text(), flags).toAutomaton(
provider, maxDeterminizedStates), maxDeterminizedStates);
}
/** Prints a user-readable version of this query. */

View File

@ -23,8 +23,8 @@ import java.util.List;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
/** Implements the wildcard search query. Supported wildcards are <code>*</code>, which
* matches any character sequence (including the empty one), and <code>?</code>,
@ -57,6 +57,17 @@ public class WildcardQuery extends AutomatonQuery {
super(term, toAutomaton(term));
}
/**
* Constructs a query for terms matching <code>term</code>.
* @param maxDeterminizedStates maximum number of states in the resulting
* automata. If the automata would need more than this many states
* TooComplextToDeterminizeException is thrown. Higher number require more
* space but can process more complex automata.
*/
public WildcardQuery(Term term, int maxDeterminizedStates) {
super(term, toAutomaton(term), maxDeterminizedStates);
}
/**
* Convert Lucene wildcard syntax into an automaton.
* @lucene.internal

View File

@ -100,9 +100,9 @@ public class TestTermsEnum2 extends LuceneTestCase {
Automaton alternate = Automata.makeStringUnion(matchedTerms);
//System.out.println("match " + matchedTerms.size() + " " + alternate.getNumberOfStates() + " states, sigma=" + alternate.getStartPoints().length);
//AutomatonTestUtil.minimizeSimple(alternate);
//System.out.println("minmize done");
//System.out.println("minimize done");
AutomatonQuery a1 = new AutomatonQuery(new Term("field", ""), automaton);
AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate);
AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate, Integer.MAX_VALUE);
ScoreDoc[] origHits = searcher.search(a1, 25).scoreDocs;
ScoreDoc[] newHits = searcher.search(a2, 25).scoreDocs;

View File

@ -18,6 +18,9 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import org.apache.lucene.document.Document;
@ -30,6 +33,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Rethrow;
import org.apache.lucene.util.TestUtil;
@ -237,4 +241,13 @@ public class TestAutomatonQuery extends LuceneTestCase {
thread.join();
}
}
public void testHugeAutomaton() {
List<BytesRef> terms = new ArrayList<>();
while (terms.size() < 10000) {
terms.add(new BytesRef(TestUtil.randomUnicodeString(random())));
}
Collections.sort(terms);
new AutomatonQuery(new Term("foo", "bar"), Automata.makeStringUnion(terms), Integer.MAX_VALUE);
}
}