mirror of https://github.com/apache/lucene.git
LUCENE-6046: fix test failure, add maxDeterminizedStates to AutomatonQuery and WildcardQuery too
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1636758 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9c1da956a4
commit
9014bb342d
|
@ -26,6 +26,7 @@ import org.apache.lucene.util.AttributeSource;
|
|||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
|
||||
/**
|
||||
* A {@link Query} that will match terms against a finite-state machine.
|
||||
|
@ -61,10 +62,26 @@ public class AutomatonQuery extends MultiTermQuery {
|
|||
* match.
|
||||
*/
|
||||
public AutomatonQuery(final Term term, Automaton automaton) {
|
||||
this(term, automaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new AutomatonQuery from an {@link Automaton}.
|
||||
*
|
||||
* @param term Term containing field and possibly some pattern structure. The
|
||||
* term text is ignored.
|
||||
* @param automaton Automaton to run, terms that are accepted are considered a
|
||||
* match.
|
||||
* @param maxDeterminizedStates maximum number of states in the resulting
|
||||
* automata. If the automata would need more than this many states
|
||||
* TooComplextToDeterminizeException is thrown. Higher number require more
|
||||
* space but can process more complex automata.
|
||||
*/
|
||||
public AutomatonQuery(final Term term, Automaton automaton, int maxDeterminizedStates) {
|
||||
super(term.field());
|
||||
this.term = term;
|
||||
this.automaton = automaton;
|
||||
this.compiled = new CompiledAutomaton(automaton);
|
||||
this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -105,8 +105,9 @@ public class RegexpQuery extends AutomatonQuery {
|
|||
*/
|
||||
public RegexpQuery(Term term, int flags, AutomatonProvider provider,
|
||||
int maxDeterminizedStates) {
|
||||
super(term, new RegExp(term.text(), flags).toAutomaton(
|
||||
provider, maxDeterminizedStates));
|
||||
super(term,
|
||||
new RegExp(term.text(), flags).toAutomaton(
|
||||
provider, maxDeterminizedStates), maxDeterminizedStates);
|
||||
}
|
||||
|
||||
/** Prints a user-readable version of this query. */
|
||||
|
|
|
@ -23,8 +23,8 @@ import java.util.List;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
|
||||
/** Implements the wildcard search query. Supported wildcards are <code>*</code>, which
|
||||
* matches any character sequence (including the empty one), and <code>?</code>,
|
||||
|
@ -57,6 +57,17 @@ public class WildcardQuery extends AutomatonQuery {
|
|||
super(term, toAutomaton(term));
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a query for terms matching <code>term</code>.
|
||||
* @param maxDeterminizedStates maximum number of states in the resulting
|
||||
* automata. If the automata would need more than this many states
|
||||
* TooComplextToDeterminizeException is thrown. Higher number require more
|
||||
* space but can process more complex automata.
|
||||
*/
|
||||
public WildcardQuery(Term term, int maxDeterminizedStates) {
|
||||
super(term, toAutomaton(term), maxDeterminizedStates);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert Lucene wildcard syntax into an automaton.
|
||||
* @lucene.internal
|
||||
|
|
|
@ -100,9 +100,9 @@ public class TestTermsEnum2 extends LuceneTestCase {
|
|||
Automaton alternate = Automata.makeStringUnion(matchedTerms);
|
||||
//System.out.println("match " + matchedTerms.size() + " " + alternate.getNumberOfStates() + " states, sigma=" + alternate.getStartPoints().length);
|
||||
//AutomatonTestUtil.minimizeSimple(alternate);
|
||||
//System.out.println("minmize done");
|
||||
//System.out.println("minimize done");
|
||||
AutomatonQuery a1 = new AutomatonQuery(new Term("field", ""), automaton);
|
||||
AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate);
|
||||
AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate, Integer.MAX_VALUE);
|
||||
|
||||
ScoreDoc[] origHits = searcher.search(a1, 25).scoreDocs;
|
||||
ScoreDoc[] newHits = searcher.search(a2, 25).scoreDocs;
|
||||
|
|
|
@ -18,6 +18,9 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -30,6 +33,7 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.Rethrow;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
@ -237,4 +241,13 @@ public class TestAutomatonQuery extends LuceneTestCase {
|
|||
thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
public void testHugeAutomaton() {
|
||||
List<BytesRef> terms = new ArrayList<>();
|
||||
while (terms.size() < 10000) {
|
||||
terms.add(new BytesRef(TestUtil.randomUnicodeString(random())));
|
||||
}
|
||||
Collections.sort(terms);
|
||||
new AutomatonQuery(new Term("foo", "bar"), Automata.makeStringUnion(terms), Integer.MAX_VALUE);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue