LUCENE-8287: Ensure that empty regex completion queries always return no results

This commit is contained in:
Jim Ferenczi 2018-05-17 10:29:49 +02:00
parent 0c3628920a
commit 0159e4b974
5 changed files with 114 additions and 2 deletions

View File

@ -219,6 +219,9 @@ Bug Fixes
that rewrites to a MatchNoDocsQuery instead of throwing an exception.
(Bjarke Mortensen, Andy Tran via David Smiley)
* LUCENE-8287: Ensure that empty regex completion queries always return no results.
(Julie Tibshirani via Jim Ferenczi)
Other
* LUCENE-8301: Update randomizedtesting to 2.6.0. (Dawid Weiss)

View File

@ -167,11 +167,19 @@ public class ContextQuery extends CompletionQuery {
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
final CompletionWeight innerWeight = ((CompletionWeight) innerQuery.createWeight(searcher, scoreMode, boost));
final Automaton innerAutomaton = innerWeight.getAutomaton();
// If the inner automaton matches nothing, then we return an empty weight to avoid
// traversing all contexts during scoring.
if (innerAutomaton.getNumStates() == 0) {
return new CompletionWeight(this, innerAutomaton);
}
// if separators are preserved the fst contains a SEP_LABEL
// behind each gap. To have a matching automaton, we need to
// include the SEP_LABEL in the query as well
Automaton optionalSepLabel = Operations.optional(Automata.makeChar(CompletionAnalyzer.SEP_LABEL));
Automaton prefixAutomaton = Operations.concatenate(optionalSepLabel, innerWeight.getAutomaton());
Automaton prefixAutomaton = Operations.concatenate(optionalSepLabel, innerAutomaton);
Automaton contextsAutomaton = Operations.concatenate(toContextAutomaton(contexts, matchAllContexts), prefixAutomaton);
contextsAutomaton = Operations.determinize(contextsAutomaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);

View File

@ -23,6 +23,8 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.suggest.BitsProducer;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.RegExp;
@ -90,7 +92,12 @@ public class RegexCompletionQuery extends CompletionQuery {
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
return new CompletionWeight(this, new RegExp(getTerm().text(), flags).toAutomaton(maxDeterminizedStates));
// If an empty regex is provided, we return an automaton that matches nothing. This ensures
// consistency with PrefixCompletionQuery, which returns no results for an empty term.
Automaton automaton = getTerm().text().isEmpty()
? Automata.makeEmpty()
: new RegExp(getTerm().text(), flags).toAutomaton(maxDeterminizedStates);
return new CompletionWeight(this, automaton);
}
/**

View File

@ -41,6 +41,7 @@ import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import static org.apache.lucene.search.suggest.document.TestSuggestField.Entry;
@ -142,6 +143,29 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
iw.close();
}
@Test
public void testEmptyPrefixQuery() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
Document document = new Document();
document.add(new SuggestField("suggest_field", "suggestion1", 1));
iw.addDocument(document);
if (rarely()) {
iw.commit();
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", ""));
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertEquals(0, suggest.scoreDocs.length);
reader.close();
iw.close();
}
public void testMostlyFilteredOutDocuments() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
@ -337,4 +361,27 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
reader.close();
iw.close();
}
public void testEmptyPrefixContextQuery() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
Document document = new Document();
document.add(new ContextSuggestField("suggest_field", "suggestion", 1, "type"));
iw.addDocument(document);
if (rarely()) {
iw.commit();
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "")));
query.addContext("type", 1);
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertEquals(0, suggest.scoreDocs.length);
reader.close();
iw.close();
}
}

View File

@ -75,6 +75,29 @@ public class TestRegexCompletionQuery extends LuceneTestCase {
iw.close();
}
@Test
public void testEmptyRegexQuery() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
Document document = new Document();
document.add(new SuggestField("suggest_field", "suggestion1", 1));
iw.addDocument(document);
if (rarely()) {
iw.commit();
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
RegexCompletionQuery query = new RegexCompletionQuery(new Term("suggest_field", ""));
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertEquals(0, suggest.scoreDocs.length);
reader.close();
iw.close();
}
@Test
public void testSimpleRegexContextQuery() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
@ -147,4 +170,28 @@ public class TestRegexCompletionQuery extends LuceneTestCase {
reader.close();
iw.close();
}
@Test
public void testEmptyRegexContextQuery() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
Document document = new Document();
document.add(new ContextSuggestField("suggest_field", "suggestion", 1, "type"));
iw.addDocument(document);
if (rarely()) {
iw.commit();
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
ContextQuery query = new ContextQuery(new RegexCompletionQuery(new Term("suggest_field", "")));
query.addContext("type", 1);
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertEquals(0, suggest.scoreDocs.length);
reader.close();
iw.close();
}
}