mirror of https://github.com/apache/lucene.git
MultiTermQuery return null for ScoreSupplier (#13454)
MultiTermQuery return null for ScoreSupplier if there are no terms in an index that match query terms. With the introduction of PR #12156 we saw degradation in performance of bool queries where one of the mandatory clauses is a TermInSetQuery with query terms not present in the field. Before for such cases TermsInSetQuery returned null for ScoreSupplier which would shortcut the whole bool query. This PR adds ability for MultiTermQuery to return null for ScoreSupplier if a field doesn't contain any query terms. Relates to PR #12156
This commit is contained in:
parent
39a7eabb6d
commit
512ff4ac92
|
@ -259,6 +259,9 @@ Optimizations
|
|||
|
||||
* GITHUB#13322: Implement Weight#count for vector values in the FieldExistsQuery. (Pan Guixin)
|
||||
|
||||
* GITHUB#13454: MultiTermQuery returns null ScoreSupplier in cases where
|
||||
no query terms are present in the index segment (Mayya Sharipova)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
(No changes)
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.index.Terms;
|
|||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOSupplier;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
|
@ -153,7 +154,7 @@ abstract class AbstractMultiTermQueryConstantScoreWrapper<Q extends MultiTermQue
|
|||
List<TermAndState> collectedTerms)
|
||||
throws IOException;
|
||||
|
||||
private WeightOrDocIdSetIterator rewrite(LeafReaderContext context, Terms terms)
|
||||
private IOSupplier<WeightOrDocIdSetIterator> rewrite(LeafReaderContext context, Terms terms)
|
||||
throws IOException {
|
||||
assert terms != null;
|
||||
|
||||
|
@ -162,21 +163,29 @@ abstract class AbstractMultiTermQueryConstantScoreWrapper<Q extends MultiTermQue
|
|||
assert termsEnum != null;
|
||||
|
||||
final List<TermAndState> collectedTerms = new ArrayList<>();
|
||||
if (collectTerms(fieldDocCount, termsEnum, collectedTerms)) {
|
||||
// build a boolean query
|
||||
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
||||
for (TermAndState t : collectedTerms) {
|
||||
final TermStates termStates = new TermStates(searcher.getTopReaderContext());
|
||||
termStates.register(t.state, context.ord, t.docFreq, t.totalTermFreq);
|
||||
bq.add(new TermQuery(new Term(q.field, t.term), termStates), BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
Query q = new ConstantScoreQuery(bq.build());
|
||||
final Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, score());
|
||||
return new WeightOrDocIdSetIterator(weight);
|
||||
boolean collectResult = collectTerms(fieldDocCount, termsEnum, collectedTerms);
|
||||
if (collectResult && collectedTerms.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Too many terms to rewrite as a simple bq. Invoke rewriteInner logic to handle rewriting:
|
||||
return rewriteInner(context, fieldDocCount, terms, termsEnum, collectedTerms);
|
||||
return () -> {
|
||||
if (collectResult) {
|
||||
// build a boolean query
|
||||
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
||||
for (TermAndState t : collectedTerms) {
|
||||
final TermStates termStates = new TermStates(searcher.getTopReaderContext());
|
||||
termStates.register(t.state, context.ord, t.docFreq, t.totalTermFreq);
|
||||
bq.add(
|
||||
new TermQuery(new Term(q.field, t.term), termStates), BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
Query q = new ConstantScoreQuery(bq.build());
|
||||
final Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, score());
|
||||
return new WeightOrDocIdSetIterator(weight);
|
||||
} else {
|
||||
// Too many terms to rewrite as a simple bq.
|
||||
// Invoke rewriteInner logic to handle rewriting:
|
||||
return rewriteInner(context, fieldDocCount, terms, termsEnum, collectedTerms);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private boolean collectTerms(int fieldDocCount, TermsEnum termsEnum, List<TermAndState> terms)
|
||||
|
@ -232,10 +241,13 @@ abstract class AbstractMultiTermQueryConstantScoreWrapper<Q extends MultiTermQue
|
|||
}
|
||||
|
||||
final long cost = estimateCost(terms, q.getTermsCount());
|
||||
IOSupplier<WeightOrDocIdSetIterator> weightOrIteratorSupplier = rewrite(context, terms);
|
||||
if (weightOrIteratorSupplier == null) return null;
|
||||
|
||||
return new ScorerSupplier() {
|
||||
@Override
|
||||
public Scorer get(long leadCost) throws IOException {
|
||||
WeightOrDocIdSetIterator weightOrIterator = rewrite(context, terms);
|
||||
WeightOrDocIdSetIterator weightOrIterator = weightOrIteratorSupplier.get();
|
||||
final Scorer scorer;
|
||||
if (weightOrIterator == null) {
|
||||
scorer = null;
|
||||
|
@ -255,7 +267,7 @@ abstract class AbstractMultiTermQueryConstantScoreWrapper<Q extends MultiTermQue
|
|||
|
||||
@Override
|
||||
public BulkScorer bulkScorer() throws IOException {
|
||||
WeightOrDocIdSetIterator weightOrIterator = rewrite(context, terms);
|
||||
WeightOrDocIdSetIterator weightOrIterator = weightOrIteratorSupplier.get();
|
||||
final BulkScorer bulkScorer;
|
||||
if (weightOrIterator == null) {
|
||||
bulkScorer = null;
|
||||
|
|
|
@ -28,13 +28,18 @@ import java.util.concurrent.atomic.AtomicInteger;
|
|||
import java.util.function.Supplier;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.KeywordField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.FilterDirectoryReader;
|
||||
import org.apache.lucene.index.FilterLeafReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
|
@ -157,6 +162,69 @@ public class TestTermInSetQuery extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testReturnsNullScoreSupplier() throws Exception {
|
||||
try (Directory directory = newDirectory()) {
|
||||
try (IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig())) {
|
||||
for (char ch = 'a'; ch <= 'z'; ch++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new KeywordField("id", Character.toString(ch), Field.Store.YES));
|
||||
doc.add(new KeywordField("content", Character.toString(ch), Field.Store.YES));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
}
|
||||
try (DirectoryReader reader = DirectoryReader.open(directory)) {
|
||||
List<BytesRef> terms = new ArrayList<>();
|
||||
for (char ch = 'a'; ch <= 'z'; ch++) {
|
||||
terms.add(newBytesRef(Character.toString(ch)));
|
||||
}
|
||||
Query query2 = new TermInSetQuery("content", terms);
|
||||
|
||||
{
|
||||
// query1 doesn't match any documents
|
||||
Query query1 = new TermInSetQuery("id", List.of(newBytesRef("aaa"), newBytesRef("bbb")));
|
||||
BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
|
||||
queryBuilder.add(query1, Occur.FILTER);
|
||||
queryBuilder.add(query2, Occur.FILTER);
|
||||
Query boolQuery = queryBuilder.build();
|
||||
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
final LeafReaderContext ctx = reader.leaves().get(0);
|
||||
|
||||
Weight weight1 = searcher.createWeight(searcher.rewrite(query1), ScoreMode.COMPLETE, 1);
|
||||
ScorerSupplier scorerSupplier1 = weight1.scorerSupplier(ctx);
|
||||
// as query1 doesn't match any documents, its scorerSupplier must be null
|
||||
assertNull(scorerSupplier1);
|
||||
Weight weight = searcher.createWeight(searcher.rewrite(boolQuery), ScoreMode.COMPLETE, 1);
|
||||
// scorerSupplier of a bool query where query1 is mandatory must be null
|
||||
ScorerSupplier scorerSupplier = weight.scorerSupplier(ctx);
|
||||
assertNull(scorerSupplier);
|
||||
}
|
||||
{
|
||||
// query1 matches some documents
|
||||
Query query1 =
|
||||
new TermInSetQuery(
|
||||
"id", List.of(newBytesRef("aaa"), newBytesRef("bbb"), newBytesRef("b")));
|
||||
BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
|
||||
queryBuilder.add(query1, Occur.FILTER);
|
||||
queryBuilder.add(query2, Occur.FILTER);
|
||||
Query boolQuery = queryBuilder.build();
|
||||
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
final LeafReaderContext ctx = reader.leaves().get(0);
|
||||
|
||||
Weight weight1 = searcher.createWeight(searcher.rewrite(query1), ScoreMode.COMPLETE, 1);
|
||||
ScorerSupplier scorerSupplier1 = weight1.scorerSupplier(ctx);
|
||||
// as query1 matches some documents, its scorerSupplier must not be null
|
||||
assertNotNull(scorerSupplier1);
|
||||
Weight weight = searcher.createWeight(searcher.rewrite(boolQuery), ScoreMode.COMPLETE, 1);
|
||||
// scorerSupplier of a bool query where query1 is mandatory must not be null
|
||||
ScorerSupplier scorerSupplier = weight.scorerSupplier(ctx);
|
||||
assertNotNull(scorerSupplier);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void assertSameMatches(IndexSearcher searcher, Query q1, Query q2, boolean scores)
|
||||
throws IOException {
|
||||
final int maxDoc = searcher.getIndexReader().maxDoc();
|
||||
|
|
Loading…
Reference in New Issue