LUCENE-10367: Optimize CoveringQuery for the case when the minimum number of matching clauses is a constant.

This commit is contained in:
Adrien Grand 2022-02-08 17:14:42 +01:00
parent bcb70fd742
commit ce93d45532
4 changed files with 119 additions and 1 deletions

View File

@ -199,6 +199,9 @@ Optimizations
* LUCENE-10315: Use SIMD instructions to decode BKD doc IDs. (Guo Feng, Adrien Grand, Ignacio Vera) * LUCENE-10315: Use SIMD instructions to decode BKD doc IDs. (Guo Feng, Adrien Grand, Ignacio Vera)
* LUCENE-10367: Optimize CoveringQuery for the case when the minimum number of
matching clauses is a constant. (LuYunCheng via Adrien Grand)
Changes in runtime behavior Changes in runtime behavior
--------------------- ---------------------

View File

@ -156,7 +156,12 @@ public abstract class LongValuesSource implements SegmentCacheable {
return new ConstantLongValuesSource(value); return new ConstantLongValuesSource(value);
} }
private static class ConstantLongValuesSource extends LongValuesSource { /**
* A ConstantLongValuesSource that always returns a constant value
*
* @lucene.internal
*/
public static class ConstantLongValuesSource extends LongValuesSource {
private final long value; private final long value;
@ -211,6 +216,11 @@ public abstract class LongValuesSource implements SegmentCacheable {
public LongValuesSource rewrite(IndexSearcher searcher) throws IOException { public LongValuesSource rewrite(IndexSearcher searcher) throws IOException {
return this; return this;
} }
/** Get the constant value. */
public long getValue() {
return value;
}
} }
private static class FieldValuesSource extends LongValuesSource { private static class FieldValuesSource extends LongValuesSource {

View File

@ -25,10 +25,12 @@ import java.util.stream.Collectors;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LongValues; import org.apache.lucene.search.LongValues;
import org.apache.lucene.search.LongValuesSource; import org.apache.lucene.search.LongValuesSource;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Matches; import org.apache.lucene.search.Matches;
import org.apache.lucene.search.MatchesUtils; import org.apache.lucene.search.MatchesUtils;
import org.apache.lucene.search.Multiset; import org.apache.lucene.search.Multiset;
@ -124,6 +126,21 @@ public final class CoveringQuery extends Query implements Accountable {
@Override @Override
public Query rewrite(IndexReader reader) throws IOException { public Query rewrite(IndexReader reader) throws IOException {
if (minimumNumberMatch instanceof LongValuesSource.ConstantLongValuesSource) {
final long constantMin =
((LongValuesSource.ConstantLongValuesSource) minimumNumberMatch).getValue();
if (constantMin > queries.size()) {
return new MatchNoDocsQuery(
"More clauses are required to match than the number of clauses");
}
BooleanQuery.Builder builder =
new BooleanQuery.Builder().setMinimumNumberShouldMatch((int) Math.max(constantMin, 1));
for (Query query : queries) {
Query r = query.rewrite(reader);
builder.add(r, BooleanClause.Occur.SHOULD);
}
return builder.build();
}
Multiset<Query> rewritten = new Multiset<>(); Multiset<Query> rewritten = new Multiset<>();
boolean actuallyRewritten = false; boolean actuallyRewritten = false;
for (Query query : queries) { for (Query query : queries) {

View File

@ -36,7 +36,9 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LongValuesSource; import org.apache.lucene.search.LongValuesSource;
import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.search.QueryUtils; import org.apache.lucene.tests.search.QueryUtils;
import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.LuceneTestCase;
@ -147,6 +149,7 @@ public class TestCoveringQuery extends LuceneTestCase {
} }
Query q1 = builder.build(); Query q1 = builder.build();
Query q2 = new CoveringQuery(queries, LongValuesSource.constant(i)); Query q2 = new CoveringQuery(queries, LongValuesSource.constant(i));
assertSameMatches(searcher, q1, q2, true);
assertEquals(searcher.count(q1), searcher.count(q2)); assertEquals(searcher.count(q1), searcher.count(q2));
} }
@ -161,4 +164,89 @@ public class TestCoveringQuery extends LuceneTestCase {
r.close(); r.close();
dir.close(); dir.close();
} }
public void testRandomWand() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
int numDocs = atLeast(50);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
if (random().nextBoolean()) {
doc.add(new StringField("field", "A", Store.NO));
}
if (random().nextBoolean()) {
doc.add(new StringField("field", "B", Store.NO));
}
if (random().nextDouble() > 0.9) {
doc.add(new StringField("field", "C", Store.NO));
}
if (random().nextDouble() > 0.1) {
doc.add(new StringField("field", "D", Store.NO));
}
doc.add(new NumericDocValuesField("min_match", 1));
w.addDocument(doc);
}
IndexReader r = DirectoryReader.open(w);
IndexSearcher searcher = new IndexSearcher(r);
w.close();
int iters = atLeast(10);
for (int iter = 0; iter < iters; ++iter) {
List<Query> queries = new ArrayList<>();
if (random().nextBoolean()) {
queries.add(new TermQuery(new Term("field", "A")));
}
if (random().nextBoolean()) {
queries.add(new TermQuery(new Term("field", "B")));
}
if (random().nextBoolean()) {
queries.add(new TermQuery(new Term("field", "C")));
}
if (random().nextBoolean()) {
queries.add(new TermQuery(new Term("field", "D")));
}
if (random().nextBoolean()) {
queries.add(new TermQuery(new Term("field", "E")));
}
Query q = new CoveringQuery(queries, LongValuesSource.fromLongField("min_match"));
QueryUtils.check(random(), q, searcher);
for (int i = 1; i < 4; ++i) {
BooleanQuery.Builder builder = new BooleanQuery.Builder().setMinimumNumberShouldMatch(i);
for (Query query : queries) {
builder.add(query, Occur.SHOULD);
}
Query q1 = builder.build();
Query q2 = new CoveringQuery(queries, LongValuesSource.constant(i));
assertSameMatches(searcher, q1, q2, true);
assertEquals(searcher.count(q1), searcher.count(q2));
}
Query filtered =
new BooleanQuery.Builder()
.add(q, Occur.MUST)
.add(new TermQuery(new Term("field", "A")), Occur.MUST)
.build();
QueryUtils.check(random(), filtered, searcher);
}
r.close();
dir.close();
}
private void assertSameMatches(IndexSearcher searcher, Query q1, Query q2, boolean scores)
throws IOException {
final int maxDoc = searcher.getIndexReader().maxDoc();
final TopDocs td1 = searcher.search(q1, maxDoc, scores ? Sort.RELEVANCE : Sort.INDEXORDER);
final TopDocs td2 = searcher.search(q2, maxDoc, scores ? Sort.RELEVANCE : Sort.INDEXORDER);
assertEquals(td1.totalHits.value, td2.totalHits.value);
for (int i = 0; i < td1.scoreDocs.length; ++i) {
assertEquals(td1.scoreDocs[i].doc, td2.scoreDocs[i].doc);
if (scores) {
assertEquals(td1.scoreDocs[i].score, td2.scoreDocs[i].score, 10e-7);
}
}
}
} }