mirror of https://github.com/apache/lucene.git
LUCENE-10367: Optimize CoveringQuery for the case when the minimum number of matching clauses is a constant.
This commit is contained in:
parent
bcb70fd742
commit
ce93d45532
|
@ -199,6 +199,9 @@ Optimizations
|
|||
|
||||
* LUCENE-10315: Use SIMD instructions to decode BKD doc IDs. (Guo Feng, Adrien Grand, Ignacio Vera)
|
||||
|
||||
* LUCENE-10367: Optimize CoveringQuery for the case when the minimum number of
|
||||
matching clauses is a constant. (LuYunCheng via Adrien Grand)
|
||||
|
||||
Changes in runtime behavior
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -156,7 +156,12 @@ public abstract class LongValuesSource implements SegmentCacheable {
|
|||
return new ConstantLongValuesSource(value);
|
||||
}
|
||||
|
||||
private static class ConstantLongValuesSource extends LongValuesSource {
|
||||
/**
|
||||
* A ConstantLongValuesSource that always returns a constant value
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static class ConstantLongValuesSource extends LongValuesSource {
|
||||
|
||||
private final long value;
|
||||
|
||||
|
@ -211,6 +216,11 @@ public abstract class LongValuesSource implements SegmentCacheable {
|
|||
public LongValuesSource rewrite(IndexSearcher searcher) throws IOException {
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Get the constant value. */
|
||||
public long getValue() {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
private static class FieldValuesSource extends LongValuesSource {
|
||||
|
|
|
@ -25,10 +25,12 @@ import java.util.stream.Collectors;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LongValues;
|
||||
import org.apache.lucene.search.LongValuesSource;
|
||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
import org.apache.lucene.search.Matches;
|
||||
import org.apache.lucene.search.MatchesUtils;
|
||||
import org.apache.lucene.search.Multiset;
|
||||
|
@ -124,6 +126,21 @@ public final class CoveringQuery extends Query implements Accountable {
|
|||
|
||||
@Override
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
if (minimumNumberMatch instanceof LongValuesSource.ConstantLongValuesSource) {
|
||||
final long constantMin =
|
||||
((LongValuesSource.ConstantLongValuesSource) minimumNumberMatch).getValue();
|
||||
if (constantMin > queries.size()) {
|
||||
return new MatchNoDocsQuery(
|
||||
"More clauses are required to match than the number of clauses");
|
||||
}
|
||||
BooleanQuery.Builder builder =
|
||||
new BooleanQuery.Builder().setMinimumNumberShouldMatch((int) Math.max(constantMin, 1));
|
||||
for (Query query : queries) {
|
||||
Query r = query.rewrite(reader);
|
||||
builder.add(r, BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
return builder.build();
|
||||
}
|
||||
Multiset<Query> rewritten = new Multiset<>();
|
||||
boolean actuallyRewritten = false;
|
||||
for (Query query : queries) {
|
||||
|
|
|
@ -36,7 +36,9 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import org.apache.lucene.search.LongValuesSource;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.tests.search.QueryUtils;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
|
@ -147,6 +149,7 @@ public class TestCoveringQuery extends LuceneTestCase {
|
|||
}
|
||||
Query q1 = builder.build();
|
||||
Query q2 = new CoveringQuery(queries, LongValuesSource.constant(i));
|
||||
assertSameMatches(searcher, q1, q2, true);
|
||||
assertEquals(searcher.count(q1), searcher.count(q2));
|
||||
}
|
||||
|
||||
|
@ -161,4 +164,89 @@ public class TestCoveringQuery extends LuceneTestCase {
|
|||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testRandomWand() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
|
||||
int numDocs = atLeast(50);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
Document doc = new Document();
|
||||
if (random().nextBoolean()) {
|
||||
doc.add(new StringField("field", "A", Store.NO));
|
||||
}
|
||||
if (random().nextBoolean()) {
|
||||
doc.add(new StringField("field", "B", Store.NO));
|
||||
}
|
||||
if (random().nextDouble() > 0.9) {
|
||||
doc.add(new StringField("field", "C", Store.NO));
|
||||
}
|
||||
if (random().nextDouble() > 0.1) {
|
||||
doc.add(new StringField("field", "D", Store.NO));
|
||||
}
|
||||
doc.add(new NumericDocValuesField("min_match", 1));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
IndexReader r = DirectoryReader.open(w);
|
||||
IndexSearcher searcher = new IndexSearcher(r);
|
||||
w.close();
|
||||
|
||||
int iters = atLeast(10);
|
||||
for (int iter = 0; iter < iters; ++iter) {
|
||||
List<Query> queries = new ArrayList<>();
|
||||
if (random().nextBoolean()) {
|
||||
queries.add(new TermQuery(new Term("field", "A")));
|
||||
}
|
||||
if (random().nextBoolean()) {
|
||||
queries.add(new TermQuery(new Term("field", "B")));
|
||||
}
|
||||
if (random().nextBoolean()) {
|
||||
queries.add(new TermQuery(new Term("field", "C")));
|
||||
}
|
||||
if (random().nextBoolean()) {
|
||||
queries.add(new TermQuery(new Term("field", "D")));
|
||||
}
|
||||
if (random().nextBoolean()) {
|
||||
queries.add(new TermQuery(new Term("field", "E")));
|
||||
}
|
||||
|
||||
Query q = new CoveringQuery(queries, LongValuesSource.fromLongField("min_match"));
|
||||
QueryUtils.check(random(), q, searcher);
|
||||
|
||||
for (int i = 1; i < 4; ++i) {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder().setMinimumNumberShouldMatch(i);
|
||||
for (Query query : queries) {
|
||||
builder.add(query, Occur.SHOULD);
|
||||
}
|
||||
Query q1 = builder.build();
|
||||
Query q2 = new CoveringQuery(queries, LongValuesSource.constant(i));
|
||||
assertSameMatches(searcher, q1, q2, true);
|
||||
assertEquals(searcher.count(q1), searcher.count(q2));
|
||||
}
|
||||
|
||||
Query filtered =
|
||||
new BooleanQuery.Builder()
|
||||
.add(q, Occur.MUST)
|
||||
.add(new TermQuery(new Term("field", "A")), Occur.MUST)
|
||||
.build();
|
||||
QueryUtils.check(random(), filtered, searcher);
|
||||
}
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private void assertSameMatches(IndexSearcher searcher, Query q1, Query q2, boolean scores)
|
||||
throws IOException {
|
||||
final int maxDoc = searcher.getIndexReader().maxDoc();
|
||||
final TopDocs td1 = searcher.search(q1, maxDoc, scores ? Sort.RELEVANCE : Sort.INDEXORDER);
|
||||
final TopDocs td2 = searcher.search(q2, maxDoc, scores ? Sort.RELEVANCE : Sort.INDEXORDER);
|
||||
assertEquals(td1.totalHits.value, td2.totalHits.value);
|
||||
for (int i = 0; i < td1.scoreDocs.length; ++i) {
|
||||
assertEquals(td1.scoreDocs[i].doc, td2.scoreDocs[i].doc);
|
||||
if (scores) {
|
||||
assertEquals(td1.scoreDocs[i].score, td2.scoreDocs[i].score, 10e-7);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue