mirror of https://github.com/apache/lucene.git
LUCENE-10367: Optimize CoveringQuery for the case when the minimum number of matching clauses is a constant.
This commit is contained in:
parent
bcb70fd742
commit
ce93d45532
|
@ -199,6 +199,9 @@ Optimizations
|
||||||
|
|
||||||
* LUCENE-10315: Use SIMD instructions to decode BKD doc IDs. (Guo Feng, Adrien Grand, Ignacio Vera)
|
* LUCENE-10315: Use SIMD instructions to decode BKD doc IDs. (Guo Feng, Adrien Grand, Ignacio Vera)
|
||||||
|
|
||||||
|
* LUCENE-10367: Optimize CoveringQuery for the case when the minimum number of
|
||||||
|
matching clauses is a constant. (LuYunCheng via Adrien Grand)
|
||||||
|
|
||||||
Changes in runtime behavior
|
Changes in runtime behavior
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -156,7 +156,12 @@ public abstract class LongValuesSource implements SegmentCacheable {
|
||||||
return new ConstantLongValuesSource(value);
|
return new ConstantLongValuesSource(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class ConstantLongValuesSource extends LongValuesSource {
|
/**
|
||||||
|
* A ConstantLongValuesSource that always returns a constant value
|
||||||
|
*
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
public static class ConstantLongValuesSource extends LongValuesSource {
|
||||||
|
|
||||||
private final long value;
|
private final long value;
|
||||||
|
|
||||||
|
@ -211,6 +216,11 @@ public abstract class LongValuesSource implements SegmentCacheable {
|
||||||
public LongValuesSource rewrite(IndexSearcher searcher) throws IOException {
|
public LongValuesSource rewrite(IndexSearcher searcher) throws IOException {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Get the constant value. */
|
||||||
|
public long getValue() {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class FieldValuesSource extends LongValuesSource {
|
private static class FieldValuesSource extends LongValuesSource {
|
||||||
|
|
|
@ -25,10 +25,12 @@ import java.util.stream.Collectors;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.Explanation;
|
import org.apache.lucene.search.Explanation;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.LongValues;
|
import org.apache.lucene.search.LongValues;
|
||||||
import org.apache.lucene.search.LongValuesSource;
|
import org.apache.lucene.search.LongValuesSource;
|
||||||
|
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||||
import org.apache.lucene.search.Matches;
|
import org.apache.lucene.search.Matches;
|
||||||
import org.apache.lucene.search.MatchesUtils;
|
import org.apache.lucene.search.MatchesUtils;
|
||||||
import org.apache.lucene.search.Multiset;
|
import org.apache.lucene.search.Multiset;
|
||||||
|
@ -124,6 +126,21 @@ public final class CoveringQuery extends Query implements Accountable {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Query rewrite(IndexReader reader) throws IOException {
|
public Query rewrite(IndexReader reader) throws IOException {
|
||||||
|
if (minimumNumberMatch instanceof LongValuesSource.ConstantLongValuesSource) {
|
||||||
|
final long constantMin =
|
||||||
|
((LongValuesSource.ConstantLongValuesSource) minimumNumberMatch).getValue();
|
||||||
|
if (constantMin > queries.size()) {
|
||||||
|
return new MatchNoDocsQuery(
|
||||||
|
"More clauses are required to match than the number of clauses");
|
||||||
|
}
|
||||||
|
BooleanQuery.Builder builder =
|
||||||
|
new BooleanQuery.Builder().setMinimumNumberShouldMatch((int) Math.max(constantMin, 1));
|
||||||
|
for (Query query : queries) {
|
||||||
|
Query r = query.rewrite(reader);
|
||||||
|
builder.add(r, BooleanClause.Occur.SHOULD);
|
||||||
|
}
|
||||||
|
return builder.build();
|
||||||
|
}
|
||||||
Multiset<Query> rewritten = new Multiset<>();
|
Multiset<Query> rewritten = new Multiset<>();
|
||||||
boolean actuallyRewritten = false;
|
boolean actuallyRewritten = false;
|
||||||
for (Query query : queries) {
|
for (Query query : queries) {
|
||||||
|
|
|
@ -36,7 +36,9 @@ import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.LongValuesSource;
|
import org.apache.lucene.search.LongValuesSource;
|
||||||
import org.apache.lucene.search.PhraseQuery;
|
import org.apache.lucene.search.PhraseQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.Sort;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.search.TopDocs;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.tests.search.QueryUtils;
|
import org.apache.lucene.tests.search.QueryUtils;
|
||||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||||
|
@ -147,6 +149,7 @@ public class TestCoveringQuery extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
Query q1 = builder.build();
|
Query q1 = builder.build();
|
||||||
Query q2 = new CoveringQuery(queries, LongValuesSource.constant(i));
|
Query q2 = new CoveringQuery(queries, LongValuesSource.constant(i));
|
||||||
|
assertSameMatches(searcher, q1, q2, true);
|
||||||
assertEquals(searcher.count(q1), searcher.count(q2));
|
assertEquals(searcher.count(q1), searcher.count(q2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -161,4 +164,89 @@ public class TestCoveringQuery extends LuceneTestCase {
|
||||||
r.close();
|
r.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testRandomWand() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
|
||||||
|
int numDocs = atLeast(50);
|
||||||
|
for (int i = 0; i < numDocs; ++i) {
|
||||||
|
Document doc = new Document();
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
doc.add(new StringField("field", "A", Store.NO));
|
||||||
|
}
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
doc.add(new StringField("field", "B", Store.NO));
|
||||||
|
}
|
||||||
|
if (random().nextDouble() > 0.9) {
|
||||||
|
doc.add(new StringField("field", "C", Store.NO));
|
||||||
|
}
|
||||||
|
if (random().nextDouble() > 0.1) {
|
||||||
|
doc.add(new StringField("field", "D", Store.NO));
|
||||||
|
}
|
||||||
|
doc.add(new NumericDocValuesField("min_match", 1));
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
IndexReader r = DirectoryReader.open(w);
|
||||||
|
IndexSearcher searcher = new IndexSearcher(r);
|
||||||
|
w.close();
|
||||||
|
|
||||||
|
int iters = atLeast(10);
|
||||||
|
for (int iter = 0; iter < iters; ++iter) {
|
||||||
|
List<Query> queries = new ArrayList<>();
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
queries.add(new TermQuery(new Term("field", "A")));
|
||||||
|
}
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
queries.add(new TermQuery(new Term("field", "B")));
|
||||||
|
}
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
queries.add(new TermQuery(new Term("field", "C")));
|
||||||
|
}
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
queries.add(new TermQuery(new Term("field", "D")));
|
||||||
|
}
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
queries.add(new TermQuery(new Term("field", "E")));
|
||||||
|
}
|
||||||
|
|
||||||
|
Query q = new CoveringQuery(queries, LongValuesSource.fromLongField("min_match"));
|
||||||
|
QueryUtils.check(random(), q, searcher);
|
||||||
|
|
||||||
|
for (int i = 1; i < 4; ++i) {
|
||||||
|
BooleanQuery.Builder builder = new BooleanQuery.Builder().setMinimumNumberShouldMatch(i);
|
||||||
|
for (Query query : queries) {
|
||||||
|
builder.add(query, Occur.SHOULD);
|
||||||
|
}
|
||||||
|
Query q1 = builder.build();
|
||||||
|
Query q2 = new CoveringQuery(queries, LongValuesSource.constant(i));
|
||||||
|
assertSameMatches(searcher, q1, q2, true);
|
||||||
|
assertEquals(searcher.count(q1), searcher.count(q2));
|
||||||
|
}
|
||||||
|
|
||||||
|
Query filtered =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(q, Occur.MUST)
|
||||||
|
.add(new TermQuery(new Term("field", "A")), Occur.MUST)
|
||||||
|
.build();
|
||||||
|
QueryUtils.check(random(), filtered, searcher);
|
||||||
|
}
|
||||||
|
|
||||||
|
r.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertSameMatches(IndexSearcher searcher, Query q1, Query q2, boolean scores)
|
||||||
|
throws IOException {
|
||||||
|
final int maxDoc = searcher.getIndexReader().maxDoc();
|
||||||
|
final TopDocs td1 = searcher.search(q1, maxDoc, scores ? Sort.RELEVANCE : Sort.INDEXORDER);
|
||||||
|
final TopDocs td2 = searcher.search(q2, maxDoc, scores ? Sort.RELEVANCE : Sort.INDEXORDER);
|
||||||
|
assertEquals(td1.totalHits.value, td2.totalHits.value);
|
||||||
|
for (int i = 0; i < td1.scoreDocs.length; ++i) {
|
||||||
|
assertEquals(td1.scoreDocs[i].doc, td2.scoreDocs[i].doc);
|
||||||
|
if (scores) {
|
||||||
|
assertEquals(td1.scoreDocs[i].score, td2.scoreDocs[i].score, 10e-7);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue