LUCENE-10385: Implement Weight#count on IndexSortSortedNumericDocValuesRangeQuery (#635)

IndexSortSortedNumericDocValuesRangeQuery can implement its count method and coompute count through a binary search, the same binary search that is used to execute the query itself, whenever all the required conditions are met.
This commit is contained in:
Luca Cavanna 2022-02-03 17:19:05 +01:00 committed by GitHub
parent ee7a8d6918
commit bade484998
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 146 additions and 69 deletions

View File

@ -128,6 +128,9 @@ New Features
based on TotalHitCountCollector that allows users to parallelize counting the
number of hits. (Luca Cavanna, Adrien Grand)
* LUCENE-10385: Implement Weight#count on IndexSortSortedNumericDocValuesRangeQuery
to speed up computing the number of hits when possible. (Luca Cavanna, Adrien Grand)
Improvements
---------------------

View File

@ -156,20 +156,9 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
SortedNumericDocValues sortedNumericValues =
DocValues.getSortedNumeric(context.reader(), field);
NumericDocValues numericValues = DocValues.unwrapSingleton(sortedNumericValues);
if (numericValues != null) {
Sort indexSort = context.reader().getMetaData().getSort();
if (indexSort != null
&& indexSort.getSort().length > 0
&& indexSort.getSort()[0].getField().equals(field)) {
SortField sortField = indexSort.getSort()[0];
DocIdSetIterator disi = getDocIdSetIterator(sortField, context, numericValues);
return new ConstantScoreScorer(this, score(), scoreMode, disi);
}
DocIdSetIterator disi = getDocIdSetIteratorOrNull(context);
if (disi != null) {
return new ConstantScoreScorer(this, score(), scoreMode, disi);
}
return fallbackWeight.scorer(context);
}
@ -180,9 +169,36 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
// if the fallback query is cacheable.
return fallbackWeight.isCacheable(ctx);
}
@Override
public int count(LeafReaderContext context) throws IOException {
BoundedDocSetIdIterator disi = getDocIdSetIteratorOrNull(context);
if (disi != null) {
return disi.lastDoc - disi.firstDoc;
}
return fallbackWeight.count(context);
}
};
}
private BoundedDocSetIdIterator getDocIdSetIteratorOrNull(LeafReaderContext context)
throws IOException {
SortedNumericDocValues sortedNumericValues =
DocValues.getSortedNumeric(context.reader(), field);
NumericDocValues numericValues = DocValues.unwrapSingleton(sortedNumericValues);
if (numericValues != null) {
Sort indexSort = context.reader().getMetaData().getSort();
if (indexSort != null
&& indexSort.getSort().length > 0
&& indexSort.getSort()[0].getField().equals(field)) {
SortField sortField = indexSort.getSort()[0];
return getDocIdSetIterator(sortField, context, numericValues);
}
}
return null;
}
/**
* Computes the document IDs that lie within the range [lowerValue, upperValue] by performing
* binary search on the field's doc values.
@ -195,7 +211,7 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
* {@link DocIdSetIterator} makes sure to wrap the original docvalues to skip over documents with
* no value.
*/
private DocIdSetIterator getDocIdSetIterator(
private BoundedDocSetIdIterator getDocIdSetIterator(
SortField sortField, LeafReaderContext context, DocIdSetIterator delegate)
throws IOException {
long lower = sortField.getReverse() ? upperValue : lowerValue;

View File

@ -38,6 +38,7 @@ import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TotalHitCountCollectorManager;
import org.apache.lucene.search.Weight;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.analysis.MockAnalyzer;
@ -95,7 +96,7 @@ public class TestIndexSortSortedNumericDocValuesRangeQuery extends LuceneTestCas
}
}
private void assertSameHits(IndexSearcher searcher, Query q1, Query q2, boolean scores)
private static void assertSameHits(IndexSearcher searcher, Query q1, Query q2, boolean scores)
throws IOException {
final int maxDoc = searcher.getIndexReader().maxDoc();
final TopDocs td1 = searcher.search(q1, maxDoc, scores ? Sort.RELEVANCE : Sort.INDEXORDER);
@ -167,43 +168,50 @@ public class TestIndexSortSortedNumericDocValuesRangeQuery extends LuceneTestCas
IndexSearcher searcher = newSearcher(reader);
// Test ranges consisting of one value.
assertEquals(1, searcher.count(createQuery("field", -80, -80)));
assertEquals(1, searcher.count(createQuery("field", -5, -5)));
assertEquals(2, searcher.count(createQuery("field", 0, 0)));
assertEquals(1, searcher.count(createQuery("field", 30, 30)));
assertEquals(1, searcher.count(createQuery("field", 35, 35)));
assertNumberOfHits(searcher, createQuery("field", -80, -80), 1);
assertNumberOfHits(searcher, createQuery("field", -5, -5), 1);
assertNumberOfHits(searcher, createQuery("field", 0, 0), 2);
assertNumberOfHits(searcher, createQuery("field", 30, 30), 1);
assertNumberOfHits(searcher, createQuery("field", 35, 35), 1);
assertEquals(0, searcher.count(createQuery("field", -90, -90)));
assertEquals(0, searcher.count(createQuery("field", 5, 5)));
assertEquals(0, searcher.count(createQuery("field", 40, 40)));
assertNumberOfHits(searcher, createQuery("field", -90, -90), 0);
assertNumberOfHits(searcher, createQuery("field", 5, 5), 0);
assertNumberOfHits(searcher, createQuery("field", 40, 40), 0);
// Test the lower end of the document value range.
assertEquals(2, searcher.count(createQuery("field", -90, -4)));
assertEquals(2, searcher.count(createQuery("field", -80, -4)));
assertEquals(1, searcher.count(createQuery("field", -70, -4)));
assertEquals(2, searcher.count(createQuery("field", -80, -5)));
assertNumberOfHits(searcher, createQuery("field", -90, -4), 2);
assertNumberOfHits(searcher, createQuery("field", -80, -4), 2);
assertNumberOfHits(searcher, createQuery("field", -70, -4), 1);
assertNumberOfHits(searcher, createQuery("field", -80, -5), 2);
// Test the upper end of the document value range.
assertEquals(1, searcher.count(createQuery("field", 25, 34)));
assertEquals(2, searcher.count(createQuery("field", 25, 35)));
assertEquals(2, searcher.count(createQuery("field", 25, 36)));
assertEquals(2, searcher.count(createQuery("field", 30, 35)));
assertNumberOfHits(searcher, createQuery("field", 25, 34), 1);
assertNumberOfHits(searcher, createQuery("field", 25, 35), 2);
assertNumberOfHits(searcher, createQuery("field", 25, 36), 2);
assertNumberOfHits(searcher, createQuery("field", 30, 35), 2);
// Test multiple occurrences of the same value.
assertEquals(2, searcher.count(createQuery("field", -4, 4)));
assertEquals(2, searcher.count(createQuery("field", -4, 0)));
assertEquals(2, searcher.count(createQuery("field", 0, 4)));
assertEquals(3, searcher.count(createQuery("field", 0, 30)));
assertNumberOfHits(searcher, createQuery("field", -4, 4), 2);
assertNumberOfHits(searcher, createQuery("field", -4, 0), 2);
assertNumberOfHits(searcher, createQuery("field", 0, 4), 2);
assertNumberOfHits(searcher, createQuery("field", 0, 30), 3);
// Test ranges that span all documents.
assertEquals(6, searcher.count(createQuery("field", -80, 35)));
assertEquals(6, searcher.count(createQuery("field", -90, 40)));
assertNumberOfHits(searcher, createQuery("field", -80, 35), 6);
assertNumberOfHits(searcher, createQuery("field", -90, 40), 6);
writer.close();
reader.close();
dir.close();
}
private static void assertNumberOfHits(IndexSearcher searcher, Query query, int numberOfHits)
throws IOException {
assertEquals(
numberOfHits, searcher.search(query, new TotalHitCountCollectorManager()).intValue());
assertEquals(numberOfHits, searcher.count(query));
}
public void testIndexSortDocValuesWithOddLength() throws Exception {
testIndexSortDocValuesWithOddLength(false);
testIndexSortDocValuesWithOddLength(true);
@ -229,38 +237,38 @@ public class TestIndexSortSortedNumericDocValuesRangeQuery extends LuceneTestCas
IndexSearcher searcher = newSearcher(reader);
// Test ranges consisting of one value.
assertEquals(1, searcher.count(createQuery("field", -80, -80)));
assertEquals(1, searcher.count(createQuery("field", -5, -5)));
assertEquals(2, searcher.count(createQuery("field", 0, 0)));
assertEquals(1, searcher.count(createQuery("field", 5, 5)));
assertEquals(1, searcher.count(createQuery("field", 30, 30)));
assertEquals(1, searcher.count(createQuery("field", 35, 35)));
assertNumberOfHits(searcher, createQuery("field", -80, -80), 1);
assertNumberOfHits(searcher, createQuery("field", -5, -5), 1);
assertNumberOfHits(searcher, createQuery("field", 0, 0), 2);
assertNumberOfHits(searcher, createQuery("field", 5, 5), 1);
assertNumberOfHits(searcher, createQuery("field", 30, 30), 1);
assertNumberOfHits(searcher, createQuery("field", 35, 35), 1);
assertEquals(0, searcher.count(createQuery("field", -90, -90)));
assertEquals(0, searcher.count(createQuery("field", 6, 6)));
assertEquals(0, searcher.count(createQuery("field", 40, 40)));
assertNumberOfHits(searcher, createQuery("field", -90, -90), 0);
assertNumberOfHits(searcher, createQuery("field", 6, 6), 0);
assertNumberOfHits(searcher, createQuery("field", 40, 40), 0);
// Test the lower end of the document value range.
assertEquals(2, searcher.count(createQuery("field", -90, -4)));
assertEquals(2, searcher.count(createQuery("field", -80, -4)));
assertEquals(1, searcher.count(createQuery("field", -70, -4)));
assertEquals(2, searcher.count(createQuery("field", -80, -5)));
assertNumberOfHits(searcher, createQuery("field", -90, -4), 2);
assertNumberOfHits(searcher, createQuery("field", -80, -4), 2);
assertNumberOfHits(searcher, createQuery("field", -70, -4), 1);
assertNumberOfHits(searcher, createQuery("field", -80, -5), 2);
// Test the upper end of the document value range.
assertEquals(1, searcher.count(createQuery("field", 25, 34)));
assertEquals(2, searcher.count(createQuery("field", 25, 35)));
assertEquals(2, searcher.count(createQuery("field", 25, 36)));
assertEquals(2, searcher.count(createQuery("field", 30, 35)));
assertNumberOfHits(searcher, createQuery("field", 25, 34), 1);
assertNumberOfHits(searcher, createQuery("field", 25, 35), 2);
assertNumberOfHits(searcher, createQuery("field", 25, 36), 2);
assertNumberOfHits(searcher, createQuery("field", 30, 35), 2);
// Test multiple occurrences of the same value.
assertEquals(2, searcher.count(createQuery("field", -4, 4)));
assertEquals(2, searcher.count(createQuery("field", -4, 0)));
assertEquals(2, searcher.count(createQuery("field", 0, 4)));
assertEquals(4, searcher.count(createQuery("field", 0, 30)));
assertNumberOfHits(searcher, createQuery("field", -4, 4), 2);
assertNumberOfHits(searcher, createQuery("field", -4, 0), 2);
assertNumberOfHits(searcher, createQuery("field", 0, 4), 2);
assertNumberOfHits(searcher, createQuery("field", 0, 30), 4);
// Test ranges that span all documents.
assertEquals(7, searcher.count(createQuery("field", -80, 35)));
assertEquals(7, searcher.count(createQuery("field", -90, 40)));
assertNumberOfHits(searcher, createQuery("field", -80, 35), 7);
assertNumberOfHits(searcher, createQuery("field", -90, 40), 7);
writer.close();
reader.close();
@ -285,10 +293,10 @@ public class TestIndexSortSortedNumericDocValuesRangeQuery extends LuceneTestCas
DirectoryReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
assertEquals(1, searcher.count(createQuery("field", 42, 43)));
assertEquals(1, searcher.count(createQuery("field", 42, 42)));
assertEquals(0, searcher.count(createQuery("field", 41, 41)));
assertEquals(0, searcher.count(createQuery("field", 43, 43)));
assertNumberOfHits(searcher, createQuery("field", 42, 43), 1);
assertNumberOfHits(searcher, createQuery("field", 42, 42), 1);
assertNumberOfHits(searcher, createQuery("field", 41, 41), 0);
assertNumberOfHits(searcher, createQuery("field", 43, 43), 0);
writer.close();
reader.close();
@ -316,11 +324,11 @@ public class TestIndexSortSortedNumericDocValuesRangeQuery extends LuceneTestCas
DirectoryReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
assertEquals(2, searcher.count(createQuery("field", -70, 0)));
assertEquals(2, searcher.count(createQuery("field", -2, 35)));
assertNumberOfHits(searcher, createQuery("field", -70, 0), 2);
assertNumberOfHits(searcher, createQuery("field", -2, 35), 2);
assertEquals(4, searcher.count(createQuery("field", -80, 35)));
assertEquals(4, searcher.count(createQuery("field", Long.MIN_VALUE, Long.MAX_VALUE)));
assertNumberOfHits(searcher, createQuery("field", -80, 35), 4);
assertNumberOfHits(searcher, createQuery("field", Long.MIN_VALUE, Long.MAX_VALUE), 4);
writer.close();
reader.close();
@ -450,6 +458,56 @@ public class TestIndexSortSortedNumericDocValuesRangeQuery extends LuceneTestCas
reader.close();
}
public void testCount() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortedNumericSortField("field", SortField.Type.LONG));
iwc.setIndexSort(indexSort);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new SortedNumericDocValuesField("field", 10));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
Query fallbackQuery = LongPoint.newRangeQuery("field", 1, 42);
Query query = new IndexSortSortedNumericDocValuesRangeQuery("field", 1, 42, fallbackQuery);
Weight weight = query.createWeight(searcher, ScoreMode.COMPLETE, 1.0f);
for (LeafReaderContext context : searcher.getLeafContexts()) {
assertEquals(1, weight.count(context));
}
writer.close();
reader.close();
dir.close();
}
public void testFallbackCount() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortedNumericSortField("field", SortField.Type.LONG));
iwc.setIndexSort(indexSort);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new SortedNumericDocValuesField("field", 10));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
// we use an unrealistic query that exposes its own Weight#count
Query fallbackQuery = new MatchNoDocsQuery();
// the index is not sorted on this field, the fallback query is used
Query query = new IndexSortSortedNumericDocValuesRangeQuery("another", 1, 42, fallbackQuery);
Weight weight = query.createWeight(searcher, ScoreMode.COMPLETE, 1.0f);
for (LeafReaderContext context : searcher.getLeafContexts()) {
assertEquals(0, weight.count(context));
}
writer.close();
reader.close();
dir.close();
}
private Document createDocument(String field, long value) {
Document doc = new Document();
doc.add(new SortedNumericDocValuesField(field, value));