mirror of
https://github.com/apache/lucene.git
synced 2025-02-09 03:25:15 +00:00
LUCENE-10458: BoundedDocSetIdIterator may supply error count in Weigth#count(LeafReaderContext) when missingValue enables (#736)
This commit is contained in:
parent
691760be93
commit
226880dd33
@ -11,6 +11,10 @@ API Changes
|
||||
|
||||
New Features
|
||||
---------------------
|
||||
|
||||
* LUCENE-10385: Implement Weight#count on IndexSortSortedNumericDocValuesRangeQuery
|
||||
to speed up computing the number of hits when possible. (Lu Xugang, Luca Cavanna, Adrien Grand)
|
||||
|
||||
* LUCENE-10422: Monitor Improvements: `Monitor` can use a custom `Directory`
|
||||
implementation. `Monitor` can be created with a readonly `QueryIndex` in order to
|
||||
have readonly `Monitor` instances. (Niko Usai)
|
||||
@ -21,6 +25,7 @@ Improvements
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
|
||||
* LUCENE-10452: Hunspell: call checkCanceled less frequently to reduce the overhead (Peter Gromov)
|
||||
|
||||
* LUCENE-10451: Hunspell: don't perform potentially expensive spellchecking after timeout (Peter Gromov)
|
||||
@ -150,9 +155,6 @@ New Features
|
||||
based on TotalHitCountCollector that allows users to parallelize counting the
|
||||
number of hits. (Luca Cavanna, Adrien Grand)
|
||||
|
||||
* LUCENE-10385: Implement Weight#count on IndexSortSortedNumericDocValuesRangeQuery
|
||||
to speed up computing the number of hits when possible. (Luca Cavanna, Adrien Grand)
|
||||
|
||||
* LUCENE-10403: Add ArrayUtil#grow(T[]). (Greg Miller)
|
||||
|
||||
* LUCENE-10414: Add fn:fuzzyTerm interval function to flexible query parser (Dawid Weiss,
|
||||
|
@ -20,8 +20,10 @@ import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.PointValues;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.search.ConstantScoreScorer;
|
||||
import org.apache.lucene.search.ConstantScoreWeight;
|
||||
@ -198,16 +200,18 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
|
||||
|
||||
@Override
|
||||
public int count(LeafReaderContext context) throws IOException {
|
||||
BoundedDocSetIdIterator disi = getDocIdSetIteratorOrNull(context);
|
||||
if (disi != null) {
|
||||
return disi.lastDoc - disi.firstDoc;
|
||||
if (context.reader().hasDeletions() == false) {
|
||||
BoundedDocIdSetIterator disi = getDocIdSetIteratorOrNull(context);
|
||||
if (disi != null && disi.delegate == null) {
|
||||
return disi.lastDoc - disi.firstDoc;
|
||||
}
|
||||
}
|
||||
return fallbackWeight.count(context);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private BoundedDocSetIdIterator getDocIdSetIteratorOrNull(LeafReaderContext context)
|
||||
private BoundedDocIdSetIterator getDocIdSetIteratorOrNull(LeafReaderContext context)
|
||||
throws IOException {
|
||||
SortedNumericDocValues sortedNumericValues =
|
||||
DocValues.getSortedNumeric(context.reader(), field);
|
||||
@ -237,7 +241,7 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
|
||||
* {@link DocIdSetIterator} makes sure to wrap the original docvalues to skip over documents with
|
||||
* no value.
|
||||
*/
|
||||
private BoundedDocSetIdIterator getDocIdSetIterator(
|
||||
private BoundedDocIdSetIterator getDocIdSetIterator(
|
||||
SortField sortField, LeafReaderContext context, DocIdSetIterator delegate)
|
||||
throws IOException {
|
||||
long lower = sortField.getReverse() ? upperValue : lowerValue;
|
||||
@ -278,7 +282,19 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
|
||||
}
|
||||
|
||||
int lastDocIdExclusive = high + 1;
|
||||
return new BoundedDocSetIdIterator(firstDocIdInclusive, lastDocIdExclusive, delegate);
|
||||
Object missingValue = sortField.getMissingValue();
|
||||
BoundedDocIdSetIterator disi;
|
||||
LeafReader reader = context.reader();
|
||||
PointValues pointValues = reader.getPointValues(field);
|
||||
final long missingLongValue = missingValue == null ? 0L : (long) missingValue;
|
||||
// all documents have docValues or missing value falls outside the range
|
||||
if ((pointValues != null && pointValues.getDocCount() == reader.maxDoc())
|
||||
|| (missingLongValue < lowerValue || missingLongValue > upperValue)) {
|
||||
disi = new BoundedDocIdSetIterator(firstDocIdInclusive, lastDocIdExclusive, null);
|
||||
} else {
|
||||
disi = new BoundedDocIdSetIterator(firstDocIdInclusive, lastDocIdExclusive, delegate);
|
||||
}
|
||||
return disi;
|
||||
}
|
||||
|
||||
/** Compares the given document's value with a stored reference value. */
|
||||
@ -306,14 +322,14 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
|
||||
* A doc ID set iterator that wraps a delegate iterator and only returns doc IDs in the range
|
||||
* [firstDocInclusive, lastDoc).
|
||||
*/
|
||||
private static class BoundedDocSetIdIterator extends DocIdSetIterator {
|
||||
private static class BoundedDocIdSetIterator extends DocIdSetIterator {
|
||||
private final int firstDoc;
|
||||
private final int lastDoc;
|
||||
private final DocIdSetIterator delegate;
|
||||
|
||||
private int docID = -1;
|
||||
|
||||
BoundedDocSetIdIterator(int firstDoc, int lastDoc, DocIdSetIterator delegate) {
|
||||
BoundedDocIdSetIterator(int firstDoc, int lastDoc, DocIdSetIterator delegate) {
|
||||
this.firstDoc = firstDoc;
|
||||
this.lastDoc = lastDoc;
|
||||
this.delegate = delegate;
|
||||
@ -335,7 +351,12 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
|
||||
target = firstDoc;
|
||||
}
|
||||
|
||||
int result = delegate.advance(target);
|
||||
int result;
|
||||
if (delegate != null) {
|
||||
result = delegate.advance(target);
|
||||
} else {
|
||||
result = target;
|
||||
}
|
||||
if (result < lastDoc) {
|
||||
docID = result;
|
||||
} else {
|
||||
|
@ -20,9 +20,11 @@ import static org.hamcrest.CoreMatchers.instanceOf;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
@ -59,7 +61,14 @@ public class TestIndexSortSortedNumericDocValuesRangeQuery extends LuceneTestCas
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
boolean reverse = random().nextBoolean();
|
||||
SortField sortField = new SortedNumericSortField("dv", SortField.Type.LONG, reverse);
|
||||
sortField.setMissingValue(random().nextLong());
|
||||
boolean enableMissingValue = random().nextBoolean();
|
||||
if (enableMissingValue) {
|
||||
long missingValue =
|
||||
random().nextBoolean()
|
||||
? TestUtil.nextLong(random(), -100, 10000)
|
||||
: (random().nextBoolean() ? Long.MIN_VALUE : Long.MAX_VALUE);
|
||||
sortField.setMissingValue(missingValue);
|
||||
}
|
||||
iwc.setIndexSort(new Sort(sortField));
|
||||
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
|
||||
@ -459,30 +468,6 @@ public class TestIndexSortSortedNumericDocValuesRangeQuery extends LuceneTestCas
|
||||
reader.close();
|
||||
}
|
||||
|
||||
public void testCount() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
Sort indexSort = new Sort(new SortedNumericSortField("field", SortField.Type.LONG));
|
||||
iwc.setIndexSort(indexSort);
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(new SortedNumericDocValuesField("field", 10));
|
||||
writer.addDocument(doc);
|
||||
IndexReader reader = writer.getReader();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
Query fallbackQuery = LongPoint.newRangeQuery("field", 1, 42);
|
||||
Query query = new IndexSortSortedNumericDocValuesRangeQuery("field", 1, 42, fallbackQuery);
|
||||
Weight weight = query.createWeight(searcher, ScoreMode.COMPLETE, 1.0f);
|
||||
for (LeafReaderContext context : searcher.getLeafContexts()) {
|
||||
assertEquals(1, weight.count(context));
|
||||
}
|
||||
|
||||
writer.close();
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testFallbackCount() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
@ -509,6 +494,119 @@ public class TestIndexSortSortedNumericDocValuesRangeQuery extends LuceneTestCas
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testCompareCount() throws IOException {
|
||||
final int iters = atLeast(10);
|
||||
for (int iter = 0; iter < iters; ++iter) {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
SortField sortField = new SortedNumericSortField("field", SortField.Type.LONG);
|
||||
boolean enableMissingValue = random().nextBoolean();
|
||||
if (enableMissingValue) {
|
||||
long missingValue =
|
||||
random().nextBoolean()
|
||||
? TestUtil.nextLong(random(), -100, 10000)
|
||||
: (random().nextBoolean() ? Long.MIN_VALUE : Long.MAX_VALUE);
|
||||
sortField.setMissingValue(missingValue);
|
||||
}
|
||||
iwc.setIndexSort(new Sort(sortField));
|
||||
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
|
||||
|
||||
final int numDocs = atLeast(100);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
Document doc = new Document();
|
||||
final int numValues = TestUtil.nextInt(random(), 0, 1);
|
||||
for (int j = 0; j < numValues; ++j) {
|
||||
final long value = TestUtil.nextLong(random(), -100, 10000);
|
||||
doc = createSNDVAndPointDocument("field", value);
|
||||
}
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
writer.deleteDocuments(LongPoint.newRangeQuery("field", 0L, 10L));
|
||||
}
|
||||
|
||||
final IndexReader reader = writer.getReader();
|
||||
final IndexSearcher searcher = newSearcher(reader);
|
||||
writer.close();
|
||||
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
final long min =
|
||||
random().nextBoolean() ? Long.MIN_VALUE : TestUtil.nextLong(random(), -100, 10000);
|
||||
final long max =
|
||||
random().nextBoolean() ? Long.MAX_VALUE : TestUtil.nextLong(random(), -100, 10000);
|
||||
final Query q1 = LongPoint.newRangeQuery("field", min, max);
|
||||
|
||||
final Query fallbackQuery = LongPoint.newRangeQuery("field", min, max);
|
||||
final Query q2 =
|
||||
new IndexSortSortedNumericDocValuesRangeQuery("field", min, max, fallbackQuery);
|
||||
final Weight weight1 = q1.createWeight(searcher, ScoreMode.COMPLETE, 1.0f);
|
||||
final Weight weight2 = q2.createWeight(searcher, ScoreMode.COMPLETE, 1.0f);
|
||||
assertSameCount(weight1, weight2, searcher);
|
||||
}
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
||||
private void assertSameCount(Weight weight1, Weight weight2, IndexSearcher searcher)
|
||||
throws IOException {
|
||||
for (LeafReaderContext context : searcher.getLeafContexts()) {
|
||||
assertEquals(weight1.count(context), weight2.count(context));
|
||||
}
|
||||
}
|
||||
|
||||
public void testCountBoundary() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
SortField sortField = new SortedNumericSortField("field", SortField.Type.LONG);
|
||||
boolean useLower = random().nextBoolean();
|
||||
long lowerValue = 1;
|
||||
long upperValue = 100;
|
||||
sortField.setMissingValue(useLower ? lowerValue : upperValue);
|
||||
Sort indexSort = new Sort(sortField);
|
||||
iwc.setIndexSort(indexSort);
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
|
||||
|
||||
writer.addDocument(
|
||||
createSNDVAndPointDocument("field", TestUtil.nextLong(random(), lowerValue, upperValue)));
|
||||
writer.addDocument(
|
||||
createSNDVAndPointDocument("field", TestUtil.nextLong(random(), lowerValue, upperValue)));
|
||||
// missingValue
|
||||
writer.addDocument(createMissingValueDocument());
|
||||
|
||||
IndexReader reader = writer.getReader();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
Query fallbackQuery = LongPoint.newRangeQuery("field", lowerValue, upperValue);
|
||||
Query query =
|
||||
new IndexSortSortedNumericDocValuesRangeQuery(
|
||||
"field", lowerValue, upperValue, fallbackQuery);
|
||||
Weight weight = query.createWeight(searcher, ScoreMode.COMPLETE, 1.0f);
|
||||
for (LeafReaderContext context : searcher.getLeafContexts()) {
|
||||
assertEquals(2, weight.count(context));
|
||||
}
|
||||
|
||||
writer.close();
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private Document createMissingValueDocument() {
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("foo", "fox", Field.Store.YES));
|
||||
return doc;
|
||||
}
|
||||
|
||||
private Document createSNDVAndPointDocument(String field, long value) {
|
||||
Document doc = new Document();
|
||||
doc.add(new SortedNumericDocValuesField(field, value));
|
||||
doc.add(new LongPoint(field, value));
|
||||
return doc;
|
||||
}
|
||||
|
||||
private Document createDocument(String field, long value) {
|
||||
Document doc = new Document();
|
||||
doc.add(new SortedNumericDocValuesField(field, value));
|
||||
|
Loading…
x
Reference in New Issue
Block a user