mirror of https://github.com/apache/lucene.git
LUCENE-10458: BoundedDocSetIdIterator may supply error count in Weigth#count(LeafReaderContext) when missingValue enables (#736)
This commit is contained in:
parent
1c6f631678
commit
5450d72258
|
@ -56,6 +56,10 @@ API Changes
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
* LUCENE-10385: Implement Weight#count on IndexSortSortedNumericDocValuesRangeQuery
|
||||||
|
to speed up computing the number of hits when possible. (Lu Xugang, Luca Cavanna, Adrien Grand)
|
||||||
|
|
||||||
* LUCENE-10422: Monitor Improvements: `Monitor` can use a custom `Directory`
|
* LUCENE-10422: Monitor Improvements: `Monitor` can use a custom `Directory`
|
||||||
implementation. `Monitor` can be created with a readonly `QueryIndex` in order to
|
implementation. `Monitor` can be created with a readonly `QueryIndex` in order to
|
||||||
have readonly `Monitor` instances. (Niko Usai)
|
have readonly `Monitor` instances. (Niko Usai)
|
||||||
|
@ -66,6 +70,7 @@ Improvements
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
* LUCENE-10452: Hunspell: call checkCanceled less frequently to reduce the overhead (Peter Gromov)
|
* LUCENE-10452: Hunspell: call checkCanceled less frequently to reduce the overhead (Peter Gromov)
|
||||||
|
|
||||||
* LUCENE-10451: Hunspell: don't perform potentially expensive spellchecking after timeout (Peter Gromov)
|
* LUCENE-10451: Hunspell: don't perform potentially expensive spellchecking after timeout (Peter Gromov)
|
||||||
|
@ -195,9 +200,6 @@ New Features
|
||||||
based on TotalHitCountCollector that allows users to parallelize counting the
|
based on TotalHitCountCollector that allows users to parallelize counting the
|
||||||
number of hits. (Luca Cavanna, Adrien Grand)
|
number of hits. (Luca Cavanna, Adrien Grand)
|
||||||
|
|
||||||
* LUCENE-10385: Implement Weight#count on IndexSortSortedNumericDocValuesRangeQuery
|
|
||||||
to speed up computing the number of hits when possible. (Luca Cavanna, Adrien Grand)
|
|
||||||
|
|
||||||
* LUCENE-10403: Add ArrayUtil#grow(T[]). (Greg Miller)
|
* LUCENE-10403: Add ArrayUtil#grow(T[]). (Greg Miller)
|
||||||
|
|
||||||
* LUCENE-10414: Add fn:fuzzyTerm interval function to flexible query parser (Dawid Weiss,
|
* LUCENE-10414: Add fn:fuzzyTerm interval function to flexible query parser (Dawid Weiss,
|
||||||
|
|
|
@ -20,8 +20,10 @@ import java.io.IOException;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import org.apache.lucene.index.DocValues;
|
import org.apache.lucene.index.DocValues;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.NumericDocValues;
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
|
import org.apache.lucene.index.PointValues;
|
||||||
import org.apache.lucene.index.SortedNumericDocValues;
|
import org.apache.lucene.index.SortedNumericDocValues;
|
||||||
import org.apache.lucene.search.ConstantScoreScorer;
|
import org.apache.lucene.search.ConstantScoreScorer;
|
||||||
import org.apache.lucene.search.ConstantScoreWeight;
|
import org.apache.lucene.search.ConstantScoreWeight;
|
||||||
|
@ -198,16 +200,18 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int count(LeafReaderContext context) throws IOException {
|
public int count(LeafReaderContext context) throws IOException {
|
||||||
BoundedDocSetIdIterator disi = getDocIdSetIteratorOrNull(context);
|
if (context.reader().hasDeletions() == false) {
|
||||||
if (disi != null) {
|
BoundedDocIdSetIterator disi = getDocIdSetIteratorOrNull(context);
|
||||||
return disi.lastDoc - disi.firstDoc;
|
if (disi != null && disi.delegate == null) {
|
||||||
|
return disi.lastDoc - disi.firstDoc;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return fallbackWeight.count(context);
|
return fallbackWeight.count(context);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
private BoundedDocSetIdIterator getDocIdSetIteratorOrNull(LeafReaderContext context)
|
private BoundedDocIdSetIterator getDocIdSetIteratorOrNull(LeafReaderContext context)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
SortedNumericDocValues sortedNumericValues =
|
SortedNumericDocValues sortedNumericValues =
|
||||||
DocValues.getSortedNumeric(context.reader(), field);
|
DocValues.getSortedNumeric(context.reader(), field);
|
||||||
|
@ -237,7 +241,7 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
|
||||||
* {@link DocIdSetIterator} makes sure to wrap the original docvalues to skip over documents with
|
* {@link DocIdSetIterator} makes sure to wrap the original docvalues to skip over documents with
|
||||||
* no value.
|
* no value.
|
||||||
*/
|
*/
|
||||||
private BoundedDocSetIdIterator getDocIdSetIterator(
|
private BoundedDocIdSetIterator getDocIdSetIterator(
|
||||||
SortField sortField, LeafReaderContext context, DocIdSetIterator delegate)
|
SortField sortField, LeafReaderContext context, DocIdSetIterator delegate)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
long lower = sortField.getReverse() ? upperValue : lowerValue;
|
long lower = sortField.getReverse() ? upperValue : lowerValue;
|
||||||
|
@ -278,7 +282,19 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
|
||||||
}
|
}
|
||||||
|
|
||||||
int lastDocIdExclusive = high + 1;
|
int lastDocIdExclusive = high + 1;
|
||||||
return new BoundedDocSetIdIterator(firstDocIdInclusive, lastDocIdExclusive, delegate);
|
Object missingValue = sortField.getMissingValue();
|
||||||
|
BoundedDocIdSetIterator disi;
|
||||||
|
LeafReader reader = context.reader();
|
||||||
|
PointValues pointValues = reader.getPointValues(field);
|
||||||
|
final long missingLongValue = missingValue == null ? 0L : (long) missingValue;
|
||||||
|
// all documents have docValues or missing value falls outside the range
|
||||||
|
if ((pointValues != null && pointValues.getDocCount() == reader.maxDoc())
|
||||||
|
|| (missingLongValue < lowerValue || missingLongValue > upperValue)) {
|
||||||
|
disi = new BoundedDocIdSetIterator(firstDocIdInclusive, lastDocIdExclusive, null);
|
||||||
|
} else {
|
||||||
|
disi = new BoundedDocIdSetIterator(firstDocIdInclusive, lastDocIdExclusive, delegate);
|
||||||
|
}
|
||||||
|
return disi;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Compares the given document's value with a stored reference value. */
|
/** Compares the given document's value with a stored reference value. */
|
||||||
|
@ -306,14 +322,14 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
|
||||||
* A doc ID set iterator that wraps a delegate iterator and only returns doc IDs in the range
|
* A doc ID set iterator that wraps a delegate iterator and only returns doc IDs in the range
|
||||||
* [firstDocInclusive, lastDoc).
|
* [firstDocInclusive, lastDoc).
|
||||||
*/
|
*/
|
||||||
private static class BoundedDocSetIdIterator extends DocIdSetIterator {
|
private static class BoundedDocIdSetIterator extends DocIdSetIterator {
|
||||||
private final int firstDoc;
|
private final int firstDoc;
|
||||||
private final int lastDoc;
|
private final int lastDoc;
|
||||||
private final DocIdSetIterator delegate;
|
private final DocIdSetIterator delegate;
|
||||||
|
|
||||||
private int docID = -1;
|
private int docID = -1;
|
||||||
|
|
||||||
BoundedDocSetIdIterator(int firstDoc, int lastDoc, DocIdSetIterator delegate) {
|
BoundedDocIdSetIterator(int firstDoc, int lastDoc, DocIdSetIterator delegate) {
|
||||||
this.firstDoc = firstDoc;
|
this.firstDoc = firstDoc;
|
||||||
this.lastDoc = lastDoc;
|
this.lastDoc = lastDoc;
|
||||||
this.delegate = delegate;
|
this.delegate = delegate;
|
||||||
|
@ -335,7 +351,12 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
|
||||||
target = firstDoc;
|
target = firstDoc;
|
||||||
}
|
}
|
||||||
|
|
||||||
int result = delegate.advance(target);
|
int result;
|
||||||
|
if (delegate != null) {
|
||||||
|
result = delegate.advance(target);
|
||||||
|
} else {
|
||||||
|
result = target;
|
||||||
|
}
|
||||||
if (result < lastDoc) {
|
if (result < lastDoc) {
|
||||||
docID = result;
|
docID = result;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -20,9 +20,11 @@ import static org.hamcrest.CoreMatchers.instanceOf;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.LongPoint;
|
import org.apache.lucene.document.LongPoint;
|
||||||
import org.apache.lucene.document.SortedNumericDocValuesField;
|
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||||
|
import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
@ -59,7 +61,14 @@ public class TestIndexSortSortedNumericDocValuesRangeQuery extends LuceneTestCas
|
||||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||||
boolean reverse = random().nextBoolean();
|
boolean reverse = random().nextBoolean();
|
||||||
SortField sortField = new SortedNumericSortField("dv", SortField.Type.LONG, reverse);
|
SortField sortField = new SortedNumericSortField("dv", SortField.Type.LONG, reverse);
|
||||||
sortField.setMissingValue(random().nextLong());
|
boolean enableMissingValue = random().nextBoolean();
|
||||||
|
if (enableMissingValue) {
|
||||||
|
long missingValue =
|
||||||
|
random().nextBoolean()
|
||||||
|
? TestUtil.nextLong(random(), -100, 10000)
|
||||||
|
: (random().nextBoolean() ? Long.MIN_VALUE : Long.MAX_VALUE);
|
||||||
|
sortField.setMissingValue(missingValue);
|
||||||
|
}
|
||||||
iwc.setIndexSort(new Sort(sortField));
|
iwc.setIndexSort(new Sort(sortField));
|
||||||
|
|
||||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
|
||||||
|
@ -459,30 +468,6 @@ public class TestIndexSortSortedNumericDocValuesRangeQuery extends LuceneTestCas
|
||||||
reader.close();
|
reader.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCount() throws IOException {
|
|
||||||
Directory dir = newDirectory();
|
|
||||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
|
||||||
Sort indexSort = new Sort(new SortedNumericSortField("field", SortField.Type.LONG));
|
|
||||||
iwc.setIndexSort(indexSort);
|
|
||||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
|
|
||||||
Document doc = new Document();
|
|
||||||
doc.add(new SortedNumericDocValuesField("field", 10));
|
|
||||||
writer.addDocument(doc);
|
|
||||||
IndexReader reader = writer.getReader();
|
|
||||||
IndexSearcher searcher = newSearcher(reader);
|
|
||||||
|
|
||||||
Query fallbackQuery = LongPoint.newRangeQuery("field", 1, 42);
|
|
||||||
Query query = new IndexSortSortedNumericDocValuesRangeQuery("field", 1, 42, fallbackQuery);
|
|
||||||
Weight weight = query.createWeight(searcher, ScoreMode.COMPLETE, 1.0f);
|
|
||||||
for (LeafReaderContext context : searcher.getLeafContexts()) {
|
|
||||||
assertEquals(1, weight.count(context));
|
|
||||||
}
|
|
||||||
|
|
||||||
writer.close();
|
|
||||||
reader.close();
|
|
||||||
dir.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testFallbackCount() throws IOException {
|
public void testFallbackCount() throws IOException {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||||
|
@ -509,6 +494,119 @@ public class TestIndexSortSortedNumericDocValuesRangeQuery extends LuceneTestCas
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testCompareCount() throws IOException {
|
||||||
|
final int iters = atLeast(10);
|
||||||
|
for (int iter = 0; iter < iters; ++iter) {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||||
|
SortField sortField = new SortedNumericSortField("field", SortField.Type.LONG);
|
||||||
|
boolean enableMissingValue = random().nextBoolean();
|
||||||
|
if (enableMissingValue) {
|
||||||
|
long missingValue =
|
||||||
|
random().nextBoolean()
|
||||||
|
? TestUtil.nextLong(random(), -100, 10000)
|
||||||
|
: (random().nextBoolean() ? Long.MIN_VALUE : Long.MAX_VALUE);
|
||||||
|
sortField.setMissingValue(missingValue);
|
||||||
|
}
|
||||||
|
iwc.setIndexSort(new Sort(sortField));
|
||||||
|
|
||||||
|
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
|
||||||
|
|
||||||
|
final int numDocs = atLeast(100);
|
||||||
|
for (int i = 0; i < numDocs; ++i) {
|
||||||
|
Document doc = new Document();
|
||||||
|
final int numValues = TestUtil.nextInt(random(), 0, 1);
|
||||||
|
for (int j = 0; j < numValues; ++j) {
|
||||||
|
final long value = TestUtil.nextLong(random(), -100, 10000);
|
||||||
|
doc = createSNDVAndPointDocument("field", value);
|
||||||
|
}
|
||||||
|
writer.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
writer.deleteDocuments(LongPoint.newRangeQuery("field", 0L, 10L));
|
||||||
|
}
|
||||||
|
|
||||||
|
final IndexReader reader = writer.getReader();
|
||||||
|
final IndexSearcher searcher = newSearcher(reader);
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
for (int i = 0; i < 100; ++i) {
|
||||||
|
final long min =
|
||||||
|
random().nextBoolean() ? Long.MIN_VALUE : TestUtil.nextLong(random(), -100, 10000);
|
||||||
|
final long max =
|
||||||
|
random().nextBoolean() ? Long.MAX_VALUE : TestUtil.nextLong(random(), -100, 10000);
|
||||||
|
final Query q1 = LongPoint.newRangeQuery("field", min, max);
|
||||||
|
|
||||||
|
final Query fallbackQuery = LongPoint.newRangeQuery("field", min, max);
|
||||||
|
final Query q2 =
|
||||||
|
new IndexSortSortedNumericDocValuesRangeQuery("field", min, max, fallbackQuery);
|
||||||
|
final Weight weight1 = q1.createWeight(searcher, ScoreMode.COMPLETE, 1.0f);
|
||||||
|
final Weight weight2 = q2.createWeight(searcher, ScoreMode.COMPLETE, 1.0f);
|
||||||
|
assertSameCount(weight1, weight2, searcher);
|
||||||
|
}
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertSameCount(Weight weight1, Weight weight2, IndexSearcher searcher)
|
||||||
|
throws IOException {
|
||||||
|
for (LeafReaderContext context : searcher.getLeafContexts()) {
|
||||||
|
assertEquals(weight1.count(context), weight2.count(context));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCountBoundary() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||||
|
SortField sortField = new SortedNumericSortField("field", SortField.Type.LONG);
|
||||||
|
boolean useLower = random().nextBoolean();
|
||||||
|
long lowerValue = 1;
|
||||||
|
long upperValue = 100;
|
||||||
|
sortField.setMissingValue(useLower ? lowerValue : upperValue);
|
||||||
|
Sort indexSort = new Sort(sortField);
|
||||||
|
iwc.setIndexSort(indexSort);
|
||||||
|
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
|
||||||
|
|
||||||
|
writer.addDocument(
|
||||||
|
createSNDVAndPointDocument("field", random().nextLong(lowerValue, upperValue)));
|
||||||
|
writer.addDocument(
|
||||||
|
createSNDVAndPointDocument("field", random().nextLong(lowerValue, upperValue)));
|
||||||
|
// missingValue
|
||||||
|
writer.addDocument(createMissingValueDocument());
|
||||||
|
|
||||||
|
IndexReader reader = writer.getReader();
|
||||||
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
|
|
||||||
|
Query fallbackQuery = LongPoint.newRangeQuery("field", lowerValue, upperValue);
|
||||||
|
Query query =
|
||||||
|
new IndexSortSortedNumericDocValuesRangeQuery(
|
||||||
|
"field", lowerValue, upperValue, fallbackQuery);
|
||||||
|
Weight weight = query.createWeight(searcher, ScoreMode.COMPLETE, 1.0f);
|
||||||
|
for (LeafReaderContext context : searcher.getLeafContexts()) {
|
||||||
|
assertEquals(2, weight.count(context));
|
||||||
|
}
|
||||||
|
|
||||||
|
writer.close();
|
||||||
|
reader.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private Document createMissingValueDocument() {
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new StringField("foo", "fox", Field.Store.YES));
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Document createSNDVAndPointDocument(String field, long value) {
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new SortedNumericDocValuesField(field, value));
|
||||||
|
doc.add(new LongPoint(field, value));
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
private Document createDocument(String field, long value) {
|
private Document createDocument(String field, long value) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new SortedNumericDocValuesField(field, value));
|
doc.add(new SortedNumericDocValuesField(field, value));
|
||||||
|
|
Loading…
Reference in New Issue