mirror of https://github.com/apache/lucene.git
LUCENE-7066: Optimize PointRangeQuery for the case that all documents have a value and all points from the segment match.
This commit is contained in:
parent
e2ebbdf638
commit
9413b42d7b
|
@ -135,6 +135,9 @@ Optimizations
|
|||
* LUCENE-7050: TermsQuery is now cached more aggressively by the default
|
||||
query caching policy. (Adrien Grand)
|
||||
|
||||
* LUCENE-7066: PointRangeQuery got optimized for the case that all documents
|
||||
have a value and all points from the segment match. (Adrien Grand)
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-6789: IndexSearcher's default Similarity is changed to BM25Similarity.
|
||||
|
|
|
@ -125,6 +125,68 @@ public abstract class PointRangeQuery extends Query {
|
|||
|
||||
return new ConstantScoreWeight(this) {
|
||||
|
||||
private DocIdSet buildMatchingDocIdSet(LeafReader reader, PointValues values,
|
||||
byte[] packedLower, byte[] packedUpper) throws IOException {
|
||||
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
|
||||
|
||||
values.intersect(field,
|
||||
new IntersectVisitor() {
|
||||
|
||||
@Override
|
||||
public void grow(int count) {
|
||||
result.grow(count);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
result.add(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID, byte[] packedValue) {
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
int offset = dim*bytesPerDim;
|
||||
if (StringHelper.compare(bytesPerDim, packedValue, offset, packedLower, offset) < 0) {
|
||||
// Doc's value is too low, in this dimension
|
||||
return;
|
||||
}
|
||||
if (StringHelper.compare(bytesPerDim, packedValue, offset, packedUpper, offset) > 0) {
|
||||
// Doc's value is too high, in this dimension
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Doc is in-bounds
|
||||
result.add(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
|
||||
boolean crosses = false;
|
||||
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
int offset = dim*bytesPerDim;
|
||||
|
||||
if (StringHelper.compare(bytesPerDim, minPackedValue, offset, packedUpper, offset) > 0 ||
|
||||
StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedLower, offset) < 0) {
|
||||
return Relation.CELL_OUTSIDE_QUERY;
|
||||
}
|
||||
|
||||
crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, packedLower, offset) < 0 ||
|
||||
StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedUpper, offset) > 0;
|
||||
}
|
||||
|
||||
if (crosses) {
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
} else {
|
||||
return Relation.CELL_INSIDE_QUERY;
|
||||
}
|
||||
}
|
||||
});
|
||||
return result.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
LeafReader reader = context.reader();
|
||||
|
@ -155,67 +217,32 @@ public abstract class PointRangeQuery extends Query {
|
|||
System.arraycopy(upperPoint[dim], 0, packedUpper, dim*bytesPerDim, bytesPerDim);
|
||||
}
|
||||
|
||||
// Now packedLowerIncl and packedUpperIncl are inclusive, and non-empty space:
|
||||
boolean allDocsMatch;
|
||||
if (values.getDocCount(field) == reader.maxDoc()) {
|
||||
final byte[] fieldPackedLower = values.getMinPackedValue(field);
|
||||
final byte[] fieldPackedUpper = values.getMaxPackedValue(field);
|
||||
allDocsMatch = true;
|
||||
for (int i = 0; i < numDims; ++i) {
|
||||
int offset = i * bytesPerDim;
|
||||
if (StringHelper.compare(bytesPerDim, packedLower, offset, fieldPackedLower, offset) > 0
|
||||
|| StringHelper.compare(bytesPerDim, packedUpper, offset, fieldPackedUpper, offset) < 0) {
|
||||
allDocsMatch = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
allDocsMatch = false;
|
||||
}
|
||||
|
||||
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
|
||||
DocIdSetIterator iterator;
|
||||
if (allDocsMatch) {
|
||||
// all docs have a value and all points are within bounds, so everything matches
|
||||
iterator = DocIdSetIterator.all(reader.maxDoc());
|
||||
} else {
|
||||
iterator = buildMatchingDocIdSet(reader, values, packedLower, packedUpper).iterator();
|
||||
}
|
||||
|
||||
values.intersect(field,
|
||||
new IntersectVisitor() {
|
||||
|
||||
@Override
|
||||
public void grow(int count) {
|
||||
result.grow(count);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
result.add(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID, byte[] packedValue) {
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
int offset = dim*bytesPerDim;
|
||||
if (StringHelper.compare(bytesPerDim, packedValue, offset, packedLower, offset) < 0) {
|
||||
// Doc's value is too low, in this dimension
|
||||
return;
|
||||
}
|
||||
if (StringHelper.compare(bytesPerDim, packedValue, offset, packedUpper, offset) > 0) {
|
||||
// Doc's value is too high, in this dimension
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Doc is in-bounds
|
||||
result.add(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
|
||||
boolean crosses = false;
|
||||
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
int offset = dim*bytesPerDim;
|
||||
|
||||
if (StringHelper.compare(bytesPerDim, minPackedValue, offset, packedUpper, offset) > 0 ||
|
||||
StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedLower, offset) < 0) {
|
||||
return Relation.CELL_OUTSIDE_QUERY;
|
||||
}
|
||||
|
||||
crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, packedLower, offset) < 0 ||
|
||||
StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedUpper, offset) > 0;
|
||||
}
|
||||
|
||||
if (crosses) {
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
} else {
|
||||
return Relation.CELL_INSIDE_QUERY;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return new ConstantScoreScorer(this, score(), result.build().iterator());
|
||||
return new ConstantScoreScorer(this, score(), iterator);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -53,13 +53,11 @@ import org.apache.lucene.index.IndexWriter;
|
|||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
import org.apache.lucene.index.MultiReader;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.PointValues;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -1847,4 +1845,45 @@ public class TestPointQueries extends LuceneTestCase {
|
|||
// binary
|
||||
assertEquals("bytes:{[12] [2a]}", BinaryPoint.newSetQuery("bytes", new byte[] {42}, new byte[] {18}).toString());
|
||||
}
|
||||
|
||||
public void testRangeOptimizesIfAllPointsMatch() throws IOException {
|
||||
final int numDims = TestUtil.nextInt(random(), 1, 3);
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
int[] value = new int[numDims];
|
||||
for (int i = 0; i < numDims; ++i) {
|
||||
value[i] = TestUtil.nextInt(random(), 1, 10);
|
||||
}
|
||||
doc.add(new IntPoint("point", value));
|
||||
w.addDocument(doc);
|
||||
IndexReader reader = w.getReader();
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
searcher.setQueryCache(null);
|
||||
int[] lowerBound = new int[numDims];
|
||||
int[] upperBound = new int[numDims];
|
||||
for (int i = 0; i < numDims; ++i) {
|
||||
lowerBound[i] = value[i] - random().nextInt(1);
|
||||
upperBound[i] = value[i] + random().nextInt(1);
|
||||
}
|
||||
Query query = IntPoint.newRangeQuery("point", lowerBound, upperBound);
|
||||
Weight weight = searcher.createNormalizedWeight(query, false);
|
||||
Scorer scorer = weight.scorer(searcher.getIndexReader().leaves().get(0));
|
||||
assertEquals(DocIdSetIterator.all(1).getClass(), scorer.iterator().getClass());
|
||||
|
||||
// When not all documents in the query have a value, the optimization is not applicable
|
||||
reader.close();
|
||||
w.addDocument(new Document());
|
||||
w.forceMerge(1);
|
||||
reader = w.getReader();
|
||||
searcher = new IndexSearcher(reader);
|
||||
searcher.setQueryCache(null);
|
||||
weight = searcher.createNormalizedWeight(query, false);
|
||||
scorer = weight.scorer(searcher.getIndexReader().leaves().get(0));
|
||||
assertFalse(DocIdSetIterator.all(1).getClass().equals(scorer.iterator().getClass()));
|
||||
|
||||
reader.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue