mirror of https://github.com/apache/lucene.git
LUCENE-7066: Optimize PointRangeQuery for the case that all documents have a value and all points from the segment match.
This commit is contained in:
parent
e2ebbdf638
commit
9413b42d7b
|
@ -135,6 +135,9 @@ Optimizations
|
||||||
* LUCENE-7050: TermsQuery is now cached more aggressively by the default
|
* LUCENE-7050: TermsQuery is now cached more aggressively by the default
|
||||||
query caching policy. (Adrien Grand)
|
query caching policy. (Adrien Grand)
|
||||||
|
|
||||||
|
* LUCENE-7066: PointRangeQuery got optimized for the case that all documents
|
||||||
|
have a value and all points from the segment match. (Adrien Grand)
|
||||||
|
|
||||||
Changes in Runtime Behavior
|
Changes in Runtime Behavior
|
||||||
|
|
||||||
* LUCENE-6789: IndexSearcher's default Similarity is changed to BM25Similarity.
|
* LUCENE-6789: IndexSearcher's default Similarity is changed to BM25Similarity.
|
||||||
|
|
|
@ -125,6 +125,68 @@ public abstract class PointRangeQuery extends Query {
|
||||||
|
|
||||||
return new ConstantScoreWeight(this) {
|
return new ConstantScoreWeight(this) {
|
||||||
|
|
||||||
|
private DocIdSet buildMatchingDocIdSet(LeafReader reader, PointValues values,
|
||||||
|
byte[] packedLower, byte[] packedUpper) throws IOException {
|
||||||
|
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
|
||||||
|
|
||||||
|
values.intersect(field,
|
||||||
|
new IntersectVisitor() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void grow(int count) {
|
||||||
|
result.grow(count);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void visit(int docID) {
|
||||||
|
result.add(docID);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void visit(int docID, byte[] packedValue) {
|
||||||
|
for(int dim=0;dim<numDims;dim++) {
|
||||||
|
int offset = dim*bytesPerDim;
|
||||||
|
if (StringHelper.compare(bytesPerDim, packedValue, offset, packedLower, offset) < 0) {
|
||||||
|
// Doc's value is too low, in this dimension
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (StringHelper.compare(bytesPerDim, packedValue, offset, packedUpper, offset) > 0) {
|
||||||
|
// Doc's value is too high, in this dimension
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Doc is in-bounds
|
||||||
|
result.add(docID);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||||
|
|
||||||
|
boolean crosses = false;
|
||||||
|
|
||||||
|
for(int dim=0;dim<numDims;dim++) {
|
||||||
|
int offset = dim*bytesPerDim;
|
||||||
|
|
||||||
|
if (StringHelper.compare(bytesPerDim, minPackedValue, offset, packedUpper, offset) > 0 ||
|
||||||
|
StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedLower, offset) < 0) {
|
||||||
|
return Relation.CELL_OUTSIDE_QUERY;
|
||||||
|
}
|
||||||
|
|
||||||
|
crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, packedLower, offset) < 0 ||
|
||||||
|
StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedUpper, offset) > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (crosses) {
|
||||||
|
return Relation.CELL_CROSSES_QUERY;
|
||||||
|
} else {
|
||||||
|
return Relation.CELL_INSIDE_QUERY;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return result.build();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||||
LeafReader reader = context.reader();
|
LeafReader reader = context.reader();
|
||||||
|
@ -155,67 +217,32 @@ public abstract class PointRangeQuery extends Query {
|
||||||
System.arraycopy(upperPoint[dim], 0, packedUpper, dim*bytesPerDim, bytesPerDim);
|
System.arraycopy(upperPoint[dim], 0, packedUpper, dim*bytesPerDim, bytesPerDim);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now packedLowerIncl and packedUpperIncl are inclusive, and non-empty space:
|
boolean allDocsMatch;
|
||||||
|
if (values.getDocCount(field) == reader.maxDoc()) {
|
||||||
|
final byte[] fieldPackedLower = values.getMinPackedValue(field);
|
||||||
|
final byte[] fieldPackedUpper = values.getMaxPackedValue(field);
|
||||||
|
allDocsMatch = true;
|
||||||
|
for (int i = 0; i < numDims; ++i) {
|
||||||
|
int offset = i * bytesPerDim;
|
||||||
|
if (StringHelper.compare(bytesPerDim, packedLower, offset, fieldPackedLower, offset) > 0
|
||||||
|
|| StringHelper.compare(bytesPerDim, packedUpper, offset, fieldPackedUpper, offset) < 0) {
|
||||||
|
allDocsMatch = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
allDocsMatch = false;
|
||||||
|
}
|
||||||
|
|
||||||
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
|
DocIdSetIterator iterator;
|
||||||
|
if (allDocsMatch) {
|
||||||
|
// all docs have a value and all points are within bounds, so everything matches
|
||||||
|
iterator = DocIdSetIterator.all(reader.maxDoc());
|
||||||
|
} else {
|
||||||
|
iterator = buildMatchingDocIdSet(reader, values, packedLower, packedUpper).iterator();
|
||||||
|
}
|
||||||
|
|
||||||
values.intersect(field,
|
return new ConstantScoreScorer(this, score(), iterator);
|
||||||
new IntersectVisitor() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void grow(int count) {
|
|
||||||
result.grow(count);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void visit(int docID) {
|
|
||||||
result.add(docID);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void visit(int docID, byte[] packedValue) {
|
|
||||||
for(int dim=0;dim<numDims;dim++) {
|
|
||||||
int offset = dim*bytesPerDim;
|
|
||||||
if (StringHelper.compare(bytesPerDim, packedValue, offset, packedLower, offset) < 0) {
|
|
||||||
// Doc's value is too low, in this dimension
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (StringHelper.compare(bytesPerDim, packedValue, offset, packedUpper, offset) > 0) {
|
|
||||||
// Doc's value is too high, in this dimension
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Doc is in-bounds
|
|
||||||
result.add(docID);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
|
||||||
|
|
||||||
boolean crosses = false;
|
|
||||||
|
|
||||||
for(int dim=0;dim<numDims;dim++) {
|
|
||||||
int offset = dim*bytesPerDim;
|
|
||||||
|
|
||||||
if (StringHelper.compare(bytesPerDim, minPackedValue, offset, packedUpper, offset) > 0 ||
|
|
||||||
StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedLower, offset) < 0) {
|
|
||||||
return Relation.CELL_OUTSIDE_QUERY;
|
|
||||||
}
|
|
||||||
|
|
||||||
crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, packedLower, offset) < 0 ||
|
|
||||||
StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedUpper, offset) > 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (crosses) {
|
|
||||||
return Relation.CELL_CROSSES_QUERY;
|
|
||||||
} else {
|
|
||||||
return Relation.CELL_INSIDE_QUERY;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
return new ConstantScoreScorer(this, score(), result.build().iterator());
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,13 +53,11 @@ import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.MultiDocValues;
|
import org.apache.lucene.index.MultiDocValues;
|
||||||
import org.apache.lucene.index.MultiReader;
|
|
||||||
import org.apache.lucene.index.NumericDocValues;
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
import org.apache.lucene.index.PointValues;
|
import org.apache.lucene.index.PointValues;
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -1847,4 +1845,45 @@ public class TestPointQueries extends LuceneTestCase {
|
||||||
// binary
|
// binary
|
||||||
assertEquals("bytes:{[12] [2a]}", BinaryPoint.newSetQuery("bytes", new byte[] {42}, new byte[] {18}).toString());
|
assertEquals("bytes:{[12] [2a]}", BinaryPoint.newSetQuery("bytes", new byte[] {42}, new byte[] {18}).toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testRangeOptimizesIfAllPointsMatch() throws IOException {
|
||||||
|
final int numDims = TestUtil.nextInt(random(), 1, 3);
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||||
|
Document doc = new Document();
|
||||||
|
int[] value = new int[numDims];
|
||||||
|
for (int i = 0; i < numDims; ++i) {
|
||||||
|
value[i] = TestUtil.nextInt(random(), 1, 10);
|
||||||
|
}
|
||||||
|
doc.add(new IntPoint("point", value));
|
||||||
|
w.addDocument(doc);
|
||||||
|
IndexReader reader = w.getReader();
|
||||||
|
IndexSearcher searcher = new IndexSearcher(reader);
|
||||||
|
searcher.setQueryCache(null);
|
||||||
|
int[] lowerBound = new int[numDims];
|
||||||
|
int[] upperBound = new int[numDims];
|
||||||
|
for (int i = 0; i < numDims; ++i) {
|
||||||
|
lowerBound[i] = value[i] - random().nextInt(1);
|
||||||
|
upperBound[i] = value[i] + random().nextInt(1);
|
||||||
|
}
|
||||||
|
Query query = IntPoint.newRangeQuery("point", lowerBound, upperBound);
|
||||||
|
Weight weight = searcher.createNormalizedWeight(query, false);
|
||||||
|
Scorer scorer = weight.scorer(searcher.getIndexReader().leaves().get(0));
|
||||||
|
assertEquals(DocIdSetIterator.all(1).getClass(), scorer.iterator().getClass());
|
||||||
|
|
||||||
|
// When not all documents in the query have a value, the optimization is not applicable
|
||||||
|
reader.close();
|
||||||
|
w.addDocument(new Document());
|
||||||
|
w.forceMerge(1);
|
||||||
|
reader = w.getReader();
|
||||||
|
searcher = new IndexSearcher(reader);
|
||||||
|
searcher.setQueryCache(null);
|
||||||
|
weight = searcher.createNormalizedWeight(query, false);
|
||||||
|
scorer = weight.scorer(searcher.getIndexReader().leaves().get(0));
|
||||||
|
assertFalse(DocIdSetIterator.all(1).getClass().equals(scorer.iterator().getClass()));
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
w.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue