LUCENE-7066: Optimize PointRangeQuery for the case that all documents have a value and all points from the segment match.

This commit is contained in:
Adrien Grand 2016-03-07 09:55:39 +01:00
parent 74e7e1509a
commit e695b0ae07
3 changed files with 130 additions and 61 deletions

View File

@ -142,6 +142,9 @@ Optimizations
* LUCENE-7050: TermsQuery is now cached more aggressively by the default
query caching policy. (Adrien Grand)
* LUCENE-7066: PointRangeQuery got optimized for the case that all documents
have a value and all points from the segment match. (Adrien Grand)
Changes in Runtime Behavior
* LUCENE-6789: IndexSearcher's default Similarity is changed to BM25Similarity.

View File

@ -125,6 +125,68 @@ public abstract class PointRangeQuery extends Query {
return new ConstantScoreWeight(this) {
private DocIdSet buildMatchingDocIdSet(LeafReader reader, PointValues values,
byte[] packedLower, byte[] packedUpper) throws IOException {
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
values.intersect(field,
new IntersectVisitor() {
@Override
public void grow(int count) {
result.grow(count);
}
@Override
public void visit(int docID) {
result.add(docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
for(int dim=0;dim<numDims;dim++) {
int offset = dim*bytesPerDim;
if (StringHelper.compare(bytesPerDim, packedValue, offset, packedLower, offset) < 0) {
// Doc's value is too low, in this dimension
return;
}
if (StringHelper.compare(bytesPerDim, packedValue, offset, packedUpper, offset) > 0) {
// Doc's value is too high, in this dimension
return;
}
}
// Doc is in-bounds
result.add(docID);
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
boolean crosses = false;
for(int dim=0;dim<numDims;dim++) {
int offset = dim*bytesPerDim;
if (StringHelper.compare(bytesPerDim, minPackedValue, offset, packedUpper, offset) > 0 ||
StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedLower, offset) < 0) {
return Relation.CELL_OUTSIDE_QUERY;
}
crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, packedLower, offset) < 0 ||
StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedUpper, offset) > 0;
}
if (crosses) {
return Relation.CELL_CROSSES_QUERY;
} else {
return Relation.CELL_INSIDE_QUERY;
}
}
});
return result.build();
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
@ -155,67 +217,32 @@ public abstract class PointRangeQuery extends Query {
System.arraycopy(upperPoint[dim], 0, packedUpper, dim*bytesPerDim, bytesPerDim);
}
// Now packedLowerIncl and packedUpperIncl are inclusive, and non-empty space:
boolean allDocsMatch;
if (values.getDocCount(field) == reader.maxDoc()) {
final byte[] fieldPackedLower = values.getMinPackedValue(field);
final byte[] fieldPackedUpper = values.getMaxPackedValue(field);
allDocsMatch = true;
for (int i = 0; i < numDims; ++i) {
int offset = i * bytesPerDim;
if (StringHelper.compare(bytesPerDim, packedLower, offset, fieldPackedLower, offset) > 0
|| StringHelper.compare(bytesPerDim, packedUpper, offset, fieldPackedUpper, offset) < 0) {
allDocsMatch = false;
break;
}
}
} else {
allDocsMatch = false;
}
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
DocIdSetIterator iterator;
if (allDocsMatch) {
// all docs have a value and all points are within bounds, so everything matches
iterator = DocIdSetIterator.all(reader.maxDoc());
} else {
iterator = buildMatchingDocIdSet(reader, values, packedLower, packedUpper).iterator();
}
values.intersect(field,
new IntersectVisitor() {
@Override
public void grow(int count) {
result.grow(count);
}
@Override
public void visit(int docID) {
result.add(docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
for(int dim=0;dim<numDims;dim++) {
int offset = dim*bytesPerDim;
if (StringHelper.compare(bytesPerDim, packedValue, offset, packedLower, offset) < 0) {
// Doc's value is too low, in this dimension
return;
}
if (StringHelper.compare(bytesPerDim, packedValue, offset, packedUpper, offset) > 0) {
// Doc's value is too high, in this dimension
return;
}
}
// Doc is in-bounds
result.add(docID);
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
boolean crosses = false;
for(int dim=0;dim<numDims;dim++) {
int offset = dim*bytesPerDim;
if (StringHelper.compare(bytesPerDim, minPackedValue, offset, packedUpper, offset) > 0 ||
StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedLower, offset) < 0) {
return Relation.CELL_OUTSIDE_QUERY;
}
crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, packedLower, offset) < 0 ||
StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedUpper, offset) > 0;
}
if (crosses) {
return Relation.CELL_CROSSES_QUERY;
} else {
return Relation.CELL_INSIDE_QUERY;
}
}
});
return new ConstantScoreScorer(this, score(), result.build().iterator());
return new ConstantScoreScorer(this, score(), iterator);
}
};
}

View File

@ -53,13 +53,11 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
@ -1847,4 +1845,45 @@ public class TestPointQueries extends LuceneTestCase {
// binary
assertEquals("bytes:{[12] [2a]}", BinaryPoint.newSetQuery("bytes", new byte[] {42}, new byte[] {18}).toString());
}
public void testRangeOptimizesIfAllPointsMatch() throws IOException {
final int numDims = TestUtil.nextInt(random(), 1, 3);
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
int[] value = new int[numDims];
for (int i = 0; i < numDims; ++i) {
value[i] = TestUtil.nextInt(random(), 1, 10);
}
doc.add(new IntPoint("point", value));
w.addDocument(doc);
IndexReader reader = w.getReader();
IndexSearcher searcher = new IndexSearcher(reader);
searcher.setQueryCache(null);
int[] lowerBound = new int[numDims];
int[] upperBound = new int[numDims];
for (int i = 0; i < numDims; ++i) {
lowerBound[i] = value[i] - random().nextInt(1);
upperBound[i] = value[i] + random().nextInt(1);
}
Query query = IntPoint.newRangeQuery("point", lowerBound, upperBound);
Weight weight = searcher.createNormalizedWeight(query, false);
Scorer scorer = weight.scorer(searcher.getIndexReader().leaves().get(0));
assertEquals(DocIdSetIterator.all(1).getClass(), scorer.iterator().getClass());
// When not all documents in the query have a value, the optimization is not applicable
reader.close();
w.addDocument(new Document());
w.forceMerge(1);
reader = w.getReader();
searcher = new IndexSearcher(reader);
searcher.setQueryCache(null);
weight = searcher.createNormalizedWeight(query, false);
scorer = weight.scorer(searcher.getIndexReader().leaves().get(0));
assertFalse(DocIdSetIterator.all(1).getClass().equals(scorer.iterator().getClass()));
reader.close();
w.close();
dir.close();
}
}