From 9413b42d7be4dafd7ccafaacc5563871c6ebb0dd Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 7 Mar 2016 09:55:39 +0100 Subject: [PATCH] LUCENE-7066: Optimize PointRangeQuery for the case that all documents have a value and all points from the segment match. --- lucene/CHANGES.txt | 3 + .../apache/lucene/search/PointRangeQuery.java | 145 +++++++++++------- .../lucene/search/TestPointQueries.java | 43 +++++- 3 files changed, 130 insertions(+), 61 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index b6d0bb3d973..c23a3fe08a2 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -135,6 +135,9 @@ Optimizations * LUCENE-7050: TermsQuery is now cached more aggressively by the default query caching policy. (Adrien Grand) +* LUCENE-7066: PointRangeQuery got optimized for the case that all documents + have a value and all points from the segment match. (Adrien Grand) + Changes in Runtime Behavior * LUCENE-6789: IndexSearcher's default Similarity is changed to BM25Similarity. diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index 85c486e7dde..777c1338813 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -125,6 +125,68 @@ public abstract class PointRangeQuery extends Query { return new ConstantScoreWeight(this) { + private DocIdSet buildMatchingDocIdSet(LeafReader reader, PointValues values, + byte[] packedLower, byte[] packedUpper) throws IOException { + DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc()); + + values.intersect(field, + new IntersectVisitor() { + + @Override + public void grow(int count) { + result.grow(count); + } + + @Override + public void visit(int docID) { + result.add(docID); + } + + @Override + public void visit(int docID, byte[] packedValue) { + for(int dim=0;dim 0) { + // Doc's value is too high, in this dimension + return; + } + } + + // Doc is in-bounds + result.add(docID); + } + + @Override + public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + + boolean crosses = false; + + for(int dim=0;dim 0 || + StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedLower, offset) < 0) { + return Relation.CELL_OUTSIDE_QUERY; + } + + crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, packedLower, offset) < 0 || + StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedUpper, offset) > 0; + } + + if (crosses) { + return Relation.CELL_CROSSES_QUERY; + } else { + return Relation.CELL_INSIDE_QUERY; + } + } + }); + return result.build(); + } + @Override public Scorer scorer(LeafReaderContext context) throws IOException { LeafReader reader = context.reader(); @@ -155,67 +217,32 @@ public abstract class PointRangeQuery extends Query { System.arraycopy(upperPoint[dim], 0, packedUpper, dim*bytesPerDim, bytesPerDim); } - // Now packedLowerIncl and packedUpperIncl are inclusive, and non-empty space: + boolean allDocsMatch; + if (values.getDocCount(field) == reader.maxDoc()) { + final byte[] fieldPackedLower = values.getMinPackedValue(field); + final byte[] fieldPackedUpper = values.getMaxPackedValue(field); + allDocsMatch = true; + for (int i = 0; i < numDims; ++i) { + int offset = i * bytesPerDim; + if (StringHelper.compare(bytesPerDim, packedLower, offset, fieldPackedLower, offset) > 0 + || StringHelper.compare(bytesPerDim, packedUpper, offset, fieldPackedUpper, offset) < 0) { + allDocsMatch = false; + break; + } + } + } else { + allDocsMatch = false; + } - DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc()); + DocIdSetIterator iterator; + if (allDocsMatch) { + // all docs have a value and all points are within bounds, so everything matches + iterator = DocIdSetIterator.all(reader.maxDoc()); + } else { + iterator = buildMatchingDocIdSet(reader, values, packedLower, packedUpper).iterator(); + } - values.intersect(field, - new IntersectVisitor() { - - @Override - public void grow(int count) { - result.grow(count); - } - - @Override - public void visit(int docID) { - result.add(docID); - } - - @Override - public void visit(int docID, byte[] packedValue) { - for(int dim=0;dim 0) { - // Doc's value is too high, in this dimension - return; - } - } - - // Doc is in-bounds - result.add(docID); - } - - @Override - public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { - - boolean crosses = false; - - for(int dim=0;dim 0 || - StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedLower, offset) < 0) { - return Relation.CELL_OUTSIDE_QUERY; - } - - crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, packedLower, offset) < 0 || - StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedUpper, offset) > 0; - } - - if (crosses) { - return Relation.CELL_CROSSES_QUERY; - } else { - return Relation.CELL_INSIDE_QUERY; - } - } - }); - - return new ConstantScoreScorer(this, score(), result.build().iterator()); + return new ConstantScoreScorer(this, score(), iterator); } }; } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java index cd0d7196585..5a3483bc30f 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java @@ -53,13 +53,11 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiDocValues; -import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.PointValues; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; @@ -1847,4 +1845,45 @@ public class TestPointQueries extends LuceneTestCase { // binary assertEquals("bytes:{[12] [2a]}", BinaryPoint.newSetQuery("bytes", new byte[] {42}, new byte[] {18}).toString()); } + + public void testRangeOptimizesIfAllPointsMatch() throws IOException { + final int numDims = TestUtil.nextInt(random(), 1, 3); + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + int[] value = new int[numDims]; + for (int i = 0; i < numDims; ++i) { + value[i] = TestUtil.nextInt(random(), 1, 10); + } + doc.add(new IntPoint("point", value)); + w.addDocument(doc); + IndexReader reader = w.getReader(); + IndexSearcher searcher = new IndexSearcher(reader); + searcher.setQueryCache(null); + int[] lowerBound = new int[numDims]; + int[] upperBound = new int[numDims]; + for (int i = 0; i < numDims; ++i) { + lowerBound[i] = value[i] - random().nextInt(1); + upperBound[i] = value[i] + random().nextInt(1); + } + Query query = IntPoint.newRangeQuery("point", lowerBound, upperBound); + Weight weight = searcher.createNormalizedWeight(query, false); + Scorer scorer = weight.scorer(searcher.getIndexReader().leaves().get(0)); + assertEquals(DocIdSetIterator.all(1).getClass(), scorer.iterator().getClass()); + + // When not all documents in the query have a value, the optimization is not applicable + reader.close(); + w.addDocument(new Document()); + w.forceMerge(1); + reader = w.getReader(); + searcher = new IndexSearcher(reader); + searcher.setQueryCache(null); + weight = searcher.createNormalizedWeight(query, false); + scorer = weight.scorer(searcher.getIndexReader().leaves().get(0)); + assertFalse(DocIdSetIterator.all(1).getClass().equals(scorer.iterator().getClass())); + + reader.close(); + w.close(); + dir.close(); + } }