LUCENE-7655: Speed up geo-distance queries that match most documents.

Closes #226
This commit is contained in:
Adrien Grand 2017-08-07 13:17:53 +02:00
parent 90199e7485
commit 5fb800f018
3 changed files with 205 additions and 67 deletions

View File

@ -21,6 +21,9 @@ Optimizations
SortedSetDocValuesFacetCounts and others) builds its map (Robert
Muir, Adrien Grand, Mike McCandless)
* LUCENE-7655: Speed up geo-distance queries in case of dense single-valued
fields when most documents match. (Maciej Zasada via Adrien Grand)
Bug Fixes
* LUCENE-7914: Add a maximum recursion level in automaton recursive

View File

@ -29,12 +29,15 @@ import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.StringHelper;
@ -131,8 +134,49 @@ final class LatLonPointDistanceQuery extends Query {
// matching docids
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
final IntersectVisitor visitor =
new IntersectVisitor() {
final IntersectVisitor visitor = getIntersectVisitor(result);
final Weight weight = this;
return new ScorerSupplier() {
long cost = -1;
@Override
public Scorer get(boolean randomAccess) throws IOException {
if (values.getDocCount() == reader.maxDoc()
&& values.getDocCount() == values.size()
&& cost() > reader.maxDoc() / 2) {
// If all docs have exactly one value and the cost is greater
// than half the leaf size then maybe we can make things faster
// by computing the set of documents that do NOT match the range
final FixedBitSet result = new FixedBitSet(reader.maxDoc());
result.set(0, reader.maxDoc());
int[] cost = new int[]{reader.maxDoc()};
values.intersect(getInverseIntersectVisitor(result, cost));
final DocIdSetIterator iterator = new BitSetIterator(result, cost[0]);
return new ConstantScoreScorer(weight, score(), iterator);
}
values.intersect(visitor);
return new ConstantScoreScorer(weight, score(), result.build().iterator());
}
@Override
public long cost() {
if (cost == -1) {
cost = values.estimatePointCount(visitor);
}
assert cost >= 0;
return cost;
}
};
}
/**
* Create a visitor that collects documents matching the range.
*/
private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result) {
return new IntersectVisitor() {
DocIdSetBuilder.BulkAdder adder;
@ -197,28 +241,84 @@ final class LatLonPointDistanceQuery extends Query {
return GeoUtils.relate(latMin, latMax, lonMin, lonMax, latitude, longitude, sortKey, axisLat);
}
};
final Weight weight = this;
return new ScorerSupplier() {
}
long cost = -1;
/**
* Create a visitor that clears documents that do NOT match the range.
*/
private IntersectVisitor getInverseIntersectVisitor(FixedBitSet result, int[] cost) {
return new IntersectVisitor() {
@Override
public Scorer get(boolean randomAccess) throws IOException {
values.intersect(visitor);
return new ConstantScoreScorer(weight, score(), result.build().iterator());
public void visit(int docID) {
result.clear(docID);
cost[0]--;
}
@Override
public long cost() {
if (cost == -1) {
cost = values.estimatePointCount(visitor);
public void visit(int docID, byte[] packedValue) {
// bounding box check
if (StringHelper.compare(Integer.BYTES, packedValue, 0, maxLat, 0) > 0 ||
StringHelper.compare(Integer.BYTES, packedValue, 0, minLat, 0) < 0) {
// latitude out of bounding box range
result.clear(docID);
cost[0]--;
return;
}
assert cost >= 0;
return cost;
if ((StringHelper.compare(Integer.BYTES, packedValue, Integer.BYTES, maxLon, 0) > 0 ||
StringHelper.compare(Integer.BYTES, packedValue, Integer.BYTES, minLon, 0) < 0)
&& StringHelper.compare(Integer.BYTES, packedValue, Integer.BYTES, minLon2, 0) < 0) {
// longitude out of bounding box range
result.clear(docID);
cost[0]--;
return;
}
int docLatitude = NumericUtils.sortableBytesToInt(packedValue, 0);
int docLongitude = NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES);
if (!distancePredicate.test(docLatitude, docLongitude)) {
result.clear(docID);
cost[0]--;
}
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (StringHelper.compare(Integer.BYTES, minPackedValue, 0, maxLat, 0) > 0 ||
StringHelper.compare(Integer.BYTES, maxPackedValue, 0, minLat, 0) < 0) {
// latitude out of bounding box range
return Relation.CELL_INSIDE_QUERY;
}
if ((StringHelper.compare(Integer.BYTES, minPackedValue, Integer.BYTES, maxLon, 0) > 0 ||
StringHelper.compare(Integer.BYTES, maxPackedValue, Integer.BYTES, minLon, 0) < 0)
&& StringHelper.compare(Integer.BYTES, maxPackedValue, Integer.BYTES, minLon2, 0) < 0) {
// latitude out of bounding box range
return Relation.CELL_INSIDE_QUERY;
}
double latMin = decodeLatitude(minPackedValue, 0);
double lonMin = decodeLongitude(minPackedValue, Integer.BYTES);
double latMax = decodeLatitude(maxPackedValue, 0);
double lonMax = decodeLongitude(maxPackedValue, Integer.BYTES);
Relation relation = GeoUtils.relate(latMin, latMax, lonMin, lonMax, latitude, longitude, sortKey, axisLat);
switch (relation) {
case CELL_INSIDE_QUERY:
// all points match, skip this subtree
return Relation.CELL_OUTSIDE_QUERY;
case CELL_OUTSIDE_QUERY:
// none of the points match, clear all documents
return Relation.CELL_INSIDE_QUERY;
default:
return relation;
}
}
};
}
};
}

View File

@ -16,11 +16,18 @@
*/
package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.LatLonPoint;
import org.apache.lucene.geo.BaseGeoPointTestCase;
import org.apache.lucene.geo.Polygon;
import org.apache.lucene.geo.GeoEncodingUtils;
import org.apache.lucene.geo.Polygon;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.bkd.BKDWriter;
public class TestLatLonPointQueries extends BaseGeoPointTestCase {
@ -53,4 +60,32 @@ public class TestLatLonPointQueries extends BaseGeoPointTestCase {
protected double quantizeLon(double lonRaw) {
return GeoEncodingUtils.decodeLongitude(GeoEncodingUtils.encodeLongitude(lonRaw));
}
public void testDistanceQueryWithInvertedIntersection() throws IOException {
final int numMatchingDocs = atLeast(10 * BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
try (Directory dir = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
for (int i = 0; i < numMatchingDocs; ++i) {
Document doc = new Document();
addPointToDoc("field", doc, 18.313694, -65.227444);
w.addDocument(doc);
}
// Add a handful of docs that don't match
for (int i = 0; i < 11; ++i) {
Document doc = new Document();
addPointToDoc("field", doc, 10, -65.227444);
w.addDocument(doc);
}
w.forceMerge(1);
}
try (IndexReader r = DirectoryReader.open(dir)) {
IndexSearcher searcher = newSearcher(r);
assertEquals(numMatchingDocs, searcher.count(newDistanceQuery("field", 18, -65, 50_000)));
}
}
}
}