mirror of https://github.com/apache/lucene.git
LUCENE-6519: make BKDPointInPolygonQueries much faster: avoid the per-hit polygon check when a leaf cell is fully contained by the polygon
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1684719 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f346fc0f21
commit
bbadb6fc42
|
@ -66,6 +66,10 @@ New Features
|
|||
wrote the oldest segment in the index, for faster checking of "too
|
||||
old" indices (Ryan Ernst, Robert Muir, Mike McCandless)
|
||||
|
||||
* LUCENE-6519: BKDPointInPolygonQuery is much faster by avoiding
|
||||
the per-hit polygon check when a leaf cell is fully contained by the
|
||||
polygon. (Nick Knize, Mike McCandless)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-6508: Simplify Lock api, there is now just
|
||||
|
|
|
@ -17,10 +17,6 @@ package org.apache.lucene.bkdtree;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
|
@ -33,8 +29,13 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.GeoUtils;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Set;
|
||||
|
||||
/** Finds all previously indexed points that fall within the specified polygon.
|
||||
*
|
||||
* <p>The field must be indexed with {@link BKDTreeDocValuesFormat}, and {@link BKDPointField} added per document.
|
||||
|
@ -74,6 +75,8 @@ public class BKDPointInPolygonQuery extends Query {
|
|||
this.polyLats = polyLats;
|
||||
this.polyLons = polyLons;
|
||||
|
||||
// TODO: we could also compute the maximal innner bounding box, to make relations faster to compute?
|
||||
|
||||
double minLon = Double.POSITIVE_INFINITY;
|
||||
double minLat = Double.POSITIVE_INFINITY;
|
||||
double maxLon = Double.NEGATIVE_INFINITY;
|
||||
|
@ -161,7 +164,22 @@ public class BKDPointInPolygonQuery extends Query {
|
|||
new BKDTreeReader.LatLonFilter() {
|
||||
@Override
|
||||
public boolean accept(double lat, double lon) {
|
||||
return pointInPolygon(lat, lon);
|
||||
return GeoUtils.pointInPolygon(polyLons, polyLats, lat, lon);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BKDTreeReader.Relation compare(double cellLatMin, double cellLatMax, double cellLonMin, double cellLonMax) {
|
||||
if (GeoUtils.rectWithinPoly(cellLonMin, cellLatMin, cellLonMax, cellLatMax,
|
||||
polyLons, polyLats,
|
||||
minLon, minLat, maxLon, maxLat)) {
|
||||
return BKDTreeReader.Relation.INSIDE;
|
||||
} else if (GeoUtils.rectCrossesPoly(cellLonMin, cellLatMin, cellLonMax, cellLatMax,
|
||||
polyLons, polyLats,
|
||||
minLon, minLat, maxLon, maxLat)) {
|
||||
return BKDTreeReader.Relation.CROSSES;
|
||||
} else {
|
||||
return BKDTreeReader.Relation.OUTSIDE;
|
||||
}
|
||||
}
|
||||
}, treeDV.delegate);
|
||||
|
||||
|
@ -203,36 +221,6 @@ public class BKDPointInPolygonQuery extends Query {
|
|||
};
|
||||
}
|
||||
|
||||
// TODO: share w/ GeoUtils:
|
||||
|
||||
/**
|
||||
* simple even-odd point in polygon computation
|
||||
* 1. Determine if point is contained in the longitudinal range
|
||||
* 2. Determine whether point crosses the edge by computing the latitudinal delta
|
||||
* between the end-point of a parallel vector (originating at the point) and the
|
||||
* y-component of the edge sink
|
||||
*
|
||||
* NOTE: Requires polygon point (x,y) order either clockwise or counter-clockwise
|
||||
*/
|
||||
boolean pointInPolygon(double lat, double lon) {
|
||||
/**
|
||||
* Note: This is using a euclidean coordinate system which could result in
|
||||
* upwards of 110KM error at the equator.
|
||||
* TODO convert coordinates to cylindrical projection (e.g. mercator)
|
||||
*/
|
||||
|
||||
// TODO: this quantizes a bit differently ... boundary cases will fail here:
|
||||
boolean inPoly = false;
|
||||
for (int i = 1; i < polyLons.length; i++) {
|
||||
if (polyLons[i] <= lon && polyLons[i-1] > lon || polyLons[i-1] <= lon && polyLons[i] > lon) {
|
||||
if (polyLats[i] + (lon - polyLons[i]) / (polyLons[i-1] - polyLons[i]) * (polyLats[i-1] - polyLats[i]) <= lat) {
|
||||
inPoly = !inPoly;
|
||||
}
|
||||
}
|
||||
}
|
||||
return inPoly;
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings({"unchecked","rawtypes"})
|
||||
public boolean equals(Object o) {
|
||||
|
|
|
@ -40,8 +40,11 @@ final class BKDTreeReader implements Accountable {
|
|||
final int maxDoc;
|
||||
final IndexInput in;
|
||||
|
||||
enum Relation {INSIDE, CROSSES, OUTSIDE};
|
||||
|
||||
interface LatLonFilter {
|
||||
boolean accept(double lat, double lon);
|
||||
Relation compare(double latMin, double latMax, double lonMin, double lonMax);
|
||||
}
|
||||
|
||||
public BKDTreeReader(IndexInput in, int maxDoc) throws IOException {
|
||||
|
@ -137,7 +140,19 @@ final class BKDTreeReader implements Accountable {
|
|||
|
||||
/** Fast path: this is called when the query rect fully encompasses all cells under this node. */
|
||||
private int addAll(Bits acceptDocs, QueryState state, int nodeID) throws IOException {
|
||||
|
||||
//long latRange = (long) cellLatMaxEnc - (long) cellLatMinEnc;
|
||||
//long lonRange = (long) cellLonMaxEnc - (long) cellLonMinEnc;
|
||||
|
||||
if (nodeID >= leafNodeOffset) {
|
||||
|
||||
/*
|
||||
System.out.println("A: " + BKDTreeWriter.decodeLat(cellLatMinEnc)
|
||||
+ " " + BKDTreeWriter.decodeLat(cellLatMaxEnc)
|
||||
+ " " + BKDTreeWriter.decodeLon(cellLonMinEnc)
|
||||
+ " " + BKDTreeWriter.decodeLon(cellLonMaxEnc));
|
||||
*/
|
||||
|
||||
// Leaf node
|
||||
long fp = leafBlockFPs[nodeID-leafNodeOffset];
|
||||
//System.out.println(" leaf nodeID=" + nodeID + " vs leafNodeOffset=" + leafNodeOffset + " fp=" + fp);
|
||||
|
@ -152,40 +167,7 @@ final class BKDTreeReader implements Accountable {
|
|||
//System.out.println(" seek to leafFP=" + fp);
|
||||
// How many points are stored in this leaf cell:
|
||||
int count = state.in.readVInt();
|
||||
if (state.latLonFilter != null) {
|
||||
// Handle this differently since we must also look up lat/lon:
|
||||
|
||||
int hitCount = 0;
|
||||
for(int i=0;i<count;i++) {
|
||||
|
||||
int docID = state.in.readInt();
|
||||
|
||||
if (acceptDocs == null || acceptDocs.get(docID)) {
|
||||
|
||||
state.sndv.setDocument(docID);
|
||||
|
||||
// How many values this doc has:
|
||||
int docValueCount = state.sndv.count();
|
||||
for(int j=0;j<docValueCount;j++) {
|
||||
long enc = state.sndv.valueAt(j);
|
||||
int latEnc = (int) ((enc>>32) & 0xffffffffL);
|
||||
int lonEnc = (int) (enc & 0xffffffffL);
|
||||
|
||||
// TODO: maybe we can fix LatLonFilter to operate on encoded forms?
|
||||
if (state.latLonFilter.accept(BKDTreeWriter.decodeLat(latEnc), BKDTreeWriter.decodeLon(lonEnc))) {
|
||||
state.bits.set(docID);
|
||||
hitCount++;
|
||||
|
||||
// Stop processing values for this doc since it's now accepted:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return hitCount;
|
||||
|
||||
} else if (acceptDocs != null) {
|
||||
if (acceptDocs != null) {
|
||||
for(int i=0;i<count;i++) {
|
||||
int docID = state.in.readInt();
|
||||
if (acceptDocs.get(docID)) {
|
||||
|
@ -213,8 +195,7 @@ final class BKDTreeReader implements Accountable {
|
|||
//System.out.println(" splitValue=" + splitValue);
|
||||
|
||||
//System.out.println(" addAll: inner");
|
||||
int count = 0;
|
||||
count += addAll(acceptDocs, state, 2*nodeID);
|
||||
int count = addAll(acceptDocs, state, 2*nodeID);
|
||||
count += addAll(acceptDocs, state, 2*nodeID+1);
|
||||
//System.out.println(" addAll: return count=" + count);
|
||||
return count;
|
||||
|
@ -227,17 +208,29 @@ final class BKDTreeReader implements Accountable {
|
|||
throws IOException {
|
||||
|
||||
// 2.06 sec -> 1.52 sec for 225 OSM London queries:
|
||||
if (state.latMinEnc <= cellLatMinEnc && state.latMaxEnc >= cellLatMaxEnc && state.lonMinEnc <= cellLonMinEnc && state.lonMaxEnc >= cellLonMaxEnc) {
|
||||
if (state.latLonFilter != null) {
|
||||
if (cellLatMinEnc > state.latMinEnc ||
|
||||
cellLatMaxEnc < state.latMaxEnc ||
|
||||
cellLonMinEnc > state.lonMinEnc ||
|
||||
cellLonMaxEnc < state.lonMaxEnc) {
|
||||
Relation r = state.latLonFilter.compare(BKDTreeWriter.decodeLat(cellLatMinEnc),
|
||||
BKDTreeWriter.decodeLat(cellLatMaxEnc),
|
||||
BKDTreeWriter.decodeLon(cellLonMinEnc),
|
||||
BKDTreeWriter.decodeLon(cellLonMaxEnc));
|
||||
//System.out.println("BKD.intersect cellLat=" + BKDTreeWriter.decodeLat(cellLatMinEnc) + " TO " + BKDTreeWriter.decodeLat(cellLatMaxEnc) + ", cellLon=" + BKDTreeWriter.decodeLon(cellLonMinEnc) + " TO " + BKDTreeWriter.decodeLon(cellLonMaxEnc) + " compare=" + r);
|
||||
if (r == Relation.OUTSIDE) {
|
||||
// This cell is fully outside of the query shape: stop recursing
|
||||
return 0;
|
||||
} else if (r == Relation.INSIDE) {
|
||||
// This cell is fully inside of the query shape: recursively add all points in this cell without filtering
|
||||
return addAll(acceptDocs, state, nodeID);
|
||||
} else {
|
||||
// The cell crosses the shape boundary, so we fall through and do full filtering
|
||||
}
|
||||
}
|
||||
} else if (state.latMinEnc <= cellLatMinEnc && state.latMaxEnc >= cellLatMaxEnc && state.lonMinEnc <= cellLonMinEnc && state.lonMaxEnc >= cellLonMaxEnc) {
|
||||
// Optimize the case when the query fully contains this cell: we can
|
||||
// recursively add all points without checking if they match the query:
|
||||
|
||||
/*
|
||||
System.out.println("A: " + BKDTreeWriter.decodeLat(cellLatMinEnc)
|
||||
+ " " + BKDTreeWriter.decodeLat(cellLatMaxEnc)
|
||||
+ " " + BKDTreeWriter.decodeLon(cellLonMinEnc)
|
||||
+ " " + BKDTreeWriter.decodeLon(cellLonMaxEnc));
|
||||
*/
|
||||
|
||||
return addAll(acceptDocs, state, nodeID);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue