LUCENE-6519: make BKDPointInPolygonQueries much faster: avoid the per-hit polygon check when a leaf cell is fully contained by the polygon

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1684719 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2015-06-10 17:23:06 +00:00
parent f346fc0f21
commit bbadb6fc42
3 changed files with 65 additions and 80 deletions

View File

@ -66,6 +66,10 @@ New Features
wrote the oldest segment in the index, for faster checking of "too
old" indices (Ryan Ernst, Robert Muir, Mike McCandless)
* LUCENE-6519: BKDPointInPolygonQuery is much faster by avoiding
the per-hit polygon check when a leaf cell is fully contained by the
polygon. (Nick Knize, Mike McCandless)
API Changes
* LUCENE-6508: Simplify Lock api, there is now just

View File

@ -17,10 +17,6 @@ package org.apache.lucene.bkdtree;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Arrays;
import java.util.Set;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
@ -33,8 +29,13 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.GeoUtils;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
import java.util.Arrays;
import java.util.Set;
/** Finds all previously indexed points that fall within the specified polygon.
*
* <p>The field must be indexed with {@link BKDTreeDocValuesFormat}, and {@link BKDPointField} added per document.
@ -74,6 +75,8 @@ public class BKDPointInPolygonQuery extends Query {
this.polyLats = polyLats;
this.polyLons = polyLons;
// TODO: we could also compute the maximal innner bounding box, to make relations faster to compute?
double minLon = Double.POSITIVE_INFINITY;
double minLat = Double.POSITIVE_INFINITY;
double maxLon = Double.NEGATIVE_INFINITY;
@ -161,7 +164,22 @@ public class BKDPointInPolygonQuery extends Query {
new BKDTreeReader.LatLonFilter() {
@Override
public boolean accept(double lat, double lon) {
return pointInPolygon(lat, lon);
return GeoUtils.pointInPolygon(polyLons, polyLats, lat, lon);
}
@Override
public BKDTreeReader.Relation compare(double cellLatMin, double cellLatMax, double cellLonMin, double cellLonMax) {
if (GeoUtils.rectWithinPoly(cellLonMin, cellLatMin, cellLonMax, cellLatMax,
polyLons, polyLats,
minLon, minLat, maxLon, maxLat)) {
return BKDTreeReader.Relation.INSIDE;
} else if (GeoUtils.rectCrossesPoly(cellLonMin, cellLatMin, cellLonMax, cellLatMax,
polyLons, polyLats,
minLon, minLat, maxLon, maxLat)) {
return BKDTreeReader.Relation.CROSSES;
} else {
return BKDTreeReader.Relation.OUTSIDE;
}
}
}, treeDV.delegate);
@ -203,36 +221,6 @@ public class BKDPointInPolygonQuery extends Query {
};
}
// TODO: share w/ GeoUtils:
/**
* simple even-odd point in polygon computation
* 1. Determine if point is contained in the longitudinal range
* 2. Determine whether point crosses the edge by computing the latitudinal delta
* between the end-point of a parallel vector (originating at the point) and the
* y-component of the edge sink
*
* NOTE: Requires polygon point (x,y) order either clockwise or counter-clockwise
*/
boolean pointInPolygon(double lat, double lon) {
/**
* Note: This is using a euclidean coordinate system which could result in
* upwards of 110KM error at the equator.
* TODO convert coordinates to cylindrical projection (e.g. mercator)
*/
// TODO: this quantizes a bit differently ... boundary cases will fail here:
boolean inPoly = false;
for (int i = 1; i < polyLons.length; i++) {
if (polyLons[i] <= lon && polyLons[i-1] > lon || polyLons[i-1] <= lon && polyLons[i] > lon) {
if (polyLats[i] + (lon - polyLons[i]) / (polyLons[i-1] - polyLons[i]) * (polyLats[i-1] - polyLats[i]) <= lat) {
inPoly = !inPoly;
}
}
}
return inPoly;
}
@Override
@SuppressWarnings({"unchecked","rawtypes"})
public boolean equals(Object o) {

View File

@ -40,8 +40,11 @@ final class BKDTreeReader implements Accountable {
final int maxDoc;
final IndexInput in;
enum Relation {INSIDE, CROSSES, OUTSIDE};
interface LatLonFilter {
boolean accept(double lat, double lon);
Relation compare(double latMin, double latMax, double lonMin, double lonMax);
}
public BKDTreeReader(IndexInput in, int maxDoc) throws IOException {
@ -137,7 +140,19 @@ final class BKDTreeReader implements Accountable {
/** Fast path: this is called when the query rect fully encompasses all cells under this node. */
private int addAll(Bits acceptDocs, QueryState state, int nodeID) throws IOException {
//long latRange = (long) cellLatMaxEnc - (long) cellLatMinEnc;
//long lonRange = (long) cellLonMaxEnc - (long) cellLonMinEnc;
if (nodeID >= leafNodeOffset) {
/*
System.out.println("A: " + BKDTreeWriter.decodeLat(cellLatMinEnc)
+ " " + BKDTreeWriter.decodeLat(cellLatMaxEnc)
+ " " + BKDTreeWriter.decodeLon(cellLonMinEnc)
+ " " + BKDTreeWriter.decodeLon(cellLonMaxEnc));
*/
// Leaf node
long fp = leafBlockFPs[nodeID-leafNodeOffset];
//System.out.println(" leaf nodeID=" + nodeID + " vs leafNodeOffset=" + leafNodeOffset + " fp=" + fp);
@ -152,40 +167,7 @@ final class BKDTreeReader implements Accountable {
//System.out.println(" seek to leafFP=" + fp);
// How many points are stored in this leaf cell:
int count = state.in.readVInt();
if (state.latLonFilter != null) {
// Handle this differently since we must also look up lat/lon:
int hitCount = 0;
for(int i=0;i<count;i++) {
int docID = state.in.readInt();
if (acceptDocs == null || acceptDocs.get(docID)) {
state.sndv.setDocument(docID);
// How many values this doc has:
int docValueCount = state.sndv.count();
for(int j=0;j<docValueCount;j++) {
long enc = state.sndv.valueAt(j);
int latEnc = (int) ((enc>>32) & 0xffffffffL);
int lonEnc = (int) (enc & 0xffffffffL);
// TODO: maybe we can fix LatLonFilter to operate on encoded forms?
if (state.latLonFilter.accept(BKDTreeWriter.decodeLat(latEnc), BKDTreeWriter.decodeLon(lonEnc))) {
state.bits.set(docID);
hitCount++;
// Stop processing values for this doc since it's now accepted:
break;
}
}
}
}
return hitCount;
} else if (acceptDocs != null) {
if (acceptDocs != null) {
for(int i=0;i<count;i++) {
int docID = state.in.readInt();
if (acceptDocs.get(docID)) {
@ -213,8 +195,7 @@ final class BKDTreeReader implements Accountable {
//System.out.println(" splitValue=" + splitValue);
//System.out.println(" addAll: inner");
int count = 0;
count += addAll(acceptDocs, state, 2*nodeID);
int count = addAll(acceptDocs, state, 2*nodeID);
count += addAll(acceptDocs, state, 2*nodeID+1);
//System.out.println(" addAll: return count=" + count);
return count;
@ -227,17 +208,29 @@ final class BKDTreeReader implements Accountable {
throws IOException {
// 2.06 sec -> 1.52 sec for 225 OSM London queries:
if (state.latMinEnc <= cellLatMinEnc && state.latMaxEnc >= cellLatMaxEnc && state.lonMinEnc <= cellLonMinEnc && state.lonMaxEnc >= cellLonMaxEnc) {
if (state.latLonFilter != null) {
if (cellLatMinEnc > state.latMinEnc ||
cellLatMaxEnc < state.latMaxEnc ||
cellLonMinEnc > state.lonMinEnc ||
cellLonMaxEnc < state.lonMaxEnc) {
Relation r = state.latLonFilter.compare(BKDTreeWriter.decodeLat(cellLatMinEnc),
BKDTreeWriter.decodeLat(cellLatMaxEnc),
BKDTreeWriter.decodeLon(cellLonMinEnc),
BKDTreeWriter.decodeLon(cellLonMaxEnc));
//System.out.println("BKD.intersect cellLat=" + BKDTreeWriter.decodeLat(cellLatMinEnc) + " TO " + BKDTreeWriter.decodeLat(cellLatMaxEnc) + ", cellLon=" + BKDTreeWriter.decodeLon(cellLonMinEnc) + " TO " + BKDTreeWriter.decodeLon(cellLonMaxEnc) + " compare=" + r);
if (r == Relation.OUTSIDE) {
// This cell is fully outside of the query shape: stop recursing
return 0;
} else if (r == Relation.INSIDE) {
// This cell is fully inside of the query shape: recursively add all points in this cell without filtering
return addAll(acceptDocs, state, nodeID);
} else {
// The cell crosses the shape boundary, so we fall through and do full filtering
}
}
} else if (state.latMinEnc <= cellLatMinEnc && state.latMaxEnc >= cellLatMaxEnc && state.lonMinEnc <= cellLonMinEnc && state.lonMaxEnc >= cellLonMaxEnc) {
// Optimize the case when the query fully contains this cell: we can
// recursively add all points without checking if they match the query:
/*
System.out.println("A: " + BKDTreeWriter.decodeLat(cellLatMinEnc)
+ " " + BKDTreeWriter.decodeLat(cellLatMaxEnc)
+ " " + BKDTreeWriter.decodeLon(cellLonMinEnc)
+ " " + BKDTreeWriter.decodeLon(cellLonMaxEnc));
*/
return addAll(acceptDocs, state, nodeID);
}