diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index d2ae4027459..8d82755d22a 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -66,6 +66,10 @@ New Features wrote the oldest segment in the index, for faster checking of "too old" indices (Ryan Ernst, Robert Muir, Mike McCandless) +* LUCENE-6519: BKDPointInPolygonQuery is much faster by avoiding + the per-hit polygon check when a leaf cell is fully contained by the + polygon. (Nick Knize, Mike McCandless) + API Changes * LUCENE-6508: Simplify Lock api, there is now just diff --git a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDPointInPolygonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDPointInPolygonQuery.java index 36e415b2526..f7443ebbfcd 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDPointInPolygonQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDPointInPolygonQuery.java @@ -17,10 +17,6 @@ package org.apache.lucene.bkdtree; * limitations under the License. */ -import java.io.IOException; -import java.util.Arrays; -import java.util.Set; - import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; @@ -33,8 +29,13 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.GeoUtils; import org.apache.lucene.util.ToStringUtils; +import java.io.IOException; +import java.util.Arrays; +import java.util.Set; + /** Finds all previously indexed points that fall within the specified polygon. * *
The field must be indexed with {@link BKDTreeDocValuesFormat}, and {@link BKDPointField} added per document.
@@ -74,6 +75,8 @@ public class BKDPointInPolygonQuery extends Query {
this.polyLats = polyLats;
this.polyLons = polyLons;
+ // TODO: we could also compute the maximal innner bounding box, to make relations faster to compute?
+
double minLon = Double.POSITIVE_INFINITY;
double minLat = Double.POSITIVE_INFINITY;
double maxLon = Double.NEGATIVE_INFINITY;
@@ -161,7 +164,22 @@ public class BKDPointInPolygonQuery extends Query {
new BKDTreeReader.LatLonFilter() {
@Override
public boolean accept(double lat, double lon) {
- return pointInPolygon(lat, lon);
+ return GeoUtils.pointInPolygon(polyLons, polyLats, lat, lon);
+ }
+
+ @Override
+ public BKDTreeReader.Relation compare(double cellLatMin, double cellLatMax, double cellLonMin, double cellLonMax) {
+ if (GeoUtils.rectWithinPoly(cellLonMin, cellLatMin, cellLonMax, cellLatMax,
+ polyLons, polyLats,
+ minLon, minLat, maxLon, maxLat)) {
+ return BKDTreeReader.Relation.INSIDE;
+ } else if (GeoUtils.rectCrossesPoly(cellLonMin, cellLatMin, cellLonMax, cellLatMax,
+ polyLons, polyLats,
+ minLon, minLat, maxLon, maxLat)) {
+ return BKDTreeReader.Relation.CROSSES;
+ } else {
+ return BKDTreeReader.Relation.OUTSIDE;
+ }
}
}, treeDV.delegate);
@@ -203,36 +221,6 @@ public class BKDPointInPolygonQuery extends Query {
};
}
- // TODO: share w/ GeoUtils:
-
- /**
- * simple even-odd point in polygon computation
- * 1. Determine if point is contained in the longitudinal range
- * 2. Determine whether point crosses the edge by computing the latitudinal delta
- * between the end-point of a parallel vector (originating at the point) and the
- * y-component of the edge sink
- *
- * NOTE: Requires polygon point (x,y) order either clockwise or counter-clockwise
- */
- boolean pointInPolygon(double lat, double lon) {
- /**
- * Note: This is using a euclidean coordinate system which could result in
- * upwards of 110KM error at the equator.
- * TODO convert coordinates to cylindrical projection (e.g. mercator)
- */
-
- // TODO: this quantizes a bit differently ... boundary cases will fail here:
- boolean inPoly = false;
- for (int i = 1; i < polyLons.length; i++) {
- if (polyLons[i] <= lon && polyLons[i-1] > lon || polyLons[i-1] <= lon && polyLons[i] > lon) {
- if (polyLats[i] + (lon - polyLons[i]) / (polyLons[i-1] - polyLons[i]) * (polyLats[i-1] - polyLats[i]) <= lat) {
- inPoly = !inPoly;
- }
- }
- }
- return inPoly;
- }
-
@Override
@SuppressWarnings({"unchecked","rawtypes"})
public boolean equals(Object o) {
diff --git a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeReader.java b/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeReader.java
index 59f94721cb5..5c65761e963 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeReader.java
@@ -40,8 +40,11 @@ final class BKDTreeReader implements Accountable {
final int maxDoc;
final IndexInput in;
+ enum Relation {INSIDE, CROSSES, OUTSIDE};
+
interface LatLonFilter {
boolean accept(double lat, double lon);
+ Relation compare(double latMin, double latMax, double lonMin, double lonMax);
}
public BKDTreeReader(IndexInput in, int maxDoc) throws IOException {
@@ -137,7 +140,19 @@ final class BKDTreeReader implements Accountable {
/** Fast path: this is called when the query rect fully encompasses all cells under this node. */
private int addAll(Bits acceptDocs, QueryState state, int nodeID) throws IOException {
+
+ //long latRange = (long) cellLatMaxEnc - (long) cellLatMinEnc;
+ //long lonRange = (long) cellLonMaxEnc - (long) cellLonMinEnc;
+
if (nodeID >= leafNodeOffset) {
+
+ /*
+ System.out.println("A: " + BKDTreeWriter.decodeLat(cellLatMinEnc)
+ + " " + BKDTreeWriter.decodeLat(cellLatMaxEnc)
+ + " " + BKDTreeWriter.decodeLon(cellLonMinEnc)
+ + " " + BKDTreeWriter.decodeLon(cellLonMaxEnc));
+ */
+
// Leaf node
long fp = leafBlockFPs[nodeID-leafNodeOffset];
//System.out.println(" leaf nodeID=" + nodeID + " vs leafNodeOffset=" + leafNodeOffset + " fp=" + fp);
@@ -152,40 +167,7 @@ final class BKDTreeReader implements Accountable {
//System.out.println(" seek to leafFP=" + fp);
// How many points are stored in this leaf cell:
int count = state.in.readVInt();
- if (state.latLonFilter != null) {
- // Handle this differently since we must also look up lat/lon:
-
- int hitCount = 0;
- for(int i=0;i