diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index d2ae4027459..8d82755d22a 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -66,6 +66,10 @@ New Features wrote the oldest segment in the index, for faster checking of "too old" indices (Ryan Ernst, Robert Muir, Mike McCandless) +* LUCENE-6519: BKDPointInPolygonQuery is much faster by avoiding + the per-hit polygon check when a leaf cell is fully contained by the + polygon. (Nick Knize, Mike McCandless) + API Changes * LUCENE-6508: Simplify Lock api, there is now just diff --git a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDPointInPolygonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDPointInPolygonQuery.java index 36e415b2526..f7443ebbfcd 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDPointInPolygonQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDPointInPolygonQuery.java @@ -17,10 +17,6 @@ package org.apache.lucene.bkdtree; * limitations under the License. */ -import java.io.IOException; -import java.util.Arrays; -import java.util.Set; - import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; @@ -33,8 +29,13 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.GeoUtils; import org.apache.lucene.util.ToStringUtils; +import java.io.IOException; +import java.util.Arrays; +import java.util.Set; + /** Finds all previously indexed points that fall within the specified polygon. * *

The field must be indexed with {@link BKDTreeDocValuesFormat}, and {@link BKDPointField} added per document. @@ -74,6 +75,8 @@ public class BKDPointInPolygonQuery extends Query { this.polyLats = polyLats; this.polyLons = polyLons; + // TODO: we could also compute the maximal innner bounding box, to make relations faster to compute? + double minLon = Double.POSITIVE_INFINITY; double minLat = Double.POSITIVE_INFINITY; double maxLon = Double.NEGATIVE_INFINITY; @@ -161,7 +164,22 @@ public class BKDPointInPolygonQuery extends Query { new BKDTreeReader.LatLonFilter() { @Override public boolean accept(double lat, double lon) { - return pointInPolygon(lat, lon); + return GeoUtils.pointInPolygon(polyLons, polyLats, lat, lon); + } + + @Override + public BKDTreeReader.Relation compare(double cellLatMin, double cellLatMax, double cellLonMin, double cellLonMax) { + if (GeoUtils.rectWithinPoly(cellLonMin, cellLatMin, cellLonMax, cellLatMax, + polyLons, polyLats, + minLon, minLat, maxLon, maxLat)) { + return BKDTreeReader.Relation.INSIDE; + } else if (GeoUtils.rectCrossesPoly(cellLonMin, cellLatMin, cellLonMax, cellLatMax, + polyLons, polyLats, + minLon, minLat, maxLon, maxLat)) { + return BKDTreeReader.Relation.CROSSES; + } else { + return BKDTreeReader.Relation.OUTSIDE; + } } }, treeDV.delegate); @@ -203,36 +221,6 @@ public class BKDPointInPolygonQuery extends Query { }; } - // TODO: share w/ GeoUtils: - - /** - * simple even-odd point in polygon computation - * 1. Determine if point is contained in the longitudinal range - * 2. Determine whether point crosses the edge by computing the latitudinal delta - * between the end-point of a parallel vector (originating at the point) and the - * y-component of the edge sink - * - * NOTE: Requires polygon point (x,y) order either clockwise or counter-clockwise - */ - boolean pointInPolygon(double lat, double lon) { - /** - * Note: This is using a euclidean coordinate system which could result in - * upwards of 110KM error at the equator. - * TODO convert coordinates to cylindrical projection (e.g. mercator) - */ - - // TODO: this quantizes a bit differently ... boundary cases will fail here: - boolean inPoly = false; - for (int i = 1; i < polyLons.length; i++) { - if (polyLons[i] <= lon && polyLons[i-1] > lon || polyLons[i-1] <= lon && polyLons[i] > lon) { - if (polyLats[i] + (lon - polyLons[i]) / (polyLons[i-1] - polyLons[i]) * (polyLats[i-1] - polyLats[i]) <= lat) { - inPoly = !inPoly; - } - } - } - return inPoly; - } - @Override @SuppressWarnings({"unchecked","rawtypes"}) public boolean equals(Object o) { diff --git a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeReader.java b/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeReader.java index 59f94721cb5..5c65761e963 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeReader.java +++ b/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeReader.java @@ -40,8 +40,11 @@ final class BKDTreeReader implements Accountable { final int maxDoc; final IndexInput in; + enum Relation {INSIDE, CROSSES, OUTSIDE}; + interface LatLonFilter { boolean accept(double lat, double lon); + Relation compare(double latMin, double latMax, double lonMin, double lonMax); } public BKDTreeReader(IndexInput in, int maxDoc) throws IOException { @@ -137,7 +140,19 @@ final class BKDTreeReader implements Accountable { /** Fast path: this is called when the query rect fully encompasses all cells under this node. */ private int addAll(Bits acceptDocs, QueryState state, int nodeID) throws IOException { + + //long latRange = (long) cellLatMaxEnc - (long) cellLatMinEnc; + //long lonRange = (long) cellLonMaxEnc - (long) cellLonMinEnc; + if (nodeID >= leafNodeOffset) { + + /* + System.out.println("A: " + BKDTreeWriter.decodeLat(cellLatMinEnc) + + " " + BKDTreeWriter.decodeLat(cellLatMaxEnc) + + " " + BKDTreeWriter.decodeLon(cellLonMinEnc) + + " " + BKDTreeWriter.decodeLon(cellLonMaxEnc)); + */ + // Leaf node long fp = leafBlockFPs[nodeID-leafNodeOffset]; //System.out.println(" leaf nodeID=" + nodeID + " vs leafNodeOffset=" + leafNodeOffset + " fp=" + fp); @@ -152,40 +167,7 @@ final class BKDTreeReader implements Accountable { //System.out.println(" seek to leafFP=" + fp); // How many points are stored in this leaf cell: int count = state.in.readVInt(); - if (state.latLonFilter != null) { - // Handle this differently since we must also look up lat/lon: - - int hitCount = 0; - for(int i=0;i>32) & 0xffffffffL); - int lonEnc = (int) (enc & 0xffffffffL); - - // TODO: maybe we can fix LatLonFilter to operate on encoded forms? - if (state.latLonFilter.accept(BKDTreeWriter.decodeLat(latEnc), BKDTreeWriter.decodeLon(lonEnc))) { - state.bits.set(docID); - hitCount++; - - // Stop processing values for this doc since it's now accepted: - break; - } - } - } - } - - return hitCount; - - } else if (acceptDocs != null) { + if (acceptDocs != null) { for(int i=0;i