diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index e7dcc3eb4e0..9dd75ef4ea8 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -56,6 +56,9 @@ Optimizations multiple polygons and holes, with memory usage independent of polygon complexity. (Karl Wright, Mike McCandless, Robert Muir) +* LUCENE-7159: Speed up LatLonPoint polygon performance for complex + polygons. (Robert Muir) + Bug Fixes * LUCENE-7127: Fix corner case bugs in GeoPointDistanceQuery. (Robert Muir) diff --git a/lucene/core/src/java/org/apache/lucene/geo/Polygon.java b/lucene/core/src/java/org/apache/lucene/geo/Polygon.java index a5da229ac88..145b939e4e2 100644 --- a/lucene/core/src/java/org/apache/lucene/geo/Polygon.java +++ b/lucene/core/src/java/org/apache/lucene/geo/Polygon.java @@ -18,6 +18,8 @@ package org.apache.lucene.geo; import java.util.Arrays; +import org.apache.lucene.index.PointValues.Relation; + /** * Represents a closed polygon on the earth's surface. *
@@ -150,90 +152,127 @@ public final class Polygon {
return false;
}
}
-
- /**
- * Computes whether a rectangle is within a polygon (shared boundaries not allowed)
- */
- public boolean contains(double minLat, double maxLat, double minLon, double maxLon) {
- // check if rectangle crosses poly (to handle concave/pacman polys), then check that all 4 corners
- // are contained
- boolean contains = crosses(minLat, maxLat, minLon, maxLon) == false &&
- contains(minLat, minLon) &&
- contains(minLat, maxLon) &&
- contains(maxLat, maxLon) &&
- contains(maxLat, minLon);
-
- if (contains) {
- // if we intersect with any hole, game over
- for (Polygon hole : holes) {
- if (hole.crosses(minLat, maxLat, minLon, maxLon) || hole.contains(minLat, maxLat, minLon, maxLon)) {
- return false;
- }
- }
- return true;
- } else {
- return false;
- }
- }
-
- /**
- * Convenience method for accurately computing whether a rectangle crosses a poly.
- */
- public boolean crosses(double minLat, double maxLat, final double minLon, final double maxLon) {
+
+ /** Returns relation to the provided rectangle */
+ public Relation relate(double minLat, double maxLat, double minLon, double maxLon) {
// if the bounding boxes are disjoint then the shape does not cross
if (maxLon < this.minLon || minLon > this.maxLon || maxLat < this.minLat || minLat > this.maxLat) {
- return false;
+ return Relation.CELL_OUTSIDE_QUERY;
}
// if the rectangle fully encloses us, we cross.
if (minLat <= this.minLat && maxLat >= this.maxLat && minLon <= this.minLon && maxLon >= this.maxLon) {
- return true;
+ return Relation.CELL_CROSSES_QUERY;
}
- // if we cross any hole, we cross
+ // check any holes
for (Polygon hole : holes) {
- if (hole.crosses(minLat, maxLat, minLon, maxLon)) {
- return true;
+ Relation holeRelation = hole.relate(minLat, maxLat, minLon, maxLon);
+ if (holeRelation == Relation.CELL_CROSSES_QUERY) {
+ return Relation.CELL_CROSSES_QUERY;
+ } else if (holeRelation == Relation.CELL_INSIDE_QUERY) {
+ return Relation.CELL_OUTSIDE_QUERY;
}
}
+ // check each corner: if < 4 are present, its cheaper than crossesSlowly
+ int numCorners = numberOfCorners(minLat, maxLat, minLon, maxLon);
+ if (numCorners == 4) {
+ if (crossesSlowly(minLat, maxLat, minLon, maxLon)) {
+ return Relation.CELL_CROSSES_QUERY;
+ }
+ return Relation.CELL_INSIDE_QUERY;
+ } else if (numCorners > 0) {
+ return Relation.CELL_CROSSES_QUERY;
+ }
+
+ // we cross
+ if (crossesSlowly(minLat, maxLat, minLon, maxLon)) {
+ return Relation.CELL_CROSSES_QUERY;
+ }
+
+ return Relation.CELL_OUTSIDE_QUERY;
+ }
+
+ // returns 0, 4, or something in between
+ private int numberOfCorners(double minLat, double maxLat, double minLon, double maxLon) {
+ int containsCount = 0;
+ if (contains(minLat, minLon)) {
+ containsCount++;
+ }
+ if (contains(minLat, maxLon)) {
+ containsCount++;
+ }
+ if (containsCount == 1) {
+ return containsCount;
+ }
+ if (contains(maxLat, maxLon)) {
+ containsCount++;
+ }
+ if (containsCount == 2) {
+ return containsCount;
+ }
+ if (contains(maxLat, minLon)) {
+ containsCount++;
+ }
+ return containsCount;
+ }
+ private boolean crossesSlowly(double minLat, double maxLat, final double minLon, final double maxLon) {
/*
* Accurately compute (within restrictions of cartesian decimal degrees) whether a rectangle crosses a polygon
*/
- final double[][] bbox = new double[][] { {minLon, minLat}, {maxLon, minLat}, {maxLon, maxLat}, {minLon, maxLat}, {minLon, minLat} };
- final int polyLength = polyLons.length-1;
- double d, s, t, a1, b1, c1, a2, b2, c2;
- double x00, y00, x01, y01, x10, y10, x11, y11;
+ final double[] boxLats = new double[] { minLat, minLat, maxLat, maxLat, minLat };
+ final double[] boxLons = new double[] { minLon, maxLon, maxLon, minLon, minLon };
// computes the intersection point between each bbox edge and the polygon edge
- for (short b=0; b<4; ++b) {
- a1 = bbox[b+1][1]-bbox[b][1];
- b1 = bbox[b][0]-bbox[b+1][0];
- c1 = a1*bbox[b+1][0] + b1*bbox[b+1][1];
- for (int p=0; p
+ * When this file is removed then we have made good progress! In general we don't call
+ * the point-in-polygon algorithm that much, because of how BKD divides up the data. But
+ * today the method is very slow (general to all polygons, linear with the number of vertices).
+ * At the same time polygon-rectangle relation operations are also slow in the same way, this
+ * just really ensures they are the bottleneck by removing most of the point-in-polygon calls.
+ *
+ * See the "grid" algorithm description here: http://erich.realtimerendering.com/ptinpoly/
+ * A few differences:
+ *
+ *
+ */
+// TODO: just make a more proper tree (maybe in-ram BKD)? then we can answer most
+// relational operations as rectangle <-> rectangle relations in integer space in log(n) time..
+final class LatLonGrid {
+ // must be a power of two!
+ static final int GRID_SIZE = 1<<5;
+ final int minLat;
+ final int maxLat;
+ final int minLon;
+ final int maxLon;
+ // TODO: something more efficient than parallel bitsets? maybe one bitset?
+ final FixedBitSet haveAnswer = new FixedBitSet(GRID_SIZE * GRID_SIZE);
+ final FixedBitSet answer = new FixedBitSet(GRID_SIZE * GRID_SIZE);
+
+ final long latPerCell;
+ final long lonPerCell;
+
+ final Polygon[] polygons;
+
+ LatLonGrid(int minLat, int maxLat, int minLon, int maxLon, Polygon... polygons) {
+ this.minLat = minLat;
+ this.maxLat = maxLat;
+ this.minLon = minLon;
+ this.maxLon = maxLon;
+ this.polygons = polygons;
+ if (minLon > maxLon) {
+ // maybe make 2 grids if you want this?
+ throw new IllegalArgumentException("Grid cannot cross the dateline");
+ }
+ if (minLat > maxLat) {
+ throw new IllegalArgumentException("bogus grid");
+ }
+ long latitudeRange = maxLat - (long) minLat;
+ long longitudeRange = maxLon - (long) minLon;
+ // we spill over the edge of the bounding box in each direction a bit,
+ // but it prevents edge case bugs.
+ latPerCell = latitudeRange / (GRID_SIZE - 1);
+ lonPerCell = longitudeRange / (GRID_SIZE - 1);
+ fill(polygons, 0, GRID_SIZE, 0, GRID_SIZE);
+ }
+
+ /** fills a 2D range of grid cells [minLatIndex .. maxLatIndex) X [minLonIndex .. maxLonIndex) */
+ void fill(Polygon[] polygons, int minLatIndex, int maxLatIndex, int minLonIndex, int maxLonIndex) {
+ // grid cells at the edge of the bounding box are typically smaller than normal, because we spill over.
+ long cellMinLat = minLat + (minLatIndex * latPerCell);
+ long cellMaxLat = Math.min(maxLat, minLat + (maxLatIndex * latPerCell) - 1);
+ long cellMinLon = minLon + (minLonIndex * lonPerCell);
+ long cellMaxLon = Math.min(maxLon, minLon + (maxLonIndex * lonPerCell) - 1);
+
+ assert cellMinLat <= maxLat && cellMinLon <= maxLon;
+ assert cellMaxLat >= cellMinLat;
+ assert cellMaxLon >= cellMinLon;
+
+ Relation relation = Polygon.relate(polygons, LatLonPoint.decodeLatitude((int) cellMinLat),
+ LatLonPoint.decodeLatitude((int) cellMaxLat),
+ LatLonPoint.decodeLongitude((int) cellMinLon),
+ LatLonPoint.decodeLongitude((int) cellMaxLon));
+ if (relation != Relation.CELL_CROSSES_QUERY) {
+ // we know the answer for this region, fill the cell range
+ for (int i = minLatIndex; i < maxLatIndex; i++) {
+ for (int j = minLonIndex; j < maxLonIndex; j++) {
+ int index = i * GRID_SIZE + j;
+ assert haveAnswer.get(index) == false;
+ haveAnswer.set(index);
+ if (relation == Relation.CELL_INSIDE_QUERY) {
+ answer.set(index);
+ }
+ }
+ }
+ } else if (minLatIndex == maxLatIndex - 1) {
+ // nothing more to do: this is a single grid cell (leaf node) and
+ // is an edge case for the polygon.
+ } else {
+ // grid range crosses our polygon, keep recursing.
+ int midLatIndex = (minLatIndex + maxLatIndex) >>> 1;
+ int midLonIndex = (minLonIndex + maxLonIndex) >>> 1;
+ fill(polygons, minLatIndex, midLatIndex, minLonIndex, midLonIndex);
+ fill(polygons, minLatIndex, midLatIndex, midLonIndex, maxLonIndex);
+ fill(polygons, midLatIndex, maxLatIndex, minLonIndex, midLonIndex);
+ fill(polygons, midLatIndex, maxLatIndex, midLonIndex, maxLonIndex);
+ }
+ }
+
+ /** Returns true if inside one of our polygons, false otherwise */
+ boolean contains(int latitude, int longitude) {
+ // first see if the grid knows the answer
+ int index = index(latitude, longitude);
+ if (index == -1) {
+ return false; // outside of bounding box range
+ } else if (haveAnswer.get(index)) {
+ return answer.get(index);
+ }
+
+ // the grid is unsure (boundary): do a real test.
+ double docLatitude = LatLonPoint.decodeLatitude(latitude);
+ double docLongitude = LatLonPoint.decodeLongitude(longitude);
+ return Polygon.contains(polygons, docLatitude, docLongitude);
+ }
+
+ /** Returns grid index of lat/lon, or -1 if the value is outside of the bounding box. */
+ private int index(int latitude, int longitude) {
+ if (latitude < minLat || latitude > maxLat || longitude < minLon || longitude > maxLon) {
+ return -1; // outside of bounding box range
+ }
+
+ long latRel = latitude - (long) minLat;
+ long lonRel = longitude - (long) minLon;
+
+ int latIndex = (int) (latRel / latPerCell);
+ int lonIndex = (int) (lonRel / lonPerCell);
+ return latIndex * GRID_SIZE + lonIndex;
+ }
+}
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
index f27386c2b0a..670db3de67a 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
@@ -100,6 +100,11 @@ final class LatLonPointInPolygonQuery extends Query {
}
final float matchCost = cumulativeCost;
+ final LatLonGrid grid = new LatLonGrid(LatLonPoint.encodeLatitude(box.minLat),
+ LatLonPoint.encodeLatitude(box.maxLat),
+ LatLonPoint.encodeLongitude(box.minLon),
+ LatLonPoint.encodeLongitude(box.maxLon), polygons);
+
return new ConstantScoreWeight(this) {
@Override
@@ -127,7 +132,7 @@ final class LatLonPointInPolygonQuery extends Query {
} else {
preApproved = new FixedBitSet(reader.maxDoc());
}
- values.intersect(field,
+ values.intersect(field,
new IntersectVisitor() {
@Override
public void visit(int docID) {
@@ -164,13 +169,7 @@ final class LatLonPointInPolygonQuery extends Query {
double cellMaxLat = LatLonPoint.decodeLatitude(maxPackedValue, 0);
double cellMaxLon = LatLonPoint.decodeLongitude(maxPackedValue, Integer.BYTES);
- if (Polygon.contains(polygons, cellMinLat, cellMaxLat, cellMinLon, cellMaxLon)) {
- return Relation.CELL_INSIDE_QUERY;
- } else if (Polygon.crosses(polygons, cellMinLat, cellMaxLat, cellMinLon, cellMaxLon)) {
- return Relation.CELL_CROSSES_QUERY;
- } else {
- return Relation.CELL_OUTSIDE_QUERY;
- }
+ return Polygon.relate(polygons, cellMinLat, cellMaxLat, cellMinLon, cellMaxLon);
}
});
@@ -194,9 +193,9 @@ final class LatLonPointInPolygonQuery extends Query {
int count = docValues.count();
for (int i = 0; i < count; i++) {
long encoded = docValues.valueAt(i);
- double docLatitude = LatLonPoint.decodeLatitude((int)(encoded >> 32));
- double docLongitude = LatLonPoint.decodeLongitude((int)(encoded & 0xFFFFFFFF));
- if (Polygon.contains(polygons, docLatitude, docLongitude)) {
+ int latitudeBits = (int)(encoded >> 32);
+ int longitudeBits = (int)(encoded & 0xFFFFFFFF);
+ if (grid.contains(latitudeBits, longitudeBits)) {
return true;
}
}
diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonGrid.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonGrid.java
new file mode 100644
index 00000000000..2de5a433d6f
--- /dev/null
+++ b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonGrid.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document;
+
+import org.apache.lucene.geo.GeoTestUtil;
+import org.apache.lucene.geo.Polygon;
+import org.apache.lucene.geo.Rectangle;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+
+/** tests against LatLonGrid (avoiding indexing/queries) */
+public class TestLatLonGrid extends LuceneTestCase {
+
+ /** If the grid returns true, then any point in that cell should return true as well */
+ public void testRandom() throws Exception {
+ for (int i = 0; i < 100; i++) {
+ Polygon polygon = GeoTestUtil.nextPolygon();
+ Rectangle box = Rectangle.fromPolygon(new Polygon[] { polygon });
+ int minLat = LatLonPoint.encodeLatitude(box.minLat);
+ int maxLat = LatLonPoint.encodeLatitude(box.maxLat);
+ int minLon = LatLonPoint.encodeLongitude(box.minLon);
+ int maxLon = LatLonPoint.encodeLongitude(box.maxLon);
+ LatLonGrid grid = new LatLonGrid(minLat, maxLat, minLon, maxLon, polygon);
+ // we are in integer space... but exhaustive testing is slow!
+ for (int j = 0; j < 10000; j++) {
+ int lat = TestUtil.nextInt(random(), minLat, maxLat);
+ int lon = TestUtil.nextInt(random(), minLon, maxLon);
+
+ boolean expected = polygon.contains(LatLonPoint.decodeLatitude(lat),
+ LatLonPoint.decodeLongitude(lon));
+ boolean actual = grid.contains(lat, lon);
+ assertEquals(expected, actual);
+ }
+ }
+ }
+}
diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/geopoint/search/GeoPointInPolygonQueryImpl.java b/lucene/spatial/src/java/org/apache/lucene/spatial/geopoint/search/GeoPointInPolygonQueryImpl.java
index 1bb43c70ba4..14b7cc7fa2b 100644
--- a/lucene/spatial/src/java/org/apache/lucene/spatial/geopoint/search/GeoPointInPolygonQueryImpl.java
+++ b/lucene/spatial/src/java/org/apache/lucene/spatial/geopoint/search/GeoPointInPolygonQueryImpl.java
@@ -21,6 +21,7 @@ import java.util.Objects;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.spatial.geopoint.document.GeoPointField.TermEncoding;
import org.apache.lucene.geo.Polygon;
+import org.apache.lucene.index.PointValues.Relation;
/** Package private implementation for the public facing GeoPointInPolygonQuery delegate class.
*
@@ -58,18 +59,17 @@ final class GeoPointInPolygonQueryImpl extends GeoPointInBBoxQueryImpl {
@Override
protected boolean cellCrosses(final double minLat, final double maxLat, final double minLon, final double maxLon) {
- return Polygon.crosses(polygons, minLat, maxLat, minLon, maxLon);
+ return Polygon.relate(polygons, minLat, maxLat, minLon, maxLon) == Relation.CELL_CROSSES_QUERY;
}
@Override
protected boolean cellWithin(final double minLat, final double maxLat, final double minLon, final double maxLon) {
- return Polygon.contains(polygons, minLat, maxLat, minLon, maxLon);
+ return Polygon.relate(polygons, minLat, maxLat, minLon, maxLon) == Relation.CELL_INSIDE_QUERY;
}
@Override
protected boolean cellIntersectsShape(final double minLat, final double maxLat, final double minLon, final double maxLon) {
- return cellContains(minLat, maxLat, minLon, maxLon) || cellWithin(minLat, maxLat, minLon, maxLon)
- || cellCrosses(minLat, maxLat, minLon, maxLon);
+ return Polygon.relate(polygons, minLat, maxLat, minLon, maxLon) != Relation.CELL_OUTSIDE_QUERY;
}
/**