From 74240be0f5a7f9de373bc53d01d5a43cd6c5bb05 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 30 Jan 2017 09:44:48 +0100 Subject: [PATCH] LUCENE-7661: Speed up for LatLonPointInPolygonQuery by pre-computing the relation of the polygon with a grid. --- lucene/CHANGES.txt | 3 + .../apache/lucene/geo/GeoEncodingUtils.java | 147 ++++++++++++++---- .../LatLonDocValuesDistanceQuery.java | 2 +- .../document/LatLonPointDistanceQuery.java | 2 +- .../document/LatLonPointInPolygonQuery.java | 6 +- 5 files changed, 129 insertions(+), 31 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index e59744a6b9a..0be102cadf7 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -128,6 +128,9 @@ Optimizations * LUCENE-7656: Speed up for LatLonPointDistanceQuery by computing distances even less often. (Adrien Grand) +* LUCENE-7661: Speed up for LatLonPointInPolygonQuery by pre-computing the + relation of the polygon with a grid. (Adrien Grand) + Build * LUCENE-7651: Fix Javadocs build for Java 8u121 by injecting "Google Code diff --git a/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java b/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java index cc75fbf0df6..663cb2e2d84 100644 --- a/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java +++ b/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java @@ -27,6 +27,8 @@ import static org.apache.lucene.geo.GeoUtils.MIN_LAT_INCL; import static org.apache.lucene.geo.GeoUtils.checkLatitude; import static org.apache.lucene.geo.GeoUtils.checkLongitude; +import java.util.function.Function; + /** * reusable geopoint encoding methods * @@ -158,11 +160,48 @@ public final class GeoEncodingUtils { * @lucene.internal */ public static DistancePredicate createDistancePredicate(double lat, double lon, double radiusMeters) { final Rectangle boundingBox = Rectangle.fromPointDistance(lat, lon, radiusMeters); + final double axisLat = Rectangle.axisLat(lat, radiusMeters); + final double distanceSortKey = GeoUtils.distanceQuerySortKey(radiusMeters); + final Function boxToRelation = box -> GeoUtils.relate( + box.minLat, box.maxLat, box.minLon, box.maxLon, lat, lon, distanceSortKey, axisLat); + final Grid subBoxes = createSubBoxes(boundingBox, boxToRelation); + + return new DistancePredicate( + subBoxes.latShift, subBoxes.lonShift, + subBoxes.latBase, subBoxes.lonBase, + subBoxes.maxLatDelta, subBoxes.maxLonDelta, + subBoxes.relations, + lat, lon, distanceSortKey); + } + + /** Create a predicate that checks whether points are within a polygon. + * It works the same way as {@link #createDistancePredicate}. + * @lucene.internal */ + public static PolygonPredicate createPolygonPredicate(Polygon[] polygons, Polygon2D tree) { + final Rectangle boundingBox = Rectangle.fromPolygon(polygons); + final Function boxToRelation = box -> tree.relate( + box.minLat, box.maxLat, box.minLon, box.maxLon); + final Grid subBoxes = createSubBoxes(boundingBox, boxToRelation); + + return new PolygonPredicate( + subBoxes.latShift, subBoxes.lonShift, + subBoxes.latBase, subBoxes.lonBase, + subBoxes.maxLatDelta, subBoxes.maxLonDelta, + subBoxes.relations, + tree); + } + + private static Grid createSubBoxes(Rectangle boundingBox, Function boxToRelation) { final int minLat = encodeLatitudeCeil(boundingBox.minLat); final int maxLat = encodeLatitude(boundingBox.maxLat); final int minLon = encodeLongitudeCeil(boundingBox.minLon); final int maxLon = encodeLongitude(boundingBox.maxLon); + if (maxLat < minLat || (boundingBox.crossesDateline() == false && maxLon < minLon)) { + // the box cannot match any quantized point + return new Grid(1, 1, 0, 0, 0, 0, new byte[0]); + } + final int latShift, lonShift; final int latBase, lonBase; final int maxLatDelta, maxLonDelta; @@ -186,8 +225,6 @@ public final class GeoEncodingUtils { assert maxLonDelta > 0; } - final double axisLat = Rectangle.axisLat(lat, radiusMeters); - final double distanceSortKey = GeoUtils.distanceQuerySortKey(radiusMeters); final byte[] relations = new byte[maxLatDelta * maxLonDelta]; for (int i = 0; i < maxLatDelta; ++i) { for (int j = 0; j < maxLonDelta; ++j) { @@ -196,46 +233,67 @@ public final class GeoEncodingUtils { final int boxMaxLat = boxMinLat + (1 << latShift) - 1; final int boxMaxLon = boxMinLon + (1 << lonShift) - 1; - relations[i * maxLonDelta + j] = (byte) GeoUtils.relate( + relations[i * maxLonDelta + j] = (byte) boxToRelation.apply(new Rectangle( decodeLatitude(boxMinLat), decodeLatitude(boxMaxLat), - decodeLongitude(boxMinLon), decodeLongitude(boxMaxLon), - lat, lon, distanceSortKey, axisLat).ordinal(); + decodeLongitude(boxMinLon), decodeLongitude(boxMaxLon))).ordinal(); } } - return new DistancePredicate( + return new Grid( latShift, lonShift, latBase, lonBase, maxLatDelta, maxLonDelta, - relations, - lat, lon, distanceSortKey); + relations); } /** Compute the minimum shift value so that * {@code (b>>>shift)-(a>>>shift)} is less that {@code ARITY}. */ private static int computeShift(long a, long b) { - assert a < b; + assert a <= b; // We enforce a shift of at least 1 so that when we work with unsigned ints // by doing (lat - MIN_VALUE), the result of the shift (lat - MIN_VALUE) >>> shift // can be used for comparisons without particular care: the sign bit has // been cleared so comparisons work the same for signed and unsigned ints for (int shift = 1; ; ++shift) { final long delta = (b >>> shift) - (a >>> shift); - if (delta >= 0 && delta < DistancePredicate.ARITY) { + if (delta >= 0 && delta < Grid.ARITY) { return shift; } } } + private static class Grid { + static final int ARITY = 64; + + final int latShift, lonShift; + final int latBase, lonBase; + final int maxLatDelta, maxLonDelta; + final byte[] relations; + + private Grid( + int latShift, int lonShift, + int latBase, int lonBase, + int maxLatDelta, int maxLonDelta, + byte[] relations) { + if (latShift < 1 || latShift > 31) { + throw new IllegalArgumentException(); + } + if (lonShift < 1 || lonShift > 31) { + throw new IllegalArgumentException(); + } + this.latShift = latShift; + this.lonShift = lonShift; + this.latBase = latBase; + this.lonBase = lonBase; + this.maxLatDelta = maxLatDelta; + this.maxLonDelta = maxLonDelta; + this.relations = relations; + } + } + /** A predicate that checks whether a given point is within a distance of another point. */ - public static class DistancePredicate { + public static class DistancePredicate extends Grid { - private static final int ARITY = 64; - - private final int latShift, lonShift; - private final int latBase, lonBase; - private final int maxLatDelta, maxLonDelta; - private final byte[] relations; private final double lat, lon; private final double distanceKey; @@ -245,13 +303,7 @@ public final class GeoEncodingUtils { int maxLatDelta, int maxLonDelta, byte[] relations, double lat, double lon, double distanceKey) { - this.latShift = latShift; - this.lonShift = lonShift; - this.latBase = latBase; - this.lonBase = lonBase; - this.maxLatDelta = maxLatDelta; - this.maxLonDelta = maxLonDelta; - this.relations = relations; + super(latShift, lonShift, latBase, lonBase, maxLatDelta, maxLonDelta, relations); this.lat = lat; this.lon = lon; this.distanceKey = distanceKey; @@ -259,16 +311,17 @@ public final class GeoEncodingUtils { /** Check whether the given point is within a distance of another point. * NOTE: this operates directly on the encoded representation of points. */ - public boolean apply(int lat, int lon) { + public boolean test(int lat, int lon) { final int lat2 = ((lat - Integer.MIN_VALUE) >>> latShift); if (lat2 < latBase || lat2 >= latBase + maxLatDelta) { return false; } int lon2 = ((lon - Integer.MIN_VALUE) >>> lonShift); if (lon2 < lonBase) { // wrap - lon2 += 1L << (32 - lonShift); - assert lon2 >= lonBase; + lon2 += 1 << (32 - lonShift); } + assert Integer.toUnsignedLong(lon2) >= lonBase; + assert lon2 - lonBase >= 0; if (lon2 - lonBase >= maxLonDelta) { return false; } @@ -284,4 +337,44 @@ public final class GeoEncodingUtils { } } + /** A predicate that checks whether a given point is within a polygon. */ + public static class PolygonPredicate extends Grid { + + private final Polygon2D tree; + + private PolygonPredicate( + int latShift, int lonShift, + int latBase, int lonBase, + int maxLatDelta, int maxLonDelta, + byte[] relations, + Polygon2D tree) { + super(latShift, lonShift, latBase, lonBase, maxLatDelta, maxLonDelta, relations); + this.tree = tree; + } + + /** Check whether the given point is within the considered polygon. + * NOTE: this operates directly on the encoded representation of points. */ + public boolean test(int lat, int lon) { + final int lat2 = ((lat - Integer.MIN_VALUE) >>> latShift); + if (lat2 < latBase || lat2 >= latBase + maxLatDelta) { + return false; + } + int lon2 = ((lon - Integer.MIN_VALUE) >>> lonShift); + if (lon2 < lonBase) { // wrap + lon2 += 1 << (32 - lonShift); + } + assert Integer.toUnsignedLong(lon2) >= lonBase; + assert lon2 - lonBase >= 0; + if (lon2 - lonBase >= maxLonDelta) { + return false; + } + + final int relation = relations[(lat2 - latBase) * maxLonDelta + (lon2 - lonBase)]; + if (relation == Relation.CELL_CROSSES_QUERY.ordinal()) { + return tree.contains(decodeLatitude(lat), decodeLongitude(lon)); + } else { + return relation == Relation.CELL_INSIDE_QUERY.ordinal(); + } + } + } } diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java index ab05804763f..e38d9feceeb 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java @@ -111,7 +111,7 @@ final class LatLonDocValuesDistanceQuery extends Query { final long value = values.nextValue(); final int lat = (int) (value >>> 32); final int lon = (int) (value & 0xFFFFFFFF); - if (distancePredicate.apply(lat, lon)) { + if (distancePredicate.test(lat, lon)) { return true; } } diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java index 3d0b82e98d2..71ddf3d1fdb 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java @@ -164,7 +164,7 @@ final class LatLonPointDistanceQuery extends Query { int docLatitude = NumericUtils.sortableBytesToInt(packedValue, 0); int docLongitude = NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES); - if (distancePredicate.apply(docLatitude, docLongitude)) { + if (distancePredicate.test(docLatitude, docLongitude)) { adder.add(docID); } } diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java index ec7c682cd73..c272b4d4c48 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java @@ -35,6 +35,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.util.DocIdSetBuilder; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.StringHelper; +import org.apache.lucene.geo.GeoEncodingUtils; import org.apache.lucene.geo.Polygon; import org.apache.lucene.geo.Polygon2D; @@ -92,6 +93,7 @@ final class LatLonPointInPolygonQuery extends Query { NumericUtils.intToSortableBytes(encodeLongitude(box.maxLon), maxLon, 0); final Polygon2D tree = Polygon2D.create(polygons); + final GeoEncodingUtils.PolygonPredicate polygonPredicate = GeoEncodingUtils.createPolygonPredicate(polygons, tree); return new ConstantScoreWeight(this, boost) { @@ -130,8 +132,8 @@ final class LatLonPointInPolygonQuery extends Query { @Override public void visit(int docID, byte[] packedValue) { - if (tree.contains(decodeLatitude(packedValue, 0), - decodeLongitude(packedValue, Integer.BYTES))) { + if (polygonPredicate.test(NumericUtils.sortableBytesToInt(packedValue, 0), + NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES))) { adder.add(docID); } }