From 2335a458d8340bbb8e4fe8133a74763bd2d37fd1 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Wed, 13 Apr 2016 19:02:11 -0400 Subject: [PATCH] LUCENE-7215: don't invoke full haversin for LatLonPoint.newDistanceQuery --- lucene/CHANGES.txt | 3 +- .../apache/lucene/util/TestSloppyMath.java | 4 ++ .../document/LatLonPointDistanceQuery.java | 70 ++++++++++++------- 3 files changed, 52 insertions(+), 25 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index ac68f780ab6..5cd5d046295 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -39,7 +39,8 @@ Optimizations * LUCENE-7071: Reduce bytes copying in OfflineSorter, giving ~10% speedup on merging 2D LatLonPoint values (Mike McCandless) -* LUCENE-7105: Optimize LatLonPoint's newDistanceQuery. (Robert Muir) +* LUCENE-7105, LUCENE-7215: Optimize LatLonPoint's newDistanceQuery. + (Robert Muir) * LUCENE-7109: LatLonPoint's newPolygonQuery supports two-phase iteration. (Robert Muir) diff --git a/lucene/core/src/test/org/apache/lucene/util/TestSloppyMath.java b/lucene/core/src/test/org/apache/lucene/util/TestSloppyMath.java index ea5597183c8..6a2eb86b2b6 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestSloppyMath.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestSloppyMath.java @@ -150,6 +150,10 @@ public class TestSloppyMath extends LuceneTestCase { } } + public void testHaversinFromSortKey() { + assertEquals(0.0, haversinMeters(0), 0.0D); + } + public void testAgainstSlowVersion() { for (int i = 0; i < 100_000; i++) { double lat1 = GeoTestUtil.nextLatitude(); diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java index 604886b7502..9bd78fecbbc 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java @@ -97,15 +97,8 @@ final class LatLonPointDistanceQuery extends Query { NumericUtils.intToSortableBytes(Integer.MAX_VALUE, minLon2, 0); } - // compute a maximum partial haversin: unless our box is crazy, we can use this bound - // to reject edge cases faster in visit() - final double maxPartialDistance; - if (box.maxLon - longitude < 90 && longitude - box.minLon < 90) { - maxPartialDistance = Math.max(SloppyMath.haversinSortKey(latitude, longitude, latitude, box.maxLon), - SloppyMath.haversinSortKey(latitude, longitude, box.maxLat, longitude)); - } else { - maxPartialDistance = Double.POSITIVE_INFINITY; - } + // compute exact sort key: avoid any asin() computations + final double sortKey = sortKey(radiusMeters); final double axisLat = Rectangle.axisLat(latitude, radiusMeters); @@ -160,13 +153,9 @@ final class LatLonPointDistanceQuery extends Query { double docLatitude = decodeLatitude(packedValue, 0); double docLongitude = decodeLongitude(packedValue, Integer.BYTES); - // first check the partial distance, if its more than that, it can't be <= radiusMeters - double h1 = SloppyMath.haversinSortKey(latitude, longitude, docLatitude, docLongitude); - if (h1 <= maxPartialDistance) { - // fully confirm with part 2: - if (SloppyMath.haversinMeters(h1) <= radiusMeters) { - result.add(docID); - } + // its a match only if its sortKey <= our sortKey + if (SloppyMath.haversinSortKey(latitude, longitude, docLatitude, docLongitude) <= sortKey) { + result.add(docID); } } @@ -197,20 +186,20 @@ final class LatLonPointDistanceQuery extends Query { if ((longitude < lonMin || longitude > lonMax) && (axisLat+ Rectangle.AXISLAT_ERROR < latMin || axisLat- Rectangle.AXISLAT_ERROR > latMax)) { // circle not fully inside / crossing axis - if (SloppyMath.haversinMeters(latitude, longitude, latMin, lonMin) > radiusMeters && - SloppyMath.haversinMeters(latitude, longitude, latMin, lonMax) > radiusMeters && - SloppyMath.haversinMeters(latitude, longitude, latMax, lonMin) > radiusMeters && - SloppyMath.haversinMeters(latitude, longitude, latMax, lonMax) > radiusMeters) { + if (SloppyMath.haversinSortKey(latitude, longitude, latMin, lonMin) > sortKey && + SloppyMath.haversinSortKey(latitude, longitude, latMin, lonMax) > sortKey && + SloppyMath.haversinSortKey(latitude, longitude, latMax, lonMin) > sortKey && + SloppyMath.haversinSortKey(latitude, longitude, latMax, lonMax) > sortKey) { // no points inside return Relation.CELL_OUTSIDE_QUERY; } } if (lonMax - longitude < 90 && longitude - lonMin < 90 && - SloppyMath.haversinMeters(latitude, longitude, latMin, lonMin) <= radiusMeters && - SloppyMath.haversinMeters(latitude, longitude, latMin, lonMax) <= radiusMeters && - SloppyMath.haversinMeters(latitude, longitude, latMax, lonMin) <= radiusMeters && - SloppyMath.haversinMeters(latitude, longitude, latMax, lonMax) <= radiusMeters) { + SloppyMath.haversinSortKey(latitude, longitude, latMin, lonMin) <= sortKey && + SloppyMath.haversinSortKey(latitude, longitude, latMin, lonMax) <= sortKey && + SloppyMath.haversinSortKey(latitude, longitude, latMax, lonMin) <= sortKey && + SloppyMath.haversinSortKey(latitude, longitude, latMax, lonMax) <= sortKey) { // we are fully enclosed, collect everything within this subtree return Relation.CELL_INSIDE_QUERY; } else { @@ -230,6 +219,39 @@ final class LatLonPointDistanceQuery extends Query { }; } + /** + * binary search to find the exact sortKey needed to match the specified radius + * any sort key <= this is a query match. + */ + static double sortKey(double radius) { + // effectively infinite + if (radius >= SloppyMath.haversinMeters(Double.MAX_VALUE)) { + return SloppyMath.haversinMeters(Double.MAX_VALUE); + } + + // this is a search through non-negative long space only + long lo = 0; + long hi = Double.doubleToRawLongBits(Double.MAX_VALUE); + while (lo <= hi) { + long mid = (lo + hi) >>> 1; + double sortKey = Double.longBitsToDouble(mid); + double midRadius = SloppyMath.haversinMeters(sortKey); + if (midRadius == radius) { + return sortKey; + } else if (midRadius > radius) { + hi = mid - 1; + } else { + lo = mid + 1; + } + } + + // not found: this is because a user can supply an arbitrary radius, one that we will never + // calculate exactly via our haversin method. + double ceil = Double.longBitsToDouble(lo); + assert SloppyMath.haversinMeters(ceil) > radius; + return ceil; + } + public String getField() { return field; }