From a4722845e3080dac06c59ca9837a602cf85f3e17 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 11 Mar 2016 21:20:07 -0500 Subject: [PATCH] LUCENE-7099: speed up embarrasingly slow sandbox/ distance sort --- .../LatLonPointDistanceComparator.java | 97 ++++++++++++++++++- 1 file changed, 95 insertions(+), 2 deletions(-) diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java index a5d85344d84..e64f4b0eeac 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java @@ -27,8 +27,16 @@ import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.LeafFieldComparator; import org.apache.lucene.search.Scorer; import org.apache.lucene.spatial.util.GeoDistanceUtils; +import org.apache.lucene.spatial.util.GeoRect; +import org.apache.lucene.spatial.util.GeoUtils; -/** Compares docs by distance from an origin */ +/** + * Compares documents by distance from an origin point + *

+ * When the least competitive item on the priority queue changes (setBottom), we recompute + * a bounding box representing competitive distance to the top-N. Then in compareBottom, we can + * quickly reject hits based on bounding box alone without computing distance for every element. + */ class LatLonPointDistanceComparator extends FieldComparator implements LeafFieldComparator { final String field; final double latitude; @@ -40,6 +48,24 @@ class LatLonPointDistanceComparator extends FieldComparator implements L double topValue; SortedNumericDocValues currentDocs; + // current bounding box(es) for the bottom distance on the PQ. + // these are pre-encoded with LatLonPoint's encoding and + // used to exclude uncompetitive hits faster. + int minLon; + int maxLon; + int minLat; + int maxLat; + + // crossesDateLine is true, then we have a second box to check + boolean crossesDateLine; + int minLon2; + int maxLon2; + int minLat2; + int maxLat2; + + // the number of times setBottom has been called (adversary protection) + int setBottomCounter = 0; + public LatLonPointDistanceComparator(String field, double latitude, double longitude, int numHits, double missingValue) { this.field = field; this.latitude = latitude; @@ -59,6 +85,52 @@ class LatLonPointDistanceComparator extends FieldComparator implements L @Override public void setBottom(int slot) { bottom = values[slot]; + // make bounding box(es) to exclude non-competitive hits, but start + // sampling if we get called way too much: don't make gobs of bounding + // boxes if comparator hits a worst case order (e.g. backwards distance order) + if (setBottomCounter < 1024 || (setBottomCounter & 0x3F) == 0x3F) { + GeoRect box = GeoUtils.circleToBBox(longitude, latitude, bottom); + // pre-encode our box to our integer encoding, so we don't have to decode + // to double values for uncompetitive hits. This has some cost! + int minLatEncoded = LatLonPoint.encodeLatitude(box.minLat); + int maxLatEncoded = LatLonPoint.encodeLatitude(box.maxLat); + int minLonEncoded = LatLonPoint.encodeLongitude(box.minLon); + int maxLonEncoded = LatLonPoint.encodeLongitude(box.maxLon); + // be sure to not introduce quantization error in our optimization, just + // round up our encoded box safely in all directions. + if (minLatEncoded != Integer.MIN_VALUE) { + minLatEncoded--; + } + if (minLonEncoded != Integer.MIN_VALUE) { + minLonEncoded--; + } + if (maxLatEncoded != Integer.MAX_VALUE) { + maxLatEncoded++; + } + if (maxLonEncoded != Integer.MAX_VALUE) { + maxLonEncoded++; + } + crossesDateLine = box.crossesDateline(); + // crosses dateline: split + if (crossesDateLine) { + // box1 + minLon = Integer.MIN_VALUE; + maxLon = maxLonEncoded; + minLat = minLatEncoded; + maxLat = maxLatEncoded; + // box2 + minLon2 = minLonEncoded; + maxLon2 = Integer.MAX_VALUE; + minLat2 = minLatEncoded; + maxLat2 = maxLatEncoded; + } else { + minLon = minLonEncoded; + maxLon = maxLonEncoded; + minLat = minLatEncoded; + maxLat = maxLatEncoded; + } + } + setBottomCounter++; } @Override @@ -68,7 +140,28 @@ class LatLonPointDistanceComparator extends FieldComparator implements L @Override public int compareBottom(int doc) throws IOException { - return Double.compare(bottom, distance(doc)); + currentDocs.setDocument(doc); + + int numValues = currentDocs.count(); + if (numValues == 0) { + return Double.compare(bottom, missingValue); + } + + double minValue = Double.POSITIVE_INFINITY; + for (int i = 0; i < numValues; i++) { + long encoded = currentDocs.valueAt(i); + int latitudeBits = (int)(encoded >> 32); + int longitudeBits = (int)(encoded & 0xFFFFFFFF); + boolean outsideBox = ((latitudeBits < minLat || longitudeBits < minLon || latitudeBits > maxLat || longitudeBits > maxLon) && + (crossesDateLine == false || latitudeBits < minLat2 || longitudeBits < minLon2 || latitudeBits > maxLat2 || longitudeBits > maxLon2)); + // only compute actual distance if its inside "competitive bounding box" + if (outsideBox == false) { + double docLatitude = LatLonPoint.decodeLatitude(latitudeBits); + double docLongitude = LatLonPoint.decodeLongitude(longitudeBits); + minValue = Math.min(minValue, GeoDistanceUtils.haversin(latitude, longitude, docLatitude, docLongitude)); + } + } + return Double.compare(bottom, minValue); } @Override