From 02bb6c01550771fb0d7a75d4b283c897024c923b Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 14 Mar 2016 19:07:30 -0400 Subject: [PATCH] LUCENE-7104: remove "sort missing first" from LatLonPoint.newDistanceSort and simplify/speedup code --- .../apache/lucene/document/LatLonPoint.java | 3 +- .../LatLonPointDistanceComparator.java | 114 +++++++----------- .../document/LatLonPointDistanceQuery.java | 8 +- .../lucene/document/LatLonPointSortField.java | 14 +-- .../document/TestLatLonPointDistanceSort.java | 57 +++------ 5 files changed, 68 insertions(+), 128 deletions(-) diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java index 9677baa3a9e..f5541bd9dcf 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java @@ -333,8 +333,7 @@ public class LatLonPoint extends Field { * the hits contains a Double instance with the distance in meters. *

* If a document is missing the field, then by default it is treated as having {@link Double#POSITIVE_INFINITY} distance - * (missing values sort last). You can change this to sort missing values first by calling - * {@link SortField#setMissingValue(Object) setMissingValue(Double.NEGATIVE_INFINITY)} on the returned SortField. + * (missing values sort last). *

* If a document contains multiple values for the field, the closest distance to the location is used. *

diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java index ef4c3f39d28..2102d81d496 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java @@ -42,7 +42,6 @@ class LatLonPointDistanceComparator extends FieldComparator implements L final String field; final double latitude; final double longitude; - final double missingValue; final double[] values; double bottom; @@ -52,27 +51,22 @@ class LatLonPointDistanceComparator extends FieldComparator implements L // current bounding box(es) for the bottom distance on the PQ. // these are pre-encoded with LatLonPoint's encoding and // used to exclude uncompetitive hits faster. - int minLon; - int maxLon; - int minLat; - int maxLat; + int minLon = Integer.MIN_VALUE; + int maxLon = Integer.MAX_VALUE; + int minLat = Integer.MIN_VALUE; + int maxLat = Integer.MAX_VALUE; - // crossesDateLine is true, then we have a second box to check - boolean crossesDateLine; - int minLon2; - int maxLon2; - int minLat2; - int maxLat2; + // second set of longitude ranges to check (for cross-dateline case) + int minLon2 = Integer.MAX_VALUE; // the number of times setBottom has been called (adversary protection) int setBottomCounter = 0; - public LatLonPointDistanceComparator(String field, double latitude, double longitude, int numHits, double missingValue) { + public LatLonPointDistanceComparator(String field, double latitude, double longitude, int numHits) { this.field = field; this.latitude = latitude; this.longitude = longitude; this.values = new double[numHits]; - this.missingValue = missingValue; } @Override @@ -90,53 +84,22 @@ class LatLonPointDistanceComparator extends FieldComparator implements L // sampling if we get called way too much: don't make gobs of bounding // boxes if comparator hits a worst case order (e.g. backwards distance order) if (setBottomCounter < 1024 || (setBottomCounter & 0x3F) == 0x3F) { - // don't pass infinite values to circleToBBox: just make a complete box. - if (bottom == missingValue) { - minLat = minLon = Integer.MIN_VALUE; - maxLat = maxLon = Integer.MAX_VALUE; - crossesDateLine = false; + GeoRect box = GeoUtils.circleToBBox(longitude, latitude, haversin2(bottom)); + // pre-encode our box to our integer encoding, so we don't have to decode + // to double values for uncompetitive hits. This has some cost! + minLat = LatLonPoint.encodeLatitude(box.minLat); + maxLat = LatLonPoint.encodeLatitude(box.maxLat); + if (box.crossesDateline()) { + // box1 + minLon = Integer.MIN_VALUE; + maxLon = LatLonPoint.encodeLongitude(box.maxLon); + // box2 + minLon2 = LatLonPoint.encodeLongitude(box.minLon); } else { - assert Double.isFinite(bottom); - GeoRect box = GeoUtils.circleToBBox(longitude, latitude, haversin2(bottom)); - // pre-encode our box to our integer encoding, so we don't have to decode - // to double values for uncompetitive hits. This has some cost! - int minLatEncoded = LatLonPoint.encodeLatitude(box.minLat); - int maxLatEncoded = LatLonPoint.encodeLatitude(box.maxLat); - int minLonEncoded = LatLonPoint.encodeLongitude(box.minLon); - int maxLonEncoded = LatLonPoint.encodeLongitude(box.maxLon); - // be sure to not introduce quantization error in our optimization, just - // round up our encoded box safely in all directions. - if (minLatEncoded != Integer.MIN_VALUE) { - minLatEncoded--; - } - if (minLonEncoded != Integer.MIN_VALUE) { - minLonEncoded--; - } - if (maxLatEncoded != Integer.MAX_VALUE) { - maxLatEncoded++; - } - if (maxLonEncoded != Integer.MAX_VALUE) { - maxLonEncoded++; - } - crossesDateLine = box.crossesDateline(); - // crosses dateline: split - if (crossesDateLine) { - // box1 - minLon = Integer.MIN_VALUE; - maxLon = maxLonEncoded; - minLat = minLatEncoded; - maxLat = maxLatEncoded; - // box2 - minLon2 = minLonEncoded; - maxLon2 = Integer.MAX_VALUE; - minLat2 = minLatEncoded; - maxLat2 = maxLatEncoded; - } else { - minLon = minLonEncoded; - maxLon = maxLonEncoded; - minLat = minLatEncoded; - maxLat = maxLatEncoded; - } + minLon = LatLonPoint.encodeLongitude(box.minLon); + maxLon = LatLonPoint.encodeLongitude(box.maxLon); + // disable box2 + minLon2 = Integer.MAX_VALUE; } } setBottomCounter++; @@ -153,24 +116,33 @@ class LatLonPointDistanceComparator extends FieldComparator implements L int numValues = currentDocs.count(); if (numValues == 0) { - return Double.compare(bottom, missingValue); + return Double.compare(bottom, Double.POSITIVE_INFINITY); } - double minValue = Double.POSITIVE_INFINITY; + int cmp = -1; for (int i = 0; i < numValues; i++) { long encoded = currentDocs.valueAt(i); + + // test bounding box int latitudeBits = (int)(encoded >> 32); + if (latitudeBits < minLat || latitudeBits > maxLat) { + continue; + } int longitudeBits = (int)(encoded & 0xFFFFFFFF); - boolean outsideBox = ((latitudeBits < minLat || longitudeBits < minLon || latitudeBits > maxLat || longitudeBits > maxLon) && - (crossesDateLine == false || latitudeBits < minLat2 || longitudeBits < minLon2 || latitudeBits > maxLat2 || longitudeBits > maxLon2)); + if ((longitudeBits < minLon || longitudeBits > maxLon) && (longitudeBits < minLon2)) { + continue; + } + // only compute actual distance if its inside "competitive bounding box" - if (outsideBox == false) { - double docLatitude = LatLonPoint.decodeLatitude(latitudeBits); - double docLongitude = LatLonPoint.decodeLongitude(longitudeBits); - minValue = Math.min(minValue, haversin1(latitude, longitude, docLatitude, docLongitude)); + double docLatitude = LatLonPoint.decodeLatitude(latitudeBits); + double docLongitude = LatLonPoint.decodeLongitude(longitudeBits); + cmp = Math.max(cmp, Double.compare(bottom, haversin1(latitude, longitude, docLatitude, docLongitude))); + // once we compete in the PQ, no need to continue. + if (cmp > 0) { + return cmp; } } - return Double.compare(bottom, minValue); + return cmp; } @Override @@ -204,12 +176,8 @@ class LatLonPointDistanceComparator extends FieldComparator implements L double sortKey(int doc) { currentDocs.setDocument(doc); - int numValues = currentDocs.count(); - if (numValues == 0) { - return missingValue; - } - double minValue = Double.POSITIVE_INFINITY; + int numValues = currentDocs.count(); for (int i = 0; i < numValues; i++) { long encoded = currentDocs.valueAt(i); double docLatitude = LatLonPoint.decodeLatitude((int)(encoded >> 32)); diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java index 9d2398653ef..3f86f1e5a5d 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java @@ -142,9 +142,11 @@ final class LatLonPointDistanceQuery extends Query { double latMax = LatLonPoint.decodeLatitude(maxPackedValue, 0); double lonMax = LatLonPoint.decodeLongitude(maxPackedValue, Integer.BYTES); - if ((latMax < box1.minLat || lonMax < box1.minLon || latMin > box1.maxLat || lonMin > box1.maxLon) && - (box2 == null || latMax < box2.minLat || lonMax < box2.minLon || latMin > box2.maxLat || lonMin > box2.maxLon)) { - // we are fully outside of bounding box(es), don't proceed any further. + if (latMax < box1.minLat || latMin > box1.maxLat) { + // latitude out of bounding box range + return Relation.CELL_OUTSIDE_QUERY; + } else if ((lonMax < box1.minLon || lonMin > box1.maxLon) && (box2 == null || lonMax < box2.minLon)) { + // longitude out of bounding box range return Relation.CELL_OUTSIDE_QUERY; } else if (lonMax - longitude < 90 && longitude - lonMin < 90 && GeoDistanceUtils.haversin(latitude, longitude, latMin, lonMin) <= radiusMeters && diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointSortField.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointSortField.java index da90b867465..f88304367e5 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointSortField.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointSortField.java @@ -47,7 +47,7 @@ final class LatLonPointSortField extends SortField { @Override public FieldComparator getComparator(int numHits, int sortPos) throws IOException { - return new LatLonPointDistanceComparator(getField(), latitude, longitude, numHits, getMissingValue()); + return new LatLonPointDistanceComparator(getField(), latitude, longitude, numHits); } @Override @@ -57,16 +57,10 @@ final class LatLonPointSortField extends SortField { @Override public void setMissingValue(Object missingValue) { - if (missingValue == null) { - throw new IllegalArgumentException("Missing value cannot be null"); + if (Double.valueOf(Double.POSITIVE_INFINITY).equals(missingValue) == false) { + throw new IllegalArgumentException("Missing value can only be Double.POSITIVE_INFINITY (missing values last), but got " + missingValue); } - if (missingValue.getClass() != Double.class) - throw new IllegalArgumentException("Missing value can only be of type java.lang.Double, but got " + missingValue.getClass()); - Double value = (Double) missingValue; - if (!Double.isInfinite(value)) { - throw new IllegalArgumentException("Missing value can only be Double.NEGATIVE_INFINITY (missing values first) or Double.POSITIVE_INFINITY (missing values last), but got " + value); - } - this.missingValue = value; + this.missingValue = missingValue; } @Override diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java index 7df956f94bf..437631310ad 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java +++ b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java @@ -28,6 +28,8 @@ import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TopDocs; import org.apache.lucene.spatial.util.GeoDistanceUtils; +import org.apache.lucene.spatial.util.GeoRect; +import org.apache.lucene.spatial.util.GeoUtils; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; @@ -110,45 +112,6 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase { dir.close(); } - /** Add two points (one doc missing) and sort by distance */ - public void testMissingFirst() throws Exception { - Directory dir = newDirectory(); - RandomIndexWriter iw = new RandomIndexWriter(random(), dir); - - // missing - Document doc = new Document(); - iw.addDocument(doc); - - doc = new Document(); - doc.add(new LatLonPoint("location", 40.718266, -74.007819)); - iw.addDocument(doc); - - doc = new Document(); - doc.add(new LatLonPoint("location", 40.7051157, -74.0088305)); - iw.addDocument(doc); - - IndexReader reader = iw.getReader(); - IndexSearcher searcher = newSearcher(reader); - iw.close(); - - SortField sortField = LatLonPoint.newDistanceSort("location", 40.7143528, -74.0059731); - sortField.setMissingValue(Double.NEGATIVE_INFINITY); - Sort sort = new Sort(sortField); - TopDocs td = searcher.search(new MatchAllDocsQuery(), 3, sort); - - FieldDoc d = (FieldDoc) td.scoreDocs[0]; - assertEquals(Double.NEGATIVE_INFINITY, (Double)d.fields[0], 0.0D); - - d = (FieldDoc) td.scoreDocs[1]; - assertEquals(462.61748421408186D, (Double)d.fields[0], 0.0D); - - d = (FieldDoc) td.scoreDocs[2]; - assertEquals(1056.1630445911035D, (Double)d.fields[0], 0.0D); - - reader.close(); - dir.close(); - } - /** Run a few iterations with just 10 docs, hopefully easy to debug */ public void testRandom() throws Exception { for (int iters = 0; iters < 100; iters++) { @@ -239,7 +202,7 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase { for (int i = 0; i < numQueries; i++) { double lat = -90 + 180.0 * random().nextDouble(); double lon = -180 + 360.0 * random().nextDouble(); - double missingValue = random().nextBoolean() ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY; + double missingValue = Double.POSITIVE_INFINITY; Result expected[] = new Result[reader.maxDoc()]; @@ -309,4 +272,18 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase { assertEquals(expected, actual); } } + + /** Test infinite radius covers whole earth */ + public void testInfiniteRect() { + for (int i = 0; i < 100000; i++) { + double centerLat = -90 + 180.0 * random().nextDouble(); + double centerLon = -180 + 360.0 * random().nextDouble(); + GeoRect rect = GeoUtils.circleToBBox(centerLat, centerLon, Double.POSITIVE_INFINITY); + assertEquals(-180.0, rect.minLon, 0.0D); + assertEquals(180.0, rect.maxLon, 0.0D); + assertEquals(-90.0, rect.minLat, 0.0D); + assertEquals(90.0, rect.maxLat, 0.0D); + assertFalse(rect.crossesDateline()); + } + } }