diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java index ebf850c9d5a..9677baa3a9e 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java @@ -333,8 +333,8 @@ public class LatLonPoint extends Field { * the hits contains a Double instance with the distance in meters. *

* If a document is missing the field, then by default it is treated as having {@link Double#POSITIVE_INFINITY} distance - * (missing last). You can change this by calling {@link SortField#setMissingValue(Object)} on the returned SortField - * to a different Double value. + * (missing values sort last). You can change this to sort missing values first by calling + * {@link SortField#setMissingValue(Object) setMissingValue(Double.NEGATIVE_INFINITY)} on the returned SortField. *

* If a document contains multiple values for the field, the closest distance to the location is used. *

diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java index e64f4b0eeac..86c9134fd7a 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java @@ -89,45 +89,53 @@ class LatLonPointDistanceComparator extends FieldComparator implements L // sampling if we get called way too much: don't make gobs of bounding // boxes if comparator hits a worst case order (e.g. backwards distance order) if (setBottomCounter < 1024 || (setBottomCounter & 0x3F) == 0x3F) { - GeoRect box = GeoUtils.circleToBBox(longitude, latitude, bottom); - // pre-encode our box to our integer encoding, so we don't have to decode - // to double values for uncompetitive hits. This has some cost! - int minLatEncoded = LatLonPoint.encodeLatitude(box.minLat); - int maxLatEncoded = LatLonPoint.encodeLatitude(box.maxLat); - int minLonEncoded = LatLonPoint.encodeLongitude(box.minLon); - int maxLonEncoded = LatLonPoint.encodeLongitude(box.maxLon); - // be sure to not introduce quantization error in our optimization, just - // round up our encoded box safely in all directions. - if (minLatEncoded != Integer.MIN_VALUE) { - minLatEncoded--; - } - if (minLonEncoded != Integer.MIN_VALUE) { - minLonEncoded--; - } - if (maxLatEncoded != Integer.MAX_VALUE) { - maxLatEncoded++; - } - if (maxLonEncoded != Integer.MAX_VALUE) { - maxLonEncoded++; - } - crossesDateLine = box.crossesDateline(); - // crosses dateline: split - if (crossesDateLine) { - // box1 - minLon = Integer.MIN_VALUE; - maxLon = maxLonEncoded; - minLat = minLatEncoded; - maxLat = maxLatEncoded; - // box2 - minLon2 = minLonEncoded; - maxLon2 = Integer.MAX_VALUE; - minLat2 = minLatEncoded; - maxLat2 = maxLatEncoded; + // don't pass infinite values to circleToBBox: just make a complete box. + if (bottom == missingValue) { + minLat = minLon = Integer.MIN_VALUE; + maxLat = maxLon = Integer.MAX_VALUE; + crossesDateLine = false; } else { - minLon = minLonEncoded; - maxLon = maxLonEncoded; - minLat = minLatEncoded; - maxLat = maxLatEncoded; + assert Double.isFinite(bottom); + GeoRect box = GeoUtils.circleToBBox(longitude, latitude, bottom); + // pre-encode our box to our integer encoding, so we don't have to decode + // to double values for uncompetitive hits. This has some cost! + int minLatEncoded = LatLonPoint.encodeLatitude(box.minLat); + int maxLatEncoded = LatLonPoint.encodeLatitude(box.maxLat); + int minLonEncoded = LatLonPoint.encodeLongitude(box.minLon); + int maxLonEncoded = LatLonPoint.encodeLongitude(box.maxLon); + // be sure to not introduce quantization error in our optimization, just + // round up our encoded box safely in all directions. + if (minLatEncoded != Integer.MIN_VALUE) { + minLatEncoded--; + } + if (minLonEncoded != Integer.MIN_VALUE) { + minLonEncoded--; + } + if (maxLatEncoded != Integer.MAX_VALUE) { + maxLatEncoded++; + } + if (maxLonEncoded != Integer.MAX_VALUE) { + maxLonEncoded++; + } + crossesDateLine = box.crossesDateline(); + // crosses dateline: split + if (crossesDateLine) { + // box1 + minLon = Integer.MIN_VALUE; + maxLon = maxLonEncoded; + minLat = minLatEncoded; + maxLat = maxLatEncoded; + // box2 + minLon2 = minLonEncoded; + maxLon2 = Integer.MAX_VALUE; + minLat2 = minLatEncoded; + maxLat2 = maxLatEncoded; + } else { + minLon = minLonEncoded; + maxLon = maxLonEncoded; + minLat = minLatEncoded; + maxLat = maxLatEncoded; + } } } setBottomCounter++; diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointSortField.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointSortField.java index 9d3928ea8c3..da90b867465 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointSortField.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointSortField.java @@ -62,7 +62,11 @@ final class LatLonPointSortField extends SortField { } if (missingValue.getClass() != Double.class) throw new IllegalArgumentException("Missing value can only be of type java.lang.Double, but got " + missingValue.getClass()); - this.missingValue = missingValue; + Double value = (Double) missingValue; + if (!Double.isInfinite(value)) { + throw new IllegalArgumentException("Missing value can only be Double.NEGATIVE_INFINITY (missing values first) or Double.POSITIVE_INFINITY (missing values last), but got " + value); + } + this.missingValue = value; } @Override diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java index ea36ea641b1..a776b3fc155 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java +++ b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java @@ -73,6 +73,82 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase { dir.close(); } + /** Add two points (one doc missing) and sort by distance */ + public void testMissingLast() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir); + + // missing + Document doc = new Document(); + iw.addDocument(doc); + + doc = new Document(); + doc.add(new LatLonPoint("location", 40.718266, -74.007819)); + iw.addDocument(doc); + + doc = new Document(); + doc.add(new LatLonPoint("location", 40.7051157, -74.0088305)); + iw.addDocument(doc); + + IndexReader reader = iw.getReader(); + IndexSearcher searcher = new IndexSearcher(reader); + iw.close(); + + Sort sort = new Sort(LatLonPoint.newDistanceSort("location", 40.7143528, -74.0059731)); + TopDocs td = searcher.search(new MatchAllDocsQuery(), 3, sort); + + FieldDoc d = (FieldDoc) td.scoreDocs[0]; + assertEquals(462.61748421408186D, (Double)d.fields[0], 0.0D); + + d = (FieldDoc) td.scoreDocs[1]; + assertEquals(1056.1630445911035D, (Double)d.fields[0], 0.0D); + + d = (FieldDoc) td.scoreDocs[2]; + assertEquals(Double.POSITIVE_INFINITY, (Double)d.fields[0], 0.0D); + + reader.close(); + dir.close(); + } + + /** Add two points (one doc missing) and sort by distance */ + public void testMissingFirst() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir); + + // missing + Document doc = new Document(); + iw.addDocument(doc); + + doc = new Document(); + doc.add(new LatLonPoint("location", 40.718266, -74.007819)); + iw.addDocument(doc); + + doc = new Document(); + doc.add(new LatLonPoint("location", 40.7051157, -74.0088305)); + iw.addDocument(doc); + + IndexReader reader = iw.getReader(); + IndexSearcher searcher = new IndexSearcher(reader); + iw.close(); + + SortField sortField = LatLonPoint.newDistanceSort("location", 40.7143528, -74.0059731); + sortField.setMissingValue(Double.NEGATIVE_INFINITY); + Sort sort = new Sort(sortField); + TopDocs td = searcher.search(new MatchAllDocsQuery(), 3, sort); + + FieldDoc d = (FieldDoc) td.scoreDocs[0]; + assertEquals(Double.NEGATIVE_INFINITY, (Double)d.fields[0], 0.0D); + + d = (FieldDoc) td.scoreDocs[1]; + assertEquals(462.61748421408186D, (Double)d.fields[0], 0.0D); + + d = (FieldDoc) td.scoreDocs[2]; + assertEquals(1056.1630445911035D, (Double)d.fields[0], 0.0D); + + reader.close(); + dir.close(); + } + /** Run a few iterations with just 10 docs, hopefully easy to debug */ public void testRandom() throws Exception { for (int iters = 0; iters < 100; iters++) { @@ -141,17 +217,20 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase { RandomIndexWriter writer = new RandomIndexWriter(random(), dir); for (int i = 0; i < numDocs; i++) { - double latRaw = -90 + 180.0 * random().nextDouble(); - double lonRaw = -180 + 360.0 * random().nextDouble(); - // pre-normalize up front, so we can just use quantized value for testing and do simple exact comparisons - double lat = LatLonPoint.decodeLatitude(LatLonPoint.encodeLatitude(latRaw)); - double lon = LatLonPoint.decodeLongitude(LatLonPoint.encodeLongitude(lonRaw)); Document doc = new Document(); doc.add(new StoredField("id", i)); doc.add(new NumericDocValuesField("id", i)); - doc.add(new LatLonPoint("field", lat, lon)); - doc.add(new StoredField("lat", lat)); - doc.add(new StoredField("lon", lon)); + if (random().nextInt(10) > 7) { + double latRaw = -90 + 180.0 * random().nextDouble(); + double lonRaw = -180 + 360.0 * random().nextDouble(); + // pre-normalize up front, so we can just use quantized value for testing and do simple exact comparisons + double lat = LatLonPoint.decodeLatitude(LatLonPoint.encodeLatitude(latRaw)); + double lon = LatLonPoint.decodeLongitude(LatLonPoint.encodeLongitude(lonRaw)); + + doc.add(new LatLonPoint("field", lat, lon)); + doc.add(new StoredField("lat", lat)); + doc.add(new StoredField("lon", lon)); + } // otherwise "missing" writer.addDocument(doc); } IndexReader reader = writer.getReader(); @@ -160,14 +239,20 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase { for (int i = 0; i < numQueries; i++) { double lat = -90 + 180.0 * random().nextDouble(); double lon = -180 + 360.0 * random().nextDouble(); + double missingValue = random().nextBoolean() ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY; Result expected[] = new Result[reader.maxDoc()]; for (int doc = 0; doc < reader.maxDoc(); doc++) { Document targetDoc = reader.document(doc); - double docLatitude = targetDoc.getField("lat").numericValue().doubleValue(); - double docLongitude = targetDoc.getField("lon").numericValue().doubleValue(); - double distance = GeoDistanceUtils.haversin(lat, lon, docLatitude, docLongitude); + final double distance; + if (targetDoc.getField("lat") == null) { + distance = missingValue; // missing + } else { + double docLatitude = targetDoc.getField("lat").numericValue().doubleValue(); + double docLongitude = targetDoc.getField("lon").numericValue().doubleValue(); + distance = GeoDistanceUtils.haversin(lat, lon, docLatitude, docLongitude); + } int id = targetDoc.getField("id").numericValue().intValue(); expected[doc] = new Result(id, distance); } @@ -177,7 +262,9 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase { // randomize the topN a bit int topN = TestUtil.nextInt(random(), 1, reader.maxDoc()); // sort by distance, then ID - Sort sort = new Sort(LatLonPoint.newDistanceSort("field", lat, lon), + SortField distanceSort = LatLonPoint.newDistanceSort("field", lat, lon); + distanceSort.setMissingValue(missingValue); + Sort sort = new Sort(distanceSort, new SortField("id", SortField.Type.INT)); TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), topN, sort);