mirror of
https://github.com/apache/lucene.git
synced 2025-02-09 03:25:15 +00:00
LUCENE-7102: LatLonPoint.newDistanceSort fails with "sort missing first"
This commit is contained in:
parent
ae866b0149
commit
a6fe1c0b15
@ -333,8 +333,8 @@ public class LatLonPoint extends Field {
|
||||
* the hits contains a Double instance with the distance in meters.
|
||||
* <p>
|
||||
* If a document is missing the field, then by default it is treated as having {@link Double#POSITIVE_INFINITY} distance
|
||||
* (missing last). You can change this by calling {@link SortField#setMissingValue(Object)} on the returned SortField
|
||||
* to a different Double value.
|
||||
* (missing values sort last). You can change this to sort missing values first by calling
|
||||
* {@link SortField#setMissingValue(Object) setMissingValue(Double.NEGATIVE_INFINITY)} on the returned SortField.
|
||||
* <p>
|
||||
* If a document contains multiple values for the field, the <i>closest</i> distance to the location is used.
|
||||
* <p>
|
||||
|
@ -89,45 +89,53 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
|
||||
// sampling if we get called way too much: don't make gobs of bounding
|
||||
// boxes if comparator hits a worst case order (e.g. backwards distance order)
|
||||
if (setBottomCounter < 1024 || (setBottomCounter & 0x3F) == 0x3F) {
|
||||
GeoRect box = GeoUtils.circleToBBox(longitude, latitude, bottom);
|
||||
// pre-encode our box to our integer encoding, so we don't have to decode
|
||||
// to double values for uncompetitive hits. This has some cost!
|
||||
int minLatEncoded = LatLonPoint.encodeLatitude(box.minLat);
|
||||
int maxLatEncoded = LatLonPoint.encodeLatitude(box.maxLat);
|
||||
int minLonEncoded = LatLonPoint.encodeLongitude(box.minLon);
|
||||
int maxLonEncoded = LatLonPoint.encodeLongitude(box.maxLon);
|
||||
// be sure to not introduce quantization error in our optimization, just
|
||||
// round up our encoded box safely in all directions.
|
||||
if (minLatEncoded != Integer.MIN_VALUE) {
|
||||
minLatEncoded--;
|
||||
}
|
||||
if (minLonEncoded != Integer.MIN_VALUE) {
|
||||
minLonEncoded--;
|
||||
}
|
||||
if (maxLatEncoded != Integer.MAX_VALUE) {
|
||||
maxLatEncoded++;
|
||||
}
|
||||
if (maxLonEncoded != Integer.MAX_VALUE) {
|
||||
maxLonEncoded++;
|
||||
}
|
||||
crossesDateLine = box.crossesDateline();
|
||||
// crosses dateline: split
|
||||
if (crossesDateLine) {
|
||||
// box1
|
||||
minLon = Integer.MIN_VALUE;
|
||||
maxLon = maxLonEncoded;
|
||||
minLat = minLatEncoded;
|
||||
maxLat = maxLatEncoded;
|
||||
// box2
|
||||
minLon2 = minLonEncoded;
|
||||
maxLon2 = Integer.MAX_VALUE;
|
||||
minLat2 = minLatEncoded;
|
||||
maxLat2 = maxLatEncoded;
|
||||
// don't pass infinite values to circleToBBox: just make a complete box.
|
||||
if (bottom == missingValue) {
|
||||
minLat = minLon = Integer.MIN_VALUE;
|
||||
maxLat = maxLon = Integer.MAX_VALUE;
|
||||
crossesDateLine = false;
|
||||
} else {
|
||||
minLon = minLonEncoded;
|
||||
maxLon = maxLonEncoded;
|
||||
minLat = minLatEncoded;
|
||||
maxLat = maxLatEncoded;
|
||||
assert Double.isFinite(bottom);
|
||||
GeoRect box = GeoUtils.circleToBBox(longitude, latitude, bottom);
|
||||
// pre-encode our box to our integer encoding, so we don't have to decode
|
||||
// to double values for uncompetitive hits. This has some cost!
|
||||
int minLatEncoded = LatLonPoint.encodeLatitude(box.minLat);
|
||||
int maxLatEncoded = LatLonPoint.encodeLatitude(box.maxLat);
|
||||
int minLonEncoded = LatLonPoint.encodeLongitude(box.minLon);
|
||||
int maxLonEncoded = LatLonPoint.encodeLongitude(box.maxLon);
|
||||
// be sure to not introduce quantization error in our optimization, just
|
||||
// round up our encoded box safely in all directions.
|
||||
if (minLatEncoded != Integer.MIN_VALUE) {
|
||||
minLatEncoded--;
|
||||
}
|
||||
if (minLonEncoded != Integer.MIN_VALUE) {
|
||||
minLonEncoded--;
|
||||
}
|
||||
if (maxLatEncoded != Integer.MAX_VALUE) {
|
||||
maxLatEncoded++;
|
||||
}
|
||||
if (maxLonEncoded != Integer.MAX_VALUE) {
|
||||
maxLonEncoded++;
|
||||
}
|
||||
crossesDateLine = box.crossesDateline();
|
||||
// crosses dateline: split
|
||||
if (crossesDateLine) {
|
||||
// box1
|
||||
minLon = Integer.MIN_VALUE;
|
||||
maxLon = maxLonEncoded;
|
||||
minLat = minLatEncoded;
|
||||
maxLat = maxLatEncoded;
|
||||
// box2
|
||||
minLon2 = minLonEncoded;
|
||||
maxLon2 = Integer.MAX_VALUE;
|
||||
minLat2 = minLatEncoded;
|
||||
maxLat2 = maxLatEncoded;
|
||||
} else {
|
||||
minLon = minLonEncoded;
|
||||
maxLon = maxLonEncoded;
|
||||
minLat = minLatEncoded;
|
||||
maxLat = maxLatEncoded;
|
||||
}
|
||||
}
|
||||
}
|
||||
setBottomCounter++;
|
||||
|
@ -62,7 +62,11 @@ final class LatLonPointSortField extends SortField {
|
||||
}
|
||||
if (missingValue.getClass() != Double.class)
|
||||
throw new IllegalArgumentException("Missing value can only be of type java.lang.Double, but got " + missingValue.getClass());
|
||||
this.missingValue = missingValue;
|
||||
Double value = (Double) missingValue;
|
||||
if (!Double.isInfinite(value)) {
|
||||
throw new IllegalArgumentException("Missing value can only be Double.NEGATIVE_INFINITY (missing values first) or Double.POSITIVE_INFINITY (missing values last), but got " + value);
|
||||
}
|
||||
this.missingValue = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -73,6 +73,82 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase {
|
||||
dir.close();
|
||||
}
|
||||
|
||||
/** Add two points (one doc missing) and sort by distance */
|
||||
public void testMissingLast() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||
|
||||
// missing
|
||||
Document doc = new Document();
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new LatLonPoint("location", 40.718266, -74.007819));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new LatLonPoint("location", 40.7051157, -74.0088305));
|
||||
iw.addDocument(doc);
|
||||
|
||||
IndexReader reader = iw.getReader();
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
iw.close();
|
||||
|
||||
Sort sort = new Sort(LatLonPoint.newDistanceSort("location", 40.7143528, -74.0059731));
|
||||
TopDocs td = searcher.search(new MatchAllDocsQuery(), 3, sort);
|
||||
|
||||
FieldDoc d = (FieldDoc) td.scoreDocs[0];
|
||||
assertEquals(462.61748421408186D, (Double)d.fields[0], 0.0D);
|
||||
|
||||
d = (FieldDoc) td.scoreDocs[1];
|
||||
assertEquals(1056.1630445911035D, (Double)d.fields[0], 0.0D);
|
||||
|
||||
d = (FieldDoc) td.scoreDocs[2];
|
||||
assertEquals(Double.POSITIVE_INFINITY, (Double)d.fields[0], 0.0D);
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
/** Add two points (one doc missing) and sort by distance */
|
||||
public void testMissingFirst() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||
|
||||
// missing
|
||||
Document doc = new Document();
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new LatLonPoint("location", 40.718266, -74.007819));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new LatLonPoint("location", 40.7051157, -74.0088305));
|
||||
iw.addDocument(doc);
|
||||
|
||||
IndexReader reader = iw.getReader();
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
iw.close();
|
||||
|
||||
SortField sortField = LatLonPoint.newDistanceSort("location", 40.7143528, -74.0059731);
|
||||
sortField.setMissingValue(Double.NEGATIVE_INFINITY);
|
||||
Sort sort = new Sort(sortField);
|
||||
TopDocs td = searcher.search(new MatchAllDocsQuery(), 3, sort);
|
||||
|
||||
FieldDoc d = (FieldDoc) td.scoreDocs[0];
|
||||
assertEquals(Double.NEGATIVE_INFINITY, (Double)d.fields[0], 0.0D);
|
||||
|
||||
d = (FieldDoc) td.scoreDocs[1];
|
||||
assertEquals(462.61748421408186D, (Double)d.fields[0], 0.0D);
|
||||
|
||||
d = (FieldDoc) td.scoreDocs[2];
|
||||
assertEquals(1056.1630445911035D, (Double)d.fields[0], 0.0D);
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
/** Run a few iterations with just 10 docs, hopefully easy to debug */
|
||||
public void testRandom() throws Exception {
|
||||
for (int iters = 0; iters < 100; iters++) {
|
||||
@ -141,17 +217,20 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase {
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
|
||||
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
double latRaw = -90 + 180.0 * random().nextDouble();
|
||||
double lonRaw = -180 + 360.0 * random().nextDouble();
|
||||
// pre-normalize up front, so we can just use quantized value for testing and do simple exact comparisons
|
||||
double lat = LatLonPoint.decodeLatitude(LatLonPoint.encodeLatitude(latRaw));
|
||||
double lon = LatLonPoint.decodeLongitude(LatLonPoint.encodeLongitude(lonRaw));
|
||||
Document doc = new Document();
|
||||
doc.add(new StoredField("id", i));
|
||||
doc.add(new NumericDocValuesField("id", i));
|
||||
doc.add(new LatLonPoint("field", lat, lon));
|
||||
doc.add(new StoredField("lat", lat));
|
||||
doc.add(new StoredField("lon", lon));
|
||||
if (random().nextInt(10) > 7) {
|
||||
double latRaw = -90 + 180.0 * random().nextDouble();
|
||||
double lonRaw = -180 + 360.0 * random().nextDouble();
|
||||
// pre-normalize up front, so we can just use quantized value for testing and do simple exact comparisons
|
||||
double lat = LatLonPoint.decodeLatitude(LatLonPoint.encodeLatitude(latRaw));
|
||||
double lon = LatLonPoint.decodeLongitude(LatLonPoint.encodeLongitude(lonRaw));
|
||||
|
||||
doc.add(new LatLonPoint("field", lat, lon));
|
||||
doc.add(new StoredField("lat", lat));
|
||||
doc.add(new StoredField("lon", lon));
|
||||
} // otherwise "missing"
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
IndexReader reader = writer.getReader();
|
||||
@ -160,14 +239,20 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase {
|
||||
for (int i = 0; i < numQueries; i++) {
|
||||
double lat = -90 + 180.0 * random().nextDouble();
|
||||
double lon = -180 + 360.0 * random().nextDouble();
|
||||
double missingValue = random().nextBoolean() ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY;
|
||||
|
||||
Result expected[] = new Result[reader.maxDoc()];
|
||||
|
||||
for (int doc = 0; doc < reader.maxDoc(); doc++) {
|
||||
Document targetDoc = reader.document(doc);
|
||||
double docLatitude = targetDoc.getField("lat").numericValue().doubleValue();
|
||||
double docLongitude = targetDoc.getField("lon").numericValue().doubleValue();
|
||||
double distance = GeoDistanceUtils.haversin(lat, lon, docLatitude, docLongitude);
|
||||
final double distance;
|
||||
if (targetDoc.getField("lat") == null) {
|
||||
distance = missingValue; // missing
|
||||
} else {
|
||||
double docLatitude = targetDoc.getField("lat").numericValue().doubleValue();
|
||||
double docLongitude = targetDoc.getField("lon").numericValue().doubleValue();
|
||||
distance = GeoDistanceUtils.haversin(lat, lon, docLatitude, docLongitude);
|
||||
}
|
||||
int id = targetDoc.getField("id").numericValue().intValue();
|
||||
expected[doc] = new Result(id, distance);
|
||||
}
|
||||
@ -177,7 +262,9 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase {
|
||||
// randomize the topN a bit
|
||||
int topN = TestUtil.nextInt(random(), 1, reader.maxDoc());
|
||||
// sort by distance, then ID
|
||||
Sort sort = new Sort(LatLonPoint.newDistanceSort("field", lat, lon),
|
||||
SortField distanceSort = LatLonPoint.newDistanceSort("field", lat, lon);
|
||||
distanceSort.setMissingValue(missingValue);
|
||||
Sort sort = new Sort(distanceSort,
|
||||
new SortField("id", SortField.Type.INT));
|
||||
|
||||
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), topN, sort);
|
||||
|
Loading…
x
Reference in New Issue
Block a user