mirror of https://github.com/apache/lucene.git
LUCENE-7104: remove "sort missing first" from LatLonPoint.newDistanceSort and simplify/speedup code
This commit is contained in:
parent
1660b5630a
commit
02bb6c0155
|
@ -333,8 +333,7 @@ public class LatLonPoint extends Field {
|
|||
* the hits contains a Double instance with the distance in meters.
|
||||
* <p>
|
||||
* If a document is missing the field, then by default it is treated as having {@link Double#POSITIVE_INFINITY} distance
|
||||
* (missing values sort last). You can change this to sort missing values first by calling
|
||||
* {@link SortField#setMissingValue(Object) setMissingValue(Double.NEGATIVE_INFINITY)} on the returned SortField.
|
||||
* (missing values sort last).
|
||||
* <p>
|
||||
* If a document contains multiple values for the field, the <i>closest</i> distance to the location is used.
|
||||
* <p>
|
||||
|
|
|
@ -42,7 +42,6 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
|
|||
final String field;
|
||||
final double latitude;
|
||||
final double longitude;
|
||||
final double missingValue;
|
||||
|
||||
final double[] values;
|
||||
double bottom;
|
||||
|
@ -52,27 +51,22 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
|
|||
// current bounding box(es) for the bottom distance on the PQ.
|
||||
// these are pre-encoded with LatLonPoint's encoding and
|
||||
// used to exclude uncompetitive hits faster.
|
||||
int minLon;
|
||||
int maxLon;
|
||||
int minLat;
|
||||
int maxLat;
|
||||
int minLon = Integer.MIN_VALUE;
|
||||
int maxLon = Integer.MAX_VALUE;
|
||||
int minLat = Integer.MIN_VALUE;
|
||||
int maxLat = Integer.MAX_VALUE;
|
||||
|
||||
// crossesDateLine is true, then we have a second box to check
|
||||
boolean crossesDateLine;
|
||||
int minLon2;
|
||||
int maxLon2;
|
||||
int minLat2;
|
||||
int maxLat2;
|
||||
// second set of longitude ranges to check (for cross-dateline case)
|
||||
int minLon2 = Integer.MAX_VALUE;
|
||||
|
||||
// the number of times setBottom has been called (adversary protection)
|
||||
int setBottomCounter = 0;
|
||||
|
||||
public LatLonPointDistanceComparator(String field, double latitude, double longitude, int numHits, double missingValue) {
|
||||
public LatLonPointDistanceComparator(String field, double latitude, double longitude, int numHits) {
|
||||
this.field = field;
|
||||
this.latitude = latitude;
|
||||
this.longitude = longitude;
|
||||
this.values = new double[numHits];
|
||||
this.missingValue = missingValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -90,53 +84,22 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
|
|||
// sampling if we get called way too much: don't make gobs of bounding
|
||||
// boxes if comparator hits a worst case order (e.g. backwards distance order)
|
||||
if (setBottomCounter < 1024 || (setBottomCounter & 0x3F) == 0x3F) {
|
||||
// don't pass infinite values to circleToBBox: just make a complete box.
|
||||
if (bottom == missingValue) {
|
||||
minLat = minLon = Integer.MIN_VALUE;
|
||||
maxLat = maxLon = Integer.MAX_VALUE;
|
||||
crossesDateLine = false;
|
||||
GeoRect box = GeoUtils.circleToBBox(longitude, latitude, haversin2(bottom));
|
||||
// pre-encode our box to our integer encoding, so we don't have to decode
|
||||
// to double values for uncompetitive hits. This has some cost!
|
||||
minLat = LatLonPoint.encodeLatitude(box.minLat);
|
||||
maxLat = LatLonPoint.encodeLatitude(box.maxLat);
|
||||
if (box.crossesDateline()) {
|
||||
// box1
|
||||
minLon = Integer.MIN_VALUE;
|
||||
maxLon = LatLonPoint.encodeLongitude(box.maxLon);
|
||||
// box2
|
||||
minLon2 = LatLonPoint.encodeLongitude(box.minLon);
|
||||
} else {
|
||||
assert Double.isFinite(bottom);
|
||||
GeoRect box = GeoUtils.circleToBBox(longitude, latitude, haversin2(bottom));
|
||||
// pre-encode our box to our integer encoding, so we don't have to decode
|
||||
// to double values for uncompetitive hits. This has some cost!
|
||||
int minLatEncoded = LatLonPoint.encodeLatitude(box.minLat);
|
||||
int maxLatEncoded = LatLonPoint.encodeLatitude(box.maxLat);
|
||||
int minLonEncoded = LatLonPoint.encodeLongitude(box.minLon);
|
||||
int maxLonEncoded = LatLonPoint.encodeLongitude(box.maxLon);
|
||||
// be sure to not introduce quantization error in our optimization, just
|
||||
// round up our encoded box safely in all directions.
|
||||
if (minLatEncoded != Integer.MIN_VALUE) {
|
||||
minLatEncoded--;
|
||||
}
|
||||
if (minLonEncoded != Integer.MIN_VALUE) {
|
||||
minLonEncoded--;
|
||||
}
|
||||
if (maxLatEncoded != Integer.MAX_VALUE) {
|
||||
maxLatEncoded++;
|
||||
}
|
||||
if (maxLonEncoded != Integer.MAX_VALUE) {
|
||||
maxLonEncoded++;
|
||||
}
|
||||
crossesDateLine = box.crossesDateline();
|
||||
// crosses dateline: split
|
||||
if (crossesDateLine) {
|
||||
// box1
|
||||
minLon = Integer.MIN_VALUE;
|
||||
maxLon = maxLonEncoded;
|
||||
minLat = minLatEncoded;
|
||||
maxLat = maxLatEncoded;
|
||||
// box2
|
||||
minLon2 = minLonEncoded;
|
||||
maxLon2 = Integer.MAX_VALUE;
|
||||
minLat2 = minLatEncoded;
|
||||
maxLat2 = maxLatEncoded;
|
||||
} else {
|
||||
minLon = minLonEncoded;
|
||||
maxLon = maxLonEncoded;
|
||||
minLat = minLatEncoded;
|
||||
maxLat = maxLatEncoded;
|
||||
}
|
||||
minLon = LatLonPoint.encodeLongitude(box.minLon);
|
||||
maxLon = LatLonPoint.encodeLongitude(box.maxLon);
|
||||
// disable box2
|
||||
minLon2 = Integer.MAX_VALUE;
|
||||
}
|
||||
}
|
||||
setBottomCounter++;
|
||||
|
@ -153,24 +116,33 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
|
|||
|
||||
int numValues = currentDocs.count();
|
||||
if (numValues == 0) {
|
||||
return Double.compare(bottom, missingValue);
|
||||
return Double.compare(bottom, Double.POSITIVE_INFINITY);
|
||||
}
|
||||
|
||||
double minValue = Double.POSITIVE_INFINITY;
|
||||
int cmp = -1;
|
||||
for (int i = 0; i < numValues; i++) {
|
||||
long encoded = currentDocs.valueAt(i);
|
||||
|
||||
// test bounding box
|
||||
int latitudeBits = (int)(encoded >> 32);
|
||||
if (latitudeBits < minLat || latitudeBits > maxLat) {
|
||||
continue;
|
||||
}
|
||||
int longitudeBits = (int)(encoded & 0xFFFFFFFF);
|
||||
boolean outsideBox = ((latitudeBits < minLat || longitudeBits < minLon || latitudeBits > maxLat || longitudeBits > maxLon) &&
|
||||
(crossesDateLine == false || latitudeBits < minLat2 || longitudeBits < minLon2 || latitudeBits > maxLat2 || longitudeBits > maxLon2));
|
||||
if ((longitudeBits < minLon || longitudeBits > maxLon) && (longitudeBits < minLon2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// only compute actual distance if its inside "competitive bounding box"
|
||||
if (outsideBox == false) {
|
||||
double docLatitude = LatLonPoint.decodeLatitude(latitudeBits);
|
||||
double docLongitude = LatLonPoint.decodeLongitude(longitudeBits);
|
||||
minValue = Math.min(minValue, haversin1(latitude, longitude, docLatitude, docLongitude));
|
||||
double docLatitude = LatLonPoint.decodeLatitude(latitudeBits);
|
||||
double docLongitude = LatLonPoint.decodeLongitude(longitudeBits);
|
||||
cmp = Math.max(cmp, Double.compare(bottom, haversin1(latitude, longitude, docLatitude, docLongitude)));
|
||||
// once we compete in the PQ, no need to continue.
|
||||
if (cmp > 0) {
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
return Double.compare(bottom, minValue);
|
||||
return cmp;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -204,12 +176,8 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
|
|||
double sortKey(int doc) {
|
||||
currentDocs.setDocument(doc);
|
||||
|
||||
int numValues = currentDocs.count();
|
||||
if (numValues == 0) {
|
||||
return missingValue;
|
||||
}
|
||||
|
||||
double minValue = Double.POSITIVE_INFINITY;
|
||||
int numValues = currentDocs.count();
|
||||
for (int i = 0; i < numValues; i++) {
|
||||
long encoded = currentDocs.valueAt(i);
|
||||
double docLatitude = LatLonPoint.decodeLatitude((int)(encoded >> 32));
|
||||
|
|
|
@ -142,9 +142,11 @@ final class LatLonPointDistanceQuery extends Query {
|
|||
double latMax = LatLonPoint.decodeLatitude(maxPackedValue, 0);
|
||||
double lonMax = LatLonPoint.decodeLongitude(maxPackedValue, Integer.BYTES);
|
||||
|
||||
if ((latMax < box1.minLat || lonMax < box1.minLon || latMin > box1.maxLat || lonMin > box1.maxLon) &&
|
||||
(box2 == null || latMax < box2.minLat || lonMax < box2.minLon || latMin > box2.maxLat || lonMin > box2.maxLon)) {
|
||||
// we are fully outside of bounding box(es), don't proceed any further.
|
||||
if (latMax < box1.minLat || latMin > box1.maxLat) {
|
||||
// latitude out of bounding box range
|
||||
return Relation.CELL_OUTSIDE_QUERY;
|
||||
} else if ((lonMax < box1.minLon || lonMin > box1.maxLon) && (box2 == null || lonMax < box2.minLon)) {
|
||||
// longitude out of bounding box range
|
||||
return Relation.CELL_OUTSIDE_QUERY;
|
||||
} else if (lonMax - longitude < 90 && longitude - lonMin < 90 &&
|
||||
GeoDistanceUtils.haversin(latitude, longitude, latMin, lonMin) <= radiusMeters &&
|
||||
|
|
|
@ -47,7 +47,7 @@ final class LatLonPointSortField extends SortField {
|
|||
|
||||
@Override
|
||||
public FieldComparator<?> getComparator(int numHits, int sortPos) throws IOException {
|
||||
return new LatLonPointDistanceComparator(getField(), latitude, longitude, numHits, getMissingValue());
|
||||
return new LatLonPointDistanceComparator(getField(), latitude, longitude, numHits);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -57,16 +57,10 @@ final class LatLonPointSortField extends SortField {
|
|||
|
||||
@Override
|
||||
public void setMissingValue(Object missingValue) {
|
||||
if (missingValue == null) {
|
||||
throw new IllegalArgumentException("Missing value cannot be null");
|
||||
if (Double.valueOf(Double.POSITIVE_INFINITY).equals(missingValue) == false) {
|
||||
throw new IllegalArgumentException("Missing value can only be Double.POSITIVE_INFINITY (missing values last), but got " + missingValue);
|
||||
}
|
||||
if (missingValue.getClass() != Double.class)
|
||||
throw new IllegalArgumentException("Missing value can only be of type java.lang.Double, but got " + missingValue.getClass());
|
||||
Double value = (Double) missingValue;
|
||||
if (!Double.isInfinite(value)) {
|
||||
throw new IllegalArgumentException("Missing value can only be Double.NEGATIVE_INFINITY (missing values first) or Double.POSITIVE_INFINITY (missing values last), but got " + value);
|
||||
}
|
||||
this.missingValue = value;
|
||||
this.missingValue = missingValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -28,6 +28,8 @@ import org.apache.lucene.search.Sort;
|
|||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.spatial.util.GeoDistanceUtils;
|
||||
import org.apache.lucene.spatial.util.GeoRect;
|
||||
import org.apache.lucene.spatial.util.GeoUtils;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
@ -110,45 +112,6 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
/** Add two points (one doc missing) and sort by distance */
|
||||
public void testMissingFirst() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||
|
||||
// missing
|
||||
Document doc = new Document();
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new LatLonPoint("location", 40.718266, -74.007819));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new LatLonPoint("location", 40.7051157, -74.0088305));
|
||||
iw.addDocument(doc);
|
||||
|
||||
IndexReader reader = iw.getReader();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
iw.close();
|
||||
|
||||
SortField sortField = LatLonPoint.newDistanceSort("location", 40.7143528, -74.0059731);
|
||||
sortField.setMissingValue(Double.NEGATIVE_INFINITY);
|
||||
Sort sort = new Sort(sortField);
|
||||
TopDocs td = searcher.search(new MatchAllDocsQuery(), 3, sort);
|
||||
|
||||
FieldDoc d = (FieldDoc) td.scoreDocs[0];
|
||||
assertEquals(Double.NEGATIVE_INFINITY, (Double)d.fields[0], 0.0D);
|
||||
|
||||
d = (FieldDoc) td.scoreDocs[1];
|
||||
assertEquals(462.61748421408186D, (Double)d.fields[0], 0.0D);
|
||||
|
||||
d = (FieldDoc) td.scoreDocs[2];
|
||||
assertEquals(1056.1630445911035D, (Double)d.fields[0], 0.0D);
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
/** Run a few iterations with just 10 docs, hopefully easy to debug */
|
||||
public void testRandom() throws Exception {
|
||||
for (int iters = 0; iters < 100; iters++) {
|
||||
|
@ -239,7 +202,7 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase {
|
|||
for (int i = 0; i < numQueries; i++) {
|
||||
double lat = -90 + 180.0 * random().nextDouble();
|
||||
double lon = -180 + 360.0 * random().nextDouble();
|
||||
double missingValue = random().nextBoolean() ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY;
|
||||
double missingValue = Double.POSITIVE_INFINITY;
|
||||
|
||||
Result expected[] = new Result[reader.maxDoc()];
|
||||
|
||||
|
@ -309,4 +272,18 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase {
|
|||
assertEquals(expected, actual);
|
||||
}
|
||||
}
|
||||
|
||||
/** Test infinite radius covers whole earth */
|
||||
public void testInfiniteRect() {
|
||||
for (int i = 0; i < 100000; i++) {
|
||||
double centerLat = -90 + 180.0 * random().nextDouble();
|
||||
double centerLon = -180 + 360.0 * random().nextDouble();
|
||||
GeoRect rect = GeoUtils.circleToBBox(centerLat, centerLon, Double.POSITIVE_INFINITY);
|
||||
assertEquals(-180.0, rect.minLon, 0.0D);
|
||||
assertEquals(180.0, rect.maxLon, 0.0D);
|
||||
assertEquals(-90.0, rect.minLat, 0.0D);
|
||||
assertEquals(90.0, rect.maxLat, 0.0D);
|
||||
assertFalse(rect.crossesDateline());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue