LUCENE-7104: remove "sort missing first" from LatLonPoint.newDistanceSort and simplify/speedup code

This commit is contained in:
Robert Muir 2016-03-14 19:07:30 -04:00
parent 1660b5630a
commit 02bb6c0155
5 changed files with 68 additions and 128 deletions

View File

@ -333,8 +333,7 @@ public class LatLonPoint extends Field {
* the hits contains a Double instance with the distance in meters.
* <p>
* If a document is missing the field, then by default it is treated as having {@link Double#POSITIVE_INFINITY} distance
* (missing values sort last). You can change this to sort missing values first by calling
* {@link SortField#setMissingValue(Object) setMissingValue(Double.NEGATIVE_INFINITY)} on the returned SortField.
* (missing values sort last).
* <p>
* If a document contains multiple values for the field, the <i>closest</i> distance to the location is used.
* <p>

View File

@ -42,7 +42,6 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
final String field;
final double latitude;
final double longitude;
final double missingValue;
final double[] values;
double bottom;
@ -52,27 +51,22 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
// current bounding box(es) for the bottom distance on the PQ.
// these are pre-encoded with LatLonPoint's encoding and
// used to exclude uncompetitive hits faster.
int minLon;
int maxLon;
int minLat;
int maxLat;
int minLon = Integer.MIN_VALUE;
int maxLon = Integer.MAX_VALUE;
int minLat = Integer.MIN_VALUE;
int maxLat = Integer.MAX_VALUE;
// crossesDateLine is true, then we have a second box to check
boolean crossesDateLine;
int minLon2;
int maxLon2;
int minLat2;
int maxLat2;
// second set of longitude ranges to check (for cross-dateline case)
int minLon2 = Integer.MAX_VALUE;
// the number of times setBottom has been called (adversary protection)
int setBottomCounter = 0;
public LatLonPointDistanceComparator(String field, double latitude, double longitude, int numHits, double missingValue) {
public LatLonPointDistanceComparator(String field, double latitude, double longitude, int numHits) {
this.field = field;
this.latitude = latitude;
this.longitude = longitude;
this.values = new double[numHits];
this.missingValue = missingValue;
}
@Override
@ -90,53 +84,22 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
// sampling if we get called way too much: don't make gobs of bounding
// boxes if comparator hits a worst case order (e.g. backwards distance order)
if (setBottomCounter < 1024 || (setBottomCounter & 0x3F) == 0x3F) {
// don't pass infinite values to circleToBBox: just make a complete box.
if (bottom == missingValue) {
minLat = minLon = Integer.MIN_VALUE;
maxLat = maxLon = Integer.MAX_VALUE;
crossesDateLine = false;
} else {
assert Double.isFinite(bottom);
GeoRect box = GeoUtils.circleToBBox(longitude, latitude, haversin2(bottom));
// pre-encode our box to our integer encoding, so we don't have to decode
// to double values for uncompetitive hits. This has some cost!
int minLatEncoded = LatLonPoint.encodeLatitude(box.minLat);
int maxLatEncoded = LatLonPoint.encodeLatitude(box.maxLat);
int minLonEncoded = LatLonPoint.encodeLongitude(box.minLon);
int maxLonEncoded = LatLonPoint.encodeLongitude(box.maxLon);
// be sure to not introduce quantization error in our optimization, just
// round up our encoded box safely in all directions.
if (minLatEncoded != Integer.MIN_VALUE) {
minLatEncoded--;
}
if (minLonEncoded != Integer.MIN_VALUE) {
minLonEncoded--;
}
if (maxLatEncoded != Integer.MAX_VALUE) {
maxLatEncoded++;
}
if (maxLonEncoded != Integer.MAX_VALUE) {
maxLonEncoded++;
}
crossesDateLine = box.crossesDateline();
// crosses dateline: split
if (crossesDateLine) {
minLat = LatLonPoint.encodeLatitude(box.minLat);
maxLat = LatLonPoint.encodeLatitude(box.maxLat);
if (box.crossesDateline()) {
// box1
minLon = Integer.MIN_VALUE;
maxLon = maxLonEncoded;
minLat = minLatEncoded;
maxLat = maxLatEncoded;
maxLon = LatLonPoint.encodeLongitude(box.maxLon);
// box2
minLon2 = minLonEncoded;
maxLon2 = Integer.MAX_VALUE;
minLat2 = minLatEncoded;
maxLat2 = maxLatEncoded;
minLon2 = LatLonPoint.encodeLongitude(box.minLon);
} else {
minLon = minLonEncoded;
maxLon = maxLonEncoded;
minLat = minLatEncoded;
maxLat = maxLatEncoded;
}
minLon = LatLonPoint.encodeLongitude(box.minLon);
maxLon = LatLonPoint.encodeLongitude(box.maxLon);
// disable box2
minLon2 = Integer.MAX_VALUE;
}
}
setBottomCounter++;
@ -153,24 +116,33 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
int numValues = currentDocs.count();
if (numValues == 0) {
return Double.compare(bottom, missingValue);
return Double.compare(bottom, Double.POSITIVE_INFINITY);
}
double minValue = Double.POSITIVE_INFINITY;
int cmp = -1;
for (int i = 0; i < numValues; i++) {
long encoded = currentDocs.valueAt(i);
// test bounding box
int latitudeBits = (int)(encoded >> 32);
if (latitudeBits < minLat || latitudeBits > maxLat) {
continue;
}
int longitudeBits = (int)(encoded & 0xFFFFFFFF);
boolean outsideBox = ((latitudeBits < minLat || longitudeBits < minLon || latitudeBits > maxLat || longitudeBits > maxLon) &&
(crossesDateLine == false || latitudeBits < minLat2 || longitudeBits < minLon2 || latitudeBits > maxLat2 || longitudeBits > maxLon2));
if ((longitudeBits < minLon || longitudeBits > maxLon) && (longitudeBits < minLon2)) {
continue;
}
// only compute actual distance if its inside "competitive bounding box"
if (outsideBox == false) {
double docLatitude = LatLonPoint.decodeLatitude(latitudeBits);
double docLongitude = LatLonPoint.decodeLongitude(longitudeBits);
minValue = Math.min(minValue, haversin1(latitude, longitude, docLatitude, docLongitude));
cmp = Math.max(cmp, Double.compare(bottom, haversin1(latitude, longitude, docLatitude, docLongitude)));
// once we compete in the PQ, no need to continue.
if (cmp > 0) {
return cmp;
}
}
return Double.compare(bottom, minValue);
return cmp;
}
@Override
@ -204,12 +176,8 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
double sortKey(int doc) {
currentDocs.setDocument(doc);
int numValues = currentDocs.count();
if (numValues == 0) {
return missingValue;
}
double minValue = Double.POSITIVE_INFINITY;
int numValues = currentDocs.count();
for (int i = 0; i < numValues; i++) {
long encoded = currentDocs.valueAt(i);
double docLatitude = LatLonPoint.decodeLatitude((int)(encoded >> 32));

View File

@ -142,9 +142,11 @@ final class LatLonPointDistanceQuery extends Query {
double latMax = LatLonPoint.decodeLatitude(maxPackedValue, 0);
double lonMax = LatLonPoint.decodeLongitude(maxPackedValue, Integer.BYTES);
if ((latMax < box1.minLat || lonMax < box1.minLon || latMin > box1.maxLat || lonMin > box1.maxLon) &&
(box2 == null || latMax < box2.minLat || lonMax < box2.minLon || latMin > box2.maxLat || lonMin > box2.maxLon)) {
// we are fully outside of bounding box(es), don't proceed any further.
if (latMax < box1.minLat || latMin > box1.maxLat) {
// latitude out of bounding box range
return Relation.CELL_OUTSIDE_QUERY;
} else if ((lonMax < box1.minLon || lonMin > box1.maxLon) && (box2 == null || lonMax < box2.minLon)) {
// longitude out of bounding box range
return Relation.CELL_OUTSIDE_QUERY;
} else if (lonMax - longitude < 90 && longitude - lonMin < 90 &&
GeoDistanceUtils.haversin(latitude, longitude, latMin, lonMin) <= radiusMeters &&

View File

@ -47,7 +47,7 @@ final class LatLonPointSortField extends SortField {
@Override
public FieldComparator<?> getComparator(int numHits, int sortPos) throws IOException {
return new LatLonPointDistanceComparator(getField(), latitude, longitude, numHits, getMissingValue());
return new LatLonPointDistanceComparator(getField(), latitude, longitude, numHits);
}
@Override
@ -57,16 +57,10 @@ final class LatLonPointSortField extends SortField {
@Override
public void setMissingValue(Object missingValue) {
if (missingValue == null) {
throw new IllegalArgumentException("Missing value cannot be null");
if (Double.valueOf(Double.POSITIVE_INFINITY).equals(missingValue) == false) {
throw new IllegalArgumentException("Missing value can only be Double.POSITIVE_INFINITY (missing values last), but got " + missingValue);
}
if (missingValue.getClass() != Double.class)
throw new IllegalArgumentException("Missing value can only be of type java.lang.Double, but got " + missingValue.getClass());
Double value = (Double) missingValue;
if (!Double.isInfinite(value)) {
throw new IllegalArgumentException("Missing value can only be Double.NEGATIVE_INFINITY (missing values first) or Double.POSITIVE_INFINITY (missing values last), but got " + value);
}
this.missingValue = value;
this.missingValue = missingValue;
}
@Override

View File

@ -28,6 +28,8 @@ import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.spatial.util.GeoDistanceUtils;
import org.apache.lucene.spatial.util.GeoRect;
import org.apache.lucene.spatial.util.GeoUtils;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
@ -110,45 +112,6 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase {
dir.close();
}
/** Add two points (one doc missing) and sort by distance */
public void testMissingFirst() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
// missing
Document doc = new Document();
iw.addDocument(doc);
doc = new Document();
doc.add(new LatLonPoint("location", 40.718266, -74.007819));
iw.addDocument(doc);
doc = new Document();
doc.add(new LatLonPoint("location", 40.7051157, -74.0088305));
iw.addDocument(doc);
IndexReader reader = iw.getReader();
IndexSearcher searcher = newSearcher(reader);
iw.close();
SortField sortField = LatLonPoint.newDistanceSort("location", 40.7143528, -74.0059731);
sortField.setMissingValue(Double.NEGATIVE_INFINITY);
Sort sort = new Sort(sortField);
TopDocs td = searcher.search(new MatchAllDocsQuery(), 3, sort);
FieldDoc d = (FieldDoc) td.scoreDocs[0];
assertEquals(Double.NEGATIVE_INFINITY, (Double)d.fields[0], 0.0D);
d = (FieldDoc) td.scoreDocs[1];
assertEquals(462.61748421408186D, (Double)d.fields[0], 0.0D);
d = (FieldDoc) td.scoreDocs[2];
assertEquals(1056.1630445911035D, (Double)d.fields[0], 0.0D);
reader.close();
dir.close();
}
/** Run a few iterations with just 10 docs, hopefully easy to debug */
public void testRandom() throws Exception {
for (int iters = 0; iters < 100; iters++) {
@ -239,7 +202,7 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase {
for (int i = 0; i < numQueries; i++) {
double lat = -90 + 180.0 * random().nextDouble();
double lon = -180 + 360.0 * random().nextDouble();
double missingValue = random().nextBoolean() ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY;
double missingValue = Double.POSITIVE_INFINITY;
Result expected[] = new Result[reader.maxDoc()];
@ -309,4 +272,18 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase {
assertEquals(expected, actual);
}
}
/** Test infinite radius covers whole earth */
public void testInfiniteRect() {
for (int i = 0; i < 100000; i++) {
double centerLat = -90 + 180.0 * random().nextDouble();
double centerLon = -180 + 360.0 * random().nextDouble();
GeoRect rect = GeoUtils.circleToBBox(centerLat, centerLon, Double.POSITIVE_INFINITY);
assertEquals(-180.0, rect.minLon, 0.0D);
assertEquals(180.0, rect.maxLon, 0.0D);
assertEquals(-90.0, rect.minLat, 0.0D);
assertEquals(90.0, rect.maxLat, 0.0D);
assertFalse(rect.crossesDateline());
}
}
}