mirror of https://github.com/apache/lucene.git
LUCENE-7104: remove "sort missing first" from LatLonPoint.newDistanceSort and simplify/speedup code
This commit is contained in:
parent
1660b5630a
commit
02bb6c0155
|
@ -333,8 +333,7 @@ public class LatLonPoint extends Field {
|
||||||
* the hits contains a Double instance with the distance in meters.
|
* the hits contains a Double instance with the distance in meters.
|
||||||
* <p>
|
* <p>
|
||||||
* If a document is missing the field, then by default it is treated as having {@link Double#POSITIVE_INFINITY} distance
|
* If a document is missing the field, then by default it is treated as having {@link Double#POSITIVE_INFINITY} distance
|
||||||
* (missing values sort last). You can change this to sort missing values first by calling
|
* (missing values sort last).
|
||||||
* {@link SortField#setMissingValue(Object) setMissingValue(Double.NEGATIVE_INFINITY)} on the returned SortField.
|
|
||||||
* <p>
|
* <p>
|
||||||
* If a document contains multiple values for the field, the <i>closest</i> distance to the location is used.
|
* If a document contains multiple values for the field, the <i>closest</i> distance to the location is used.
|
||||||
* <p>
|
* <p>
|
||||||
|
|
|
@ -42,7 +42,6 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
|
||||||
final String field;
|
final String field;
|
||||||
final double latitude;
|
final double latitude;
|
||||||
final double longitude;
|
final double longitude;
|
||||||
final double missingValue;
|
|
||||||
|
|
||||||
final double[] values;
|
final double[] values;
|
||||||
double bottom;
|
double bottom;
|
||||||
|
@ -52,27 +51,22 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
|
||||||
// current bounding box(es) for the bottom distance on the PQ.
|
// current bounding box(es) for the bottom distance on the PQ.
|
||||||
// these are pre-encoded with LatLonPoint's encoding and
|
// these are pre-encoded with LatLonPoint's encoding and
|
||||||
// used to exclude uncompetitive hits faster.
|
// used to exclude uncompetitive hits faster.
|
||||||
int minLon;
|
int minLon = Integer.MIN_VALUE;
|
||||||
int maxLon;
|
int maxLon = Integer.MAX_VALUE;
|
||||||
int minLat;
|
int minLat = Integer.MIN_VALUE;
|
||||||
int maxLat;
|
int maxLat = Integer.MAX_VALUE;
|
||||||
|
|
||||||
// crossesDateLine is true, then we have a second box to check
|
// second set of longitude ranges to check (for cross-dateline case)
|
||||||
boolean crossesDateLine;
|
int minLon2 = Integer.MAX_VALUE;
|
||||||
int minLon2;
|
|
||||||
int maxLon2;
|
|
||||||
int minLat2;
|
|
||||||
int maxLat2;
|
|
||||||
|
|
||||||
// the number of times setBottom has been called (adversary protection)
|
// the number of times setBottom has been called (adversary protection)
|
||||||
int setBottomCounter = 0;
|
int setBottomCounter = 0;
|
||||||
|
|
||||||
public LatLonPointDistanceComparator(String field, double latitude, double longitude, int numHits, double missingValue) {
|
public LatLonPointDistanceComparator(String field, double latitude, double longitude, int numHits) {
|
||||||
this.field = field;
|
this.field = field;
|
||||||
this.latitude = latitude;
|
this.latitude = latitude;
|
||||||
this.longitude = longitude;
|
this.longitude = longitude;
|
||||||
this.values = new double[numHits];
|
this.values = new double[numHits];
|
||||||
this.missingValue = missingValue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -90,53 +84,22 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
|
||||||
// sampling if we get called way too much: don't make gobs of bounding
|
// sampling if we get called way too much: don't make gobs of bounding
|
||||||
// boxes if comparator hits a worst case order (e.g. backwards distance order)
|
// boxes if comparator hits a worst case order (e.g. backwards distance order)
|
||||||
if (setBottomCounter < 1024 || (setBottomCounter & 0x3F) == 0x3F) {
|
if (setBottomCounter < 1024 || (setBottomCounter & 0x3F) == 0x3F) {
|
||||||
// don't pass infinite values to circleToBBox: just make a complete box.
|
GeoRect box = GeoUtils.circleToBBox(longitude, latitude, haversin2(bottom));
|
||||||
if (bottom == missingValue) {
|
// pre-encode our box to our integer encoding, so we don't have to decode
|
||||||
minLat = minLon = Integer.MIN_VALUE;
|
// to double values for uncompetitive hits. This has some cost!
|
||||||
maxLat = maxLon = Integer.MAX_VALUE;
|
minLat = LatLonPoint.encodeLatitude(box.minLat);
|
||||||
crossesDateLine = false;
|
maxLat = LatLonPoint.encodeLatitude(box.maxLat);
|
||||||
|
if (box.crossesDateline()) {
|
||||||
|
// box1
|
||||||
|
minLon = Integer.MIN_VALUE;
|
||||||
|
maxLon = LatLonPoint.encodeLongitude(box.maxLon);
|
||||||
|
// box2
|
||||||
|
minLon2 = LatLonPoint.encodeLongitude(box.minLon);
|
||||||
} else {
|
} else {
|
||||||
assert Double.isFinite(bottom);
|
minLon = LatLonPoint.encodeLongitude(box.minLon);
|
||||||
GeoRect box = GeoUtils.circleToBBox(longitude, latitude, haversin2(bottom));
|
maxLon = LatLonPoint.encodeLongitude(box.maxLon);
|
||||||
// pre-encode our box to our integer encoding, so we don't have to decode
|
// disable box2
|
||||||
// to double values for uncompetitive hits. This has some cost!
|
minLon2 = Integer.MAX_VALUE;
|
||||||
int minLatEncoded = LatLonPoint.encodeLatitude(box.minLat);
|
|
||||||
int maxLatEncoded = LatLonPoint.encodeLatitude(box.maxLat);
|
|
||||||
int minLonEncoded = LatLonPoint.encodeLongitude(box.minLon);
|
|
||||||
int maxLonEncoded = LatLonPoint.encodeLongitude(box.maxLon);
|
|
||||||
// be sure to not introduce quantization error in our optimization, just
|
|
||||||
// round up our encoded box safely in all directions.
|
|
||||||
if (minLatEncoded != Integer.MIN_VALUE) {
|
|
||||||
minLatEncoded--;
|
|
||||||
}
|
|
||||||
if (minLonEncoded != Integer.MIN_VALUE) {
|
|
||||||
minLonEncoded--;
|
|
||||||
}
|
|
||||||
if (maxLatEncoded != Integer.MAX_VALUE) {
|
|
||||||
maxLatEncoded++;
|
|
||||||
}
|
|
||||||
if (maxLonEncoded != Integer.MAX_VALUE) {
|
|
||||||
maxLonEncoded++;
|
|
||||||
}
|
|
||||||
crossesDateLine = box.crossesDateline();
|
|
||||||
// crosses dateline: split
|
|
||||||
if (crossesDateLine) {
|
|
||||||
// box1
|
|
||||||
minLon = Integer.MIN_VALUE;
|
|
||||||
maxLon = maxLonEncoded;
|
|
||||||
minLat = minLatEncoded;
|
|
||||||
maxLat = maxLatEncoded;
|
|
||||||
// box2
|
|
||||||
minLon2 = minLonEncoded;
|
|
||||||
maxLon2 = Integer.MAX_VALUE;
|
|
||||||
minLat2 = minLatEncoded;
|
|
||||||
maxLat2 = maxLatEncoded;
|
|
||||||
} else {
|
|
||||||
minLon = minLonEncoded;
|
|
||||||
maxLon = maxLonEncoded;
|
|
||||||
minLat = minLatEncoded;
|
|
||||||
maxLat = maxLatEncoded;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
setBottomCounter++;
|
setBottomCounter++;
|
||||||
|
@ -153,24 +116,33 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
|
||||||
|
|
||||||
int numValues = currentDocs.count();
|
int numValues = currentDocs.count();
|
||||||
if (numValues == 0) {
|
if (numValues == 0) {
|
||||||
return Double.compare(bottom, missingValue);
|
return Double.compare(bottom, Double.POSITIVE_INFINITY);
|
||||||
}
|
}
|
||||||
|
|
||||||
double minValue = Double.POSITIVE_INFINITY;
|
int cmp = -1;
|
||||||
for (int i = 0; i < numValues; i++) {
|
for (int i = 0; i < numValues; i++) {
|
||||||
long encoded = currentDocs.valueAt(i);
|
long encoded = currentDocs.valueAt(i);
|
||||||
|
|
||||||
|
// test bounding box
|
||||||
int latitudeBits = (int)(encoded >> 32);
|
int latitudeBits = (int)(encoded >> 32);
|
||||||
|
if (latitudeBits < minLat || latitudeBits > maxLat) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
int longitudeBits = (int)(encoded & 0xFFFFFFFF);
|
int longitudeBits = (int)(encoded & 0xFFFFFFFF);
|
||||||
boolean outsideBox = ((latitudeBits < minLat || longitudeBits < minLon || latitudeBits > maxLat || longitudeBits > maxLon) &&
|
if ((longitudeBits < minLon || longitudeBits > maxLon) && (longitudeBits < minLon2)) {
|
||||||
(crossesDateLine == false || latitudeBits < minLat2 || longitudeBits < minLon2 || latitudeBits > maxLat2 || longitudeBits > maxLon2));
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// only compute actual distance if its inside "competitive bounding box"
|
// only compute actual distance if its inside "competitive bounding box"
|
||||||
if (outsideBox == false) {
|
double docLatitude = LatLonPoint.decodeLatitude(latitudeBits);
|
||||||
double docLatitude = LatLonPoint.decodeLatitude(latitudeBits);
|
double docLongitude = LatLonPoint.decodeLongitude(longitudeBits);
|
||||||
double docLongitude = LatLonPoint.decodeLongitude(longitudeBits);
|
cmp = Math.max(cmp, Double.compare(bottom, haversin1(latitude, longitude, docLatitude, docLongitude)));
|
||||||
minValue = Math.min(minValue, haversin1(latitude, longitude, docLatitude, docLongitude));
|
// once we compete in the PQ, no need to continue.
|
||||||
|
if (cmp > 0) {
|
||||||
|
return cmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Double.compare(bottom, minValue);
|
return cmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -204,12 +176,8 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
|
||||||
double sortKey(int doc) {
|
double sortKey(int doc) {
|
||||||
currentDocs.setDocument(doc);
|
currentDocs.setDocument(doc);
|
||||||
|
|
||||||
int numValues = currentDocs.count();
|
|
||||||
if (numValues == 0) {
|
|
||||||
return missingValue;
|
|
||||||
}
|
|
||||||
|
|
||||||
double minValue = Double.POSITIVE_INFINITY;
|
double minValue = Double.POSITIVE_INFINITY;
|
||||||
|
int numValues = currentDocs.count();
|
||||||
for (int i = 0; i < numValues; i++) {
|
for (int i = 0; i < numValues; i++) {
|
||||||
long encoded = currentDocs.valueAt(i);
|
long encoded = currentDocs.valueAt(i);
|
||||||
double docLatitude = LatLonPoint.decodeLatitude((int)(encoded >> 32));
|
double docLatitude = LatLonPoint.decodeLatitude((int)(encoded >> 32));
|
||||||
|
|
|
@ -142,9 +142,11 @@ final class LatLonPointDistanceQuery extends Query {
|
||||||
double latMax = LatLonPoint.decodeLatitude(maxPackedValue, 0);
|
double latMax = LatLonPoint.decodeLatitude(maxPackedValue, 0);
|
||||||
double lonMax = LatLonPoint.decodeLongitude(maxPackedValue, Integer.BYTES);
|
double lonMax = LatLonPoint.decodeLongitude(maxPackedValue, Integer.BYTES);
|
||||||
|
|
||||||
if ((latMax < box1.minLat || lonMax < box1.minLon || latMin > box1.maxLat || lonMin > box1.maxLon) &&
|
if (latMax < box1.minLat || latMin > box1.maxLat) {
|
||||||
(box2 == null || latMax < box2.minLat || lonMax < box2.minLon || latMin > box2.maxLat || lonMin > box2.maxLon)) {
|
// latitude out of bounding box range
|
||||||
// we are fully outside of bounding box(es), don't proceed any further.
|
return Relation.CELL_OUTSIDE_QUERY;
|
||||||
|
} else if ((lonMax < box1.minLon || lonMin > box1.maxLon) && (box2 == null || lonMax < box2.minLon)) {
|
||||||
|
// longitude out of bounding box range
|
||||||
return Relation.CELL_OUTSIDE_QUERY;
|
return Relation.CELL_OUTSIDE_QUERY;
|
||||||
} else if (lonMax - longitude < 90 && longitude - lonMin < 90 &&
|
} else if (lonMax - longitude < 90 && longitude - lonMin < 90 &&
|
||||||
GeoDistanceUtils.haversin(latitude, longitude, latMin, lonMin) <= radiusMeters &&
|
GeoDistanceUtils.haversin(latitude, longitude, latMin, lonMin) <= radiusMeters &&
|
||||||
|
|
|
@ -47,7 +47,7 @@ final class LatLonPointSortField extends SortField {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FieldComparator<?> getComparator(int numHits, int sortPos) throws IOException {
|
public FieldComparator<?> getComparator(int numHits, int sortPos) throws IOException {
|
||||||
return new LatLonPointDistanceComparator(getField(), latitude, longitude, numHits, getMissingValue());
|
return new LatLonPointDistanceComparator(getField(), latitude, longitude, numHits);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -57,16 +57,10 @@ final class LatLonPointSortField extends SortField {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setMissingValue(Object missingValue) {
|
public void setMissingValue(Object missingValue) {
|
||||||
if (missingValue == null) {
|
if (Double.valueOf(Double.POSITIVE_INFINITY).equals(missingValue) == false) {
|
||||||
throw new IllegalArgumentException("Missing value cannot be null");
|
throw new IllegalArgumentException("Missing value can only be Double.POSITIVE_INFINITY (missing values last), but got " + missingValue);
|
||||||
}
|
}
|
||||||
if (missingValue.getClass() != Double.class)
|
this.missingValue = missingValue;
|
||||||
throw new IllegalArgumentException("Missing value can only be of type java.lang.Double, but got " + missingValue.getClass());
|
|
||||||
Double value = (Double) missingValue;
|
|
||||||
if (!Double.isInfinite(value)) {
|
|
||||||
throw new IllegalArgumentException("Missing value can only be Double.NEGATIVE_INFINITY (missing values first) or Double.POSITIVE_INFINITY (missing values last), but got " + value);
|
|
||||||
}
|
|
||||||
this.missingValue = value;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -28,6 +28,8 @@ import org.apache.lucene.search.Sort;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.search.TopDocs;
|
import org.apache.lucene.search.TopDocs;
|
||||||
import org.apache.lucene.spatial.util.GeoDistanceUtils;
|
import org.apache.lucene.spatial.util.GeoDistanceUtils;
|
||||||
|
import org.apache.lucene.spatial.util.GeoRect;
|
||||||
|
import org.apache.lucene.spatial.util.GeoUtils;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
@ -110,45 +112,6 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase {
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Add two points (one doc missing) and sort by distance */
|
|
||||||
public void testMissingFirst() throws Exception {
|
|
||||||
Directory dir = newDirectory();
|
|
||||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
|
||||||
|
|
||||||
// missing
|
|
||||||
Document doc = new Document();
|
|
||||||
iw.addDocument(doc);
|
|
||||||
|
|
||||||
doc = new Document();
|
|
||||||
doc.add(new LatLonPoint("location", 40.718266, -74.007819));
|
|
||||||
iw.addDocument(doc);
|
|
||||||
|
|
||||||
doc = new Document();
|
|
||||||
doc.add(new LatLonPoint("location", 40.7051157, -74.0088305));
|
|
||||||
iw.addDocument(doc);
|
|
||||||
|
|
||||||
IndexReader reader = iw.getReader();
|
|
||||||
IndexSearcher searcher = newSearcher(reader);
|
|
||||||
iw.close();
|
|
||||||
|
|
||||||
SortField sortField = LatLonPoint.newDistanceSort("location", 40.7143528, -74.0059731);
|
|
||||||
sortField.setMissingValue(Double.NEGATIVE_INFINITY);
|
|
||||||
Sort sort = new Sort(sortField);
|
|
||||||
TopDocs td = searcher.search(new MatchAllDocsQuery(), 3, sort);
|
|
||||||
|
|
||||||
FieldDoc d = (FieldDoc) td.scoreDocs[0];
|
|
||||||
assertEquals(Double.NEGATIVE_INFINITY, (Double)d.fields[0], 0.0D);
|
|
||||||
|
|
||||||
d = (FieldDoc) td.scoreDocs[1];
|
|
||||||
assertEquals(462.61748421408186D, (Double)d.fields[0], 0.0D);
|
|
||||||
|
|
||||||
d = (FieldDoc) td.scoreDocs[2];
|
|
||||||
assertEquals(1056.1630445911035D, (Double)d.fields[0], 0.0D);
|
|
||||||
|
|
||||||
reader.close();
|
|
||||||
dir.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Run a few iterations with just 10 docs, hopefully easy to debug */
|
/** Run a few iterations with just 10 docs, hopefully easy to debug */
|
||||||
public void testRandom() throws Exception {
|
public void testRandom() throws Exception {
|
||||||
for (int iters = 0; iters < 100; iters++) {
|
for (int iters = 0; iters < 100; iters++) {
|
||||||
|
@ -239,7 +202,7 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase {
|
||||||
for (int i = 0; i < numQueries; i++) {
|
for (int i = 0; i < numQueries; i++) {
|
||||||
double lat = -90 + 180.0 * random().nextDouble();
|
double lat = -90 + 180.0 * random().nextDouble();
|
||||||
double lon = -180 + 360.0 * random().nextDouble();
|
double lon = -180 + 360.0 * random().nextDouble();
|
||||||
double missingValue = random().nextBoolean() ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY;
|
double missingValue = Double.POSITIVE_INFINITY;
|
||||||
|
|
||||||
Result expected[] = new Result[reader.maxDoc()];
|
Result expected[] = new Result[reader.maxDoc()];
|
||||||
|
|
||||||
|
@ -309,4 +272,18 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase {
|
||||||
assertEquals(expected, actual);
|
assertEquals(expected, actual);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Test infinite radius covers whole earth */
|
||||||
|
public void testInfiniteRect() {
|
||||||
|
for (int i = 0; i < 100000; i++) {
|
||||||
|
double centerLat = -90 + 180.0 * random().nextDouble();
|
||||||
|
double centerLon = -180 + 360.0 * random().nextDouble();
|
||||||
|
GeoRect rect = GeoUtils.circleToBBox(centerLat, centerLon, Double.POSITIVE_INFINITY);
|
||||||
|
assertEquals(-180.0, rect.minLon, 0.0D);
|
||||||
|
assertEquals(180.0, rect.maxLon, 0.0D);
|
||||||
|
assertEquals(-90.0, rect.minLat, 0.0D);
|
||||||
|
assertEquals(90.0, rect.maxLat, 0.0D);
|
||||||
|
assertFalse(rect.crossesDateline());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue