mirror of https://github.com/apache/lucene.git
LUCENE-7099: speed up embarrasingly slow sandbox/ distance sort
This commit is contained in:
parent
b3ee746a8b
commit
e169050dc3
|
@ -27,8 +27,16 @@ import org.apache.lucene.search.FieldComparator;
|
|||
import org.apache.lucene.search.LeafFieldComparator;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.spatial.util.GeoDistanceUtils;
|
||||
import org.apache.lucene.spatial.util.GeoRect;
|
||||
import org.apache.lucene.spatial.util.GeoUtils;
|
||||
|
||||
/** Compares docs by distance from an origin */
|
||||
/**
|
||||
* Compares documents by distance from an origin point
|
||||
* <p>
|
||||
* When the least competitive item on the priority queue changes (setBottom), we recompute
|
||||
* a bounding box representing competitive distance to the top-N. Then in compareBottom, we can
|
||||
* quickly reject hits based on bounding box alone without computing distance for every element.
|
||||
*/
|
||||
class LatLonPointDistanceComparator extends FieldComparator<Double> implements LeafFieldComparator {
|
||||
final String field;
|
||||
final double latitude;
|
||||
|
@ -40,6 +48,24 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
|
|||
double topValue;
|
||||
SortedNumericDocValues currentDocs;
|
||||
|
||||
// current bounding box(es) for the bottom distance on the PQ.
|
||||
// these are pre-encoded with LatLonPoint's encoding and
|
||||
// used to exclude uncompetitive hits faster.
|
||||
int minLon;
|
||||
int maxLon;
|
||||
int minLat;
|
||||
int maxLat;
|
||||
|
||||
// crossesDateLine is true, then we have a second box to check
|
||||
boolean crossesDateLine;
|
||||
int minLon2;
|
||||
int maxLon2;
|
||||
int minLat2;
|
||||
int maxLat2;
|
||||
|
||||
// the number of times setBottom has been called (adversary protection)
|
||||
int setBottomCounter = 0;
|
||||
|
||||
public LatLonPointDistanceComparator(String field, double latitude, double longitude, int numHits, double missingValue) {
|
||||
this.field = field;
|
||||
this.latitude = latitude;
|
||||
|
@ -59,6 +85,52 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
|
|||
@Override
|
||||
public void setBottom(int slot) {
|
||||
bottom = values[slot];
|
||||
// make bounding box(es) to exclude non-competitive hits, but start
|
||||
// sampling if we get called way too much: don't make gobs of bounding
|
||||
// boxes if comparator hits a worst case order (e.g. backwards distance order)
|
||||
if (setBottomCounter < 1024 || (setBottomCounter & 0x3F) == 0x3F) {
|
||||
GeoRect box = GeoUtils.circleToBBox(longitude, latitude, bottom);
|
||||
// pre-encode our box to our integer encoding, so we don't have to decode
|
||||
// to double values for uncompetitive hits. This has some cost!
|
||||
int minLatEncoded = LatLonPoint.encodeLatitude(box.minLat);
|
||||
int maxLatEncoded = LatLonPoint.encodeLatitude(box.maxLat);
|
||||
int minLonEncoded = LatLonPoint.encodeLongitude(box.minLon);
|
||||
int maxLonEncoded = LatLonPoint.encodeLongitude(box.maxLon);
|
||||
// be sure to not introduce quantization error in our optimization, just
|
||||
// round up our encoded box safely in all directions.
|
||||
if (minLatEncoded != Integer.MIN_VALUE) {
|
||||
minLatEncoded--;
|
||||
}
|
||||
if (minLonEncoded != Integer.MIN_VALUE) {
|
||||
minLonEncoded--;
|
||||
}
|
||||
if (maxLatEncoded != Integer.MAX_VALUE) {
|
||||
maxLatEncoded++;
|
||||
}
|
||||
if (maxLonEncoded != Integer.MAX_VALUE) {
|
||||
maxLonEncoded++;
|
||||
}
|
||||
crossesDateLine = box.crossesDateline();
|
||||
// crosses dateline: split
|
||||
if (crossesDateLine) {
|
||||
// box1
|
||||
minLon = Integer.MIN_VALUE;
|
||||
maxLon = maxLonEncoded;
|
||||
minLat = minLatEncoded;
|
||||
maxLat = maxLatEncoded;
|
||||
// box2
|
||||
minLon2 = minLonEncoded;
|
||||
maxLon2 = Integer.MAX_VALUE;
|
||||
minLat2 = minLatEncoded;
|
||||
maxLat2 = maxLatEncoded;
|
||||
} else {
|
||||
minLon = minLonEncoded;
|
||||
maxLon = maxLonEncoded;
|
||||
minLat = minLatEncoded;
|
||||
maxLat = maxLatEncoded;
|
||||
}
|
||||
}
|
||||
setBottomCounter++;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -68,7 +140,28 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
|
|||
|
||||
@Override
|
||||
public int compareBottom(int doc) throws IOException {
|
||||
return Double.compare(bottom, distance(doc));
|
||||
currentDocs.setDocument(doc);
|
||||
|
||||
int numValues = currentDocs.count();
|
||||
if (numValues == 0) {
|
||||
return Double.compare(bottom, missingValue);
|
||||
}
|
||||
|
||||
double minValue = Double.POSITIVE_INFINITY;
|
||||
for (int i = 0; i < numValues; i++) {
|
||||
long encoded = currentDocs.valueAt(i);
|
||||
int latitudeBits = (int)(encoded >> 32);
|
||||
int longitudeBits = (int)(encoded & 0xFFFFFFFF);
|
||||
boolean outsideBox = ((latitudeBits < minLat || longitudeBits < minLon || latitudeBits > maxLat || longitudeBits > maxLon) &&
|
||||
(crossesDateLine == false || latitudeBits < minLat2 || longitudeBits < minLon2 || latitudeBits > maxLat2 || longitudeBits > maxLon2));
|
||||
// only compute actual distance if its inside "competitive bounding box"
|
||||
if (outsideBox == false) {
|
||||
double docLatitude = LatLonPoint.decodeLatitude(latitudeBits);
|
||||
double docLongitude = LatLonPoint.decodeLongitude(longitudeBits);
|
||||
minValue = Math.min(minValue, GeoDistanceUtils.haversin(latitude, longitude, docLatitude, docLongitude));
|
||||
}
|
||||
}
|
||||
return Double.compare(bottom, minValue);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
Loading…
Reference in New Issue