LUCENE-7099: speed up embarrasingly slow sandbox/ distance sort

This commit is contained in:
Robert Muir 2016-03-11 21:20:07 -05:00
parent 51d02687a6
commit a4722845e3
1 changed files with 95 additions and 2 deletions

View File

@ -27,8 +27,16 @@ import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.spatial.util.GeoDistanceUtils;
import org.apache.lucene.spatial.util.GeoRect;
import org.apache.lucene.spatial.util.GeoUtils;
/** Compares docs by distance from an origin */
/**
* Compares documents by distance from an origin point
* <p>
* When the least competitive item on the priority queue changes (setBottom), we recompute
* a bounding box representing competitive distance to the top-N. Then in compareBottom, we can
* quickly reject hits based on bounding box alone without computing distance for every element.
*/
class LatLonPointDistanceComparator extends FieldComparator<Double> implements LeafFieldComparator {
final String field;
final double latitude;
@ -40,6 +48,24 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
double topValue;
SortedNumericDocValues currentDocs;
// current bounding box(es) for the bottom distance on the PQ.
// these are pre-encoded with LatLonPoint's encoding and
// used to exclude uncompetitive hits faster.
int minLon;
int maxLon;
int minLat;
int maxLat;
// crossesDateLine is true, then we have a second box to check
boolean crossesDateLine;
int minLon2;
int maxLon2;
int minLat2;
int maxLat2;
// the number of times setBottom has been called (adversary protection)
int setBottomCounter = 0;
public LatLonPointDistanceComparator(String field, double latitude, double longitude, int numHits, double missingValue) {
this.field = field;
this.latitude = latitude;
@ -59,6 +85,52 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
@Override
public void setBottom(int slot) {
bottom = values[slot];
// make bounding box(es) to exclude non-competitive hits, but start
// sampling if we get called way too much: don't make gobs of bounding
// boxes if comparator hits a worst case order (e.g. backwards distance order)
if (setBottomCounter < 1024 || (setBottomCounter & 0x3F) == 0x3F) {
GeoRect box = GeoUtils.circleToBBox(longitude, latitude, bottom);
// pre-encode our box to our integer encoding, so we don't have to decode
// to double values for uncompetitive hits. This has some cost!
int minLatEncoded = LatLonPoint.encodeLatitude(box.minLat);
int maxLatEncoded = LatLonPoint.encodeLatitude(box.maxLat);
int minLonEncoded = LatLonPoint.encodeLongitude(box.minLon);
int maxLonEncoded = LatLonPoint.encodeLongitude(box.maxLon);
// be sure to not introduce quantization error in our optimization, just
// round up our encoded box safely in all directions.
if (minLatEncoded != Integer.MIN_VALUE) {
minLatEncoded--;
}
if (minLonEncoded != Integer.MIN_VALUE) {
minLonEncoded--;
}
if (maxLatEncoded != Integer.MAX_VALUE) {
maxLatEncoded++;
}
if (maxLonEncoded != Integer.MAX_VALUE) {
maxLonEncoded++;
}
crossesDateLine = box.crossesDateline();
// crosses dateline: split
if (crossesDateLine) {
// box1
minLon = Integer.MIN_VALUE;
maxLon = maxLonEncoded;
minLat = minLatEncoded;
maxLat = maxLatEncoded;
// box2
minLon2 = minLonEncoded;
maxLon2 = Integer.MAX_VALUE;
minLat2 = minLatEncoded;
maxLat2 = maxLatEncoded;
} else {
minLon = minLonEncoded;
maxLon = maxLonEncoded;
minLat = minLatEncoded;
maxLat = maxLatEncoded;
}
}
setBottomCounter++;
}
@Override
@ -68,7 +140,28 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
@Override
public int compareBottom(int doc) throws IOException {
return Double.compare(bottom, distance(doc));
currentDocs.setDocument(doc);
int numValues = currentDocs.count();
if (numValues == 0) {
return Double.compare(bottom, missingValue);
}
double minValue = Double.POSITIVE_INFINITY;
for (int i = 0; i < numValues; i++) {
long encoded = currentDocs.valueAt(i);
int latitudeBits = (int)(encoded >> 32);
int longitudeBits = (int)(encoded & 0xFFFFFFFF);
boolean outsideBox = ((latitudeBits < minLat || longitudeBits < minLon || latitudeBits > maxLat || longitudeBits > maxLon) &&
(crossesDateLine == false || latitudeBits < minLat2 || longitudeBits < minLon2 || latitudeBits > maxLat2 || longitudeBits > maxLon2));
// only compute actual distance if its inside "competitive bounding box"
if (outsideBox == false) {
double docLatitude = LatLonPoint.decodeLatitude(latitudeBits);
double docLongitude = LatLonPoint.decodeLongitude(longitudeBits);
minValue = Math.min(minValue, GeoDistanceUtils.haversin(latitude, longitude, docLatitude, docLongitude));
}
}
return Double.compare(bottom, minValue);
}
@Override