mirror of https://github.com/apache/lucene.git
LUCENE-7099: speed up embarrasingly slow sandbox/ distance sort
This commit is contained in:
parent
51d02687a6
commit
a4722845e3
|
@ -27,8 +27,16 @@ import org.apache.lucene.search.FieldComparator;
|
||||||
import org.apache.lucene.search.LeafFieldComparator;
|
import org.apache.lucene.search.LeafFieldComparator;
|
||||||
import org.apache.lucene.search.Scorer;
|
import org.apache.lucene.search.Scorer;
|
||||||
import org.apache.lucene.spatial.util.GeoDistanceUtils;
|
import org.apache.lucene.spatial.util.GeoDistanceUtils;
|
||||||
|
import org.apache.lucene.spatial.util.GeoRect;
|
||||||
|
import org.apache.lucene.spatial.util.GeoUtils;
|
||||||
|
|
||||||
/** Compares docs by distance from an origin */
|
/**
|
||||||
|
* Compares documents by distance from an origin point
|
||||||
|
* <p>
|
||||||
|
* When the least competitive item on the priority queue changes (setBottom), we recompute
|
||||||
|
* a bounding box representing competitive distance to the top-N. Then in compareBottom, we can
|
||||||
|
* quickly reject hits based on bounding box alone without computing distance for every element.
|
||||||
|
*/
|
||||||
class LatLonPointDistanceComparator extends FieldComparator<Double> implements LeafFieldComparator {
|
class LatLonPointDistanceComparator extends FieldComparator<Double> implements LeafFieldComparator {
|
||||||
final String field;
|
final String field;
|
||||||
final double latitude;
|
final double latitude;
|
||||||
|
@ -40,6 +48,24 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
|
||||||
double topValue;
|
double topValue;
|
||||||
SortedNumericDocValues currentDocs;
|
SortedNumericDocValues currentDocs;
|
||||||
|
|
||||||
|
// current bounding box(es) for the bottom distance on the PQ.
|
||||||
|
// these are pre-encoded with LatLonPoint's encoding and
|
||||||
|
// used to exclude uncompetitive hits faster.
|
||||||
|
int minLon;
|
||||||
|
int maxLon;
|
||||||
|
int minLat;
|
||||||
|
int maxLat;
|
||||||
|
|
||||||
|
// crossesDateLine is true, then we have a second box to check
|
||||||
|
boolean crossesDateLine;
|
||||||
|
int minLon2;
|
||||||
|
int maxLon2;
|
||||||
|
int minLat2;
|
||||||
|
int maxLat2;
|
||||||
|
|
||||||
|
// the number of times setBottom has been called (adversary protection)
|
||||||
|
int setBottomCounter = 0;
|
||||||
|
|
||||||
public LatLonPointDistanceComparator(String field, double latitude, double longitude, int numHits, double missingValue) {
|
public LatLonPointDistanceComparator(String field, double latitude, double longitude, int numHits, double missingValue) {
|
||||||
this.field = field;
|
this.field = field;
|
||||||
this.latitude = latitude;
|
this.latitude = latitude;
|
||||||
|
@ -59,6 +85,52 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
|
||||||
@Override
|
@Override
|
||||||
public void setBottom(int slot) {
|
public void setBottom(int slot) {
|
||||||
bottom = values[slot];
|
bottom = values[slot];
|
||||||
|
// make bounding box(es) to exclude non-competitive hits, but start
|
||||||
|
// sampling if we get called way too much: don't make gobs of bounding
|
||||||
|
// boxes if comparator hits a worst case order (e.g. backwards distance order)
|
||||||
|
if (setBottomCounter < 1024 || (setBottomCounter & 0x3F) == 0x3F) {
|
||||||
|
GeoRect box = GeoUtils.circleToBBox(longitude, latitude, bottom);
|
||||||
|
// pre-encode our box to our integer encoding, so we don't have to decode
|
||||||
|
// to double values for uncompetitive hits. This has some cost!
|
||||||
|
int minLatEncoded = LatLonPoint.encodeLatitude(box.minLat);
|
||||||
|
int maxLatEncoded = LatLonPoint.encodeLatitude(box.maxLat);
|
||||||
|
int minLonEncoded = LatLonPoint.encodeLongitude(box.minLon);
|
||||||
|
int maxLonEncoded = LatLonPoint.encodeLongitude(box.maxLon);
|
||||||
|
// be sure to not introduce quantization error in our optimization, just
|
||||||
|
// round up our encoded box safely in all directions.
|
||||||
|
if (minLatEncoded != Integer.MIN_VALUE) {
|
||||||
|
minLatEncoded--;
|
||||||
|
}
|
||||||
|
if (minLonEncoded != Integer.MIN_VALUE) {
|
||||||
|
minLonEncoded--;
|
||||||
|
}
|
||||||
|
if (maxLatEncoded != Integer.MAX_VALUE) {
|
||||||
|
maxLatEncoded++;
|
||||||
|
}
|
||||||
|
if (maxLonEncoded != Integer.MAX_VALUE) {
|
||||||
|
maxLonEncoded++;
|
||||||
|
}
|
||||||
|
crossesDateLine = box.crossesDateline();
|
||||||
|
// crosses dateline: split
|
||||||
|
if (crossesDateLine) {
|
||||||
|
// box1
|
||||||
|
minLon = Integer.MIN_VALUE;
|
||||||
|
maxLon = maxLonEncoded;
|
||||||
|
minLat = minLatEncoded;
|
||||||
|
maxLat = maxLatEncoded;
|
||||||
|
// box2
|
||||||
|
minLon2 = minLonEncoded;
|
||||||
|
maxLon2 = Integer.MAX_VALUE;
|
||||||
|
minLat2 = minLatEncoded;
|
||||||
|
maxLat2 = maxLatEncoded;
|
||||||
|
} else {
|
||||||
|
minLon = minLonEncoded;
|
||||||
|
maxLon = maxLonEncoded;
|
||||||
|
minLat = minLatEncoded;
|
||||||
|
maxLat = maxLatEncoded;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
setBottomCounter++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -68,7 +140,28 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int compareBottom(int doc) throws IOException {
|
public int compareBottom(int doc) throws IOException {
|
||||||
return Double.compare(bottom, distance(doc));
|
currentDocs.setDocument(doc);
|
||||||
|
|
||||||
|
int numValues = currentDocs.count();
|
||||||
|
if (numValues == 0) {
|
||||||
|
return Double.compare(bottom, missingValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
double minValue = Double.POSITIVE_INFINITY;
|
||||||
|
for (int i = 0; i < numValues; i++) {
|
||||||
|
long encoded = currentDocs.valueAt(i);
|
||||||
|
int latitudeBits = (int)(encoded >> 32);
|
||||||
|
int longitudeBits = (int)(encoded & 0xFFFFFFFF);
|
||||||
|
boolean outsideBox = ((latitudeBits < minLat || longitudeBits < minLon || latitudeBits > maxLat || longitudeBits > maxLon) &&
|
||||||
|
(crossesDateLine == false || latitudeBits < minLat2 || longitudeBits < minLon2 || latitudeBits > maxLat2 || longitudeBits > maxLon2));
|
||||||
|
// only compute actual distance if its inside "competitive bounding box"
|
||||||
|
if (outsideBox == false) {
|
||||||
|
double docLatitude = LatLonPoint.decodeLatitude(latitudeBits);
|
||||||
|
double docLongitude = LatLonPoint.decodeLongitude(longitudeBits);
|
||||||
|
minValue = Math.min(minValue, GeoDistanceUtils.haversin(latitude, longitude, docLatitude, docLongitude));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Double.compare(bottom, minValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
Loading…
Reference in New Issue