mirror of https://github.com/apache/lucene.git
LUCENE-6704: GeoPointDistanceQuery was visiting too many term ranges
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1693690 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
54915fac9a
commit
00bedc01c2
|
@ -296,6 +296,9 @@ Bug fixes
|
|||
more than one value in a multi-valued field. (Chris Earle, Nik Everett
|
||||
via Mike McCandless)
|
||||
|
||||
* LUCENE-6704: GeoPointDistanceQuery was visiting too many term ranges,
|
||||
consuming too much heap for a large radius (Nick Knize via Mike McCandless)
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-6501: The subreader structure in ParallelCompositeReader
|
||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.util.GeoDistanceUtils;
|
||||
import org.apache.lucene.util.GeoProjectionUtils;
|
||||
import org.apache.lucene.util.GeoUtils;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
|
@ -88,11 +88,13 @@ public final class GeoPointDistanceQuery extends GeoPointInBBoxQuery {
|
|||
}
|
||||
|
||||
static GeoBoundingBox computeBBox(final double centerLon, final double centerLat, final double radius) {
|
||||
final double lonDistDeg = GeoDistanceUtils.distanceToDegreesLon(centerLat, radius);
|
||||
final double latDistDeg = GeoDistanceUtils.distanceToDegreesLat(centerLat, radius);
|
||||
double[] t = GeoProjectionUtils.pointFromLonLatBearing(centerLon, centerLat, 0, radius, null);
|
||||
double[] r = GeoProjectionUtils.pointFromLonLatBearing(centerLon, centerLat, 90, radius, null);
|
||||
double[] b = GeoProjectionUtils.pointFromLonLatBearing(centerLon, centerLat, 180, radius, null);
|
||||
double[] l = GeoProjectionUtils.pointFromLonLatBearing(centerLon, centerLat, 270, radius, null);
|
||||
|
||||
return new GeoBoundingBox(GeoUtils.normalizeLon(centerLon - lonDistDeg), GeoUtils.normalizeLon(centerLon + lonDistDeg),
|
||||
GeoUtils.normalizeLat(centerLat - latDistDeg), GeoUtils.normalizeLat(centerLat + latDistDeg));
|
||||
return new GeoBoundingBox(GeoUtils.normalizeLon(l[0]), GeoUtils.normalizeLon(r[0]), GeoUtils.normalizeLat(b[1]),
|
||||
GeoUtils.normalizeLat(t[1]));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -65,34 +65,31 @@ final class GeoPointDistanceQueryImpl extends GeoPointInBBoxQueryImpl {
|
|||
return GeoUtils.rectWithinCircle(minLon, minLat, maxLon, maxLat, query.centerLon, query.centerLat, query.radius);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean cellIntersectsShape(final double minLon, final double minLat, final double maxLon, final double maxLat) {
|
||||
return (cellCrosses(minLon, minLat, maxLon, maxLat) || cellContains(minLon, minLat, maxLon, maxLat)
|
||||
|| cellWithin(minLon, minLat, maxLon, maxLat));
|
||||
}
|
||||
|
||||
/**
|
||||
* The two-phase query approach. The parent
|
||||
* {@link org.apache.lucene.search.GeoPointTermsEnum#accept} method is called to match
|
||||
* encoded terms that fall within the bounding box of the polygon. Those documents that pass the initial
|
||||
* bounding box filter are then compared to the provided polygon using the
|
||||
* {@link org.apache.lucene.util.GeoUtils#pointInPolygon} method.
|
||||
* bounding box filter are then compared to the provided distance using the
|
||||
* {@link org.apache.lucene.util.SloppyMath#haversin} method.
|
||||
*
|
||||
* @param term term for candidate document
|
||||
* @return match status
|
||||
*/
|
||||
@Override
|
||||
protected AcceptStatus accept(BytesRef term) {
|
||||
// first filter by bounding box
|
||||
AcceptStatus status = super.accept(term);
|
||||
assert status != AcceptStatus.YES_AND_SEEK;
|
||||
|
||||
if (status != AcceptStatus.YES) {
|
||||
return status;
|
||||
}
|
||||
|
||||
protected AcceptStatus postFilterBoundary(BytesRef term) {
|
||||
final long val = NumericUtils.prefixCodedToLong(term);
|
||||
final double lon = GeoUtils.mortonUnhashLon(val);
|
||||
final double lat = GeoUtils.mortonUnhashLat(val);
|
||||
// post-filter by distance
|
||||
if (!(SloppyMath.haversin(query.centerLat, query.centerLon, lat, lon)*1000.0 <= query.radius)) {
|
||||
if (SloppyMath.haversin(query.centerLat, query.centerLon, lat, lon) * 1000.0 > query.radius) {
|
||||
return AcceptStatus.NO;
|
||||
}
|
||||
|
||||
return AcceptStatus.YES;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -169,7 +169,7 @@ public final class GeoPointInPolygonQuery extends GeoPointInBBoxQueryImpl {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected boolean cellIntersects(final double minLon, final double minLat, final double maxLon, final double maxLat) {
|
||||
protected boolean cellIntersectsMBR(final double minLon, final double minLat, final double maxLon, final double maxLat) {
|
||||
return GeoUtils.rectIntersects(minLon, minLat, maxLon, maxLat, GeoPointInPolygonQuery.this.minLon,
|
||||
GeoPointInPolygonQuery.this.minLat, GeoPointInPolygonQuery.this.maxLon, GeoPointInPolygonQuery.this.maxLat);
|
||||
}
|
||||
|
@ -185,15 +185,7 @@ public final class GeoPointInPolygonQuery extends GeoPointInBBoxQueryImpl {
|
|||
* @return match status
|
||||
*/
|
||||
@Override
|
||||
protected AcceptStatus accept(BytesRef term) {
|
||||
// first filter by bounding box
|
||||
AcceptStatus status = super.accept(term);
|
||||
assert status != AcceptStatus.YES_AND_SEEK;
|
||||
|
||||
if (status != AcceptStatus.YES) {
|
||||
return status;
|
||||
}
|
||||
|
||||
protected AcceptStatus postFilterBoundary(BytesRef term) {
|
||||
final long val = NumericUtils.prefixCodedToLong(term);
|
||||
final double lon = GeoUtils.mortonUnhashLon(val);
|
||||
final double lat = GeoUtils.mortonUnhashLat(val);
|
||||
|
|
|
@ -20,10 +20,12 @@ package org.apache.lucene.search;
|
|||
import org.apache.lucene.util.GeoUtils;
|
||||
|
||||
/**
|
||||
* Abstract base class used by {@code GeoPointInBBoxQueryImpl}
|
||||
* TermQuery for GeoPointField for overriding {@link org.apache.lucene.search.MultiTermQuery} methods specific to
|
||||
* Geospatial operations
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
// TODO: remove this? Just absorb into its base class
|
||||
abstract class GeoPointTermQuery extends MultiTermQuery {
|
||||
// simple bounding box optimization - no objects used to avoid dependencies
|
||||
|
|
|
@ -32,6 +32,8 @@ import org.apache.lucene.util.NumericUtils;
|
|||
/**
|
||||
* computes all ranges along a space-filling curve that represents
|
||||
* the given bounding box and enumerates all terms contained within those ranges
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
class GeoPointTermsEnum extends FilteredTermsEnum {
|
||||
protected final double minLon;
|
||||
|
@ -39,7 +41,7 @@ class GeoPointTermsEnum extends FilteredTermsEnum {
|
|||
protected final double maxLon;
|
||||
protected final double maxLat;
|
||||
|
||||
private Range currentRange;
|
||||
protected Range currentRange;
|
||||
private BytesRef currentLowerBound, currentUpperBound;
|
||||
|
||||
private final List<Range> rangeBounds = new LinkedList<>();
|
||||
|
@ -89,23 +91,46 @@ class GeoPointTermsEnum extends FilteredTermsEnum {
|
|||
final short level = (short)(62-res>>>1);
|
||||
|
||||
// if cell is within and a factor of the precision step, or it crosses the edge of the shape add the range
|
||||
final boolean within = res% GeoPointField.PRECISION_STEP == 0 && cellWithin(minLon, minLat, maxLon, maxLat);
|
||||
if (within || (level == DETAIL_LEVEL && cellCrosses(minLon, minLat, maxLon, maxLat))) {
|
||||
final boolean within = res % GeoPointField.PRECISION_STEP == 0 && cellWithin(minLon, minLat, maxLon, maxLat);
|
||||
if (within || (level == DETAIL_LEVEL && cellIntersectsShape(minLon, minLat, maxLon, maxLat))) {
|
||||
rangeBounds.add(new Range(start, end, res, level, !within));
|
||||
} else if (level <= DETAIL_LEVEL && cellIntersects(minLon, minLat, maxLon, maxLat)) {
|
||||
computeRange(start, (short)(res - 1));
|
||||
} else if (level < DETAIL_LEVEL && cellIntersectsMBR(minLon, minLat, maxLon, maxLat)) {
|
||||
computeRange(start, (short) (res - 1));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine whether the quad-cell crosses the shape
|
||||
*/
|
||||
protected boolean cellCrosses(final double minLon, final double minLat, final double maxLon, final double maxLat) {
|
||||
return GeoUtils.rectCrosses(minLon, minLat, maxLon, maxLat, this.minLon, this.minLat, this.maxLon, this.maxLat);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine whether quad-cell is within the shape
|
||||
*/
|
||||
protected boolean cellWithin(final double minLon, final double minLat, final double maxLon, final double maxLat) {
|
||||
return GeoUtils.rectWithin(minLon, minLat, maxLon, maxLat, this.minLon, this.minLat, this.maxLon, this.maxLat);
|
||||
}
|
||||
|
||||
protected boolean cellIntersects(final double minLon, final double minLat, final double maxLon, final double maxLat) {
|
||||
/**
|
||||
* Return whether quad-cell contains the bounding box of this shape
|
||||
*/
|
||||
protected boolean cellContains(final double minLon, final double minLat, final double maxLon, final double maxLat) {
|
||||
return GeoUtils.rectWithin(this.minLon, this.minLat, this.maxLon, this.maxLat, minLon, minLat, maxLon, maxLat);
|
||||
}
|
||||
|
||||
/**
|
||||
* Default shape is a rectangle, so this returns the same as {@code cellIntersectsMBR}
|
||||
*/
|
||||
protected boolean cellIntersectsShape(final double minLon, final double minLat, final double maxLon, final double maxLat) {
|
||||
return cellIntersectsMBR(minLon, minLat, maxLon, maxLat);
|
||||
}
|
||||
|
||||
/**
|
||||
* Primary driver for cells intersecting shape boundaries
|
||||
*/
|
||||
protected boolean cellIntersectsMBR(final double minLon, final double minLat, final double maxLon, final double maxLat) {
|
||||
return GeoUtils.rectIntersects(minLon, minLat, maxLon, maxLat, this.minLon, this.minLat, this.maxLon, this.maxLat);
|
||||
}
|
||||
|
||||
|
@ -166,14 +191,19 @@ class GeoPointTermsEnum extends FilteredTermsEnum {
|
|||
nextRange();
|
||||
}
|
||||
|
||||
// final-filter boundary ranges by bounding box
|
||||
if (currentRange.boundary) {
|
||||
final long val = NumericUtils.prefixCodedToLong(term);
|
||||
final double lon = GeoUtils.mortonUnhashLon(val);
|
||||
final double lat = GeoUtils.mortonUnhashLat(val);
|
||||
if (!GeoUtils.bboxContains(lon, lat, minLon, minLat, maxLon, maxLat)) {
|
||||
return AcceptStatus.NO;
|
||||
}
|
||||
return postFilterBoundary(term);
|
||||
}
|
||||
|
||||
return AcceptStatus.YES;
|
||||
}
|
||||
|
||||
protected AcceptStatus postFilterBoundary(BytesRef term) {
|
||||
final long val = NumericUtils.prefixCodedToLong(term);
|
||||
final double lon = GeoUtils.mortonUnhashLon(val);
|
||||
final double lat = GeoUtils.mortonUnhashLat(val);
|
||||
if (!GeoUtils.bboxContains(lon, lat, minLon, minLat, maxLon, maxLat)) {
|
||||
return AcceptStatus.NO;
|
||||
}
|
||||
return AcceptStatus.YES;
|
||||
}
|
||||
|
|
|
@ -290,17 +290,25 @@ public final class GeoUtils {
|
|||
!pointInPolygon(shapeX, shapeY, rMaxY, rMaxX) || !pointInPolygon(shapeX, shapeY, rMaxY, rMinX));
|
||||
}
|
||||
|
||||
private static boolean rectAnyCornersInCirlce( final double rMinX, final double rMinY, final double rMaxX, final double rMaxY,
|
||||
final double centerLon, final double centerLat, final double radius) {
|
||||
private static boolean rectAnyCornersOutsideCircle(final double rMinX, final double rMinY, final double rMaxX, final double rMaxY,
|
||||
final double centerLon, final double centerLat, final double radius) {
|
||||
return (SloppyMath.haversin(centerLat, centerLon, rMinY, rMinX)*1000.0 > radius
|
||||
|| SloppyMath.haversin(centerLat, centerLon, rMaxY, rMinX)*1000.0 > radius
|
||||
|| SloppyMath.haversin(centerLat, centerLon, rMaxY, rMaxX)*1000.0 > radius
|
||||
|| SloppyMath.haversin(centerLat, centerLon, rMinY, rMaxX)*1000.0 > radius);
|
||||
}
|
||||
|
||||
private static boolean rectAnyCornersInCircle(final double rMinX, final double rMinY, final double rMaxX, final double rMaxY,
|
||||
final double centerLon, final double centerLat, final double radius) {
|
||||
return (SloppyMath.haversin(centerLat, centerLon, rMinY, rMinX)*1000.0 <= radius
|
||||
|| SloppyMath.haversin(centerLat, centerLon, rMaxY, rMinX)*1000.0 <= radius
|
||||
|| SloppyMath.haversin(centerLat, centerLon, rMaxY, rMaxX)*1000.0 <= radius
|
||||
|| SloppyMath.haversin(centerLat, centerLon, rMinY, rMaxX)*1000.0 <= radius);
|
||||
}
|
||||
|
||||
public static boolean rectWithinCircle(final double rMinX, final double rMinY, final double rMaxX, final double rMaxY,
|
||||
final double centerLon, final double centerLat, final double radius) {
|
||||
return !(rectAnyCornersInCirlce(rMinX, rMinY, rMaxX, rMaxY, centerLon, centerLat, radius));
|
||||
return !(rectAnyCornersOutsideCircle(rMinX, rMinY, rMaxX, rMaxY, centerLon, centerLat, radius));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -308,14 +316,22 @@ public final class GeoUtils {
|
|||
*/
|
||||
public static boolean rectCrossesCircle(final double rMinX, final double rMinY, final double rMaxX, final double rMaxY,
|
||||
final double centerLon, final double centerLat, final double radius) {
|
||||
|
||||
return rectAnyCornersInCirlce(rMinX, rMinY, rMaxX, rMaxY, centerLon, centerLat, radius)
|
||||
return rectAnyCornersInCircle(rMinX, rMinY, rMaxX, rMaxY, centerLon, centerLat, radius)
|
||||
|| lineCrossesSphere(rMinX, rMinY, 0, rMaxX, rMinY, 0, centerLon, centerLat, 0, radius)
|
||||
|| lineCrossesSphere(rMaxX, rMinY, 0, rMaxX, rMaxY, 0, centerLon, centerLat, 0, radius)
|
||||
|| lineCrossesSphere(rMaxX, rMaxY, 0, rMinX, rMaxY, 0, centerLon, centerLat, 0, radius)
|
||||
|| lineCrossesSphere(rMinX, rMaxY, 0, rMinX, rMinY, 0, centerLon, centerLat, 0, radius);
|
||||
}
|
||||
|
||||
public static boolean circleWithinRect(double rMinX, final double rMinY, final double rMaxX, final double rMaxY,
|
||||
final double centerLon, final double centerLat, final double radius) {
|
||||
return !(centerLon < rMinX || centerLon > rMaxX || centerLat > rMaxY || centerLat < rMinY
|
||||
|| SloppyMath.haversin(rMinY, centerLon, centerLat, centerLon) < radius
|
||||
|| SloppyMath.haversin(rMaxY, centerLon, centerLat, centerLon) < radius
|
||||
|| SloppyMath.haversin(centerLat, rMinX, centerLat, centerLon) < radius
|
||||
|| SloppyMath.haversin(centerLat, rMaxX, centerLat, centerLon) < radius);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes whether or a 3dimensional line segment intersects or crosses a sphere
|
||||
*
|
||||
|
|
|
@ -41,6 +41,7 @@ import org.apache.lucene.index.RandomIndexWriter;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.GeoDistanceUtils;
|
||||
import org.apache.lucene.util.GeoUtils;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -63,7 +64,9 @@ public class TestGeoPointQuery extends LuceneTestCase {
|
|||
private static final String FIELD_NAME = "geoField";
|
||||
|
||||
// error threshold for point-distance queries (in meters)
|
||||
private static final int DISTANCE_ERR = 700;
|
||||
// @todo haversine is sloppy, would be good to have a better heuristic for
|
||||
// determining the possible haversine error
|
||||
private static final int DISTANCE_ERR = 1000;
|
||||
|
||||
// Global bounding box we will "cover" in the random test; we have to make this "smallish" else the queries take very long:
|
||||
private static double originLat;
|
||||
|
@ -118,6 +121,7 @@ public class TestGeoPointQuery extends LuceneTestCase {
|
|||
new GeoPointField(FIELD_NAME, -14.796283808944777, -62.455081198245665, storedPoint),
|
||||
new GeoPointField(FIELD_NAME, -178.8538113027811, 32.94823588839368, storedPoint),
|
||||
new GeoPointField(FIELD_NAME, 178.8538113027811, 32.94823588839368, storedPoint),
|
||||
new GeoPointField(FIELD_NAME, -73.998776, 40.720611, storedPoint),
|
||||
new GeoPointField(FIELD_NAME, -179.5, -44.5, storedPoint)};
|
||||
|
||||
for (GeoPointField p : pts) {
|
||||
|
@ -202,7 +206,13 @@ public class TestGeoPointQuery extends LuceneTestCase {
|
|||
@Test
|
||||
public void testWholeMap() throws Exception {
|
||||
TopDocs td = bboxQuery(-179.9, -89.9, 179.9, 89.9, 20);
|
||||
assertEquals("testWholeMap failed", 14, td.totalHits);
|
||||
assertEquals("testWholeMap failed", 15, td.totalHits);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void smallTest() throws Exception {
|
||||
TopDocs td = geoDistanceQuery(-73.998776, 40.720611, 1, 20);
|
||||
assertEquals("smallTest failed", 1, td.totalHits);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -221,6 +231,15 @@ public class TestGeoPointQuery extends LuceneTestCase {
|
|||
assertEquals("GeoDistanceQuery failed", 1, td.totalHits);
|
||||
}
|
||||
|
||||
/**
|
||||
* LUCENE-6704
|
||||
*/
|
||||
@Nightly
|
||||
public void testGeoDistanceQueryHuge() throws Exception {
|
||||
TopDocs td = geoDistanceQuery(-96.4538113027811, 32.94823588839368, 1000000, 20);
|
||||
assertEquals("GeoDistanceQuery failed", 6, td.totalHits);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGeoDistanceQueryCrossDateline() throws Exception {
|
||||
TopDocs td = geoDistanceQuery(-179.9538113027811, 32.94823588839368, 120000, 20);
|
||||
|
@ -425,8 +444,8 @@ public class TestGeoPointQuery extends LuceneTestCase {
|
|||
double centerLon = bbox.minLon + ((bbox.maxLon - bbox.minLon)/2.0);
|
||||
|
||||
// radius (in meters) as a function of the random generated bbox
|
||||
// TODO: change 100 back to 1000
|
||||
final double radius = SloppyMath.haversin(centerLat, centerLon, bbox.minLat, centerLon)*100;
|
||||
final double radius = GeoDistanceUtils.vincentyDistance(centerLon, centerLat, centerLon, bbox.minLat);
|
||||
//final double radius = SloppyMath.haversin(centerLat, centerLon, bbox.minLat, centerLon)*1000;
|
||||
if (VERBOSE) {
|
||||
System.out.println("\t radius = " + radius);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue