LUCENE-6704: GeoPointDistanceQuery was visiting too many term ranges

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1693690 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2015-08-01 08:58:46 +00:00
parent 54915fac9a
commit 00bedc01c2
8 changed files with 112 additions and 51 deletions

View File

@ -296,6 +296,9 @@ Bug fixes
more than one value in a multi-valued field. (Chris Earle, Nik Everett
via Mike McCandless)
* LUCENE-6704: GeoPointDistanceQuery was visiting too many term ranges,
consuming too much heap for a large radius (Nick Knize via Mike McCandless)
Changes in Runtime Behavior
* LUCENE-6501: The subreader structure in ParallelCompositeReader

View File

@ -18,7 +18,7 @@ package org.apache.lucene.search;
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.GeoDistanceUtils;
import org.apache.lucene.util.GeoProjectionUtils;
import org.apache.lucene.util.GeoUtils;
import org.apache.lucene.util.ToStringUtils;
@ -88,11 +88,13 @@ public final class GeoPointDistanceQuery extends GeoPointInBBoxQuery {
}
static GeoBoundingBox computeBBox(final double centerLon, final double centerLat, final double radius) {
final double lonDistDeg = GeoDistanceUtils.distanceToDegreesLon(centerLat, radius);
final double latDistDeg = GeoDistanceUtils.distanceToDegreesLat(centerLat, radius);
double[] t = GeoProjectionUtils.pointFromLonLatBearing(centerLon, centerLat, 0, radius, null);
double[] r = GeoProjectionUtils.pointFromLonLatBearing(centerLon, centerLat, 90, radius, null);
double[] b = GeoProjectionUtils.pointFromLonLatBearing(centerLon, centerLat, 180, radius, null);
double[] l = GeoProjectionUtils.pointFromLonLatBearing(centerLon, centerLat, 270, radius, null);
return new GeoBoundingBox(GeoUtils.normalizeLon(centerLon - lonDistDeg), GeoUtils.normalizeLon(centerLon + lonDistDeg),
GeoUtils.normalizeLat(centerLat - latDistDeg), GeoUtils.normalizeLat(centerLat + latDistDeg));
return new GeoBoundingBox(GeoUtils.normalizeLon(l[0]), GeoUtils.normalizeLon(r[0]), GeoUtils.normalizeLat(b[1]),
GeoUtils.normalizeLat(t[1]));
}
@Override

View File

@ -65,34 +65,31 @@ final class GeoPointDistanceQueryImpl extends GeoPointInBBoxQueryImpl {
return GeoUtils.rectWithinCircle(minLon, minLat, maxLon, maxLat, query.centerLon, query.centerLat, query.radius);
}
@Override
protected boolean cellIntersectsShape(final double minLon, final double minLat, final double maxLon, final double maxLat) {
return (cellCrosses(minLon, minLat, maxLon, maxLat) || cellContains(minLon, minLat, maxLon, maxLat)
|| cellWithin(minLon, minLat, maxLon, maxLat));
}
/**
* The two-phase query approach. The parent
* {@link org.apache.lucene.search.GeoPointTermsEnum#accept} method is called to match
* encoded terms that fall within the bounding box of the polygon. Those documents that pass the initial
* bounding box filter are then compared to the provided polygon using the
* {@link org.apache.lucene.util.GeoUtils#pointInPolygon} method.
* bounding box filter are then compared to the provided distance using the
* {@link org.apache.lucene.util.SloppyMath#haversin} method.
*
* @param term term for candidate document
* @return match status
*/
@Override
protected AcceptStatus accept(BytesRef term) {
// first filter by bounding box
AcceptStatus status = super.accept(term);
assert status != AcceptStatus.YES_AND_SEEK;
if (status != AcceptStatus.YES) {
return status;
}
protected AcceptStatus postFilterBoundary(BytesRef term) {
final long val = NumericUtils.prefixCodedToLong(term);
final double lon = GeoUtils.mortonUnhashLon(val);
final double lat = GeoUtils.mortonUnhashLat(val);
// post-filter by distance
if (!(SloppyMath.haversin(query.centerLat, query.centerLon, lat, lon)*1000.0 <= query.radius)) {
if (SloppyMath.haversin(query.centerLat, query.centerLon, lat, lon) * 1000.0 > query.radius) {
return AcceptStatus.NO;
}
return AcceptStatus.YES;
}
}

View File

@ -169,7 +169,7 @@ public final class GeoPointInPolygonQuery extends GeoPointInBBoxQueryImpl {
}
@Override
protected boolean cellIntersects(final double minLon, final double minLat, final double maxLon, final double maxLat) {
protected boolean cellIntersectsMBR(final double minLon, final double minLat, final double maxLon, final double maxLat) {
return GeoUtils.rectIntersects(minLon, minLat, maxLon, maxLat, GeoPointInPolygonQuery.this.minLon,
GeoPointInPolygonQuery.this.minLat, GeoPointInPolygonQuery.this.maxLon, GeoPointInPolygonQuery.this.maxLat);
}
@ -185,15 +185,7 @@ public final class GeoPointInPolygonQuery extends GeoPointInBBoxQueryImpl {
* @return match status
*/
@Override
protected AcceptStatus accept(BytesRef term) {
// first filter by bounding box
AcceptStatus status = super.accept(term);
assert status != AcceptStatus.YES_AND_SEEK;
if (status != AcceptStatus.YES) {
return status;
}
protected AcceptStatus postFilterBoundary(BytesRef term) {
final long val = NumericUtils.prefixCodedToLong(term);
final double lon = GeoUtils.mortonUnhashLon(val);
final double lat = GeoUtils.mortonUnhashLat(val);

View File

@ -20,10 +20,12 @@ package org.apache.lucene.search;
import org.apache.lucene.util.GeoUtils;
/**
* Abstract base class used by {@code GeoPointInBBoxQueryImpl}
* TermQuery for GeoPointField for overriding {@link org.apache.lucene.search.MultiTermQuery} methods specific to
* Geospatial operations
*
* @lucene.experimental
*/
// TODO: remove this? Just absorb into its base class
abstract class GeoPointTermQuery extends MultiTermQuery {
// simple bounding box optimization - no objects used to avoid dependencies

View File

@ -32,6 +32,8 @@ import org.apache.lucene.util.NumericUtils;
/**
* computes all ranges along a space-filling curve that represents
* the given bounding box and enumerates all terms contained within those ranges
*
* @lucene.experimental
*/
class GeoPointTermsEnum extends FilteredTermsEnum {
protected final double minLon;
@ -39,7 +41,7 @@ class GeoPointTermsEnum extends FilteredTermsEnum {
protected final double maxLon;
protected final double maxLat;
private Range currentRange;
protected Range currentRange;
private BytesRef currentLowerBound, currentUpperBound;
private final List<Range> rangeBounds = new LinkedList<>();
@ -89,23 +91,46 @@ class GeoPointTermsEnum extends FilteredTermsEnum {
final short level = (short)(62-res>>>1);
// if cell is within and a factor of the precision step, or it crosses the edge of the shape add the range
final boolean within = res% GeoPointField.PRECISION_STEP == 0 && cellWithin(minLon, minLat, maxLon, maxLat);
if (within || (level == DETAIL_LEVEL && cellCrosses(minLon, minLat, maxLon, maxLat))) {
final boolean within = res % GeoPointField.PRECISION_STEP == 0 && cellWithin(minLon, minLat, maxLon, maxLat);
if (within || (level == DETAIL_LEVEL && cellIntersectsShape(minLon, minLat, maxLon, maxLat))) {
rangeBounds.add(new Range(start, end, res, level, !within));
} else if (level <= DETAIL_LEVEL && cellIntersects(minLon, minLat, maxLon, maxLat)) {
computeRange(start, (short)(res - 1));
} else if (level < DETAIL_LEVEL && cellIntersectsMBR(minLon, minLat, maxLon, maxLat)) {
computeRange(start, (short) (res - 1));
}
}
/**
* Determine whether the quad-cell crosses the shape
*/
protected boolean cellCrosses(final double minLon, final double minLat, final double maxLon, final double maxLat) {
return GeoUtils.rectCrosses(minLon, minLat, maxLon, maxLat, this.minLon, this.minLat, this.maxLon, this.maxLat);
}
/**
* Determine whether quad-cell is within the shape
*/
protected boolean cellWithin(final double minLon, final double minLat, final double maxLon, final double maxLat) {
return GeoUtils.rectWithin(minLon, minLat, maxLon, maxLat, this.minLon, this.minLat, this.maxLon, this.maxLat);
}
protected boolean cellIntersects(final double minLon, final double minLat, final double maxLon, final double maxLat) {
/**
* Return whether quad-cell contains the bounding box of this shape
*/
protected boolean cellContains(final double minLon, final double minLat, final double maxLon, final double maxLat) {
return GeoUtils.rectWithin(this.minLon, this.minLat, this.maxLon, this.maxLat, minLon, minLat, maxLon, maxLat);
}
/**
* Default shape is a rectangle, so this returns the same as {@code cellIntersectsMBR}
*/
protected boolean cellIntersectsShape(final double minLon, final double minLat, final double maxLon, final double maxLat) {
return cellIntersectsMBR(minLon, minLat, maxLon, maxLat);
}
/**
* Primary driver for cells intersecting shape boundaries
*/
protected boolean cellIntersectsMBR(final double minLon, final double minLat, final double maxLon, final double maxLat) {
return GeoUtils.rectIntersects(minLon, minLat, maxLon, maxLat, this.minLon, this.minLat, this.maxLon, this.maxLat);
}
@ -166,14 +191,19 @@ class GeoPointTermsEnum extends FilteredTermsEnum {
nextRange();
}
// final-filter boundary ranges by bounding box
if (currentRange.boundary) {
final long val = NumericUtils.prefixCodedToLong(term);
final double lon = GeoUtils.mortonUnhashLon(val);
final double lat = GeoUtils.mortonUnhashLat(val);
if (!GeoUtils.bboxContains(lon, lat, minLon, minLat, maxLon, maxLat)) {
return AcceptStatus.NO;
}
return postFilterBoundary(term);
}
return AcceptStatus.YES;
}
protected AcceptStatus postFilterBoundary(BytesRef term) {
final long val = NumericUtils.prefixCodedToLong(term);
final double lon = GeoUtils.mortonUnhashLon(val);
final double lat = GeoUtils.mortonUnhashLat(val);
if (!GeoUtils.bboxContains(lon, lat, minLon, minLat, maxLon, maxLat)) {
return AcceptStatus.NO;
}
return AcceptStatus.YES;
}

View File

@ -290,17 +290,25 @@ public final class GeoUtils {
!pointInPolygon(shapeX, shapeY, rMaxY, rMaxX) || !pointInPolygon(shapeX, shapeY, rMaxY, rMinX));
}
private static boolean rectAnyCornersInCirlce( final double rMinX, final double rMinY, final double rMaxX, final double rMaxY,
final double centerLon, final double centerLat, final double radius) {
private static boolean rectAnyCornersOutsideCircle(final double rMinX, final double rMinY, final double rMaxX, final double rMaxY,
final double centerLon, final double centerLat, final double radius) {
return (SloppyMath.haversin(centerLat, centerLon, rMinY, rMinX)*1000.0 > radius
|| SloppyMath.haversin(centerLat, centerLon, rMaxY, rMinX)*1000.0 > radius
|| SloppyMath.haversin(centerLat, centerLon, rMaxY, rMaxX)*1000.0 > radius
|| SloppyMath.haversin(centerLat, centerLon, rMinY, rMaxX)*1000.0 > radius);
}
private static boolean rectAnyCornersInCircle(final double rMinX, final double rMinY, final double rMaxX, final double rMaxY,
final double centerLon, final double centerLat, final double radius) {
return (SloppyMath.haversin(centerLat, centerLon, rMinY, rMinX)*1000.0 <= radius
|| SloppyMath.haversin(centerLat, centerLon, rMaxY, rMinX)*1000.0 <= radius
|| SloppyMath.haversin(centerLat, centerLon, rMaxY, rMaxX)*1000.0 <= radius
|| SloppyMath.haversin(centerLat, centerLon, rMinY, rMaxX)*1000.0 <= radius);
}
public static boolean rectWithinCircle(final double rMinX, final double rMinY, final double rMaxX, final double rMaxY,
final double centerLon, final double centerLat, final double radius) {
return !(rectAnyCornersInCirlce(rMinX, rMinY, rMaxX, rMaxY, centerLon, centerLat, radius));
return !(rectAnyCornersOutsideCircle(rMinX, rMinY, rMaxX, rMaxY, centerLon, centerLat, radius));
}
/**
@ -308,14 +316,22 @@ public final class GeoUtils {
*/
public static boolean rectCrossesCircle(final double rMinX, final double rMinY, final double rMaxX, final double rMaxY,
final double centerLon, final double centerLat, final double radius) {
return rectAnyCornersInCirlce(rMinX, rMinY, rMaxX, rMaxY, centerLon, centerLat, radius)
return rectAnyCornersInCircle(rMinX, rMinY, rMaxX, rMaxY, centerLon, centerLat, radius)
|| lineCrossesSphere(rMinX, rMinY, 0, rMaxX, rMinY, 0, centerLon, centerLat, 0, radius)
|| lineCrossesSphere(rMaxX, rMinY, 0, rMaxX, rMaxY, 0, centerLon, centerLat, 0, radius)
|| lineCrossesSphere(rMaxX, rMaxY, 0, rMinX, rMaxY, 0, centerLon, centerLat, 0, radius)
|| lineCrossesSphere(rMinX, rMaxY, 0, rMinX, rMinY, 0, centerLon, centerLat, 0, radius);
}
public static boolean circleWithinRect(double rMinX, final double rMinY, final double rMaxX, final double rMaxY,
final double centerLon, final double centerLat, final double radius) {
return !(centerLon < rMinX || centerLon > rMaxX || centerLat > rMaxY || centerLat < rMinY
|| SloppyMath.haversin(rMinY, centerLon, centerLat, centerLon) < radius
|| SloppyMath.haversin(rMaxY, centerLon, centerLat, centerLon) < radius
|| SloppyMath.haversin(centerLat, rMinX, centerLat, centerLon) < radius
|| SloppyMath.haversin(centerLat, rMaxX, centerLat, centerLon) < radius);
}
/**
* Computes whether or a 3dimensional line segment intersects or crosses a sphere
*

View File

@ -41,6 +41,7 @@ import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.GeoDistanceUtils;
import org.apache.lucene.util.GeoUtils;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
@ -63,7 +64,9 @@ public class TestGeoPointQuery extends LuceneTestCase {
private static final String FIELD_NAME = "geoField";
// error threshold for point-distance queries (in meters)
private static final int DISTANCE_ERR = 700;
// @todo haversine is sloppy, would be good to have a better heuristic for
// determining the possible haversine error
private static final int DISTANCE_ERR = 1000;
// Global bounding box we will "cover" in the random test; we have to make this "smallish" else the queries take very long:
private static double originLat;
@ -118,6 +121,7 @@ public class TestGeoPointQuery extends LuceneTestCase {
new GeoPointField(FIELD_NAME, -14.796283808944777, -62.455081198245665, storedPoint),
new GeoPointField(FIELD_NAME, -178.8538113027811, 32.94823588839368, storedPoint),
new GeoPointField(FIELD_NAME, 178.8538113027811, 32.94823588839368, storedPoint),
new GeoPointField(FIELD_NAME, -73.998776, 40.720611, storedPoint),
new GeoPointField(FIELD_NAME, -179.5, -44.5, storedPoint)};
for (GeoPointField p : pts) {
@ -202,7 +206,13 @@ public class TestGeoPointQuery extends LuceneTestCase {
@Test
public void testWholeMap() throws Exception {
TopDocs td = bboxQuery(-179.9, -89.9, 179.9, 89.9, 20);
assertEquals("testWholeMap failed", 14, td.totalHits);
assertEquals("testWholeMap failed", 15, td.totalHits);
}
@Test
public void smallTest() throws Exception {
TopDocs td = geoDistanceQuery(-73.998776, 40.720611, 1, 20);
assertEquals("smallTest failed", 1, td.totalHits);
}
@Test
@ -221,6 +231,15 @@ public class TestGeoPointQuery extends LuceneTestCase {
assertEquals("GeoDistanceQuery failed", 1, td.totalHits);
}
/**
* LUCENE-6704
*/
@Nightly
public void testGeoDistanceQueryHuge() throws Exception {
TopDocs td = geoDistanceQuery(-96.4538113027811, 32.94823588839368, 1000000, 20);
assertEquals("GeoDistanceQuery failed", 6, td.totalHits);
}
@Test
public void testGeoDistanceQueryCrossDateline() throws Exception {
TopDocs td = geoDistanceQuery(-179.9538113027811, 32.94823588839368, 120000, 20);
@ -425,8 +444,8 @@ public class TestGeoPointQuery extends LuceneTestCase {
double centerLon = bbox.minLon + ((bbox.maxLon - bbox.minLon)/2.0);
// radius (in meters) as a function of the random generated bbox
// TODO: change 100 back to 1000
final double radius = SloppyMath.haversin(centerLat, centerLon, bbox.minLat, centerLon)*100;
final double radius = GeoDistanceUtils.vincentyDistance(centerLon, centerLat, centerLon, bbox.minLat);
//final double radius = SloppyMath.haversin(centerLat, centerLon, bbox.minLat, centerLon)*1000;
if (VERBOSE) {
System.out.println("\t radius = " + radius);
}