LUCENE-8555: Add dateline crossing support to LatLonShapeBoundingBoxQuery

This commit is contained in:
Nicholas Knize 2018-11-02 12:15:22 -05:00
parent 0cbefe8b25
commit 31d7dfe6b1
8 changed files with 159 additions and 38 deletions

View File

@ -244,6 +244,8 @@ New Features
* LUCENE-8554: Add new LatLonShapeLineQuery that queries indexed LatLonShape fields
by arbitrary lines. (Nick Knize)
* LUCENE-8555: Add dateline crossing support to LatLonShapeBoundingBoxQuery. (Nick Knize)
Improvements:
* LUCENE-8521: Change LatLonShape encoding to 7 dimensions instead of 6; where the

View File

@ -43,6 +43,10 @@ public final class GeoEncodingUtils {
private static final double LON_SCALE = (0x1L<<BITS)/360.0D;
private static final double LON_DECODE = 1/LON_SCALE;
public static final int MIN_LON_ENCODED = encodeLongitude(MIN_LON_INCL);
public static final int MAX_LON_ENCODED = encodeLongitude(MAX_LON_INCL);
// No instance:
private GeoEncodingUtils() {
}

View File

@ -117,8 +117,6 @@ public class LatLonShape {
}
/** create a query to find all polygons that intersect a defined bounding box
* note: does not currently support dateline crossing boxes
* todo split dateline crossing boxes into two queries like {@link LatLonPoint#newBoxQuery}
**/
public static Query newBoxQuery(String field, QueryRelation queryRelation, double minLatitude, double maxLatitude, double minLongitude, double maxLongitude) {
return new LatLonShapeBoundingBoxQuery(field, queryRelation, minLatitude, maxLatitude, minLongitude, maxLongitude);

View File

@ -25,6 +25,8 @@ import org.apache.lucene.util.FutureArrays;
import org.apache.lucene.util.NumericUtils;
import static org.apache.lucene.document.LatLonShape.BYTES;
import static org.apache.lucene.geo.GeoEncodingUtils.MAX_LON_ENCODED;
import static org.apache.lucene.geo.GeoEncodingUtils.MIN_LON_ENCODED;
import static org.apache.lucene.geo.GeoEncodingUtils.decodeLatitude;
import static org.apache.lucene.geo.GeoEncodingUtils.decodeLongitude;
import static org.apache.lucene.geo.GeoEncodingUtils.encodeLatitude;
@ -43,6 +45,7 @@ import static org.apache.lucene.geo.GeoUtils.orient;
**/
final class LatLonShapeBoundingBoxQuery extends LatLonShapeQuery {
final byte[] bbox;
final byte[] west;
final int minX;
final int maxX;
final int minY;
@ -50,23 +53,59 @@ final class LatLonShapeBoundingBoxQuery extends LatLonShapeQuery {
public LatLonShapeBoundingBoxQuery(String field, LatLonShape.QueryRelation queryRelation, double minLat, double maxLat, double minLon, double maxLon) {
super(field, queryRelation);
if (minLon > maxLon) {
throw new IllegalArgumentException("dateline crossing bounding box queries are not supported for [" + field + "]");
}
this.bbox = new byte[4 * LatLonShape.BYTES];
this.minX = encodeLongitudeCeil(minLon);
this.maxX = encodeLongitude(maxLon);
int minXenc = encodeLongitudeCeil(minLon);
int maxXenc = encodeLongitude(maxLon);
this.minY = encodeLatitudeCeil(minLat);
this.maxY = encodeLatitude(maxLat);
LatLonShape.encodeTriangleBoxVal(this.minY, bbox, 0);
LatLonShape.encodeTriangleBoxVal(this.minX, bbox, BYTES);
LatLonShape.encodeTriangleBoxVal(this.maxY, bbox, 2 * BYTES);
LatLonShape.encodeTriangleBoxVal(this.maxX, bbox, 3 * BYTES);
if (minLon > maxLon == true) {
// crossing dateline is split into east/west boxes
this.west = new byte[4 * LatLonShape.BYTES];
this.minX = minXenc;
this.maxX = maxXenc;
encode(MIN_LON_ENCODED, this.maxX, this.minY, this.maxY, this.west);
encode(this.minX, MAX_LON_ENCODED, this.minY, this.maxY, this.bbox);
} else {
// encodeLongitudeCeil may cause minX to be > maxX iff
// the delta between the longtude < the encoding resolution
if (minXenc > maxXenc) {
minXenc = maxXenc;
}
this.west = null;
this.minX = minXenc;
this.maxX = maxXenc;
encode(this.minX, this.maxX, this.minY, this.maxY, bbox);
}
}
/** encodes a bounding box into the provided byte array */
private static void encode(final int minX, final int maxX, final int minY, final int maxY, byte[] b) {
if (b == null) {
b = new byte[4 * LatLonShape.BYTES];
}
LatLonShape.encodeTriangleBoxVal(minY, b, 0);
LatLonShape.encodeTriangleBoxVal(minX, b, BYTES);
LatLonShape.encodeTriangleBoxVal(maxY, b, 2 * BYTES);
LatLonShape.encodeTriangleBoxVal(maxX, b, 3 * BYTES);
}
@Override
protected Relation relateRangeBBoxToQuery(int minXOffset, int minYOffset, byte[] minTriangle,
int maxXOffset, int maxYOffset, byte[] maxTriangle) {
Relation eastRelation = compareBBoxToRangeBBox(this.bbox, minXOffset, minYOffset, minTriangle, maxXOffset, maxYOffset, maxTriangle);
if (this.crossesDateline() && eastRelation == Relation.CELL_OUTSIDE_QUERY) {
return compareBBoxToRangeBBox(this.west, minXOffset, minYOffset, minTriangle, maxXOffset, maxYOffset, maxTriangle);
}
return eastRelation;
}
/** static utility method to compare a bbox with a range of triangles (just the bbox of the triangle collection) */
protected static Relation compareBBoxToRangeBBox(final byte[] bbox,
int minXOffset, int minYOffset, byte[] minTriangle,
int maxXOffset, int maxYOffset, byte[] maxTriangle) {
// check bounding box (DISJOINT)
if (FutureArrays.compareUnsigned(minTriangle, minXOffset, minXOffset + BYTES, bbox, 3 * BYTES, 4 * BYTES) > 0 ||
FutureArrays.compareUnsigned(maxTriangle, maxXOffset, maxXOffset + BYTES, bbox, BYTES, 2 * BYTES) < 0 ||
@ -87,6 +126,7 @@ final class LatLonShapeBoundingBoxQuery extends LatLonShapeQuery {
/** returns true if the query matches the encoded triangle */
@Override
protected boolean queryMatches(byte[] t) {
// decode indexed triangle
long a = NumericUtils.sortableBytesToLong(t, 4 * LatLonShape.BYTES);
long b = NumericUtils.sortableBytesToLong(t, 5 * LatLonShape.BYTES);
long c = NumericUtils.sortableBytesToLong(t, 6 * LatLonShape.BYTES);
@ -99,11 +139,19 @@ final class LatLonShapeBoundingBoxQuery extends LatLonShapeQuery {
int cY = (int)(c & 0x00000000FFFFFFFFL);
if (queryRelation == LatLonShape.QueryRelation.WITHIN) {
return bboxContainsTriangle(aX, aY, bX, bY, cX, cY, minX, maxX, minY, maxY);
return queryContainsTriangle(aX, aY, bX, bY, cX, cY);
}
return queryMatches(aX, aY, bX, bY, cX, cY);
}
private boolean queryContainsTriangle(int ax, int ay, int bx, int by, int cx, int cy) {
if (this.crossesDateline() == true) {
return bboxContainsTriangle(ax, ay, bx, by, cx, cy, MIN_LON_ENCODED, this.maxX, this.minY, this.maxY)
|| bboxContainsTriangle(ax, ay, bx, by, cx, cy, this.minX, MAX_LON_ENCODED, this.minY, this.maxY);
}
return bboxContainsTriangle(ax, ay, bx, by, cx, cy, minX, maxX, minY, maxY);
}
/** static utility method to check if a bounding box contains a point */
private static boolean bboxContainsPoint(int x, int y, int minX, int maxX, int minY, int maxY) {
return (x < minX || x > maxX || y < minY || y > maxY) == false;
@ -119,6 +167,10 @@ final class LatLonShapeBoundingBoxQuery extends LatLonShapeQuery {
/** instance method to check if query box contains point */
private boolean queryContainsPoint(int x, int y) {
if (this.crossesDateline() == true) {
return bboxContainsPoint(x, y, MIN_LON_ENCODED, this.maxX, this.minY, this.maxY)
|| bboxContainsPoint(x, y, this.minX, MAX_LON_ENCODED, this.minY, this.maxY);
}
return bboxContainsPoint(x, y, this.minX, this.maxX, this.minY, this.maxY);
}
@ -135,7 +187,12 @@ final class LatLonShapeBoundingBoxQuery extends LatLonShapeQuery {
int tMaxY = StrictMath.max(StrictMath.max(aY, bY), cY);
// 2. check bounding boxes are disjoint
if (tMaxX < minX || tMinX > maxX || tMinY > maxY || tMaxY < minY) {
if (this.crossesDateline() == true) {
if (boxesAreDisjoint(tMinX, tMaxX, tMinY, tMaxY, MIN_LON_ENCODED, this.maxX, this.minY, this.maxY)
&& boxesAreDisjoint(tMinX, tMaxX, tMinY, tMaxY, this.minX, MAX_LON_ENCODED, this.minY, this.maxY)) {
return false;
}
} else if (tMaxX < minX || tMinX > maxX || tMinY > maxY || tMaxY < minY) {
return false;
}
@ -210,6 +267,10 @@ final class LatLonShapeBoundingBoxQuery extends LatLonShapeQuery {
/** returns true if the edge (defined by (ax, ay) (bx, by)) intersects the query */
private boolean edgeIntersectsQuery(int ax, int ay, int bx, int by) {
if (this.crossesDateline() == true) {
return edgeIntersectsBox(ax, ay, bx, by, MIN_LON_ENCODED, this.maxX, this.minY, this.maxY)
|| edgeIntersectsBox(ax, ay, bx, by, this.minX, MAX_LON_ENCODED, this.minY, this.maxY);
}
return edgeIntersectsBox(ax, ay, bx, by, this.minX, this.maxX, this.minY, this.maxY);
}
@ -230,6 +291,10 @@ final class LatLonShapeBoundingBoxQuery extends LatLonShapeQuery {
return (aMaxX < bMinX || aMinX > bMaxX || aMaxY < bMinY || aMinY > bMaxY);
}
public boolean crossesDateline() {
return minX > maxX;
}
@Override
public boolean equals(Object o) {
return sameClassAs(o) && equalsTo(getClass().cast(o));
@ -237,13 +302,16 @@ final class LatLonShapeBoundingBoxQuery extends LatLonShapeQuery {
@Override
protected boolean equalsTo(Object o) {
return super.equalsTo(o) && Arrays.equals(bbox, ((LatLonShapeBoundingBoxQuery)o).bbox);
return super.equalsTo(o)
&& Arrays.equals(bbox, ((LatLonShapeBoundingBoxQuery)o).bbox)
&& Arrays.equals(west, ((LatLonShapeBoundingBoxQuery)o).west);
}
@Override
public int hashCode() {
int hash = super.hashCode();
hash = 31 * hash + Arrays.hashCode(bbox);
hash = 31 * hash + Arrays.hashCode(west);
return hash;
}
@ -265,6 +333,9 @@ final class LatLonShapeBoundingBoxQuery extends LatLonShapeQuery {
sb.append(decodeLongitude(minX));
sb.append(" TO ");
sb.append(decodeLongitude(maxX));
if (maxX < minX) {
sb.append(" [crosses dateline!]");
}
sb.append(")");
return sb.toString();
}

View File

@ -284,16 +284,7 @@ public abstract class BaseLatLonShapeTestCase extends LuceneTestCase {
}
// BBox
Rectangle rect;
// quantizing the bbox may end up w/ bounding boxes crossing dateline...
// todo add support for bounding boxes crossing dateline
while (true) {
rect = GeoTestUtil.nextBoxNotCrossingDateline();
if (decodeLongitude(encodeLongitudeCeil(rect.minLon)) <= decodeLongitude(encodeLongitude(rect.maxLon)) &&
decodeLatitude(encodeLatitudeCeil(rect.minLat)) <= decodeLatitude(encodeLatitude(rect.maxLat))) {
break;
}
}
Rectangle rect = GeoTestUtil.nextBox();
QueryRelation queryRelation = RandomPicks.randomFrom(random(), QueryRelation.values());
Query query = newRectQuery(FIELD_NAME, queryRelation, rect.minLat, rect.maxLat, rect.minLon, rect.maxLon);
@ -328,6 +319,8 @@ public abstract class BaseLatLonShapeTestCase extends LuceneTestCase {
assertEquals(docID, docIDToID.nextDoc());
int id = (int) docIDToID.longValue();
boolean expected;
double qMinLon = quantizeLonCeil(rect.minLon);
double qMaxLon = quantizeLon(rect.maxLon);
if (liveDocs != null && liveDocs.get(docID) == false) {
// document is deleted
expected = false;
@ -335,8 +328,12 @@ public abstract class BaseLatLonShapeTestCase extends LuceneTestCase {
expected = false;
} else {
// check quantized poly against quantized query
expected = getValidator(queryRelation).testBBoxQuery(quantizeLatCeil(rect.minLat), quantizeLat(rect.maxLat),
quantizeLonCeil(rect.minLon), quantizeLon(rect.maxLon), shapes[id]);
if (qMinLon > qMaxLon && rect.crossesDateline() == false) {
// if the quantization creates a false dateline crossing (because of encodeCeil):
// then do not use encodeCeil
qMinLon = quantizeLon(rect.minLon);
}
expected = getValidator(queryRelation).testBBoxQuery(quantizeLatCeil(rect.minLat), quantizeLat(rect.maxLat), qMinLon, qMaxLon, shapes[id]);
}
if (hits.get(docID) != expected) {
@ -351,8 +348,7 @@ public abstract class BaseLatLonShapeTestCase extends LuceneTestCase {
b.append(" query=" + query + " docID=" + docID + "\n");
b.append(" shape=" + shapes[id] + "\n");
b.append(" deleted?=" + (liveDocs != null && liveDocs.get(docID) == false));
b.append(" rect=Rectangle(" + quantizeLatCeil(rect.minLat) + " TO " + quantizeLat(rect.maxLat) + " lon=" + quantizeLonCeil(rect.minLon) + " TO " + quantizeLon(rect.maxLon) + ")\n");
if (true) {
b.append(" rect=Rectangle(lat=" + quantizeLatCeil(rect.minLat) + " TO " + quantizeLat(rect.maxLat) + " lon=" + qMinLon + " TO " + quantizeLon(rect.maxLon) + ")\n"); if (true) {
fail("wrong hit (first of possibly more):\n\n" + b);
} else {
System.out.println(b.toString());

View File

@ -20,11 +20,15 @@ import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
import org.apache.lucene.document.LatLonShape.QueryRelation;
import org.apache.lucene.geo.EdgeTree;
import org.apache.lucene.geo.GeoTestUtil;
import org.apache.lucene.geo.GeoUtils;
import org.apache.lucene.geo.Line;
import org.apache.lucene.geo.Line2D;
import org.apache.lucene.geo.Polygon2D;
import org.apache.lucene.index.PointValues.Relation;
import static org.apache.lucene.geo.GeoUtils.MAX_LON_INCL;
import static org.apache.lucene.geo.GeoUtils.MIN_LON_INCL;
/** random bounding box and polygon query tests for random generated {@link Line} types */
public class TestLatLonLineShapeQueries extends BaseLatLonShapeTestCase {
@ -78,13 +82,32 @@ public class TestLatLonLineShapeQueries extends BaseLatLonShapeTestCase {
Line l = (Line)shape;
if (queryRelation == QueryRelation.WITHIN) {
// within: bounding box of shape should be within query box
return minLat <= quantizeLat(l.minLat) && maxLat >= quantizeLat(l.maxLat)
&& minLon <= quantizeLon(l.minLon) && maxLon >= quantizeLon(l.maxLon);
double lMinLat = quantizeLat(l.minLat);
double lMinLon = quantizeLon(l.minLon);
double lMaxLat = quantizeLat(l.maxLat);
double lMaxLon = quantizeLon(l.maxLon);
if (minLon > maxLon) {
// crosses dateline:
return minLat <= lMinLat && maxLat >= lMaxLat
&& ((GeoUtils.MIN_LON_INCL <= lMinLon && maxLon >= lMaxLon)
|| (minLon <= lMinLon && GeoUtils.MAX_LON_INCL >= lMaxLon));
}
return minLat <= lMinLat && maxLat >= lMaxLat
&& minLon <= lMinLon && maxLon >= lMaxLon;
}
// to keep it simple we convert the bbox into a polygon and use poly2d
Line2D line = Line2D.create(quantizeLine(l));
Relation r = line.relate(minLat, maxLat, minLon, maxLon);
Relation r;
if (minLon > maxLon) {
// crosses dateline:
r = line.relate(minLat, maxLat, MIN_LON_INCL, maxLon);
if (r == Relation.CELL_OUTSIDE_QUERY) {
r = line.relate(minLat, maxLat, minLon, MAX_LON_INCL);
}
} else {
r = line.relate(minLat, maxLat, minLon, maxLon);
}
if (queryRelation == QueryRelation.DISJOINT) {
return r == Relation.CELL_OUTSIDE_QUERY;

View File

@ -81,9 +81,13 @@ public class TestLatLonPointShapeQueries extends BaseLatLonShapeTestCase {
@Override
public boolean testBBoxQuery(double minLat, double maxLat, double minLon, double maxLon, Object shape) {
Point p = (Point)shape;
double lat = decodeLatitude(encodeLatitude(p.lat));
double lon = decodeLongitude(encodeLongitude(p.lon));
boolean isDisjoint = lat < minLat || lat > maxLat || lon < minLon || lon > maxLon;
double lat = quantizeLat(p.lat);
double lon = quantizeLon(p.lon);
boolean isDisjoint = lat < minLat || lat > maxLat;
isDisjoint = isDisjoint || ((minLon > maxLon)
? lon < minLon && lon > maxLon
: lon < minLon || lon > maxLon);
if (queryRelation == QueryRelation.DISJOINT) {
return isDisjoint;
}

View File

@ -26,6 +26,9 @@ import org.apache.lucene.geo.Polygon2D;
import org.apache.lucene.geo.Tessellator;
import org.apache.lucene.index.PointValues.Relation;
import static org.apache.lucene.geo.GeoUtils.MAX_LON_INCL;
import static org.apache.lucene.geo.GeoUtils.MIN_LON_INCL;
/** random bounding box and polygon query tests for random indexed {@link Polygon} types */
public class TestLatLonPolygonShapeQueries extends BaseLatLonShapeTestCase {
@ -68,12 +71,32 @@ public class TestLatLonPolygonShapeQueries extends BaseLatLonShapeTestCase {
Polygon p = (Polygon)shape;
if (queryRelation == QueryRelation.WITHIN) {
// within: bounding box of shape should be within query box
return minLat <= quantizeLat(p.minLat) && maxLat >= quantizeLat(p.maxLat)
&& minLon <= quantizeLon(p.minLon) && maxLon >= quantizeLon(p.maxLon);
double pMinLat = quantizeLat(p.minLat);
double pMinLon = quantizeLon(p.minLon);
double pMaxLat = quantizeLat(p.maxLat);
double pMaxLon = quantizeLon(p.maxLon);
if (minLon > maxLon) {
// crosses dateline:
return minLat <= pMinLat && maxLat >= pMaxLat
&& ((MIN_LON_INCL <= pMinLon && maxLon >= pMaxLon)
|| (minLon <= pMinLon && MAX_LON_INCL >= pMaxLon));
}
return minLat <= pMinLat && maxLat >= pMaxLat
&& minLon <= pMinLon && maxLon >= pMaxLon;
}
Polygon2D poly = Polygon2D.create(quantizePolygon(p));
Relation r = poly.relate(minLat, maxLat, minLon, maxLon);
Relation r;
if (minLon > maxLon) {
// crosses dateline:
r = poly.relate(minLat, maxLat, MIN_LON_INCL, maxLon);
if (r == Relation.CELL_OUTSIDE_QUERY) {
r = poly.relate(minLat, maxLat, minLon, MAX_LON_INCL);
}
} else {
r = poly.relate(minLat, maxLat, minLon, maxLon);
}
if (queryRelation == QueryRelation.DISJOINT) {
return r == Relation.CELL_OUTSIDE_QUERY;
}