LUCENE-7392: Add point based LatLonBoundingBox as new RangeField Type.

This commit is contained in:
Nicholas Knize 2017-09-19 14:20:37 -05:00
parent 4e5597716a
commit bf71650ad7
7 changed files with 548 additions and 5 deletions

View File

@ -25,6 +25,9 @@ New Features
* LUCENE-7940: Add BengaliAnalyzer. (Md. Abdulla-Al-Sun via Robert Muir)
* LUCENE-7392: Add point based LatLonBoundingBox as new RangeField Type.
(Nick Knize)
Optimizations
* LUCENE-7905: Optimize how OrdinalMap (used by

View File

@ -156,7 +156,7 @@ public class TestIntRangeFieldQueries extends BaseRangeFieldQueryTestCase {
}
/** IntRange test class implementation - use to validate IntRange */
private class IntTestRange extends Range {
protected class IntTestRange extends Range {
int[] min;
int[] max;

View File

@ -0,0 +1,231 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.document;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import static org.apache.lucene.geo.GeoEncodingUtils.decodeLatitude;
import static org.apache.lucene.geo.GeoEncodingUtils.decodeLongitude;
import static org.apache.lucene.geo.GeoEncodingUtils.encodeLatitude;
import static org.apache.lucene.geo.GeoEncodingUtils.encodeLongitude;
/**
* An indexed 2-Dimension Bounding Box field for the Geospatial Lat/Lon Coordinate system
* <p>
* This field indexes 2-dimension Latitude, Longitude based Geospatial Bounding Boxes. The bounding boxes are defined as
* {@code minLat, minLon, maxLat, maxLon} where min/max lat,lon pairs using double floating point precision.
* <p>
* Multiple values for the same field in one document is supported.
*
* <p>
* This field defines the following static factory methods for common search operations over double ranges:
* <ul>
* <li>{@link #newIntersectsQuery newIntersectsQuery()} matches bounding boxes that intersect the defined search bounding box.
* <li>{@link #newWithinQuery newWithinQuery()} matches bounding boxes that are within the defined search bounding box.
* <li>{@link #newContainsQuery newContainsQuery()} matches bounding boxes that contain the defined search bounding box.
* <li>{@link #newCrossesQuery newCrosses()} matches bounding boxes that cross the defined search bounding box.
* </ul>
*
* <p>
* The following Field limitations and restrictions apply:
* <ul>
* <li>Dateline wrapping is not supported.
* <li>Due to an encoding limitation Eastern and Western Hemisphere Bounding Boxes that share the dateline are not supported.
* </ul>
*/
public class LatLonBoundingBox extends Field {
/** uses same encoding as {@link LatLonPoint} so numBytes is the same */
public static final int BYTES = LatLonPoint.BYTES;
/**
* Create a new 2D GeoBoundingBoxField representing a 2 dimensional geospatial bounding box
*
* @param name field name. must not be null
* @param minLat minimum latitude value (in degrees); valid in [-90.0 : 90.0]
* @param minLon minimum longitude value (in degrees); valid in [-180.0 : 180.0]
* @param maxLat maximum latitude value (in degrees); valid in [minLat : 90.0]
* @param maxLon maximum longitude value (in degrees); valid in [minLon : 180.0]
*/
public LatLonBoundingBox(String name, final double minLat, final double minLon,
final double maxLat, final double maxLon) {
super(name, getType(2));
setRangeValues(minLat, minLon, maxLat, maxLon);
}
/** set the field type */
static FieldType getType(int geoDimensions) {
FieldType ft = new FieldType();
ft.setDimensions(geoDimensions*2, BYTES);
ft.freeze();
return ft;
}
/**
* Changes the values of the field
* @param minLat minimum latitude value (in degrees); valid in [-90.0 : 90.0]
* @param minLon minimum longitude value (in degrees); valid in [-180.0 : 180.0]
* @param maxLat maximum latitude value (in degrees); valid in [minLat : 90.0]
* @param maxLon maximum longitude value (in degrees); valid in [minLon : 180.0]
* @throws IllegalArgumentException if {@code min} or {@code max} is invalid
*/
public void setRangeValues(double minLat, double minLon, double maxLat, double maxLon) {
checkArgs(minLat, minLon, maxLat, maxLon);
final byte[] bytes;
if (fieldsData == null) {
bytes = new byte[4*BYTES];
fieldsData = new BytesRef(bytes);
} else {
bytes = ((BytesRef)fieldsData).bytes;
}
encode(minLat, minLon, bytes, 0);
encode(maxLat, maxLon, bytes, 2 * BYTES);
}
/** validate the two-dimension arguments */
static void checkArgs(final double minLat, final double minLon, final double maxLat, final double maxLon) {
// dateline crossing not supported
if (minLon > maxLon) {
throw new IllegalArgumentException("cannot have minLon [" + minLon + "] exceed maxLon [" + maxLon + "].");
}
// pole crossing not supported
if (minLat > maxLat) {
throw new IllegalArgumentException("cannot have minLat [" + minLat + "] exceed maxLat [" + maxLat + "].");
}
}
/**
* Create a new 2d query that finds all indexed 2d GeoBoundingBoxField values that intersect the defined
* 3d bounding ranges
* @param field field name. must not be null
* @param minLat minimum latitude value (in degrees); valid in [-90.0 : 90.0]
* @param minLon minimum longitude value (in degrees); valid in [-180.0 : 180.0]
* @param maxLat maximum latitude value (in degrees); valid in [minLat : 90.0]
* @param maxLon maximum longitude value (in degrees); valid in [minLon : 180.0]
* @return query for matching intersecting 2d bounding boxes
*/
public static Query newIntersectsQuery(String field, final double minLat, final double minLon,
final double maxLat, final double maxLon) {
return newRangeQuery(field, minLat, minLon, maxLat, maxLon, RangeFieldQuery.QueryType.INTERSECTS);
}
/**
* Create a new 2d query that finds all indexed 2d GeoBoundingBoxField values that are within the defined
* 2d bounding box
* @param field field name. must not be null
* @param minLat minimum latitude value (in degrees); valid in [-90.0 : 90.0]
* @param minLon minimum longitude value (in degrees); valid in [-180.0 : 180.0]
* @param maxLat maximum latitude value (in degrees); valid in [minLat : 90.0]
* @param maxLon maximum longitude value (in degrees); valid in [minLon : 180.0]
* @return query for matching 3d bounding boxes that are within the defined bounding box
*/
public static Query newWithinQuery(String field, final double minLat, final double minLon,
final double maxLat, final double maxLon) {
return newRangeQuery(field, minLat, minLon, maxLat, maxLon, RangeFieldQuery.QueryType.WITHIN);
}
/**
* Create a new 2d query that finds all indexed 2d GeoBoundingBoxField values that contain the defined
* 2d bounding box
* @param field field name. must not be null
* @param minLat minimum latitude value (in degrees); valid in [-90.0 : 90.0]
* @param minLon minimum longitude value (in degrees); valid in [-180.0 : 180.0]
* @param maxLat maximum latitude value (in degrees); valid in [minLat : 90.0]
* @param maxLon maximum longitude value (in degrees); valid in [minLon : 180.0]
* @return query for matching 2d bounding boxes that contain the defined bounding box
*/
public static Query newContainsQuery(String field, final double minLat, final double minLon,
final double maxLat, final double maxLon) {
return newRangeQuery(field, minLat, minLon, maxLat, maxLon, RangeFieldQuery.QueryType.CONTAINS);
}
/**
* Create a new 2d query that finds all indexed 2d GeoBoundingBoxField values that cross the defined
* 3d bounding box
* @param field field name. must not be null
* @param minLat minimum latitude value (in degrees); valid in [-90.0 : 90.0]
* @param minLon minimum longitude value (in degrees); valid in [-180.0 : 180.0]
* @param maxLat maximum latitude value (in degrees); valid in [minLat : 90.0]
* @param maxLon maximum longitude value (in degrees); valid in [minLon : 180.0]
* @return query for matching 2d bounding boxes that cross the defined bounding box
*/
public static Query newCrossesQuery(String field, final double minLat, final double minLon,
final double maxLat, final double maxLon) {
return newRangeQuery(field, minLat, minLon, maxLat, maxLon, RangeFieldQuery.QueryType.CROSSES);
}
/** helper method to create a two-dimensional geospatial bounding box query */
private static Query newRangeQuery(String field, final double minLat, final double minLon,
final double maxLat, final double maxLon, final RangeFieldQuery.QueryType queryType) {
checkArgs(minLat, minLon, maxLat, maxLon);
return new RangeFieldQuery(field, encode(minLat, minLon, maxLat, maxLon), 2, queryType) {
@Override
protected String toString(byte[] ranges, int dimension) { return LatLonBoundingBox.toString(ranges, dimension); }
};
}
/** encodes a two-dimensional geo bounding box into a byte array */
static byte[] encode(double minLat, double minLon, double maxLat, double maxLon) {
byte[] b = new byte[BYTES * 4];
encode(minLat, minLon, b, 0);
encode(maxLat, maxLon, b, BYTES*2);
return b;
}
/** encodes a two-dimensional geopoint (lat, lon) into a byte array */
static void encode(double lat, double lon, byte[] result, int offset) {
if (result == null) {
result = new byte[BYTES*4];
}
NumericUtils.intToSortableBytes(encodeLatitude(lat), result, offset);
NumericUtils.intToSortableBytes(encodeLongitude(lon), result, offset + BYTES);
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(getClass().getSimpleName());
sb.append(" <");
sb.append(name);
sb.append(':');
byte[] b = ((BytesRef)fieldsData).bytes;
toString(b, 0);
sb.append('>');
return sb.toString();
}
private static String toString(byte[] ranges, int dimension) {
double min, max;
int minOfs = 0;
int maxOfs = ranges.length/2;
switch (dimension) {
case 0:
min = decodeLatitude(ranges, minOfs);
max = decodeLatitude(ranges, maxOfs);
break;
case 1:
min = decodeLongitude(ranges, minOfs);
max = decodeLongitude(ranges, maxOfs);
break;
default:
throw new IllegalArgumentException("invalid dimension [" + dimension + "] in toString");
}
return "[" + min + " : " + max + "]";
}
}

View File

@ -77,7 +77,8 @@ import static org.apache.lucene.geo.GeoEncodingUtils.encodeLongitudeCeil;
// to the field is not actually what gets indexed. Float would be 1E-5 error vs 1E-7, but it might be
// a better tradeoff? then it would be completely transparent to the user and lucene would be "lossless".
public class LatLonPoint extends Field {
/** LatLonPoint is encoded as integer values so number of bytes is 4 */
public static final int BYTES = Integer.BYTES;
/**
* Type for an indexed LatLonPoint
* <p>

View File

@ -0,0 +1,299 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.LatLonBoundingBox;
import org.apache.lucene.geo.GeoTestUtil;
import org.apache.lucene.geo.Rectangle;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.store.Directory;
import static org.apache.lucene.geo.GeoEncodingUtils.decodeLatitude;
import static org.apache.lucene.geo.GeoEncodingUtils.decodeLongitude;
import static org.apache.lucene.geo.GeoEncodingUtils.encodeLatitude;
import static org.apache.lucene.geo.GeoEncodingUtils.encodeLongitude;
/** Random testing for GeoBoundingBoxField type. */
public class TestLatLonBoundingBoxQueries extends BaseRangeFieldQueryTestCase {
private static final String FIELD_NAME = "geoBoundingBoxField";
@Override
protected LatLonBoundingBox newRangeField(Range r) {
// addRange is called instead of this method
throw new UnsupportedOperationException("this method should never be called");
}
@Override
protected void addRange(Document doc, Range r) {
GeoBBox b = (GeoBBox)r;
doc.add(new LatLonBoundingBox(FIELD_NAME, b.minLat, b.minLon, b.maxLat, b.maxLon));
}
/** Basic test for 2d boxes */
public void testBasics() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
// Shared meridian test (disjoint)
Document document = new Document();
document.add(new LatLonBoundingBox(FIELD_NAME, -20d, -180d, 20d, -100d));
writer.addDocument(document);
// intersects (crosses)
document = new Document();
document.add(new LatLonBoundingBox(FIELD_NAME, 0d, 14.096488952636719d, 10d, 20d));
writer.addDocument(document);
// intersects (contains, crosses)
document = new Document();
document.add(new LatLonBoundingBox(FIELD_NAME, -10.282592503353953d, -1d, 1d, 14.096488952636719d));
writer.addDocument(document);
// intersects (crosses)
document = new Document();
document.add(new LatLonBoundingBox(FIELD_NAME, -1d, -11d, 1d, 1d));
writer.addDocument(document);
// intersects (crosses)
document = new Document();
document.add(new LatLonBoundingBox(FIELD_NAME, -1d, 14.096488952636719d, 5d, 30d));
writer.addDocument(document);
// intersects (within)
document = new Document();
document.add(new LatLonBoundingBox(FIELD_NAME, -5d, 0d, -1d, 14.096488952636719d));
writer.addDocument(document);
// search
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
assertEquals(5, searcher.count(LatLonBoundingBox.newIntersectsQuery(FIELD_NAME,
-10.282592503353953d, 0.0d, 0.0d, 14.096488952636719d)));
assertEquals(1, searcher.count(LatLonBoundingBox.newWithinQuery(FIELD_NAME,
-10.282592503353953d, 0.0d, 0.0d, 14.096488952636719d)));
assertEquals(1, searcher.count(LatLonBoundingBox.newContainsQuery(FIELD_NAME,
-10.282592503353953d, 0.0d, 0.0d, 14.096488952636719d)));
assertEquals(4, searcher.count(LatLonBoundingBox.newCrossesQuery(FIELD_NAME,
-10.282592503353953d, 0.0d, 0.0d, 14.096488952636719d)));
reader.close();
writer.close();
dir.close();
}
@Override
protected int dimension() {
return 2;
}
@Override
protected Range nextRange(int dimensions) {
// create a random bounding box in 2 dimensions
return new GeoBBox(dimensions);
}
@Override
protected Query newIntersectsQuery(Range r) {
GeoBBox b = (GeoBBox)r;
return LatLonBoundingBox.newIntersectsQuery(FIELD_NAME, b.minLat, b.minLon, b.maxLat, b.maxLon);
}
@Override
protected Query newContainsQuery(Range r) {
GeoBBox b = (GeoBBox)r;
return LatLonBoundingBox.newContainsQuery(FIELD_NAME, b.minLat, b.minLon, b.maxLat, b.maxLon);
}
@Override
protected Query newWithinQuery(Range r) {
GeoBBox b = (GeoBBox)r;
return LatLonBoundingBox.newWithinQuery(FIELD_NAME, b.minLat, b.minLon, b.maxLat, b.maxLon);
}
@Override
protected Query newCrossesQuery(Range r) {
GeoBBox b = (GeoBBox)r;
return LatLonBoundingBox.newCrossesQuery(FIELD_NAME, b.minLat, b.minLon, b.maxLat, b.maxLon);
}
protected static class GeoBBox extends Range {
protected double minLat, minLon, maxLat, maxLon;
protected int dimension;
GeoBBox(int dimension) {
this.dimension = dimension;
final Rectangle box = GeoTestUtil.nextBoxNotCrossingDateline();
minLat = quantizeLat(box.minLat);
minLon = quantizeLon(box.minLon);
maxLat = quantizeLat(box.maxLat);
maxLon = quantizeLon(box.maxLon);
// minLat = quantizeLat(Math.min(box.minLat, box.maxLat));
// minLon = quantizeLon(Math.max(box.minLat, box.maxLat));
// maxLat = quantizeLat(box.maxLat);
// maxLon = quantizeLon(box.maxLon);
// if (maxLon == -180d) {
// // index and search handle this fine, but the test validator
// // struggles when maxLon == -180; so lets correct
// maxLon = 180d;
// }
}
protected static double quantizeLat(double lat) {
return decodeLatitude(encodeLatitude(lat));
}
protected double quantizeLon(double lon) {
return decodeLongitude(encodeLongitude(lon));
}
@Override
protected int numDimensions() {
return dimension;
}
@Override
protected Double getMin(int dim) {
if (dim == 0) {
return minLat;
} else if (dim == 1) {
return minLon;
}
throw new IndexOutOfBoundsException("dimension " + dim + " is greater than " + dimension);
}
@Override
protected void setMin(int dim, Object val) {
if (dim == 0) {
setMinLat((Double)val);
} else if (dim == 1) {
setMinLon((Double)val);
} else {
throw new IndexOutOfBoundsException("dimension " + dim + " is greater than " + dimension);
}
}
private void setMinLat(double d) {
if (d > maxLat) {
minLat = maxLat;
maxLat = d;
} else {
minLat = d;
}
}
private void setMinLon(double d) {
if (d > maxLon) {
minLon = maxLon;
maxLon = d;
} else {
minLon = d;
}
}
private void setMaxLat(double d) {
if (d < minLat) {
maxLat = minLat;
minLat = d;
} else {
maxLat = d;
}
}
private void setMaxLon(double d) {
if (d < minLon) {
maxLon = minLon;
minLon = d;
} else {
maxLon = d;
}
}
@Override
protected Double getMax(int dim) {
if (dim == 0) {
return maxLat;
} else if (dim == 1) {
return maxLon;
}
throw new IndexOutOfBoundsException("dimension " + dim + " is greater than " + dimension);
}
@Override
protected void setMax(int dim, Object val) {
if (dim == 0) {
setMaxLat((Double)val);
} else if (dim == 1) {
setMaxLon((Double)val);
} else {
throw new IndexOutOfBoundsException("dimension " + dim + " is greater than " + dimension);
}
}
@Override
protected boolean isEqual(Range other) {
GeoBBox o = (GeoBBox)other;
if (this.dimension != o.dimension) return false;
if (this.minLat != o.minLat) return false;
if (this.minLon != o.minLon) return false;
if (this.maxLat != o.maxLat) return false;
if (this.maxLon != o.maxLon) return false;
return true;
}
@Override
protected boolean isDisjoint(Range other) {
GeoBBox o = (GeoBBox)other;
if (minLat > o.maxLat || maxLat < o.minLat) return true;
if (minLon > o.maxLon || maxLon < o.minLon) return true;
return false;
}
@Override
protected boolean isWithin(Range other) {
GeoBBox o = (GeoBBox)other;
return o.contains(this);
}
@Override
protected boolean contains(Range other) {
GeoBBox o = (GeoBBox)other;
if (minLat > o.minLat || maxLat < o.maxLat) return false;
if (minLon > o.minLon || maxLon < o.maxLon) return false;
return true;
}
@Override
public String toString() {
StringBuilder b = new StringBuilder();
b.append("GeoBoundingBox(lat: ");
b.append(minLat);
b.append(" TO ");
b.append(maxLat);
b.append(", lon: ");
b.append(minLon);
b.append(" TO ");
b.append(maxLon);
b.append(")");
return b.toString();
}
}
}

View File

@ -307,6 +307,11 @@ public class GeoTestUtil {
return nextBoxInternal(nextLatitude(), nextLatitude(), nextLongitude(), nextLongitude(), true);
}
/** returns next pseudorandom box: does not cross the 180th meridian */
public static Rectangle nextBoxNotCrossingDateline() {
return nextBoxInternal(nextLatitude(), nextLatitude(), nextLongitude(), nextLongitude(), false);
}
/** Makes an n-gon, centered at the provided lat/lon, and each vertex approximately
* distanceMeters away from the center.
*

View File

@ -141,12 +141,12 @@ public abstract class BaseRangeFieldQueryTestCase extends LuceneTestCase {
if (x == m) {
int d = (int)Math.floor(m/2);
// current could be multivalue but old may not be, so use first box
if (even == 0) {
if (even == 0) { // even is min
ranges[id][0].setMin(d, ranges[oldID][0].getMin(d));
if (VERBOSE) {
System.out.println(" id=" + id + " box=" + ranges[id] + " (same min[" + d + "] as doc=" + oldID + ")");
}
} else {
} else { // odd is max
ranges[id][0].setMax(d, ranges[oldID][0].getMax(d));
if (VERBOSE) {
System.out.println(" id=" + id + " box=" + ranges[id] + " (same max[" + d + "] as doc=" + oldID + ")");
@ -184,7 +184,7 @@ public abstract class BaseRangeFieldQueryTestCase extends LuceneTestCase {
doc.add(new NumericDocValuesField("id", id));
if (ranges[id][0].isMissing == false) {
for (int n=0; n<ranges[id].length; ++n) {
doc.add(newRangeField(ranges[id][n]));
addRange(doc, ranges[id][n]);
}
}
w.addDocument(doc);
@ -293,6 +293,10 @@ public abstract class BaseRangeFieldQueryTestCase extends LuceneTestCase {
IOUtils.close(r, dir);
}
protected void addRange(Document doc, Range box) {
doc.add(newRangeField(box));
}
protected boolean expectedResult(Range queryRange, Range[] range, Range.QueryType queryType) {
for (int i=0; i<range.length; ++i) {
if (expectedBBoxQueryResult(queryRange, range[i], queryType) == true) {