LUCENE-6780: Improves GeoPointDistanceQuery accuracy with large radius. Improves testing rigor to GeoPointField

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1709926 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Knize 2015-10-21 21:59:08 +00:00
parent 369aa32aea
commit d369057766
20 changed files with 858 additions and 1139 deletions

View File

@ -33,8 +33,10 @@ package org.apache.lucene.util;
public class SloppyMath {
/**
* Returns the distance in kilometers between two points
* specified in decimal degrees (latitude/longitude).
* Returns the Haversine distance in kilometers between two points
* specified in decimal degrees (latitude/longitude). This works correctly
* even if the dateline is between the two points.
*
* @param lat1 Latitude of the first point.
* @param lon1 Longitude of the first point.
* @param lat2 Latitude of the second point.

View File

@ -18,6 +18,7 @@ package org.apache.lucene.bkdtree;
*/
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
@ -78,7 +79,6 @@ public class BKDPointInBBoxQuery extends Query {
// used in the first pass:
return new ConstantScoreWeight(this) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
@ -96,9 +96,7 @@ public class BKDPointInBBoxQuery extends Query {
DocIdSet result = tree.intersect(minLat, maxLat, minLon, maxLon, null, treeDV.delegate);
final DocIdSetIterator disi = result.iterator();
return new ConstantScoreScorer(this, score(), disi);
return new ConstantScoreScorer(this, score(), result.iterator());
}
};
}

View File

@ -19,12 +19,15 @@ package org.apache.lucene.bkdtree;
import java.io.IOException;
import java.util.Arrays;
import java.util.Set;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
@ -147,9 +150,7 @@ public class BKDPointInPolygonQuery extends Query {
}
}, treeDV.delegate);
final DocIdSetIterator disi = result.iterator();
return new ConstantScoreScorer(this, score(), disi);
return new ConstantScoreScorer(this, score(), result.iterator());
}
};
}

View File

@ -131,13 +131,40 @@ final class BKDTreeReader implements Accountable {
return state.docs.build(hitCount);
}
private boolean accept(QueryState state, int docID) throws IOException {
//System.out.println(" check accept docID=" + docID);
state.sndv.setDocument(docID);
// How many values this doc has:
int count = state.sndv.count();
for(int j=0;j<count;j++) {
long enc = state.sndv.valueAt(j);
int latEnc = (int) ((enc>>32) & 0xffffffffL);
int lonEnc = (int) (enc & 0xffffffffL);
//System.out.println(" lat=" + BKDTreeWriter.decodeLat(latEnc) + " lon=" + BKDTreeWriter.decodeLon(lonEnc));
if (latEnc >= state.latMinEnc &&
latEnc < state.latMaxEnc &&
lonEnc >= state.lonMinEnc &&
lonEnc < state.lonMaxEnc &&
(state.latLonFilter == null ||
state.latLonFilter.accept(BKDTreeWriter.decodeLat(latEnc), BKDTreeWriter.decodeLon(lonEnc)))) {
//System.out.println(" yes");
return true;
}
}
return false;
}
/** Fast path: this is called when the query rect fully encompasses all cells under this node. */
private int addAll(QueryState state, int nodeID) throws IOException {
//System.out.println(" addAll nodeID=" + nodeID);
//long latRange = (long) cellLatMaxEnc - (long) cellLatMinEnc;
//long lonRange = (long) cellLonMaxEnc - (long) cellLonMinEnc;
if (nodeID >= leafNodeOffset) {
//System.out.println(" leaf");
/*
System.out.println("A: " + BKDTreeWriter.decodeLat(cellLatMinEnc)
@ -161,6 +188,8 @@ final class BKDTreeReader implements Accountable {
state.docs.grow(count);
for(int i=0;i<count;i++) {
int docID = state.in.readInt();
//System.out.println(" docID=" + docID);
assert accept(state, docID);
state.docs.add(docID);
}
@ -188,10 +217,13 @@ final class BKDTreeReader implements Accountable {
int cellLatMinEnc, int cellLatMaxEnc, int cellLonMinEnc, int cellLonMaxEnc)
throws IOException {
//System.out.println("\nBKD: intersect nodeID=" + nodeID + " lat=" + BKDTreeWriter.decodeLat(state.latMinEnc) + " TO " + BKDTreeWriter.decodeLat(state.latMaxEnc) +
//" lon=" + BKDTreeWriter.decodeLon(state.lonMinEnc) + " TO " + BKDTreeWriter.decodeLon(state.lonMaxEnc));
// 2.06 sec -> 1.52 sec for 225 OSM London queries:
if (state.latLonFilter != null) {
// Only call the filter when the current cell does not fully contain the bbox:
// Don't check the filter if the current cell fully contains the query bbox (just keep recursing in that case):
if (cellLatMinEnc > state.latMinEnc || cellLatMaxEnc < state.latMaxEnc ||
cellLonMinEnc > state.lonMinEnc || cellLonMaxEnc < state.lonMaxEnc) {
@ -209,6 +241,8 @@ final class BKDTreeReader implements Accountable {
} else {
// The cell crosses the shape boundary, so we fall through and do full filtering
}
} else {
//System.out.println(" straight recurse");
}
// TODO: clean this up: the bbox case should also just be a filter, and we should assert filter != null at the start
} else if (state.latMinEnc <= cellLatMinEnc && state.latMaxEnc >= cellLatMaxEnc && state.lonMinEnc <= cellLonMinEnc && state.lonMaxEnc >= cellLonMaxEnc) {
@ -230,11 +264,13 @@ final class BKDTreeReader implements Accountable {
//System.out.println("\nintersect node=" + nodeID + " vs " + leafNodeOffset);
if (nodeID >= leafNodeOffset) {
// Leaf node; scan and filter all points in this block:
//System.out.println(" intersect leaf nodeID=" + nodeID + " vs leafNodeOffset=" + leafNodeOffset + " fp=" + leafBlockFPs[nodeID-leafNodeOffset]);
int hitCount = 0;
long fp = leafBlockFPs[nodeID-leafNodeOffset];
//System.out.println(" intersect leaf fp=" + fp);
if (fp == 0) {
// Dead end node (adversary case):
//System.out.println(" dead-end leaf");
@ -256,27 +292,9 @@ final class BKDTreeReader implements Accountable {
state.docs.grow(count);
for(int i=0;i<count;i++) {
int docID = state.in.readInt();
state.sndv.setDocument(docID);
// How many values this doc has:
int docValueCount = state.sndv.count();
for(int j=0;j<docValueCount;j++) {
long enc = state.sndv.valueAt(j);
int latEnc = (int) ((enc>>32) & 0xffffffffL);
int lonEnc = (int) (enc & 0xffffffffL);
if (latEnc >= state.latMinEnc &&
latEnc < state.latMaxEnc &&
lonEnc >= state.lonMinEnc &&
lonEnc < state.lonMaxEnc &&
(state.latLonFilter == null ||
state.latLonFilter.accept(BKDTreeWriter.decodeLat(latEnc), BKDTreeWriter.decodeLon(lonEnc)))) {
state.docs.add(docID);
hitCount++;
// Stop processing values for this doc:
break;
}
if (accept(state, docID)) {
state.docs.add(docID);
hitCount++;
}
}
@ -298,7 +316,7 @@ final class BKDTreeReader implements Accountable {
if (dim == 0) {
//System.out.println(" split on lat=" + splitValue);
//System.out.println(" split on lat=" + BKDTreeWriter.decodeLat(splitValue));
// Inner node split on lat:
@ -308,6 +326,8 @@ final class BKDTreeReader implements Accountable {
count += intersect(state,
2*nodeID,
cellLatMinEnc, splitValue, cellLonMinEnc, cellLonMaxEnc);
} else {
//System.out.println(" no recurse left");
}
// Right node:
@ -316,31 +336,37 @@ final class BKDTreeReader implements Accountable {
count += intersect(state,
2*nodeID+1,
splitValue, cellLatMaxEnc, cellLonMinEnc, cellLonMaxEnc);
} else {
//System.out.println(" no recurse right");
}
} else {
// Inner node split on lon:
assert dim == 1;
// System.out.println(" split on lon=" + splitValue);
//System.out.println(" split on lon=" + BKDTreeWriter.decodeLon(splitValue));
// Left node:
if (state.lonMinEnc < splitValue) {
// System.out.println(" recurse left");
//System.out.println(" recurse left");
count += intersect(state,
2*nodeID,
cellLatMinEnc, cellLatMaxEnc, cellLonMinEnc, splitValue);
} else {
//System.out.println(" no recurse left");
}
// Right node:
if (state.lonMaxEnc >= splitValue) {
// System.out.println(" recurse right");
//System.out.println(" recurse right");
count += intersect(state,
2*nodeID+1,
cellLatMinEnc, cellLatMaxEnc, splitValue, cellLonMaxEnc);
} else {
//System.out.println(" no recurse right");
}
}
//System.out.println(" return nodeID=" + nodeID);
return count;
}
}

View File

@ -131,7 +131,6 @@ class BKDTreeWriter {
}
public void add(double lat, double lon, int docID) throws IOException {
if (validLat(lat) == false) {
throw new IllegalArgumentException("invalid lat: " + lat);
}
@ -677,6 +676,7 @@ class BKDTreeWriter {
// on those lists:
int docID = docIDs[i];
if (docID != lastDocID) {
//System.out.println(" docID=" + docID);
out.writeInt(docID);
lastDocID = docID;
}

View File

@ -39,7 +39,7 @@ final class OfflineLatLonWriter implements LatLonWriter {
out = tempDir.createTempOutput(tempFileNamePrefix, "bkd", IOContext.DEFAULT);
this.count = count;
}
@Override
public void append(int latEnc, int lonEnc, long ord, int docID) throws IOException {
out.writeInt(latEnc);

View File

@ -106,4 +106,24 @@ public final class GeoPointField extends Field {
}
fieldsData = GeoUtils.mortonHash(lon, lat);
}
public double getLon() {
return GeoUtils.mortonUnhashLon((long) fieldsData);
}
public double getLat() {
return GeoUtils.mortonUnhashLat((long) fieldsData);
}
@Override
public String toString() {
if (fieldsData == null) {
return null;
}
StringBuilder sb = new StringBuilder();
sb.append(GeoUtils.mortonUnhashLon((long) fieldsData));
sb.append(',');
sb.append(GeoUtils.mortonUnhashLat((long) fieldsData));
return sb.toString();
}
}

View File

@ -18,40 +18,47 @@ package org.apache.lucene.search;
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.GeoProjectionUtils;
import org.apache.lucene.util.GeoRect;
import org.apache.lucene.util.GeoUtils;
import org.apache.lucene.util.SloppyMath;
/** Implements a simple point distance query on a GeoPoint field. This is based on
* {@link org.apache.lucene.search.GeoPointInBBoxQuery} and is implemented using a two phase approach. First,
* like {@code GeoPointInBBoxQueryImpl} candidate terms are queried using the numeric ranges based on
* the morton codes of the min and max lat/lon pairs that intersect the boundary of the point-radius
* circle (see {@link org.apache.lucene.util.GeoUtils#lineCrossesSphere}. Terms
* circle. Terms
* passing this initial filter are then passed to a secondary {@code postFilter} method that verifies whether the
* decoded lat/lon point fall within the specified query distance (see {@link org.apache.lucene.util.SloppyMath#haversin}.
* All morton value comparisons are subject to the same precision tolerance defined in
* {@value org.apache.lucene.util.GeoUtils#TOLERANCE} and distance comparisons are subject to the accuracy of the
* haversine formula (from R.W. Sinnott, "Virtues of the Haversine", Sky and Telescope, vol. 68, no. 2, 1984, p. 159)
*
*
* Note: This query currently uses haversine which is a sloppy distance calculation (see above reference). For large
* <p>Note: This query currently uses haversine which is a sloppy distance calculation (see above reference). For large
* queries one can expect upwards of 400m error. Vincenty shrinks this to ~40m error but pays a penalty for computing
* using the spheroid
*
* @lucene.experimental
*/
* @lucene.experimental */
public class GeoPointDistanceQuery extends GeoPointInBBoxQuery {
protected final double centerLon;
protected final double centerLat;
protected final double radius;
protected final double radiusMeters;
/** NOTE: radius is in meters. */
public GeoPointDistanceQuery(final String field, final double centerLon, final double centerLat, final double radius) {
this(field, computeBBox(centerLon, centerLat, radius), centerLon, centerLat, radius);
public GeoPointDistanceQuery(final String field, final double centerLon, final double centerLat, final double radiusMeters) {
this(field, GeoUtils.circleToBBox(centerLon, centerLat, radiusMeters), centerLon, centerLat, radiusMeters);
}
private GeoPointDistanceQuery(final String field, GeoBoundingBox bbox, final double centerLon,
final double centerLat, final double radius) {
private GeoPointDistanceQuery(final String field, GeoRect bbox, final double centerLon,
final double centerLat, final double radiusMeters) {
super(field, bbox.minLon, bbox.minLat, bbox.maxLon, bbox.maxLat);
{
// check longitudinal overlap (limits radius)
final double maxRadius = SloppyMath.haversin(centerLat, centerLon, centerLat, (180.0 + centerLon) % 360)*1000.0;
if (radiusMeters > maxRadius) {
throw new IllegalArgumentException("radiusMeters " + radiusMeters + " exceeds maxRadius [" + maxRadius
+ "] at location [" + centerLon + " " + centerLat + "]");
}
}
if (GeoUtils.isValidLon(centerLon) == false) {
throw new IllegalArgumentException("invalid centerLon " + centerLon);
@ -61,9 +68,13 @@ public class GeoPointDistanceQuery extends GeoPointInBBoxQuery {
throw new IllegalArgumentException("invalid centerLat " + centerLat);
}
if (radiusMeters <= 0.0) {
throw new IllegalArgumentException("invalid radiusMeters " + radiusMeters);
}
this.centerLon = centerLon;
this.centerLat = centerLat;
this.radius = radius;
this.radiusMeters = radiusMeters;
}
@Override
@ -71,25 +82,15 @@ public class GeoPointDistanceQuery extends GeoPointInBBoxQuery {
if (maxLon < minLon) {
BooleanQuery.Builder bqb = new BooleanQuery.Builder();
GeoPointDistanceQueryImpl left = new GeoPointDistanceQueryImpl(field, this, new GeoBoundingBox(-180.0D, maxLon,
GeoPointDistanceQueryImpl left = new GeoPointDistanceQueryImpl(field, this, new GeoRect(GeoUtils.MIN_LON_INCL, maxLon,
minLat, maxLat));
bqb.add(new BooleanClause(left, BooleanClause.Occur.SHOULD));
GeoPointDistanceQueryImpl right = new GeoPointDistanceQueryImpl(field, this, new GeoBoundingBox(minLon, 180.0D,
GeoPointDistanceQueryImpl right = new GeoPointDistanceQueryImpl(field, this, new GeoRect(minLon, GeoUtils.MAX_LON_INCL,
minLat, maxLat));
bqb.add(new BooleanClause(right, BooleanClause.Occur.SHOULD));
return bqb.build();
}
return new GeoPointDistanceQueryImpl(field, this, new GeoBoundingBox(this.minLon, this.maxLon, this.minLat, this.maxLat));
}
static GeoBoundingBox computeBBox(final double centerLon, final double centerLat, final double radius) {
double[] t = GeoProjectionUtils.pointFromLonLatBearing(centerLon, centerLat, 0, radius, null);
double[] r = GeoProjectionUtils.pointFromLonLatBearing(centerLon, centerLat, 90, radius, null);
double[] b = GeoProjectionUtils.pointFromLonLatBearing(centerLon, centerLat, 180, radius, null);
double[] l = GeoProjectionUtils.pointFromLonLatBearing(centerLon, centerLat, 270, radius, null);
return new GeoBoundingBox(GeoUtils.normalizeLon(l[0]), GeoUtils.normalizeLon(r[0]), GeoUtils.normalizeLat(b[1]),
GeoUtils.normalizeLat(t[1]));
return new GeoPointDistanceQueryImpl(field, this, new GeoRect(this.minLon, this.maxLon, this.minLat, this.maxLat));
}
@Override
@ -102,7 +103,7 @@ public class GeoPointDistanceQuery extends GeoPointInBBoxQuery {
if (Double.compare(that.centerLat, centerLat) != 0) return false;
if (Double.compare(that.centerLon, centerLon) != 0) return false;
if (Double.compare(that.radius, radius) != 0) return false;
if (Double.compare(that.radiusMeters, radiusMeters) != 0) return false;
return true;
}
@ -115,7 +116,7 @@ public class GeoPointDistanceQuery extends GeoPointInBBoxQuery {
result = 31 * result + (int) (temp ^ (temp >>> 32));
temp = Double.doubleToLongBits(centerLat);
result = 31 * result + (int) (temp ^ (temp >>> 32));
temp = Double.doubleToLongBits(radius);
temp = Double.doubleToLongBits(radiusMeters);
result = 31 * result + (int) (temp ^ (temp >>> 32));
return result;
}
@ -136,17 +137,8 @@ public class GeoPointDistanceQuery extends GeoPointInBBoxQuery {
.append(centerLat)
.append(']')
.append(" Distance: ")
.append(radius)
.append(" m")
.append(" Lower Left: [")
.append(minLon)
.append(',')
.append(minLat)
.append(']')
.append(" Upper Right: [")
.append(maxLon)
.append(',')
.append(maxLat)
.append(radiusMeters)
.append(" meters")
.append("]")
.toString();
}
@ -159,7 +151,7 @@ public class GeoPointDistanceQuery extends GeoPointInBBoxQuery {
return this.centerLat;
}
public double getRadius() {
return this.radius;
public double getRadiusMeters() {
return this.radiusMeters;
}
}

View File

@ -19,9 +19,11 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.document.GeoPointField;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.GeoRect;
import org.apache.lucene.util.GeoUtils;
import org.apache.lucene.util.SloppyMath;
@ -32,7 +34,7 @@ import org.apache.lucene.util.SloppyMath;
final class GeoPointDistanceQueryImpl extends GeoPointInBBoxQueryImpl {
private final GeoPointDistanceQuery query;
GeoPointDistanceQueryImpl(final String field, final GeoPointDistanceQuery q, final GeoBoundingBox bbox) {
GeoPointDistanceQueryImpl(final String field, final GeoPointDistanceQuery q, final GeoRect bbox) {
super(field, bbox.minLon, bbox.minLat, bbox.maxLon, bbox.maxLat);
query = q;
}
@ -53,14 +55,31 @@ final class GeoPointDistanceQueryImpl extends GeoPointInBBoxQueryImpl {
super(tenum, minLon, minLat, maxLon, maxLat);
}
/**
* Computes the maximum shift for the given pointDistanceQuery. This prevents unnecessary depth traversal
* given the size of the distance query.
*/
@Override
protected short computeMaxShift() {
final short shiftFactor;
if (query.radiusMeters > 1000000) {
shiftFactor = 5;
} else {
shiftFactor = 4;
}
return (short)(GeoPointField.PRECISION_STEP * shiftFactor);
}
@Override
protected boolean cellCrosses(final double minLon, final double minLat, final double maxLon, final double maxLat) {
return GeoUtils.rectCrossesCircle(minLon, minLat, maxLon, maxLat, query.centerLon, query.centerLat, query.radius);
return GeoUtils.rectCrossesCircle(minLon, minLat, maxLon, maxLat, query.centerLon, query.centerLat, query.radiusMeters);
}
@Override
protected boolean cellWithin(final double minLon, final double minLat, final double maxLon, final double maxLat) {
return GeoUtils.rectWithinCircle(minLon, minLat, maxLon, maxLat, query.centerLon, query.centerLat, query.radius);
return GeoUtils.rectWithinCircle(minLon, minLat, maxLon, maxLat, query.centerLon, query.centerLat, query.radiusMeters);
}
@Override
@ -77,7 +96,7 @@ final class GeoPointDistanceQueryImpl extends GeoPointInBBoxQueryImpl {
*/
@Override
protected boolean postFilter(final double lon, final double lat) {
return (SloppyMath.haversin(query.centerLat, query.centerLon, lat, lon) * 1000.0 <= query.radius);
return (SloppyMath.haversin(query.centerLat, query.centerLon, lat, lon) * 1000.0 <= query.radiusMeters);
}
}
@ -101,7 +120,7 @@ final class GeoPointDistanceQueryImpl extends GeoPointInBBoxQueryImpl {
return result;
}
public double getRadius() {
return query.getRadius();
public double getRadiusMeters() {
return query.getRadiusMeters();
}
}

View File

@ -17,31 +17,29 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import java.util.List;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.GeoProjectionUtils;
/** Implements a point distance range query on a GeoPoint field. This is based on
* {@code org.apache.lucene.search.GeoPointDistanceQuery} and is implemented using a
* {@code org.apache.lucene.search.BooleanClause.MUST_NOT} clause to exclude any points that fall within
* minRadius from the provided point.
* minRadiusMeters from the provided point.
*
* @lucene.experimental
*/
public final class GeoPointDistanceRangeQuery extends GeoPointDistanceQuery {
protected final double minRadius;
protected final double minRadiusMeters;
public GeoPointDistanceRangeQuery(final String field, final double centerLon, final double centerLat,
final double minRadius, final double maxRadius) {
final double minRadiusMeters, final double maxRadius) {
super(field, centerLon, centerLat, maxRadius);
this.minRadius = minRadius;
this.minRadiusMeters = minRadiusMeters;
}
@Override
public Query rewrite(IndexReader reader) {
Query q = super.rewrite(reader);
if (minRadius == 0.0) {
if (minRadiusMeters == 0.0) {
return q;
}
@ -49,13 +47,13 @@ public final class GeoPointDistanceRangeQuery extends GeoPointDistanceQuery {
BooleanQuery.Builder bqb = new BooleanQuery.Builder();
// create a new exclusion query
GeoPointDistanceQuery exclude = new GeoPointDistanceQuery(field, centerLon, centerLat, minRadius);
GeoPointDistanceQuery exclude = new GeoPointDistanceQuery(field, centerLon, centerLat, minRadiusMeters);
// full map search
if (radius >= GeoProjectionUtils.SEMIMINOR_AXIS) {
bqb.add(new BooleanClause(new GeoPointInBBoxQuery(this.field, -180.0, -90.0, 180.0, 90.0), BooleanClause.Occur.MUST));
} else {
// if (radiusMeters >= GeoProjectionUtils.SEMIMINOR_AXIS) {
// bqb.add(new BooleanClause(new GeoPointInBBoxQuery(this.field, -180.0, -90.0, 180.0, 90.0), BooleanClause.Occur.MUST));
// } else {
bqb.add(new BooleanClause(q, BooleanClause.Occur.MUST));
}
// }
bqb.add(new BooleanClause(exclude, BooleanClause.Occur.MUST_NOT));
return bqb.build();
@ -77,10 +75,10 @@ public final class GeoPointDistanceRangeQuery extends GeoPointDistanceQuery {
.append(centerLat)
.append(']')
.append(" From Distance: ")
.append(minRadius)
.append(minRadiusMeters)
.append(" m")
.append(" To Distance: ")
.append(radius)
.append(radiusMeters)
.append(" m")
.append(" Lower Left: [")
.append(minLon)
@ -96,10 +94,10 @@ public final class GeoPointDistanceRangeQuery extends GeoPointDistanceQuery {
}
public double getMinRadiusMeters() {
return this.minRadius;
return this.minRadiusMeters;
}
public double getMaxRadiusMeters() {
return this.radius;
return this.radiusMeters;
}
}

View File

@ -19,10 +19,12 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.document.GeoPointField;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.GeoUtils;
import org.apache.lucene.util.SloppyMath;
import org.apache.lucene.util.ToStringUtils;
/** Package private implementation for the public facing GeoPointInBBoxQuery delegate class.
@ -59,6 +61,23 @@ class GeoPointInBBoxQueryImpl extends GeoPointTermQuery {
super(tenum, minLon, minLat, maxLon, maxLat);
}
@Override
protected short computeMaxShift() {
final short shiftFactor;
// compute diagonal radius
double midLon = (minLon + maxLon) * 0.5;
double midLat = (minLat + maxLat) * 0.5;
if (SloppyMath.haversin(minLat, minLon, midLat, midLon)*1000 > 1000000) {
shiftFactor = 5;
} else {
shiftFactor = 4;
}
return (short)(GeoPointField.PRECISION_STEP * shiftFactor);
}
/**
* Determine whether the quad-cell crosses the shape
*/

View File

@ -17,14 +17,14 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.GeoRect;
import org.apache.lucene.util.GeoUtils;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
import java.util.Arrays;
/** Implements a simple point in polygon query on a GeoPoint field. This is based on
* {@code GeoPointInBBoxQueryImpl} and is implemented using a
@ -36,14 +36,14 @@ import java.util.Arrays;
* term is passed to the final point in polygon check. All value comparisons are subject
* to the same precision tolerance defined in {@value org.apache.lucene.util.GeoUtils#TOLERANCE}
*
* NOTES:
* <p>NOTES:
* 1. The polygon coordinates need to be in either clockwise or counter-clockwise order.
* 2. The polygon must not be self-crossing, otherwise the query may result in unexpected behavior
* 3. All latitude/longitude values must be in decimal degrees.
* 4. Complex computational geometry (e.g., dateline wrapping, polygon with holes) is not supported
* 5. For more advanced GeoSpatial indexing and query operations see spatial module
*
* @lucene.experimental
* @lucene.experimental
*/
public final class GeoPointInPolygonQuery extends GeoPointInBBoxQueryImpl {
// polygon position arrays - this avoids the use of any objects or
@ -60,7 +60,7 @@ public final class GeoPointInPolygonQuery extends GeoPointInBBoxQueryImpl {
}
/** Common constructor, used only internally. */
private GeoPointInPolygonQuery(final String field, GeoBoundingBox bbox, final double[] polyLons, final double[] polyLats) {
private GeoPointInPolygonQuery(final String field, GeoRect bbox, final double[] polyLons, final double[] polyLats) {
super(field, bbox.minLon, bbox.minLat, bbox.maxLon, bbox.maxLat);
if (polyLats.length != polyLons.length) {
throw new IllegalArgumentException("polyLats and polyLons must be equal length");
@ -183,7 +183,7 @@ public final class GeoPointInPolygonQuery extends GeoPointInBBoxQueryImpl {
}
}
private static GeoBoundingBox computeBBox(double[] polyLons, double[] polyLats) {
private static GeoRect computeBBox(double[] polyLons, double[] polyLats) {
if (polyLons.length != polyLats.length) {
throw new IllegalArgumentException("polyLons and polyLats must be equal length");
}
@ -206,7 +206,7 @@ public final class GeoPointInPolygonQuery extends GeoPointInBBoxQueryImpl {
maxLat = Math.max(polyLats[i], maxLat);
}
return new GeoBoundingBox(minLon, maxLon, minLat, maxLat);
return new GeoRect(minLon, maxLon, minLat, maxLat);
}
/**

View File

@ -48,9 +48,7 @@ abstract class GeoPointTermsEnum extends FilteredTermsEnum {
private final List<Range> rangeBounds = new LinkedList<>();
// detail level should be a factor of PRECISION_STEP limiting the depth of recursion (and number of ranges)
// in this case a factor of 4 brings the detail level to ~0.002/0.001 degrees lon/lat respectively (or ~222m/111m)
private static final short MAX_SHIFT = GeoPointField.PRECISION_STEP * 4;
protected static final short DETAIL_LEVEL = ((GeoUtils.BITS<<1)-MAX_SHIFT)/2;
protected final short DETAIL_LEVEL;
GeoPointTermsEnum(final TermsEnum tenum, final double minLon, final double minLat,
final double maxLon, final double maxLat) {
@ -61,6 +59,7 @@ abstract class GeoPointTermsEnum extends FilteredTermsEnum {
this.minLat = GeoUtils.mortonUnhashLat(rectMinHash);
this.maxLon = GeoUtils.mortonUnhashLon(rectMaxHash);
this.maxLat = GeoUtils.mortonUnhashLat(rectMaxHash);
DETAIL_LEVEL = (short)(((GeoUtils.BITS<<1)-computeMaxShift())/2);
computeRange(0L, (short) (((GeoUtils.BITS) << 1) - 1));
Collections.sort(rangeBounds);
@ -103,12 +102,23 @@ abstract class GeoPointTermsEnum extends FilteredTermsEnum {
// if cell is within and a factor of the precision step, or it crosses the edge of the shape add the range
final boolean within = res % GeoPointField.PRECISION_STEP == 0 && cellWithin(minLon, minLat, maxLon, maxLat);
if (within || (level == DETAIL_LEVEL && cellIntersectsShape(minLon, minLat, maxLon, maxLat))) {
rangeBounds.add(new Range(start, res, !within));
final short nextRes = (short)(res-1);
if (nextRes % GeoPointField.PRECISION_STEP == 0) {
rangeBounds.add(new Range(start, nextRes, !within));
rangeBounds.add(new Range(start|(1L<<nextRes), nextRes, !within));
} else {
rangeBounds.add(new Range(start, res, !within));
}
} else if (level < DETAIL_LEVEL && cellIntersectsMBR(minLon, minLat, maxLon, maxLat)) {
computeRange(start, (short) (res - 1));
}
}
protected short computeMaxShift() {
// in this case a factor of 4 brings the detail level to ~0.002/0.001 degrees lon/lat respectively (or ~222m/111m)
return GeoPointField.PRECISION_STEP * 4;
}
/**
* Determine whether the quad-cell crosses the shape
*/

View File

@ -109,6 +109,46 @@ public class GeoDistanceUtils {
return StrictMath.toDegrees(StrictMath.acos(1-((2d*h)/(cLat*cLat))));
}
/**
* Finds the closest point within a rectangle (defined by rMinX, rMinY, rMaxX, rMaxY) to the given (lon, lat) point
* the result is provided in closestPt. When the point is outside the rectangle, the closest point is on an edge
* or corner of the rectangle; else, the closest point is the point itself.
*/
public static void closestPointOnBBox(final double rMinX, final double rMinY, final double rMaxX, final double rMaxY,
final double lon, final double lat, double[] closestPt) {
assert closestPt != null && closestPt.length == 2;
closestPt[0] = 0;
closestPt[1] = 0;
boolean xSet = true;
boolean ySet = true;
if (lon > rMaxX) {
closestPt[0] = rMaxX;
} else if (lon < rMinX) {
closestPt[0] = rMinX;
} else {
xSet = false;
}
if (lat > rMaxY) {
closestPt[1] = rMaxY;
} else if (lat < rMinY) {
closestPt[1] = rMinY;
} else {
ySet = false;
}
if (closestPt[0] == 0 && xSet == false) {
closestPt[0] = lon;
}
if (closestPt[1] == 0 && ySet == false) {
closestPt[1] = lat;
}
}
/**
* Compute the inverse haversine to determine distance in degrees longitude for provided distance in meters
* @param lat latitude to compute delta degrees lon

View File

@ -63,6 +63,25 @@ public class GeoHashUtils {
return (l<<4)|hash.length();
}
/**
* Encode an existing geohash long to the provided precision
*/
public static long longEncode(long geohash, int level) {
final short precision = (short)(geohash & 15);
if (precision == level) {
return geohash;
} else if (precision > level) {
return ((geohash >>> (((precision - level) * 5) + 4)) << 4) | level;
}
return ((geohash >>> 4) << (((level - precision) * 5) + 4) | level);
}
public static long fromMorton(long morton, int level) {
long mFlipped = BitUtil.flipFlop(morton);
mFlipped >>>= (((GeoHashUtils.PRECISION - level) * 5) + MORTON_OFFSET);
return (mFlipped << 4) | level;
}
/**
* Encode to a geohash string from the geohash based long format
*/
@ -71,7 +90,7 @@ public class GeoHashUtils {
geoHashLong >>>= 4;
char[] chars = new char[level];
do {
chars[--level] = BASE_32[(int)(geoHashLong&31L)];
chars[--level] = BASE_32[(int) (geoHashLong&31L)];
geoHashLong>>>=5;
} while(level > 0);
@ -89,19 +108,10 @@ public class GeoHashUtils {
* Encode to a level specific geohash string from full resolution longitude, latitude
*/
public static final String stringEncode(final double lon, final double lat, final int level) {
// bit twiddle to geohash (since geohash is a swapped (lon/lat) encoding)
final long hashedVal = BitUtil.flipFlop(GeoUtils.mortonHash(lon, lat));
// convert to geohashlong
final long ghLong = fromMorton(GeoUtils.mortonHash(lon, lat), level);
return stringEncode(ghLong);
StringBuilder geoHash = new StringBuilder();
short precision = 0;
final short msf = (GeoUtils.BITS<<1)-5;
long mask = 31L<<msf;
do {
geoHash.append(BASE_32[(int)((mask & hashedVal)>>>(msf-(precision*5)))]);
// next 5 bits
mask >>>= 5;
} while (++precision < level);
return geoHash.toString();
}
/**
@ -151,7 +161,7 @@ public class GeoHashUtils {
final int level = (int)(geoHashLong&15);
final short odd = (short)(level & 1);
return BitUtil.flipFlop((geoHashLong >>> 4) << odd) << (((12 - level) * 5) + (MORTON_OFFSET - odd));
return BitUtil.flipFlop(((geoHashLong >>> 4) << odd) << (((12 - level) * 5) + (MORTON_OFFSET - odd)));
}
private static final char encode(int x, int y) {

View File

@ -31,6 +31,10 @@ public class GeoProjectionUtils {
static final double PI_OVER_2 = StrictMath.PI / 2.0D;
static final double SEMIMAJOR_AXIS2 = SEMIMAJOR_AXIS * SEMIMAJOR_AXIS;
static final double SEMIMINOR_AXIS2 = SEMIMINOR_AXIS * SEMIMINOR_AXIS;
public static final double MIN_LON_RADIANS = StrictMath.toRadians(GeoUtils.MIN_LON_INCL);
public static final double MIN_LAT_RADIANS = StrictMath.toRadians(GeoUtils.MIN_LAT_INCL);
public static final double MAX_LON_RADIANS = StrictMath.toRadians(GeoUtils.MAX_LON_INCL);
public static final double MAX_LAT_RADIANS = StrictMath.toRadians(GeoUtils.MAX_LAT_INCL);
/**
* Converts from geocentric earth-centered earth-fixed to geodesic lat/lon/alt
@ -375,8 +379,8 @@ public class GeoProjectionUtils {
final double lam = lambda - (1-c) * FLATTENING * sinAlpha *
(sigma + c * sinSigma * (cos2SigmaM + c * cosSigma * (-1 + 2* cos2SigmaM*cos2SigmaM)));
pt[0] = lon + StrictMath.toDegrees(lam);
pt[1] = StrictMath.toDegrees(lat2);
pt[0] = GeoUtils.normalizeLon(lon + StrictMath.toDegrees(lam));
pt[1] = GeoUtils.normalizeLat(StrictMath.toDegrees(lat2));
return pt;
}

View File

@ -25,11 +25,9 @@ import java.util.ArrayList;
* @lucene.experimental
*/
public final class GeoUtils {
private static final short MIN_LON = -180;
private static final short MIN_LAT = -90;
public static final short BITS = 32;
private static final double LON_SCALE = ((0x1L<<BITS)-1)/360.0D;
private static final double LAT_SCALE = ((0x1L<<BITS)-1)/180.0D;
public static final short BITS = 31;
private static final double LON_SCALE = (0x1L<<BITS)/360.0D;
private static final double LAT_SCALE = (0x1L<<BITS)/180.0D;
public static final double TOLERANCE = 1E-6;
/** Minimum longitude value. */
@ -61,24 +59,24 @@ public final class GeoUtils {
}
private static long scaleLon(final double val) {
return (long) ((val-MIN_LON) * LON_SCALE);
return (long) ((val-MIN_LON_INCL) * LON_SCALE);
}
private static long scaleLat(final double val) {
return (long) ((val-MIN_LAT) * LAT_SCALE);
return (long) ((val-MIN_LAT_INCL) * LAT_SCALE);
}
private static double unscaleLon(final long val) {
return (val / LON_SCALE) + MIN_LON;
return (val / LON_SCALE) + MIN_LON_INCL;
}
private static double unscaleLat(final long val) {
return (val / LAT_SCALE) + MIN_LAT;
return (val / LAT_SCALE) + MIN_LAT_INCL;
}
public static double compare(final double v1, final double v2) {
final double compare = v1-v2;
return Math.abs(compare) <= TOLERANCE ? 0 : compare;
final double delta = v1-v2;
return Math.abs(delta) <= TOLERANCE ? 0 : delta;
}
/**
@ -109,8 +107,13 @@ public final class GeoUtils {
return (off <= 180 ? off : 360-off) - 90;
}
public static final boolean bboxContains(final double lon, final double lat, final double minLon,
final double minLat, final double maxLon, final double maxLat) {
/**
* Determine if a bbox (defined by minLon, minLat, maxLon, maxLat) contains the provided point (defined by lon, lat)
* NOTE: this is a basic method that does not handle dateline or pole crossing. Unwrapping must be done before
* calling this method.
*/
public static boolean bboxContains(final double lon, final double lat, final double minLon,
final double minLat, final double maxLon, final double maxLat) {
return (compare(lon, minLon) >= 0 && compare(lon, maxLon) <= 0
&& compare(lat, minLat) >= 0 && compare(lat, maxLat) <= 0);
}
@ -161,7 +164,7 @@ public final class GeoUtils {
}
/**
* Computes whether a rectangle is wholly within another rectangle (shared boundaries allowed)
* Computes whether the first (a) rectangle is wholly within another (b) rectangle (shared boundaries allowed)
*/
public static boolean rectWithin(final double aMinX, final double aMinY, final double aMaxX, final double aMaxY,
final double bMinX, final double bMinY, final double bMaxX, final double bMaxY) {
@ -248,11 +251,11 @@ public final class GeoUtils {
*
* @param lon longitudinal center of circle (in degrees)
* @param lat latitudinal center of circle (in degrees)
* @param radius distance radius of circle (in meters)
* @param radiusMeters distance radius of circle (in meters)
* @return a list of lon/lat points representing the circle
*/
@SuppressWarnings({"unchecked","rawtypes"})
public static ArrayList<double[]> circleToPoly(final double lon, final double lat, final double radius) {
public static ArrayList<double[]> circleToPoly(final double lon, final double lat, final double radiusMeters) {
double angle;
// a little under-sampling (to limit the number of polygonal points): using archimedes estimation of pi
final int sides = 25;
@ -264,7 +267,7 @@ public final class GeoUtils {
final int sidesLen = sides-1;
for (int i=0; i<sidesLen; ++i) {
angle = (i*360/sides);
pt = GeoProjectionUtils.pointFromLonLatBearing(lon, lat, angle, radius, pt);
pt = GeoProjectionUtils.pointFromLonLatBearing(lon, lat, angle, radiusMeters, pt);
lons[i] = pt[0];
lats[i] = pt[1];
}
@ -291,47 +294,79 @@ public final class GeoUtils {
}
private static boolean rectAnyCornersOutsideCircle(final double rMinX, final double rMinY, final double rMaxX, final double rMaxY,
final double centerLon, final double centerLat, final double radius) {
return (SloppyMath.haversin(centerLat, centerLon, rMinY, rMinX)*1000.0 > radius
|| SloppyMath.haversin(centerLat, centerLon, rMaxY, rMinX)*1000.0 > radius
|| SloppyMath.haversin(centerLat, centerLon, rMaxY, rMaxX)*1000.0 > radius
|| SloppyMath.haversin(centerLat, centerLon, rMinY, rMaxX)*1000.0 > radius);
final double centerLon, final double centerLat, final double radiusMeters) {
return SloppyMath.haversin(centerLat, centerLon, rMinY, rMinX)*1000.0 > radiusMeters
|| SloppyMath.haversin(centerLat, centerLon, rMaxY, rMinX)*1000.0 > radiusMeters
|| SloppyMath.haversin(centerLat, centerLon, rMaxY, rMaxX)*1000.0 > radiusMeters
|| SloppyMath.haversin(centerLat, centerLon, rMinY, rMaxX)*1000.0 > radiusMeters;
}
private static boolean rectAnyCornersInCircle(final double rMinX, final double rMinY, final double rMaxX, final double rMaxY,
final double centerLon, final double centerLat, final double radius) {
return (SloppyMath.haversin(centerLat, centerLon, rMinY, rMinX)*1000.0 <= radius
|| SloppyMath.haversin(centerLat, centerLon, rMaxY, rMinX)*1000.0 <= radius
|| SloppyMath.haversin(centerLat, centerLon, rMaxY, rMaxX)*1000.0 <= radius
|| SloppyMath.haversin(centerLat, centerLon, rMinY, rMaxX)*1000.0 <= radius);
final double centerLon, final double centerLat, final double radiusMeters) {
return SloppyMath.haversin(centerLat, centerLon, rMinY, rMinX)*1000.0 <= radiusMeters
|| SloppyMath.haversin(centerLat, centerLon, rMaxY, rMinX)*1000.0 <= radiusMeters
|| SloppyMath.haversin(centerLat, centerLon, rMaxY, rMaxX)*1000.0 <= radiusMeters
|| SloppyMath.haversin(centerLat, centerLon, rMinY, rMaxX)*1000.0 <= radiusMeters;
}
public static boolean rectWithinCircle(final double rMinX, final double rMinY, final double rMaxX, final double rMaxY,
final double centerLon, final double centerLat, final double radius) {
return !(rectAnyCornersOutsideCircle(rMinX, rMinY, rMaxX, rMaxY, centerLon, centerLat, radius));
final double centerLon, final double centerLat, final double radiusMeters) {
return rectAnyCornersOutsideCircle(rMinX, rMinY, rMaxX, rMaxY, centerLon, centerLat, radiusMeters) == false;
}
/**
* Computes whether a rectangle crosses a circle
* Determine if a bbox (defined by minLon, minLat, maxLon, maxLat) contains the provided point (defined by lon, lat)
* NOTE: this is basic method that does not handle dateline or pole crossing. Unwrapping must be done before
* calling this method.
*/
public static boolean rectCrossesCircle(final double rMinX, final double rMinY, final double rMaxX, final double rMaxY,
final double centerLon, final double centerLat, final double radius) {
return rectAnyCornersInCircle(rMinX, rMinY, rMaxX, rMaxY, centerLon, centerLat, radius)
|| lineCrossesSphere(rMinX, rMinY, 0, rMaxX, rMinY, 0, centerLon, centerLat, 0, radius)
|| lineCrossesSphere(rMaxX, rMinY, 0, rMaxX, rMaxY, 0, centerLon, centerLat, 0, radius)
|| lineCrossesSphere(rMaxX, rMaxY, 0, rMinX, rMaxY, 0, centerLon, centerLat, 0, radius)
|| lineCrossesSphere(rMinX, rMaxY, 0, rMinX, rMinY, 0, centerLon, centerLat, 0, radius);
final double centerLon, final double centerLat, final double radiusMeters) {
return rectAnyCornersInCircle(rMinX, rMinY, rMaxX, rMaxY, centerLon, centerLat, radiusMeters)
|| isClosestPointOnRectWithinRange(rMinX, rMinY, rMaxX, rMaxY, centerLon, centerLat, radiusMeters);
}
public static boolean circleWithinRect(double rMinX, final double rMinY, final double rMaxX, final double rMaxY,
final double centerLon, final double centerLat, final double radius) {
return !(centerLon < rMinX || centerLon > rMaxX || centerLat > rMaxY || centerLat < rMinY
|| SloppyMath.haversin(rMinY, centerLon, centerLat, centerLon) < radius
|| SloppyMath.haversin(rMaxY, centerLon, centerLat, centerLon) < radius
|| SloppyMath.haversin(centerLat, rMinX, centerLat, centerLon) < radius
|| SloppyMath.haversin(centerLat, rMaxX, centerLat, centerLon) < radius);
private static boolean isClosestPointOnRectWithinRange(final double rMinX, final double rMinY, final double rMaxX, final double rMaxY,
final double centerLon, final double centerLat, final double radiusMeters) {
double[] closestPt = {0, 0};
GeoDistanceUtils.closestPointOnBBox(rMinX, rMinY, rMaxX, rMaxY, centerLon, centerLat, closestPt);
return SloppyMath.haversin(centerLat, centerLon, closestPt[1], closestPt[0])*1000.0 <= radiusMeters;
}
/**
* Compute Bounding Box for a circle using WGS-84 parameters
*/
public static GeoRect circleToBBox(final double centerLon, final double centerLat, final double radiusMeters) {
final double radLat = StrictMath.toRadians(centerLat);
final double radLon = StrictMath.toRadians(centerLon);
double radDistance = radiusMeters / GeoProjectionUtils.SEMIMAJOR_AXIS;
double minLat = radLat - radDistance;
double maxLat = radLat + radDistance;
double minLon;
double maxLon;
if (minLat > GeoProjectionUtils.MIN_LAT_RADIANS && maxLat < GeoProjectionUtils.MAX_LAT_RADIANS) {
double deltaLon = StrictMath.asin(StrictMath.sin(radDistance) / StrictMath.cos(radLat));
minLon = radLon - deltaLon;
if (minLon < GeoProjectionUtils.MIN_LON_RADIANS) {
minLon += 2d * StrictMath.PI;
}
maxLon = radLon + deltaLon;
if (maxLon > GeoProjectionUtils.MAX_LON_RADIANS) {
maxLon -= 2d * StrictMath.PI;
}
} else {
// a pole is within the distance
minLat = StrictMath.max(minLat, GeoProjectionUtils.MIN_LAT_RADIANS);
maxLat = StrictMath.min(maxLat, GeoProjectionUtils.MAX_LAT_RADIANS);
minLon = GeoProjectionUtils.MIN_LON_RADIANS;
maxLon = GeoProjectionUtils.MAX_LON_RADIANS;
}
return new GeoRect(StrictMath.toDegrees(minLon), StrictMath.toDegrees(maxLon),
StrictMath.toDegrees(minLat), StrictMath.toDegrees(maxLat));
}
/*
/**
* Computes whether or a 3dimensional line segment intersects or crosses a sphere
*
@ -344,12 +379,13 @@ public final class GeoUtils {
* @param centerLon longitudinal location of center search point (in degrees)
* @param centerLat latitudinal location of center search point (in degrees)
* @param centerAlt altitude of the center point (in meters)
* @param radius search sphere radius (in meters)
* @param radiusMeters search sphere radius (in meters)
* @return whether the provided line segment is a secant of the
*/
* /
// NOTE: not used for 2d at the moment. used for 3d w/ altitude (we can keep or add back)
private static boolean lineCrossesSphere(double lon1, double lat1, double alt1, double lon2,
double lat2, double alt2, double centerLon, double centerLat,
double centerAlt, double radius) {
double centerAlt, double radiusMeters) {
// convert to cartesian 3d (in meters)
double[] ecf1 = GeoProjectionUtils.llaToECF(lon1, lat1, alt1, null);
double[] ecf2 = GeoProjectionUtils.llaToECF(lon2, lat2, alt2, null);
@ -364,7 +400,7 @@ public final class GeoUtils {
final double a = dX*dX + dY*dY + dZ*dZ;
final double b = 2 * (fX*dX + fY*dY + fZ*dZ);
final double c = (fX*fX + fY*fY + fZ*fZ) - (radius*radius);
final double c = (fX*fX + fY*fY + fZ*fZ) - (radiusMeters*radiusMeters);
double discrim = (b*b)-(4*a*c);
if (discrim < 0) {
@ -382,6 +418,7 @@ public final class GeoUtils {
return true;
}
*/
public static boolean isValidLat(double lat) {
return Double.isNaN(lat) == false && lat >= MIN_LAT_INCL && lat <= MAX_LAT_INCL;

View File

@ -30,344 +30,59 @@ import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.lucene53.Lucene53Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.BaseGeoPointTestCase;
import org.apache.lucene.util.GeoRect;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.Nightly;
import org.apache.lucene.util.SloppyMath;
import org.apache.lucene.util.TestUtil;
import org.junit.BeforeClass;
public class TestBKDTree extends LuceneTestCase {
// TODO: can test framework assert we don't leak temp files?
private static boolean smallBBox;
public class TestBKDTree extends BaseGeoPointTestCase {
@BeforeClass
public static void beforeClass() {
smallBBox = random().nextBoolean();
@Override
protected void addPointToDoc(String field, Document doc, double lat, double lon) {
doc.add(new BKDPointField(field, lat, lon));
}
public void testAllLatEqual() throws Exception {
int numPoints = atLeast(10000);
double lat = randomLat();
double[] lats = new double[numPoints];
double[] lons = new double[numPoints];
boolean haveRealDoc = false;
for(int docID=0;docID<numPoints;docID++) {
int x = random().nextInt(20);
if (x == 17) {
// Some docs don't have a point:
lats[docID] = Double.NaN;
if (VERBOSE) {
System.out.println(" doc=" + docID + " is missing");
}
continue;
}
if (docID > 0 && x == 14 && haveRealDoc) {
int oldDocID;
while (true) {
oldDocID = random().nextInt(docID);
if (Double.isNaN(lats[oldDocID]) == false) {
break;
}
}
// Fully identical point:
lons[docID] = lons[oldDocID];
if (VERBOSE) {
System.out.println(" doc=" + docID + " lat=" + lat + " lon=" + lons[docID] + " (same lat/lon as doc=" + oldDocID + ")");
}
} else {
lons[docID] = randomLon();
haveRealDoc = true;
if (VERBOSE) {
System.out.println(" doc=" + docID + " lat=" + lat + " lon=" + lons[docID]);
}
}
lats[docID] = lat;
}
verify(lats, lons);
@Override
protected Query newBBoxQuery(String field, GeoRect rect) {
return new BKDPointInBBoxQuery(field, rect.minLat, rect.maxLat, rect.minLon, rect.maxLon);
}
public void testAllLonEqual() throws Exception {
int numPoints = atLeast(10000);
double theLon = randomLon();
double[] lats = new double[numPoints];
double[] lons = new double[numPoints];
boolean haveRealDoc = false;
//System.out.println("theLon=" + theLon);
for(int docID=0;docID<numPoints;docID++) {
int x = random().nextInt(20);
if (x == 17) {
// Some docs don't have a point:
lats[docID] = Double.NaN;
if (VERBOSE) {
System.out.println(" doc=" + docID + " is missing");
}
continue;
}
if (docID > 0 && x == 14 && haveRealDoc) {
int oldDocID;
while (true) {
oldDocID = random().nextInt(docID);
if (Double.isNaN(lats[oldDocID]) == false) {
break;
}
}
// Fully identical point:
lats[docID] = lats[oldDocID];
if (VERBOSE) {
System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + theLon + " (same lat/lon as doc=" + oldDocID + ")");
}
} else {
lats[docID] = randomLat();
haveRealDoc = true;
if (VERBOSE) {
System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + theLon);
}
}
lons[docID] = theLon;
}
verify(lats, lons);
@Override
protected Query newDistanceQuery(String field, double centerLat, double centerLon, double radiusMeters) {
// return new BKDDistanceQuery(field, centerLat, centerLon, radiusMeters);
return null;
}
public void testMultiValued() throws Exception {
int numPoints = atLeast(10000);
// Every doc has 2 points:
double[] lats = new double[2*numPoints];
double[] lons = new double[2*numPoints];
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
// We rely on docID order:
iwc.setMergePolicy(newLogMergePolicy());
Codec codec = TestUtil.alwaysDocValuesFormat(getDocValuesFormat());
iwc.setCodec(codec);
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
for (int docID=0;docID<numPoints;docID++) {
Document doc = new Document();
lats[2*docID] = randomLat();
lons[2*docID] = randomLon();
doc.add(new BKDPointField("point", lats[2*docID], lons[2*docID]));
lats[2*docID+1] = randomLat();
lons[2*docID+1] = randomLon();
doc.add(new BKDPointField("point", lats[2*docID+1], lons[2*docID+1]));
w.addDocument(doc);
}
if (random().nextBoolean()) {
w.forceMerge(1);
}
IndexReader r = w.getReader();
w.close();
// We can't wrap with "exotic" readers because the BKD query must see the BKDDVFormat:
IndexSearcher s = newSearcher(r, false);
int iters = atLeast(100);
for (int iter=0;iter<iters;iter++) {
double lat0 = randomLat();
double lat1 = randomLat();
double lon0 = randomLon();
double lon1 = randomLon();
if (lat1 < lat0) {
double x = lat0;
lat0 = lat1;
lat1 = x;
}
if (lon1 < lon0) {
double x = lon0;
lon0 = lon1;
lon1 = x;
}
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " lat=" + lat0 + " TO " + lat1 + " lon=" + lon0 + " TO " + lon1);
}
Query query = new BKDPointInBBoxQuery("point", lat0, lat1, lon0, lon1);
final FixedBitSet hits = new FixedBitSet(r.maxDoc());
s.search(query, new SimpleCollector() {
private int docBase;
@Override
public boolean needsScores() {
return false;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase+doc);
}
});
for(int docID=0;docID<lats.length/2;docID++) {
double latDoc1 = lats[2*docID];
double lonDoc1 = lons[2*docID];
double latDoc2 = lats[2*docID+1];
double lonDoc2 = lons[2*docID+1];
boolean expected = rectContainsPointEnc(lat0, lat1, lon0, lon1, latDoc1, lonDoc1) ||
rectContainsPointEnc(lat0, lat1, lon0, lon1, latDoc2, lonDoc2);
if (hits.get(docID) != expected) {
fail("docID=" + docID + " latDoc1=" + latDoc1 + " lonDoc1=" + lonDoc1 + " latDoc2=" + latDoc2 + " lonDoc2=" + lonDoc2 + " expected " + expected + " but got: " + hits.get(docID));
}
}
}
r.close();
dir.close();
@Override
protected Query newDistanceRangeQuery(String field, double centerLat, double centerLon, double minRadiusMeters, double radiusMeters) {
return null;
}
// A particularly tricky adversary:
public void testSamePointManyTimes() throws Exception {
int numPoints = atLeast(1000);
// Every doc has 2 points:
double theLat = randomLat();
double theLon = randomLon();
double[] lats = new double[numPoints];
Arrays.fill(lats, theLat);
double[] lons = new double[numPoints];
Arrays.fill(lons, theLon);
verify(lats, lons);
@Override
protected Query newPolygonQuery(String field, double[] lats, double[] lons) {
return new BKDPointInPolygonQuery(FIELD_NAME, lats, lons);
}
public void testRandomTiny() throws Exception {
// Make sure single-leaf-node case is OK:
doTestRandom(10);
}
public void testRandomMedium() throws Exception {
doTestRandom(10000);
}
@Nightly
public void testRandomBig() throws Exception {
doTestRandom(200000);
}
private void doTestRandom(int count) throws Exception {
int numPoints = atLeast(count);
if (VERBOSE) {
System.out.println("TEST: numPoints=" + numPoints);
}
double[] lats = new double[numPoints];
double[] lons = new double[numPoints];
boolean haveRealDoc = false;
for (int docID=0;docID<numPoints;docID++) {
int x = random().nextInt(20);
if (x == 17) {
// Some docs don't have a point:
lats[docID] = Double.NaN;
if (VERBOSE) {
System.out.println(" doc=" + docID + " is missing");
}
continue;
}
if (docID > 0 && x < 3 && haveRealDoc) {
int oldDocID;
while (true) {
oldDocID = random().nextInt(docID);
if (Double.isNaN(lats[oldDocID]) == false) {
break;
}
}
if (x == 0) {
// Identical lat to old point
lats[docID] = lats[oldDocID];
lons[docID] = randomLon();
if (VERBOSE) {
System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID] + " (same lat as doc=" + oldDocID + ")");
}
} else if (x == 1) {
// Identical lon to old point
lats[docID] = randomLat();
lons[docID] = lons[oldDocID];
if (VERBOSE) {
System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID] + " (same lon as doc=" + oldDocID + ")");
}
} else {
assert x == 2;
// Fully identical point:
lats[docID] = lats[oldDocID];
lons[docID] = lons[oldDocID];
if (VERBOSE) {
System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID] + " (same lat/lon as doc=" + oldDocID + ")");
}
}
} else {
lats[docID] = randomLat();
lons[docID] = randomLon();
haveRealDoc = true;
if (VERBOSE) {
System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID]);
}
}
}
verify(lats, lons);
}
private static final double TOLERANCE = 1e-7;
private static void verify(double[] lats, double[] lons) throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig();
// Else we can get O(N^2) merging:
int mbd = iwc.getMaxBufferedDocs();
if (mbd != -1 && mbd < lats.length/100) {
iwc.setMaxBufferedDocs(lats.length/100);
}
@Override
protected void initIndexWriterConfig(final String fieldName, IndexWriterConfig iwc) {
final DocValuesFormat dvFormat = getDocValuesFormat();
Codec codec = new Lucene53Codec() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
if (field.equals("point")) {
if (field.equals(fieldName)) {
return dvFormat;
} else {
return super.getDocValuesFormatForField(field);
@ -375,189 +90,22 @@ public class TestBKDTree extends LuceneTestCase {
}
};
iwc.setCodec(codec);
Directory dir;
if (lats.length > 100000) {
dir = noVirusChecker(newFSDirectory(createTempDir("TestBKDTree")));
} else {
dir = getDirectory();
}
Set<Integer> deleted = new HashSet<>();
// RandomIndexWriter is too slow here:
IndexWriter w = new IndexWriter(dir, iwc);
for(int id=0;id<lats.length;id++) {
Document doc = new Document();
doc.add(newStringField("id", ""+id, Field.Store.NO));
doc.add(new NumericDocValuesField("id", id));
if (Double.isNaN(lats[id]) == false) {
doc.add(new BKDPointField("point", lats[id], lons[id]));
}
w.addDocument(doc);
if (id > 0 && random().nextInt(100) == 42) {
int idToDelete = random().nextInt(id);
w.deleteDocuments(new Term("id", ""+idToDelete));
deleted.add(idToDelete);
if (VERBOSE) {
System.out.println(" delete id=" + idToDelete);
}
}
}
if (random().nextBoolean()) {
w.forceMerge(1);
}
final IndexReader r = DirectoryReader.open(w, true);
w.close();
// We can't wrap with "exotic" readers because the BKD query must see the BKDDVFormat:
IndexSearcher s = newSearcher(r, false);
int numThreads = TestUtil.nextInt(random(), 2, 5);
List<Thread> threads = new ArrayList<>();
final int iters = atLeast(100);
final CountDownLatch startingGun = new CountDownLatch(1);
final AtomicBoolean failed = new AtomicBoolean();
for(int i=0;i<numThreads;i++) {
Thread thread = new Thread() {
@Override
public void run() {
try {
_run();
} catch (Exception e) {
failed.set(true);
throw new RuntimeException(e);
}
}
private void _run() throws Exception {
startingGun.await();
NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
for (int iter=0;iter<iters && failed.get() == false;iter++) {
double lat0 = randomLat();
double lat1 = randomLat();
double lon0 = randomLon();
double lon1 = randomLon();
if (lat1 < lat0) {
double x = lat0;
lat0 = lat1;
lat1 = x;
}
boolean crossesDateLine;
if (lon1 < lon0) {
if (random().nextBoolean()) {
double x = lon0;
lon0 = lon1;
lon1 = x;
crossesDateLine = false;
} else {
crossesDateLine = true;
}
} else {
crossesDateLine = false;
}
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " lat=" + lat0 + " TO " + lat1 + " lon=" + lon0 + " TO " + lon1 + " crossesDateLine=" + crossesDateLine);
}
Query query;
// TODO: get poly query working with dateline crossing too (how?)!
if (crossesDateLine || random().nextBoolean()) {
query = new BKDPointInBBoxQuery("point", lat0, lat1, lon0, lon1);
} else {
double[] lats = new double[5];
double[] lons = new double[5];
lats[0] = lat0;
lons[0] = lon0;
lats[1] = lat1;
lons[1] = lon0;
lats[2] = lat1;
lons[2] = lon1;
lats[3] = lat0;
lons[3] = lon1;
lats[4] = lat0;
lons[4] = lon0;
query = new BKDPointInPolygonQuery("point", lats, lons);
}
if (VERBOSE) {
System.out.println(" using query: " + query);
}
final FixedBitSet hits = new FixedBitSet(r.maxDoc());
s.search(query, new SimpleCollector() {
private int docBase;
@Override
public boolean needsScores() {
return false;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase+doc);
}
});
if (VERBOSE) {
System.out.println(" hitCount: " + hits.cardinality());
}
for(int docID=0;docID<r.maxDoc();docID++) {
int id = (int) docIDToID.get(docID);
boolean expected = deleted.contains(id) == false && rectContainsPointEnc(lat0, lat1, lon0, lon1, lats[id], lons[id]);
if (hits.get(docID) != expected) {
if (query instanceof BKDPointInPolygonQuery &&
(Math.abs(lat0-lats[id]) < TOLERANCE ||
Math.abs(lat1-lats[id]) < TOLERANCE ||
Math.abs(lon0-lons[id]) < TOLERANCE ||
Math.abs(lon1-lons[id]) < TOLERANCE)) {
// The poly check quantizes slightly differently, so we allow for boundary cases to disagree
} else {
// We do exact quantized comparison so the bbox query should never disagree:
fail(Thread.currentThread().getName() + ": iter=" + iter + " id=" + id + " docID=" + docID + " lat=" + lats[id] + " lon=" + lons[id] + " (bbox: lat=" + lat0 + " TO " + lat1 + " lon=" + lon0 + " TO " + lon1 + ") expected " + expected + " but got: " + hits.get(docID) + " deleted?=" + deleted.contains(id) + " query=" + query + " crossesDateLine=" + crossesDateLine);
}
}
}
}
}
};
thread.setName("T" + i);
thread.start();
threads.add(thread);
}
startingGun.countDown();
for(Thread thread : threads) {
thread.join();
}
IOUtils.close(r, dir);
}
@Override
protected Boolean rectContainsPoint(GeoRect rect, double pointLat, double pointLon) {
assert Double.isNaN(pointLat) == false;
int rectLatMinEnc = BKDTreeWriter.encodeLat(rect.minLat);
int rectLatMaxEnc = BKDTreeWriter.encodeLat(rect.maxLat);
int rectLonMinEnc = BKDTreeWriter.encodeLon(rect.minLon);
int rectLonMaxEnc = BKDTreeWriter.encodeLon(rect.maxLon);
private static boolean rectContainsPointEnc(double rectLatMin, double rectLatMax,
double rectLonMin, double rectLonMax,
double pointLat, double pointLon) {
if (Double.isNaN(pointLat)) {
return false;
}
int rectLatMinEnc = BKDTreeWriter.encodeLat(rectLatMin);
int rectLatMaxEnc = BKDTreeWriter.encodeLat(rectLatMax);
int rectLonMinEnc = BKDTreeWriter.encodeLon(rectLonMin);
int rectLonMaxEnc = BKDTreeWriter.encodeLon(rectLonMax);
int pointLatEnc = BKDTreeWriter.encodeLat(pointLat);
int pointLonEnc = BKDTreeWriter.encodeLon(pointLon);
if (rectLonMin < rectLonMax) {
if (rect.minLon < rect.maxLon) {
return pointLatEnc >= rectLatMinEnc &&
pointLatEnc < rectLatMaxEnc &&
pointLonEnc >= rectLonMinEnc &&
@ -571,30 +119,44 @@ public class TestBKDTree extends LuceneTestCase {
}
}
private static double randomLat() {
if (smallBBox) {
return 2.0 * (random().nextDouble()-0.5);
private static final double POLY_TOLERANCE = 1e-7;
@Override
protected Boolean polyRectContainsPoint(GeoRect rect, double pointLat, double pointLon) {
if (Math.abs(rect.minLat-pointLat) < POLY_TOLERANCE ||
Math.abs(rect.maxLat-pointLat) < POLY_TOLERANCE ||
Math.abs(rect.minLon-pointLon) < POLY_TOLERANCE ||
Math.abs(rect.maxLon-pointLon) < POLY_TOLERANCE) {
// The poly check quantizes slightly differently, so we allow for boundary cases to disagree
return null;
} else {
return -90 + 180.0 * random().nextDouble();
return rectContainsPoint(rect, pointLat, pointLon);
}
}
private static double randomLon() {
if (smallBBox) {
return 2.0 * (random().nextDouble()-0.5);
} else {
return -180 + 360.0 * random().nextDouble();
}
@Override
protected Boolean circleContainsPoint(double centerLat, double centerLon, double radiusMeters, double pointLat, double pointLon) {
double distanceKM = SloppyMath.haversin(centerLat, centerLon, pointLat, pointLon);
boolean result = distanceKM*1000.0 <= radiusMeters;
//System.out.println(" shouldMatch? centerLon=" + centerLon + " centerLat=" + centerLat + " pointLon=" + pointLon + " pointLat=" + pointLat + " result=" + result + " distanceMeters=" + (distanceKM * 1000));
return result;
}
@Override
protected Boolean distanceRangeContainsPoint(double centerLat, double centerLon, double minRadiusMeters, double radiusMeters, double pointLat, double pointLon) {
final double d = SloppyMath.haversin(centerLat, centerLon, pointLat, pointLon)*1000.0;
return d >= minRadiusMeters && d <= radiusMeters;
}
public void testEncodeDecode() throws Exception {
int iters = atLeast(10000);
boolean small = random().nextBoolean();
for(int iter=0;iter<iters;iter++) {
double lat = randomLat();
double lat = randomLat(small);
double latQuantized = BKDTreeWriter.decodeLat(BKDTreeWriter.encodeLat(lat));
assertEquals(lat, latQuantized, BKDTreeWriter.TOLERANCE);
double lon = randomLon();
double lon = randomLon(small);
double lonQuantized = BKDTreeWriter.decodeLon(BKDTreeWriter.encodeLon(lon));
assertEquals(lon, lonQuantized, BKDTreeWriter.TOLERANCE);
}
@ -611,18 +173,17 @@ public class TestBKDTree extends LuceneTestCase {
public void testAccountableHasDelegate() throws Exception {
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
Codec codec = TestUtil.alwaysDocValuesFormat(getDocValuesFormat());
iwc.setCodec(codec);
iwc.setCodec(TestUtil.alwaysDocValuesFormat(getDocValuesFormat()));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new BKDPointField("field", -18.2861, 147.7));
doc.add(new BKDPointField(FIELD_NAME, -18.2861, 147.7));
w.addDocument(doc);
IndexReader r = w.getReader();
// We can't wrap with "exotic" readers because the BKD query must see the BKDDVFormat:
IndexSearcher s = newSearcher(r, false);
// Need to run a query so the DV field is really loaded:
TopDocs hits = s.search(new BKDPointInBBoxQuery("field", -30, 0, 140, 150), 1);
TopDocs hits = s.search(new BKDPointInBBoxQuery(FIELD_NAME, -30, 0, 140, 150), 1);
assertEquals(1, hits.totalHits);
assertTrue(Accountables.toString((Accountable) r.leaves().get(0).reader()).contains("delegate"));
IOUtils.close(r, w, dir);
@ -631,6 +192,9 @@ public class TestBKDTree extends LuceneTestCase {
private static DocValuesFormat getDocValuesFormat() {
int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048);
int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024);
if (VERBOSE) {
System.out.println(" BKD params: maxPointsInLeaf=" + maxPointsInLeaf + " maxPointsSortInHeap=" + maxPointsSortInHeap);
}
return new BKDTreeDocValuesFormat(maxPointsInLeaf, maxPointsSortInHeap);
}

View File

@ -17,58 +17,61 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.GeoPointField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.GeoDistanceUtils;
import org.apache.lucene.util.GeoProjectionUtils;
import org.apache.lucene.util.BaseGeoPointTestCase;
import org.apache.lucene.util.GeoRect;
import org.apache.lucene.util.GeoUtils;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.SloppyMath;
import org.apache.lucene.util.TestGeoUtils;
import org.apache.lucene.util.TestUtil;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
/**
* Unit testing for basic GeoPoint query logic
*
* @lucene.experimental
*/
public class TestGeoPointQuery extends LuceneTestCase {
public class TestGeoPointQuery extends BaseGeoPointTestCase {
private static Directory directory = null;
private static IndexReader reader = null;
private static IndexSearcher searcher = null;
private static final String FIELD_NAME = "geoField";
// error threshold for point-distance queries (in percent) NOTE: Guideline from USGS
private static final double DISTANCE_PCT_ERR = 0.005;
// error threshold for point-distance queries (in meters)
// @todo haversine is sloppy, would be good to have a better heuristic for
// determining the possible haversine error
private static final int DISTANCE_ERR = 1000;
@Override
protected void addPointToDoc(String field, Document doc, double lat, double lon) {
doc.add(new GeoPointField(field, lon, lat, Field.Store.NO));
}
@Override
protected Query newBBoxQuery(String field, GeoRect rect) {
return new GeoPointInBBoxQuery(field, rect.minLon, rect.minLat, rect.maxLon, rect.maxLat);
}
@Override
protected Query newDistanceQuery(String field, double centerLat, double centerLon, double radiusMeters) {
return new GeoPointDistanceQuery(field, centerLon, centerLat, radiusMeters);
}
@Override
protected Query newDistanceRangeQuery(String field, double centerLat, double centerLon, double minRadiusMeters, double radiusMeters) {
return new GeoPointDistanceRangeQuery(field, centerLon, centerLat, minRadiusMeters, radiusMeters);
}
@Override
protected Query newPolygonQuery(String field, double[] lats, double[] lons) {
return new GeoPointInPolygonQuery(field, lons, lats);
}
@BeforeClass
public static void beforeClass() throws Exception {
@ -143,19 +146,81 @@ public class TestGeoPointQuery extends LuceneTestCase {
return searcher.search(q, limit);
}
@Override
protected Boolean rectContainsPoint(GeoRect rect, double pointLat, double pointLon) {
if (GeoUtils.compare(pointLon, rect.minLon) == 0.0 ||
GeoUtils.compare(pointLon, rect.maxLon) == 0.0 ||
GeoUtils.compare(pointLat, rect.minLat) == 0.0 ||
GeoUtils.compare(pointLat, rect.maxLat) == 0.0) {
// Point is very close to rect boundary
return null;
}
if (rect.minLon < rect.maxLon) {
return GeoUtils.bboxContains(pointLon, pointLat, rect.minLon, rect.minLat, rect.maxLon, rect.maxLat);
} else {
// Rect crosses dateline:
return GeoUtils.bboxContains(pointLon, pointLat, -180.0, rect.minLat, rect.maxLon, rect.maxLat)
|| GeoUtils.bboxContains(pointLon, pointLat, rect.minLon, rect.minLat, 180.0, rect.maxLat);
}
}
@Override
protected Boolean polyRectContainsPoint(GeoRect rect, double pointLat, double pointLon) {
return rectContainsPoint(rect, pointLat, pointLon);
}
@Override
protected Boolean circleContainsPoint(double centerLat, double centerLon, double radiusMeters, double pointLat, double pointLon) {
if (radiusQueryCanBeWrong(centerLat, centerLon, pointLon, pointLat, radiusMeters)) {
return null;
} else {
return SloppyMath.haversin(centerLat, centerLon, pointLat, pointLon)*1000.0 <= radiusMeters;
}
}
@Override
protected Boolean distanceRangeContainsPoint(double centerLat, double centerLon, double minRadiusMeters, double radiusMeters, double pointLat, double pointLon) {
if (radiusQueryCanBeWrong(centerLat, centerLon, pointLon, pointLat, minRadiusMeters)
|| radiusQueryCanBeWrong(centerLat, centerLon, pointLon, pointLat, radiusMeters)) {
return null;
} else {
final double d = SloppyMath.haversin(centerLat, centerLon, pointLat, pointLon)*1000.0;
return d >= minRadiusMeters && d <= radiusMeters;
}
}
private static boolean radiusQueryCanBeWrong(double centerLat, double centerLon, double ptLon, double ptLat,
final double radius) {
final long hashedCntr = GeoUtils.mortonHash(centerLon, centerLat);
centerLon = GeoUtils.mortonUnhashLon(hashedCntr);
centerLat = GeoUtils.mortonUnhashLat(hashedCntr);
final long hashedPt = GeoUtils.mortonHash(ptLon, ptLat);
ptLon = GeoUtils.mortonUnhashLon(hashedPt);
ptLat = GeoUtils.mortonUnhashLat(hashedPt);
double ptDistance = SloppyMath.haversin(centerLat, centerLon, ptLat, ptLon)*1000.0;
double delta = StrictMath.abs(ptDistance - radius);
// if its within the distance error then it can be wrong
return delta < (ptDistance*DISTANCE_PCT_ERR);
}
public void testRectCrossesCircle() throws Exception {
assertTrue(GeoUtils.rectCrossesCircle(-180, -90, 180, 0.0, 0.667, 0.0, 88000.0));
}
private TopDocs geoDistanceRangeQuery(double lon, double lat, double minRadius, double maxRadius, int limit)
throws Exception {
GeoPointDistanceRangeQuery q = new GeoPointDistanceRangeQuery(FIELD_NAME, lon, lat, minRadius, maxRadius);
return searcher.search(q, limit);
}
@Test
public void testBBoxQuery() throws Exception {
TopDocs td = bboxQuery(-96.7772, 32.778650, -96.77690000, 32.778950, 5);
assertEquals("GeoBoundingBoxQuery failed", 4, td.totalHits);
}
@Test
public void testPolyQuery() throws Exception {
TopDocs td = polygonQuery(new double[]{-96.7682647, -96.8280029, -96.6288757, -96.4929199,
-96.6041564, -96.7449188, -96.76826477, -96.7682647},
@ -164,7 +229,6 @@ public class TestGeoPointQuery extends LuceneTestCase {
assertEquals("GeoPolygonQuery failed", 2, td.totalHits);
}
@Test
public void testPacManPolyQuery() throws Exception {
// pacman
double[] px = {0, 10, 10, 0, -8, -10, -8, 0, 10, 10, 0};
@ -188,25 +252,21 @@ public class TestGeoPointQuery extends LuceneTestCase {
assertTrue(GeoUtils.rectWithinPoly(-5, 0, -2, 5, px, py, xMin, yMin, xMax, yMax));
}
@Test
public void testBBoxCrossDateline() throws Exception {
TopDocs td = bboxQuery(179.0, -45.0, -179.0, -44.0, 20);
assertEquals("BBoxCrossDateline query failed", 2, td.totalHits);
}
@Test
public void testWholeMap() throws Exception {
TopDocs td = bboxQuery(-179.9, -89.9, 179.9, 89.9, 20);
assertEquals("testWholeMap failed", 24, td.totalHits);
}
@Test
public void smallTest() throws Exception {
TopDocs td = geoDistanceQuery(-73.998776, 40.720611, 1, 20);
assertEquals("smallTest failed", 2, td.totalHits);
}
@Test
public void testInvalidBBox() throws Exception {
try {
bboxQuery(179.0, -92.0, 181.0, -91.0, 20);
@ -216,35 +276,38 @@ public class TestGeoPointQuery extends LuceneTestCase {
throw new Exception("GeoBoundingBox should not accept invalid lat/lon");
}
@Test
public void testGeoDistanceQuery() throws Exception {
TopDocs td = geoDistanceQuery(-96.4538113027811, 32.94823588839368, 6000, 20);
assertEquals("GeoDistanceQuery failed", 2, td.totalHits);
}
@Test
public void testMultiValuedQuery() throws Exception {
TopDocs td = bboxQuery(-96.4538113027811, 32.7559529921407, -96.7706036567688, 32.7756745755423, 20);
// 3 single valued docs + 2 multi-valued docs
assertEquals("testMultiValuedQuery failed", 5, td.totalHits);
}
public void testTooBigRadius() throws Exception {
try {
geoDistanceQuery(0.0, 85.0, 4000000, 20);
} catch (IllegalArgumentException e) {
e.getMessage().contains("exceeds maxRadius");
}
}
/**
* Explicitly large
*/
@Nightly
public void testGeoDistanceQueryHuge() throws Exception {
TopDocs td = geoDistanceQuery(-96.4538113027811, 32.94823588839368, 2000000, 20);
assertEquals("GeoDistanceQuery failed", 13, td.totalHits);
TopDocs td = geoDistanceQuery(-96.4538113027811, 32.94823588839368, 6000000, 20);
assertEquals("GeoDistanceQuery failed",18, td.totalHits);
}
@Test
public void testGeoDistanceQueryCrossDateline() throws Exception {
TopDocs td = geoDistanceQuery(-179.9538113027811, 32.94823588839368, 120000, 20);
assertEquals("GeoDistanceQuery failed", 3, td.totalHits);
}
@Test
public void testInvalidGeoDistanceQuery() throws Exception {
try {
geoDistanceQuery(181.0, 92.0, 120000, 20);
@ -254,413 +317,14 @@ public class TestGeoPointQuery extends LuceneTestCase {
throw new Exception("GeoDistanceQuery should not accept invalid lat/lon as origin");
}
@Test
public void testMaxDistanceRangeQuery() throws Exception {
TopDocs td = geoDistanceRangeQuery(0.0, 0.0, 10000, GeoProjectionUtils.SEMIMINOR_AXIS, 20);
TopDocs td = geoDistanceRangeQuery(0.0, 0.0, 10, 20000000, 20);
assertEquals("GeoDistanceRangeQuery failed", 24, td.totalHits);
}
public void testRandomTiny() throws Exception {
// Make sure single-leaf-node case is OK:
doTestRandom(10);
}
public void testRandom() throws Exception {
doTestRandom(10000);
}
@Test
public void testMortonEncoding() throws Exception {
long hash = GeoUtils.mortonHash(180, 90);
assertEquals(180.0, GeoUtils.mortonUnhashLon(hash), 0);
assertEquals(90.0, GeoUtils.mortonUnhashLat(hash), 0);
}
@Nightly
public void testRandomBig() throws Exception {
doTestRandom(200000);
}
private void doTestRandom(int count) throws Exception {
int numPoints = atLeast(count);
if (VERBOSE) {
System.out.println("TEST: numPoints=" + numPoints);
}
double[] lats = new double[numPoints];
double[] lons = new double[numPoints];
boolean haveRealDoc = false;
for (int docID=0;docID<numPoints;docID++) {
int x = random().nextInt(20);
if (x == 17) {
// Some docs don't have a point:
lats[docID] = Double.NaN;
if (VERBOSE) {
//System.out.println(" doc=" + docID + " is missing");
}
continue;
}
if (docID > 0 && x < 3 && haveRealDoc) {
int oldDocID;
while (true) {
oldDocID = random().nextInt(docID);
if (Double.isNaN(lats[oldDocID]) == false) {
break;
}
}
if (x == 0) {
// Identical lat to old point
lats[docID] = lats[oldDocID];
lons[docID] = TestGeoUtils.randomLon();
if (VERBOSE) {
//System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID] + " (same lat as doc=" + oldDocID + ")");
}
} else if (x == 1) {
// Identical lon to old point
lats[docID] = TestGeoUtils.randomLat();
lons[docID] = lons[oldDocID];
if (VERBOSE) {
//System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID] + " (same lon as doc=" + oldDocID + ")");
}
} else {
assert x == 2;
// Fully identical point:
lats[docID] = lats[oldDocID];
lons[docID] = lons[oldDocID];
if (VERBOSE) {
//System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID] + " (same lat/lon as doc=" + oldDocID + ")");
}
}
} else {
lats[docID] = TestGeoUtils.randomLat();
lons[docID] = TestGeoUtils.randomLon();
haveRealDoc = true;
if (VERBOSE) {
//System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID]);
}
}
}
verify(lats, lons);
}
private static void verify(double[] lats, double[] lons) throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig();
Directory dir;
if (lats.length > 100000) {
dir = newFSDirectory(createTempDir("TestGeoPointQuery"));
iwc.setCodec(TestUtil.getDefaultCodec());
} else {
dir = newDirectory();
}
Set<Integer> deleted = new HashSet<>();
// RandomIndexWriter is too slow here:
IndexWriter w = new IndexWriter(dir, iwc);
for(int id=0;id<lats.length;id++) {
Document doc = new Document();
doc.add(newStringField("id", ""+id, Field.Store.NO));
doc.add(new NumericDocValuesField("id", id));
if (Double.isNaN(lats[id]) == false) {
if (VERBOSE) {
System.out.println(" id=" + id + " lat=" + lats[id] + " lon=" + lons[id]);
}
doc.add(new GeoPointField(FIELD_NAME, lons[id], lats[id], Field.Store.NO));
} else if (VERBOSE) {
System.out.println(" id=" + id + " skipped");
}
w.addDocument(doc);
if (id > 0 && random().nextInt(100) == 42) {
int idToDelete = random().nextInt(id);
w.deleteDocuments(new Term("id", ""+idToDelete));
deleted.add(idToDelete);
if (VERBOSE) {
System.out.println(" delete id=" + idToDelete);
}
}
}
if (random().nextBoolean()) {
w.forceMerge(1);
}
IndexReader r = DirectoryReader.open(w, true);
w.close();
IndexSearcher s = newSearcher(r);
// Make sure queries are thread safe:
int numThreads = TestUtil.nextInt(random(), 2, 5);
List<Thread> threads = new ArrayList<>();
final int iters = atLeast(10);
final CountDownLatch startingGun = new CountDownLatch(1);
for(int i=0;i<numThreads;i++) {
Thread thread = new Thread() {
@Override
public void run() {
try {
_run();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private void _run() throws Exception {
startingGun.await();
NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
for (int iter=0;iter<iters;iter++) {
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter);
}
Query query;
VerifyHits verifyHits;
if (random().nextBoolean()) {
GeoBoundingBox bbox = randomBBox();
query = new GeoPointInBBoxQuery(FIELD_NAME, bbox.minLon, bbox.minLat, bbox.maxLon, bbox.maxLat);
verifyHits = new VerifyHits() {
@Override
protected Boolean shouldMatch(double pointLat, double pointLon) {
// morton encode & decode to compare apples to apples (that is, compare with same hash precision error
// present in the index)
long pointHash = GeoUtils.mortonHash(pointLon, pointLat);
pointLon = GeoUtils.mortonUnhashLon(pointHash);
pointLat = GeoUtils.mortonUnhashLat(pointHash);
if (bboxQueryCanBeWrong(bbox, pointLat, pointLon)) {
return null;
} else {
return rectContainsPointEnc(bbox, pointLat, pointLon);
}
}
};
} else if (random().nextBoolean()) {
// generate a random bounding box
GeoBoundingBox bbox = randomBBox();
double centerLat = bbox.minLat + ((bbox.maxLat - bbox.minLat)/2.0);
double centerLon = bbox.minLon + ((bbox.maxLon - bbox.minLon)/2.0);
// radius (in meters) as a function of the random generated bbox
final double radius = random().nextDouble() * (0.05 * GeoProjectionUtils.SEMIMINOR_AXIS);
// randomly test range queries
final boolean rangeQuery = random().nextBoolean();
final double radiusMax = (rangeQuery) ? radius + random().nextDouble() * (0.05 * GeoProjectionUtils.SEMIMINOR_AXIS) : 0;
if (VERBOSE) {
System.out.println("\t radius = " + radius + ((rangeQuery) ? " : " + radiusMax : ""));
}
// query using the centroid of the bounding box
if (rangeQuery) {
query = new GeoPointDistanceRangeQuery(FIELD_NAME, centerLon, centerLat, radius, radiusMax);
} else {
query = new GeoPointDistanceQuery(FIELD_NAME, centerLon, centerLat, radius);
}
verifyHits = new VerifyHits() {
@Override
protected Boolean shouldMatch(double pointLat, double pointLon) {
if (Double.isNaN(pointLat) || Double.isNaN(pointLon)) {
return null;
}
if (radiusQueryCanBeWrong(centerLat, centerLon, pointLon, pointLat, radius)
|| (rangeQuery && radiusQueryCanBeWrong(centerLat, centerLon, pointLon, pointLat, radiusMax))) {
return null;
} else {
return distanceContainsPt(centerLon, centerLat, pointLon, pointLat, radius, (rangeQuery) ? radiusMax : 0);
}
}
};
} else {
GeoBoundingBox bbox = randomBBox();
double[] pLats = new double[5];
double[] pLons = new double[5];
pLats[0] = bbox.minLat;
pLons[0] = bbox.minLon;
pLats[1] = bbox.maxLat;
pLons[1] = bbox.minLon;
pLats[2] = bbox.maxLat;
pLons[2] = bbox.maxLon;
pLats[3] = bbox.minLat;
pLons[3] = bbox.maxLon;
pLats[4] = bbox.minLat;
pLons[4] = bbox.minLon;
query = new GeoPointInPolygonQuery(FIELD_NAME, pLons, pLats);
verifyHits = new VerifyHits() {
@Override
protected Boolean shouldMatch(double pointLat, double pointLon) {
// morton encode & decode to compare apples to apples (that is, compare with same hash precision error
// present in the index)
long pointHash = GeoUtils.mortonHash(pointLon, pointLat);
pointLon = GeoUtils.mortonUnhashLon(pointHash);
pointLat = GeoUtils.mortonUnhashLat(pointHash);
if (bboxQueryCanBeWrong(bbox, pointLat, pointLon)) {
return null;
} else {
return rectContainsPointEnc(bbox, pointLat, pointLon);
}
}
};
}
verifyHits.test(s, docIDToID, deleted, query, lats, lons);
}
}
};
thread.setName("T" + i);
thread.start();
threads.add(thread);
}
startingGun.countDown();
for(Thread thread : threads) {
thread.join();
}
IOUtils.close(r, dir);
}
private static abstract class VerifyHits {
public void test(IndexSearcher s, NumericDocValues docIDToID, Set<Integer> deleted, Query query, double[] lats, double[] lons) throws Exception {
int maxDoc = s.getIndexReader().maxDoc();
final FixedBitSet hits = new FixedBitSet(maxDoc);
s.search(query, new SimpleCollector() {
private int docBase;
@Override
public boolean needsScores() {
return false;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase+doc);
}
});
for(int docID=0;docID<maxDoc;docID++) {
int id = (int) docIDToID.get(docID);
Boolean expected;
if (deleted.contains(id)) {
expected = false;
} else {
expected = shouldMatch(lats[id], lons[id]);
}
// null means it's a borderline case which is allowed to be wrong:
if (expected != null) {
if (hits.get(docID) != expected) {
System.out.println(Thread.currentThread().getName() + ": id=" + id +
" docID=" + docID + " lat=" + lats[id] + " lon=" + lons[id] +
" deleted?=" + deleted.contains(id) + " expected=" + expected + " but got " + hits.get(docID) +
" query=" + query);
fail("wrong hit");
}
}
}
}
/** Return true if we definitely should match, false if we definitely
* should not match, and null if it's a borderline case which might
* go either way. */
protected abstract Boolean shouldMatch(double lat, double lon);
}
private static boolean distanceContainsPt(double lonA, double latA, double lonB, double latB, final double radius,
final double maxRadius) {
final long hashedPtA = GeoUtils.mortonHash(lonA, latA);
lonA = GeoUtils.mortonUnhashLon(hashedPtA);
latA = GeoUtils.mortonUnhashLat(hashedPtA);
final long hashedPtB = GeoUtils.mortonHash(lonB, latB);
lonB = GeoUtils.mortonUnhashLon(hashedPtB);
latB = GeoUtils.mortonUnhashLat(hashedPtB);
if (maxRadius == 0) {
return (SloppyMath.haversin(latA, lonA, latB, lonB)*1000.0 <= radius);
}
return SloppyMath.haversin(latA, lonA, latB, lonB)*1000.0 >= radius
&& SloppyMath.haversin(latA, lonA, latB, lonB)*1000.0 <= maxRadius;
}
private static boolean rectContainsPointEnc(GeoBoundingBox bbox, double pointLat, double pointLon) {
// We should never see a deleted doc here?
assert Double.isNaN(pointLat) == false;
return GeoUtils.bboxContains(pointLon, pointLat, bbox.minLon, bbox.minLat, bbox.maxLon, bbox.maxLat);
}
private static boolean radiusQueryCanBeWrong(double centerLat, double centerLon, double ptLon, double ptLat,
final double radius) {
final long hashedCntr = GeoUtils.mortonHash(centerLon, centerLat);
centerLon = GeoUtils.mortonUnhashLon(hashedCntr);
centerLat = GeoUtils.mortonUnhashLat(hashedCntr);
final long hashedPt = GeoUtils.mortonHash(ptLon, ptLat);
ptLon = GeoUtils.mortonUnhashLon(hashedPt);
ptLat = GeoUtils.mortonUnhashLat(hashedPt);
double ptDistance = SloppyMath.haversin(centerLat, centerLon, ptLat, ptLon)*1000.0;
double delta = StrictMath.abs(ptDistance - radius);
// if its within the distance error then it can be wrong
return delta < DISTANCE_ERR;
}
private static boolean bboxQueryCanBeWrong(GeoBoundingBox bbox, double lat, double lon) {
// we can tolerate variance at the GeoUtils.TOLERANCE decimal place
final int tLon = (int)(lon/(GeoUtils.TOLERANCE-1));
final int tLat = (int)(lat/(GeoUtils.TOLERANCE-1));
final int tMinLon = (int)(bbox.minLon/(GeoUtils.TOLERANCE-1));
final int tMinLat = (int)(bbox.minLat/(GeoUtils.TOLERANCE-1));
final int tMaxLon = (int)(bbox.maxLon/(GeoUtils.TOLERANCE-1));
final int tMaxLat = (int)(bbox.maxLat/(GeoUtils.TOLERANCE-1));
return ((tMinLon - tLon) == 0 || (tMinLat - tLat) == 0
|| (tMaxLon - tLon) == 0 || (tMaxLat - tLat) == 0);
}
private static GeoBoundingBox randomBBox() {
double lat0 = TestGeoUtils.randomLat();
double lat1 = TestGeoUtils.randomLat();
double lon0 = TestGeoUtils.randomLon();
double lon1 = TestGeoUtils.randomLon();
if (lat1 < lat0) {
double x = lat0;
lat0 = lat1;
lat1 = x;
}
if (lon1 < lon0) {
double x = lon0;
lon0 = lon1;
lon1 = x;
}
return new GeoBoundingBox(lon0, lon1, lat0, lat1);
}
}

View File

@ -17,12 +17,17 @@ package org.apache.lucene.util;
* limitations under the License.
*/
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.junit.BeforeClass;
import org.junit.Test;
import com.carrotsearch.randomizedtesting.generators.RandomInts;
/**
* Tests class for methods in GeoUtils
@ -31,6 +36,9 @@ import org.junit.Test;
*/
public class TestGeoUtils extends LuceneTestCase {
private static final double LON_SCALE = (0x1L<<GeoUtils.BITS)/360.0D;
private static final double LAT_SCALE = (0x1L<<GeoUtils.BITS)/180.0D;
// Global bounding box we will "cover" in the random test; we have to make this "smallish" else the queries take very long:
private static double originLat;
private static double originLon;
@ -54,7 +62,6 @@ public class TestGeoUtils extends LuceneTestCase {
}
}
@Test
public void testGeoHash() {
int numPoints = atLeast(100);
String randomGeoHashString;
@ -63,8 +70,8 @@ public class TestGeoUtils extends LuceneTestCase {
int randomLevel;
for (int i = 0; i < numPoints; ++i) {
// random point
double lat = randomLatFullRange();
double lon = randomLonFullRange();
double lat = randomLat(false);
double lon = randomLon(false);
// compute geohash straight from lat/lon and from morton encoded value to ensure they're the same
randomGeoHashString = GeoHashUtils.stringEncode(lon, lat, randomLevel = random().nextInt(12 - 1) + 1);
@ -90,7 +97,6 @@ public class TestGeoUtils extends LuceneTestCase {
* Pass condition: lat=42.6, lng=-5.6 should be encoded as "ezs42e44yx96",
* lat=57.64911 lng=10.40744 should be encoded as "u4pruydqqvj8"
*/
@Test
public void testEncode() {
String hash = GeoHashUtils.stringEncode(-5.6, 42.6, 12);
assertEquals("ezs42e44yx96", hash);
@ -103,7 +109,6 @@ public class TestGeoUtils extends LuceneTestCase {
* Pass condition: lat=52.3738007, lng=4.8909347 should be encoded and then
* decoded within 0.00001 of the original value
*/
@Test
public void testDecodePreciseLongitudeLatitude() {
final String geohash = GeoHashUtils.stringEncode(4.8909347, 52.3738007);
final long hash = GeoHashUtils.mortonEncode(geohash);
@ -116,7 +121,6 @@ public class TestGeoUtils extends LuceneTestCase {
* Pass condition: lat=84.6, lng=10.5 should be encoded and then decoded
* within 0.00001 of the original value
*/
@Test
public void testDecodeImpreciseLongitudeLatitude() {
final String geohash = GeoHashUtils.stringEncode(10.5, 84.6);
@ -126,7 +130,6 @@ public class TestGeoUtils extends LuceneTestCase {
assertEquals(10.5, GeoUtils.mortonUnhashLon(hash), 0.00001D);
}
@Test
public void testDecodeEncode() {
final String geoHash = "u173zq37x014";
assertEquals(geoHash, GeoHashUtils.stringEncode(4.8909347, 52.3738007));
@ -139,7 +142,6 @@ public class TestGeoUtils extends LuceneTestCase {
assertEquals(geoHash, GeoHashUtils.stringEncode(lon, lat));
}
@Test
public void testNeighbors() {
String geohash = "gcpv";
List<String> expectedNeighbors = new ArrayList<>();
@ -201,19 +203,332 @@ public class TestGeoUtils extends LuceneTestCase {
assertEquals(expectedNeighbors, neighbors);
}
public static double randomLatFullRange() {
return (180d * random().nextDouble()) - 90d;
public void testClosestPointOnBBox() {
double[] result = new double[2];
GeoDistanceUtils.closestPointOnBBox(20, 30, 40, 50, 70, 70, result);
assertEquals(40.0, result[0], 0.0);
assertEquals(50.0, result[1], 0.0);
GeoDistanceUtils.closestPointOnBBox(-20, -20, 0, 0, 70, 70, result);
assertEquals(0.0, result[0], 0.0);
assertEquals(0.0, result[1], 0.0);
}
public static double randomLonFullRange() {
return (360d * random().nextDouble()) - 180d;
private static class Cell {
static int nextCellID;
final Cell parent;
final int cellID;
final double minLon, maxLon;
final double minLat, maxLat;
final int splitCount;
public Cell(Cell parent,
double minLon, double minLat,
double maxLon, double maxLat,
int splitCount) {
assert maxLon >= minLon;
assert maxLat >= minLat;
this.parent = parent;
this.minLon = minLon;
this.minLat = minLat;
this.maxLon = maxLon;
this.maxLat = maxLat;
this.cellID = nextCellID++;
this.splitCount = splitCount;
}
/** Returns true if the quantized point lies within this cell, inclusive on all bounds. */
public boolean contains(double lon, double lat) {
return lon >= minLon && lon <= maxLon && lat >= minLat && lat <= maxLat;
}
@Override
public String toString() {
return "cell=" + cellID + (parent == null ? "" : " parentCellID=" + parent.cellID) + " lon: " + minLon + " TO " + maxLon + ", lat: " + minLat + " TO " + maxLat + ", splits: " + splitCount;
}
}
public static double randomLat() {
return GeoUtils.normalizeLat(originLat + latRange * (random().nextDouble() - 0.5));
public long scaleLon(final double val) {
return (long) ((val-GeoUtils.MIN_LON_INCL) * LON_SCALE);
}
public static double randomLon() {
return GeoUtils.normalizeLon(originLon + lonRange * (random().nextDouble() - 0.5));
public long scaleLat(final double val) {
return (long) ((val-GeoUtils.MIN_LAT_INCL) * LAT_SCALE);
}
public double unscaleLon(final long val) {
return (val / LON_SCALE) + GeoUtils.MIN_LON_INCL;
}
public double unscaleLat(final long val) {
return (val / LAT_SCALE) + GeoUtils.MIN_LAT_INCL;
}
public double randomLat(boolean small) {
double result;
if (small) {
result = GeoUtils.normalizeLat(originLat + latRange * (random().nextDouble() - 0.5));
} else {
result = -90 + 180.0 * random().nextDouble();
}
return unscaleLat(scaleLat(result));
}
public double randomLon(boolean small) {
double result;
if (small) {
result = GeoUtils.normalizeLon(originLon + lonRange * (random().nextDouble() - 0.5));
} else {
result = -180 + 360.0 * random().nextDouble();
}
return unscaleLon(scaleLon(result));
}
private void findMatches(Set<Integer> hits, PrintWriter log, Cell root,
double centerLon, double centerLat, double radiusMeters,
double[] docLons, double[] docLats) {
if (VERBOSE) {
log.println(" root cell: " + root);
}
List<Cell> queue = new ArrayList<>();
queue.add(root);
int recurseDepth = RandomInts.randomIntBetween(random(), 5, 15);
while (queue.size() > 0) {
Cell cell = queue.get(queue.size()-1);
queue.remove(queue.size()-1);
if (VERBOSE) {
log.println(" cycle: " + cell + " queue.size()=" + queue.size());
}
if (random().nextInt(10) == 7 || cell.splitCount > recurseDepth) {
if (VERBOSE) {
log.println(" leaf");
}
// Leaf cell: brute force check all docs that fall within this cell:
for(int docID=0;docID<docLons.length;docID++) {
if (cell.contains(docLons[docID], docLats[docID])) {
double distanceMeters = SloppyMath.haversin(centerLat, centerLon, docLats[docID], docLons[docID]) * 1000.0;
if (distanceMeters <= radiusMeters) {
if (VERBOSE) {
log.println(" check doc=" + docID + ": match!");
}
hits.add(docID);
} else {
if (VERBOSE) {
log.println(" check doc=" + docID + ": no match");
}
}
}
}
} else {
if (GeoUtils.rectWithinCircle(cell.minLon, cell.minLat, cell.maxLon, cell.maxLat, centerLon, centerLat, radiusMeters)) {
// Query circle fully contains this cell, just addAll:
if (VERBOSE) {
log.println(" circle fully contains cell: now addAll");
}
for(int docID=0;docID<docLons.length;docID++) {
if (cell.contains(docLons[docID], docLats[docID])) {
if (VERBOSE) {
log.println(" addAll doc=" + docID);
}
hits.add(docID);
}
}
continue;
} else if (GeoUtils.rectWithin(root.minLon, root.minLat, root.maxLon, root.maxLat,
cell.minLon, cell.minLat, cell.maxLon, cell.maxLat)) {
// Fall through below to "recurse"
if (VERBOSE) {
log.println(" cell fully contains circle: keep splitting");
}
} else if (GeoUtils.rectCrossesCircle(cell.minLon, cell.minLat, cell.maxLon, cell.maxLat,
centerLon, centerLat, radiusMeters)) {
// Fall through below to "recurse"
if (VERBOSE) {
log.println(" cell overlaps circle: keep splitting");
}
} else {
if (VERBOSE) {
log.println(" no overlap: drop this cell");
for(int docID=0;docID<docLons.length;docID++) {
if (cell.contains(docLons[docID], docLats[docID])) {
if (VERBOSE) {
log.println(" skip doc=" + docID);
}
}
}
}
continue;
}
// Randomly split:
if (random().nextBoolean()) {
// Split on lon:
double splitValue = cell.minLon + (cell.maxLon - cell.minLon) * random().nextDouble();
if (VERBOSE) {
log.println(" now split on lon=" + splitValue);
}
Cell cell1 = new Cell(cell,
cell.minLon, cell.minLat,
splitValue, cell.maxLat,
cell.splitCount+1);
Cell cell2 = new Cell(cell,
splitValue, cell.minLat,
cell.maxLon, cell.maxLat,
cell.splitCount+1);
if (VERBOSE) {
log.println(" split cell1: " + cell1);
log.println(" split cell2: " + cell2);
}
queue.add(cell1);
queue.add(cell2);
} else {
// Split on lat:
double splitValue = cell.minLat + (cell.maxLat - cell.minLat) * random().nextDouble();
if (VERBOSE) {
log.println(" now split on lat=" + splitValue);
}
Cell cell1 = new Cell(cell,
cell.minLon, cell.minLat,
cell.maxLon, splitValue,
cell.splitCount+1);
Cell cell2 = new Cell(cell,
cell.minLon, splitValue,
cell.maxLon, cell.maxLat,
cell.splitCount+1);
if (VERBOSE) {
log.println(" split cells:\n " + cell1 + "\n " + cell2);
}
queue.add(cell1);
queue.add(cell2);
}
}
}
}
/** Tests consistency of GeoUtils.rectWithinCircle, .rectCrossesCircle, .rectWithin and SloppyMath.haversine distance check */
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6846")
public void testGeoRelations() throws Exception {
int numDocs = atLeast(1000);
// boolean useSmallRanges = random().nextBoolean();
// TODO: the GeoUtils APIs have bugs if you use large distances:
boolean useSmallRanges = true;
if (VERBOSE) {
System.out.println("TEST: " + numDocs + " docs useSmallRanges=" + useSmallRanges);
}
double[] docLons = new double[numDocs];
double[] docLats = new double[numDocs];
for(int docID=0;docID<numDocs;docID++) {
docLons[docID] = randomLon(useSmallRanges);
docLats[docID] = randomLat(useSmallRanges);
if (VERBOSE) {
System.out.println(" doc=" + docID + ": lon=" + docLons[docID] + " lat=" + docLats[docID]);
}
}
int iters = atLeast(10);
iters = atLeast(50);
for(int iter=0;iter<iters;iter++) {
Cell.nextCellID = 0;
double centerLon = randomLon(useSmallRanges);
double centerLat = randomLat(useSmallRanges);
// So the circle covers at most 50% of the earth's surface:
double radiusMeters;
// TODO: GeoUtils APIs are still buggy for large distances:
if (true || useSmallRanges) {
// Approx 3 degrees lon at the equator:
radiusMeters = random().nextDouble() * 333000;
} else {
radiusMeters = random().nextDouble() * GeoProjectionUtils.SEMIMAJOR_AXIS * Math.PI / 2.0;
}
StringWriter sw = new StringWriter();
PrintWriter log = new PrintWriter(sw, true);
if (VERBOSE) {
log.println("\nTEST: iter=" + iter + " radiusMeters=" + radiusMeters + " centerLon=" + centerLon + " centerLat=" + centerLat);
}
GeoRect bbox = GeoUtils.circleToBBox(centerLon, centerLat, radiusMeters);
Set<Integer> hits = new HashSet<>();
if (bbox.maxLon < bbox.minLon) {
// Crosses dateline
log.println(" circle crosses dateline; first right query");
findMatches(hits, log,
new Cell(null,
-180, bbox.minLat,
bbox.maxLon, bbox.maxLat,
0),
centerLon, centerLat, radiusMeters,
docLons, docLats);
log.println(" circle crosses dateline; now left query");
findMatches(hits, log,
new Cell(null,
bbox.minLon, bbox.minLat,
180, bbox.maxLat,
0),
centerLon, centerLat, radiusMeters,
docLons, docLats);
} else {
// Start with the root cell that fully contains the shape:
findMatches(hits, log,
new Cell(null,
bbox.minLon, bbox.minLat,
bbox.maxLon, bbox.maxLat,
0),
centerLon, centerLat, radiusMeters,
docLons, docLats);
}
if (VERBOSE) {
log.println(" " + hits.size() + " hits");
}
int failCount = 0;
// Done matching, now verify:
for(int docID=0;docID<numDocs;docID++) {
double distanceMeters = SloppyMath.haversin(centerLat, centerLon, docLats[docID], docLons[docID]) * 1000.0;
boolean expected = distanceMeters <= radiusMeters;
boolean actual = hits.contains(docID);
if (actual != expected) {
if (actual) {
log.println("doc=" + docID + " matched but should not");
} else {
log.println("doc=" + docID + " did not match but should");
}
log.println(" lon=" + docLons[docID] + " lat=" + docLats[docID] + " distanceMeters=" + distanceMeters + " vs radiusMeters=" + radiusMeters);
failCount++;
}
}
if (failCount != 0) {
System.out.print(sw.toString());
fail(failCount + " incorrect hits (see above)");
}
}
}
}