LUCENE-4342 fix distance precision lookup for prefix trees, and modify the default algorithm.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1379530 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
David Wayne Smiley 2012-08-31 17:53:05 +00:00
parent 487c3306d8
commit 7164584523
5 changed files with 115 additions and 30 deletions

View File

@ -67,10 +67,41 @@ public class GeohashPrefixTree extends SpatialPrefixTree {
@Override
public int getLevelForDistance(double dist) {
final int level = GeohashUtils.lookupHashLenForWidthHeight(dist, dist);
final int level = lookupHashLenForWidthHeight(dist, dist);
return Math.max(Math.min(level, maxLevels), 1);
}
/* TODO temporarily in-lined GeoHashUtils.lookupHashLenForWidthHeight() is fixed in Spatial4j 0.3 */
/**
* Return the longest geohash length that will have a width & height >= specified arguments.
*/
private static int lookupHashLenForWidthHeight(double lonErr, double latErr) {
//loop through hash length arrays from beginning till we find one.
for(int len = 1; len <= GeohashUtils.MAX_PRECISION; len++) {
double latHeight = hashLenToLatHeight[len];
double lonWidth = hashLenToLonWidth[len];
if (latHeight < latErr && lonWidth < lonErr)
return len;
}
return GeohashUtils.MAX_PRECISION;
}
/** See the table at http://en.wikipedia.org/wiki/Geohash */
private static final double[] hashLenToLatHeight, hashLenToLonWidth;
static {
hashLenToLatHeight = new double[GeohashUtils.MAX_PRECISION +1];
hashLenToLonWidth = new double[GeohashUtils.MAX_PRECISION +1];
hashLenToLatHeight[0] = 90*2;
hashLenToLonWidth[0] = 180*2;
boolean even = false;
for(int i = 1; i <= GeohashUtils.MAX_PRECISION; i++) {
hashLenToLatHeight[i] = hashLenToLatHeight[i-1]/(even?8:4);
hashLenToLonWidth[i] = hashLenToLonWidth[i-1]/(even?4:8);
even = ! even;
}
}
@Override
public Node getNode(Point p, int level) {
return new GhCell(GeohashUtils.encodeLatLon(p.getY(), p.getX(), level));//args are lat,lon (y,x)

View File

@ -122,10 +122,10 @@ public class QuadPrefixTree extends SpatialPrefixTree {
@Override
public int getLevelForDistance(double dist) {
for (int i = 1; i < maxLevels; i++) {
for (int i = 0; i < maxLevels-1; i++) {
//note: level[i] is actually a lookup for level i+1
if(dist > levelW[i] || dist > levelH[i]) {
return i;
if(dist > levelW[i] && dist > levelH[i]) {
return i+1;
}
}
return maxLevels;

View File

@ -18,7 +18,9 @@ package org.apache.lucene.spatial.prefix.tree;
*/
import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.shape.Circle;
import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Rectangle;
import com.spatial4j.core.shape.Shape;
import java.nio.charset.Charset;
@ -66,30 +68,35 @@ public abstract class SpatialPrefixTree {
* See {@link org.apache.lucene.spatial.query.SpatialArgs#getDistPrecision()}.
* A grid level looked up via {@link #getLevelForDistance(double)} is returned.
*
* @param precision 0-0.5
* @return 1-maxLevels
* @param precision 0 to 0.5
* @return 1 to maxLevels
*/
public int getMaxLevelForPrecision(Shape shape, double precision) {
if (precision < 0 || precision > 0.5) {
throw new IllegalArgumentException("Precision " + precision + " must be between [0-0.5]");
throw new IllegalArgumentException("Precision " + precision + " must be between [0 to 0.5]");
}
if (precision == 0 || shape instanceof Point) {
return maxLevels;
}
double bboxArea = shape.getBoundingBox().getArea();
if (bboxArea == 0) {
return maxLevels;
}
double avgSideLenFromCenter = Math.sqrt(bboxArea) / 2;
return getLevelForDistance(avgSideLenFromCenter * precision);
Rectangle bbox = shape.getBoundingBox();
//The diagonal distance should be the same computed from any opposite corner,
// and this is the longest distance that might be occurring within the shape.
double diagonalDist = ctx.getDistCalc().distance(
ctx.makePoint(bbox.getMinX(), bbox.getMinY()), bbox.getMaxX(), bbox.getMaxY());
//convert to degrees //TODO not needed in Spatial4j 0.3
diagonalDist = ctx.getDistCalc().distanceToDegrees(diagonalDist);
return getLevelForDistance(diagonalDist * 0.5 * precision);
}
/**
* Returns the level of the smallest grid size with a side length that is greater or equal to the provided
* distance.
* Returns the level of the largest grid in which its longest side is less
* than or equal to the provided distance (in degrees). Consequently {@link
* dist} acts as an error epsilon declaring the amount of detail needed in the
* grid, such that you can get a grid with just the right amount of
* precision.
*
* @param dist >= 0
* @return level [1-maxLevels]
* @return level [1 to maxLevels]
*/
public abstract int getLevelForDistance(double dist);

View File

@ -85,12 +85,14 @@ public class SpatialArgs {
}
/**
* The fraction of the distance from the center of the query shape to its nearest edge
* that is considered acceptable error. The algorithm for computing the distance to the
* nearest edge is actually a little different. It normalizes the shape to a square
* given it's bounding box area:
* <pre>sqrt(shape.bbox.area)/2</pre>
* And the error distance is beyond the shape such that the shape is a minimum shape.
* A measure of acceptable error of the shape. It is specified as the
* fraction of the distance from the center of the query shape to its furthest
* bounding box corner. This effectively inflates the size of the shape but
* should not shrink it.
* <p/>
* The default is {@link #DEFAULT_DIST_PRECISION}
*
* @return 0 to 0.5
*/
public Double getDistPrecision() {
return distPrecision;

View File

@ -69,6 +69,45 @@ public class TestRecursivePrefixTreeStrategy extends StrategyTestCase {
}
}
@Test
public void testOneMeterPrecision() {
init(GeohashPrefixTree.getMaxLevelsPossible());
GeohashPrefixTree grid = (GeohashPrefixTree) ((RecursivePrefixTreeStrategy) strategy).getGrid();
//DWS: I know this to be true. 11 is needed for one meter
assertEquals(11, grid.getLevelForDistance(ctx.getDistCalc().distanceToDegrees(0.001)));
}
@Test
public void testPrecision() throws IOException{
init(GeohashPrefixTree.getMaxLevelsPossible());
Point iPt = ctx.makePoint(2.8028712999999925, 48.3708044);//lon, lat
addDocument(newDoc("iPt", iPt));
commit();
Point qPt = ctx.makePoint(2.4632387000000335, 48.6003516);
final double DIST = 35.75;//35.7499...
assertEquals(DIST, ctx.getDistCalc().distance(iPt, qPt), 0.001);
//distPrec will affect the query shape precision. The indexed precision
// was set to nearly zilch via init(GeohashPrefixTree.getMaxLevelsPossible());
final double distPrec = 0.025; //the suggested default, by the way
final double distMult = 1+distPrec;
assertTrue(35.74*distMult >= DIST);
checkHits(q(qPt, 35.74, distPrec), 1, null);
assertTrue(30*distMult < DIST);
checkHits(q(qPt, 30, distPrec), 0, null);
assertTrue(33*distMult < DIST);
checkHits(q(qPt, 33, distPrec), 0, null);
assertTrue(34*distMult < DIST);
checkHits(q(qPt, 34, distPrec), 0, null);
}
@Test
public void geohashRecursiveRandom() throws IOException {
init(12);
@ -105,9 +144,9 @@ public class TestRecursivePrefixTreeStrategy extends StrategyTestCase {
qcYoff + clusterCenter.getY());
double[] distRange = calcDistRange(queryCenter,clusterCenter,sideDegree);
//4.1 query a small box getting nothing
checkHits(queryCenter, distRange[0]*0.99, 0, null);
checkHits(q(queryCenter, distRange[0]*0.99), 0, null);
//4.2 Query a large box enclosing the cluster, getting everything
checkHits(queryCenter, distRange[1]*1.01, points.size(), null);
checkHits(q(queryCenter, distRange[1]*1.01), points.size(), null);
//4.3 Query a medium box getting some (calculate the correct solution and verify)
double queryDist = distRange[0] + (distRange[1]-distRange[0])/2;//average
@ -122,7 +161,7 @@ public class TestRecursivePrefixTreeStrategy extends StrategyTestCase {
ids = Arrays.copyOf(ids, ids_sz);
//assert ids_sz > 0 (can't because randomness keeps us from being able to)
checkHits(queryCenter, queryDist, ids.length, ids);
checkHits(q(queryCenter, queryDist), ids.length, ids);
}
}
@ -132,13 +171,20 @@ public class TestRecursivePrefixTreeStrategy extends StrategyTestCase {
}//randomTest()
//TODO can we use super.runTestQueries() ?
private void checkHits(Point pt, double dist, int assertNumFound, int[] assertIds) {
private SpatialArgs q(Point pt, double dist) {
return q(pt, dist, 0.0);
}
private SpatialArgs q(Point pt, double dist, double distPrec) {
Shape shape = ctx.makeCircle(pt,dist);
SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,shape);
args.setDistPrecision(0.0);
args.setDistPrecision(distPrec);
return args;
}
private void checkHits(SpatialArgs args, int assertNumFound, int[] assertIds) {
SearchResults got = executeQuery(strategy.makeQuery(args), 100);
assertEquals(""+shape,assertNumFound,got.numFound);
assertEquals("" + args, assertNumFound, got.numFound);
if (assertIds != null) {
Set<Integer> gotIds = new HashSet<Integer>();
for (SearchResult result : got.results) {
@ -150,7 +196,6 @@ public class TestRecursivePrefixTreeStrategy extends StrategyTestCase {
}
}
//
private Document newDoc(String id, Shape shape) {
Document doc = new Document();
doc.add(new StringField("id", id, Field.Store.YES));