From 71645845233ce7cee3cc828c9bb3162d9520cf30 Mon Sep 17 00:00:00 2001 From: David Wayne Smiley Date: Fri, 31 Aug 2012 17:53:05 +0000 Subject: [PATCH] LUCENE-4342 fix distance precision lookup for prefix trees, and modify the default algorithm. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1379530 13f79535-47bb-0310-9956-ffa450edef68 --- .../prefix/tree/GeohashPrefixTree.java | 33 +++++++++- .../spatial/prefix/tree/QuadPrefixTree.java | 6 +- .../prefix/tree/SpatialPrefixTree.java | 31 ++++++---- .../lucene/spatial/query/SpatialArgs.java | 14 +++-- .../TestRecursivePrefixTreeStrategy.java | 61 ++++++++++++++++--- 5 files changed, 115 insertions(+), 30 deletions(-) diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java index 518f59880bc..0dfe6bac92a 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java @@ -67,10 +67,41 @@ public class GeohashPrefixTree extends SpatialPrefixTree { @Override public int getLevelForDistance(double dist) { - final int level = GeohashUtils.lookupHashLenForWidthHeight(dist, dist); + final int level = lookupHashLenForWidthHeight(dist, dist); return Math.max(Math.min(level, maxLevels), 1); } + /* TODO temporarily in-lined GeoHashUtils.lookupHashLenForWidthHeight() is fixed in Spatial4j 0.3 */ + + /** + * Return the longest geohash length that will have a width & height >= specified arguments. + */ + private static int lookupHashLenForWidthHeight(double lonErr, double latErr) { + //loop through hash length arrays from beginning till we find one. + for(int len = 1; len <= GeohashUtils.MAX_PRECISION; len++) { + double latHeight = hashLenToLatHeight[len]; + double lonWidth = hashLenToLonWidth[len]; + if (latHeight < latErr && lonWidth < lonErr) + return len; + } + return GeohashUtils.MAX_PRECISION; + } + + /** See the table at http://en.wikipedia.org/wiki/Geohash */ + private static final double[] hashLenToLatHeight, hashLenToLonWidth; + static { + hashLenToLatHeight = new double[GeohashUtils.MAX_PRECISION +1]; + hashLenToLonWidth = new double[GeohashUtils.MAX_PRECISION +1]; + hashLenToLatHeight[0] = 90*2; + hashLenToLonWidth[0] = 180*2; + boolean even = false; + for(int i = 1; i <= GeohashUtils.MAX_PRECISION; i++) { + hashLenToLatHeight[i] = hashLenToLatHeight[i-1]/(even?8:4); + hashLenToLonWidth[i] = hashLenToLonWidth[i-1]/(even?4:8); + even = ! even; + } + } + @Override public Node getNode(Point p, int level) { return new GhCell(GeohashUtils.encodeLatLon(p.getY(), p.getX(), level));//args are lat,lon (y,x) diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java index 1d74bd17ba4..dde2d5b16af 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java @@ -122,10 +122,10 @@ public class QuadPrefixTree extends SpatialPrefixTree { @Override public int getLevelForDistance(double dist) { - for (int i = 1; i < maxLevels; i++) { + for (int i = 0; i < maxLevels-1; i++) { //note: level[i] is actually a lookup for level i+1 - if(dist > levelW[i] || dist > levelH[i]) { - return i; + if(dist > levelW[i] && dist > levelH[i]) { + return i+1; } } return maxLevels; diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java index e7b6bf70d0c..2892c515ac4 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java @@ -18,7 +18,9 @@ package org.apache.lucene.spatial.prefix.tree; */ import com.spatial4j.core.context.SpatialContext; +import com.spatial4j.core.shape.Circle; import com.spatial4j.core.shape.Point; +import com.spatial4j.core.shape.Rectangle; import com.spatial4j.core.shape.Shape; import java.nio.charset.Charset; @@ -66,30 +68,35 @@ public abstract class SpatialPrefixTree { * See {@link org.apache.lucene.spatial.query.SpatialArgs#getDistPrecision()}. * A grid level looked up via {@link #getLevelForDistance(double)} is returned. * - * @param precision 0-0.5 - * @return 1-maxLevels + * @param precision 0 to 0.5 + * @return 1 to maxLevels */ public int getMaxLevelForPrecision(Shape shape, double precision) { if (precision < 0 || precision > 0.5) { - throw new IllegalArgumentException("Precision " + precision + " must be between [0-0.5]"); + throw new IllegalArgumentException("Precision " + precision + " must be between [0 to 0.5]"); } if (precision == 0 || shape instanceof Point) { return maxLevels; } - double bboxArea = shape.getBoundingBox().getArea(); - if (bboxArea == 0) { - return maxLevels; - } - double avgSideLenFromCenter = Math.sqrt(bboxArea) / 2; - return getLevelForDistance(avgSideLenFromCenter * precision); + Rectangle bbox = shape.getBoundingBox(); + //The diagonal distance should be the same computed from any opposite corner, + // and this is the longest distance that might be occurring within the shape. + double diagonalDist = ctx.getDistCalc().distance( + ctx.makePoint(bbox.getMinX(), bbox.getMinY()), bbox.getMaxX(), bbox.getMaxY()); + //convert to degrees //TODO not needed in Spatial4j 0.3 + diagonalDist = ctx.getDistCalc().distanceToDegrees(diagonalDist); + return getLevelForDistance(diagonalDist * 0.5 * precision); } /** - * Returns the level of the smallest grid size with a side length that is greater or equal to the provided - * distance. + * Returns the level of the largest grid in which its longest side is less + * than or equal to the provided distance (in degrees). Consequently {@link + * dist} acts as an error epsilon declaring the amount of detail needed in the + * grid, such that you can get a grid with just the right amount of + * precision. * * @param dist >= 0 - * @return level [1-maxLevels] + * @return level [1 to maxLevels] */ public abstract int getLevelForDistance(double dist); diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialArgs.java b/lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialArgs.java index 7f88bb9e329..471b41f0fa9 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialArgs.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialArgs.java @@ -85,12 +85,14 @@ public class SpatialArgs { } /** - * The fraction of the distance from the center of the query shape to its nearest edge - * that is considered acceptable error. The algorithm for computing the distance to the - * nearest edge is actually a little different. It normalizes the shape to a square - * given it's bounding box area: - *
sqrt(shape.bbox.area)/2
- * And the error distance is beyond the shape such that the shape is a minimum shape. + * A measure of acceptable error of the shape. It is specified as the + * fraction of the distance from the center of the query shape to its furthest + * bounding box corner. This effectively inflates the size of the shape but + * should not shrink it. + *

+ * The default is {@link #DEFAULT_DIST_PRECISION} + * + * @return 0 to 0.5 */ public Double getDistPrecision() { return distPrecision; diff --git a/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/TestRecursivePrefixTreeStrategy.java b/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/TestRecursivePrefixTreeStrategy.java index 4131d7f58b0..f00d13b18dc 100644 --- a/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/TestRecursivePrefixTreeStrategy.java +++ b/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/TestRecursivePrefixTreeStrategy.java @@ -69,6 +69,45 @@ public class TestRecursivePrefixTreeStrategy extends StrategyTestCase { } } + @Test + public void testOneMeterPrecision() { + init(GeohashPrefixTree.getMaxLevelsPossible()); + GeohashPrefixTree grid = (GeohashPrefixTree) ((RecursivePrefixTreeStrategy) strategy).getGrid(); + //DWS: I know this to be true. 11 is needed for one meter + assertEquals(11, grid.getLevelForDistance(ctx.getDistCalc().distanceToDegrees(0.001))); + } + + @Test + public void testPrecision() throws IOException{ + init(GeohashPrefixTree.getMaxLevelsPossible()); + + Point iPt = ctx.makePoint(2.8028712999999925, 48.3708044);//lon, lat + addDocument(newDoc("iPt", iPt)); + commit(); + + Point qPt = ctx.makePoint(2.4632387000000335, 48.6003516); + + final double DIST = 35.75;//35.7499... + assertEquals(DIST, ctx.getDistCalc().distance(iPt, qPt), 0.001); + + //distPrec will affect the query shape precision. The indexed precision + // was set to nearly zilch via init(GeohashPrefixTree.getMaxLevelsPossible()); + final double distPrec = 0.025; //the suggested default, by the way + final double distMult = 1+distPrec; + + assertTrue(35.74*distMult >= DIST); + checkHits(q(qPt, 35.74, distPrec), 1, null); + + assertTrue(30*distMult < DIST); + checkHits(q(qPt, 30, distPrec), 0, null); + + assertTrue(33*distMult < DIST); + checkHits(q(qPt, 33, distPrec), 0, null); + + assertTrue(34*distMult < DIST); + checkHits(q(qPt, 34, distPrec), 0, null); + } + @Test public void geohashRecursiveRandom() throws IOException { init(12); @@ -105,9 +144,9 @@ public class TestRecursivePrefixTreeStrategy extends StrategyTestCase { qcYoff + clusterCenter.getY()); double[] distRange = calcDistRange(queryCenter,clusterCenter,sideDegree); //4.1 query a small box getting nothing - checkHits(queryCenter, distRange[0]*0.99, 0, null); + checkHits(q(queryCenter, distRange[0]*0.99), 0, null); //4.2 Query a large box enclosing the cluster, getting everything - checkHits(queryCenter, distRange[1]*1.01, points.size(), null); + checkHits(q(queryCenter, distRange[1]*1.01), points.size(), null); //4.3 Query a medium box getting some (calculate the correct solution and verify) double queryDist = distRange[0] + (distRange[1]-distRange[0])/2;//average @@ -122,7 +161,7 @@ public class TestRecursivePrefixTreeStrategy extends StrategyTestCase { ids = Arrays.copyOf(ids, ids_sz); //assert ids_sz > 0 (can't because randomness keeps us from being able to) - checkHits(queryCenter, queryDist, ids.length, ids); + checkHits(q(queryCenter, queryDist), ids.length, ids); } } @@ -132,13 +171,20 @@ public class TestRecursivePrefixTreeStrategy extends StrategyTestCase { }//randomTest() - //TODO can we use super.runTestQueries() ? - private void checkHits(Point pt, double dist, int assertNumFound, int[] assertIds) { + private SpatialArgs q(Point pt, double dist) { + return q(pt, dist, 0.0); + } + + private SpatialArgs q(Point pt, double dist, double distPrec) { Shape shape = ctx.makeCircle(pt,dist); SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,shape); - args.setDistPrecision(0.0); + args.setDistPrecision(distPrec); + return args; + } + + private void checkHits(SpatialArgs args, int assertNumFound, int[] assertIds) { SearchResults got = executeQuery(strategy.makeQuery(args), 100); - assertEquals(""+shape,assertNumFound,got.numFound); + assertEquals("" + args, assertNumFound, got.numFound); if (assertIds != null) { Set gotIds = new HashSet(); for (SearchResult result : got.results) { @@ -150,7 +196,6 @@ public class TestRecursivePrefixTreeStrategy extends StrategyTestCase { } } - // private Document newDoc(String id, Shape shape) { Document doc = new Document(); doc.add(new StringField("id", id, Field.Store.YES));