LUCENE-4186 distErrPct upgrade

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1382231 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
David Wayne Smiley 2012-09-08 03:31:06 +00:00
parent 9ff1c66154
commit cf1717ed5e
9 changed files with 134 additions and 74 deletions

View File

@ -44,7 +44,7 @@ public abstract class PrefixTreeStrategy extends SpatialStrategy {
protected final SpatialPrefixTree grid;
private final Map<String, PointPrefixTreeFieldCacheProvider> provider = new ConcurrentHashMap<String, PointPrefixTreeFieldCacheProvider>();
protected int defaultFieldValuesArrayLen = 2;
protected double distErrPct = SpatialArgs.DEFAULT_DIST_PRECISION;
protected double distErrPct = SpatialArgs.DEFAULT_DISTERRPCT;// [ 0 TO 0.5 ]
public PrefixTreeStrategy(SpatialPrefixTree grid, String fieldName) {
super(grid.getSpatialContext(), fieldName);
@ -56,21 +56,34 @@ public abstract class PrefixTreeStrategy extends SpatialStrategy {
this.defaultFieldValuesArrayLen = defaultFieldValuesArrayLen;
}
/** See {@link SpatialPrefixTree#getMaxLevelForPrecision(com.spatial4j.core.shape.Shape, double)}. */
public double getDistErrPct() {
return distErrPct;
}
/**
* The default measure of shape precision affecting indexed and query shapes.
* Specific shapes at index and query time can use something different.
* @see org.apache.lucene.spatial.query.SpatialArgs#getDistErrPct()
*/
public void setDistErrPct(double distErrPct) {
this.distErrPct = distErrPct;
}
@Override
public Field[] createIndexableFields(Shape shape) {
int detailLevel = grid.getMaxLevelForPrecision(shape,distErrPct);
double distErr = SpatialArgs.calcDistanceFromErrPct(shape, distErrPct, ctx);
return createIndexableFields(shape, distErr);
}
public Field[] createIndexableFields(Shape shape, double distErr) {
int detailLevel = grid.getLevelForDistance(distErr);
List<Node> cells = grid.getNodes(shape, detailLevel, true);//true=intermediates cells
//If shape isn't a point, add a full-resolution center-point so that
// PrefixFieldCacheProvider has the center-points.
// PointPrefixTreeFieldCacheProvider has the center-points.
// TODO index each center of a multi-point? Yes/no?
if (!(shape instanceof Point)) {
Point ctr = shape.getCenter();
//TODO should be smarter; don't index 2 tokens for this in CellTokenizer. Harmless though.
//TODO should be smarter; don't index 2 tokens for this in CellTokenStream. Harmless though.
cells.add(grid.getNodes(ctr,grid.getMaxLevels(),false).get(0));
}

View File

@ -56,7 +56,7 @@ public class RecursivePrefixTreeStrategy extends PrefixTreeStrategy {
Shape shape = args.getShape();
int detailLevel = grid.getMaxLevelForPrecision(shape,args.getDistPrecision());
int detailLevel = grid.getLevelForDistance(args.resolveDistErr(ctx, distErrPct));
return new RecursivePrefixTreeFilter(
getFieldName(), grid,shape, prefixGridScanLevel, detailLevel);

View File

@ -48,7 +48,7 @@ public class TermQueryPrefixTreeStrategy extends PrefixTreeStrategy {
throw new UnsupportedSpatialOperation(op);
Shape shape = args.getShape();
int detailLevel = grid.getMaxLevelForPrecision(shape, args.getDistPrecision());
int detailLevel = grid.getLevelForDistance(args.resolveDistErr(ctx, distErrPct));
List<Node> cells = grid.getNodes(shape, detailLevel, false);
TermsFilter filter = new TermsFilter();
for (Node cell : cells) {

View File

@ -1,3 +1,5 @@
package org.apache.lucene.spatial.prefix.tree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -15,13 +17,11 @@
* limitations under the License.
*/
package org.apache.lucene.spatial.prefix.tree;
import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.io.GeohashUtils;
import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Rectangle;
import com.spatial4j.core.shape.Shape;
import com.spatial4j.core.io.GeohashUtils;
import java.util.ArrayList;
import java.util.Collection;
@ -67,6 +67,8 @@ public class GeohashPrefixTree extends SpatialPrefixTree {
@Override
public int getLevelForDistance(double dist) {
if (dist == 0)
return maxLevels;//short circuit
final int level = GeohashUtils.lookupHashLenForWidthHeight(dist, dist);
return Math.max(Math.min(level, maxLevels), 1);
}

View File

@ -1,3 +1,5 @@
package org.apache.lucene.spatial.prefix.tree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -15,8 +17,6 @@
* limitations under the License.
*/
package org.apache.lucene.spatial.prefix.tree;
import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Rectangle;
@ -121,6 +121,8 @@ public class QuadPrefixTree extends SpatialPrefixTree {
@Override
public int getLevelForDistance(double dist) {
if (dist == 0)//short circuit
return maxLevels;
for (int i = 0; i < maxLevels-1; i++) {
//note: level[i] is actually a lookup for level i+1
if(dist > levelW[i] && dist > levelH[i]) {

View File

@ -18,9 +18,7 @@ package org.apache.lucene.spatial.prefix.tree;
*/
import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.shape.Circle;
import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Rectangle;
import com.spatial4j.core.shape.Shape;
import java.nio.charset.Charset;
@ -30,7 +28,7 @@ import java.util.Collections;
import java.util.List;
/**
* A Spatial Prefix Tree, or Trie, which decomposes shapes into prefixed strings at variable lengths corresponding to
* A spatial Prefix Tree, or Trie, which decomposes shapes into prefixed strings at variable lengths corresponding to
* variable precision. Each string corresponds to a spatial region.
*
* Implementations of this class should be thread-safe and immutable once initialized.
@ -64,28 +62,6 @@ public abstract class SpatialPrefixTree {
return getClass().getSimpleName() + "(maxLevels:" + maxLevels + ",ctx:" + ctx + ")";
}
/**
* See {@link org.apache.lucene.spatial.query.SpatialArgs#getDistPrecision()}.
* A grid level looked up via {@link #getLevelForDistance(double)} is returned.
*
* @param precision 0 to 0.5
* @return 1 to maxLevels
*/
public int getMaxLevelForPrecision(Shape shape, double precision) {
if (precision < 0 || precision > 0.5) {
throw new IllegalArgumentException("Precision " + precision + " must be between [0 to 0.5]");
}
if (precision == 0 || shape instanceof Point) {
return maxLevels;
}
Rectangle bbox = shape.getBoundingBox();
//The diagonal distance should be the same computed from any opposite corner,
// and this is the longest distance that might be occurring within the shape.
double diagonalDist = ctx.getDistCalc().distance(
ctx.makePoint(bbox.getMinX(), bbox.getMinY()), bbox.getMaxX(), bbox.getMaxY());
return getLevelForDistance(diagonalDist * 0.5 * precision);
}
/**
* Returns the level of the largest grid in which its longest side is less
* than or equal to the provided distance (in degrees). Consequently {@code

View File

@ -17,8 +17,9 @@ package org.apache.lucene.spatial.query;
* limitations under the License.
*/
import java.util.Locale;
import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Rectangle;
import com.spatial4j.core.shape.Shape;
/**
@ -28,36 +29,72 @@ import com.spatial4j.core.shape.Shape;
*/
public class SpatialArgs {
public static final double DEFAULT_DIST_PRECISION = 0.025d;
public static final double DEFAULT_DISTERRPCT = 0.025d;
private SpatialOperation operation;
private Shape shape;
private double distPrecision = DEFAULT_DIST_PRECISION;
public SpatialArgs(SpatialOperation operation) {
this.operation = operation;
}
private Double distErrPct;
private Double distErr;
public SpatialArgs(SpatialOperation operation, Shape shape) {
if (operation == null || shape == null)
throw new NullPointerException("operation and shape are required");
this.operation = operation;
this.shape = shape;
}
/**
* Computes the distance given a shape and the {@code distErrPct}. The
* algorithm is the fraction of the distance from the center of the query
* shape to its furthest bounding box corner.
*
* @param shape Mandatory.
* @param distErrPct 0 to 0.5
* @param ctx Mandatory
* @return A distance (in degrees).
*/
public static double calcDistanceFromErrPct(Shape shape, double distErrPct, SpatialContext ctx) {
if (distErrPct < 0 || distErrPct > 0.5) {
throw new IllegalArgumentException("distErrPct " + distErrPct + " must be between [0 to 0.5]");
}
if (distErrPct == 0 || shape instanceof Point) {
return 0;
}
Rectangle bbox = shape.getBoundingBox();
//The diagonal distance should be the same computed from any opposite corner,
// and this is the longest distance that might be occurring within the shape.
double diagonalDist = ctx.getDistCalc().distance(
ctx.makePoint(bbox.getMinX(), bbox.getMinY()), bbox.getMaxX(), bbox.getMaxY());
return diagonalDist * 0.5 * distErrPct;
}
/**
* Gets the error distance that specifies how precise the query shape is. This
* looks at {@link #getDistErr()}, {@link #getDistErrPct()}, and {@code
* defaultDistErrPct}.
* @param ctx
* @param defaultDistErrPct 0 to 0.5
* @return >= 0
*/
public double resolveDistErr(SpatialContext ctx, double defaultDistErrPct) {
if (distErr != null)
return distErr;
double distErrPct = (this.distErrPct != null ? this.distErrPct : defaultDistErrPct);
return calcDistanceFromErrPct(shape, distErrPct, ctx);
}
/** Check if the arguments make sense -- throw an exception if not */
public void validate() throws IllegalArgumentException {
if (operation.isTargetNeedsArea() && !shape.hasArea()) {
throw new IllegalArgumentException(operation + " only supports geometry with area");
}
if (distErr != null && distErrPct != null)
throw new IllegalArgumentException("Only distErr or distErrPct can be specified.");
}
@Override
public String toString() {
StringBuilder str = new StringBuilder();
str.append(operation.getName()).append('(');
str.append(shape.toString());
str.append(" distPrec=").append(String.format(Locale.ROOT, "%.2f%%", distPrecision / 100d));
str.append(')');
return str.toString();
return SpatialArgsParser.writeSpatialArgs(this);
}
//------------------------------------------------
@ -84,22 +121,33 @@ public class SpatialArgs {
}
/**
* A measure of acceptable error of the shape. It is specified as the
* fraction of the distance from the center of the query shape to its furthest
* bounding box corner. This effectively inflates the size of the shape but
* should not shrink it.
* <p/>
* The default is {@link #DEFAULT_DIST_PRECISION}
* A measure of acceptable error of the shape as a fraction. This effectively
* inflates the size of the shape but should not shrink it.
*
* @return 0 to 0.5
* @see #calcDistanceFromErrPct(com.spatial4j.core.shape.Shape, double,
* com.spatial4j.core.context.SpatialContext)
*/
public Double getDistPrecision() {
return distPrecision;
public Double getDistErrPct() {
return distErrPct;
}
public void setDistPrecision(Double distPrecision) {
if (distPrecision != null)
this.distPrecision = distPrecision;
public void setDistErrPct(Double distErrPct) {
if (distErrPct != null)
this.distErrPct = distErrPct;
}
/**
* The acceptable error of the shape. This effectively inflates the
* size of the shape but should not shrink it.
*
* @return >= 0
*/
public Double getDistErr() {
return distErr;
}
public void setDistErr(Double distErr) {
this.distErr = distErr;
}
}

View File

@ -23,6 +23,7 @@ import com.spatial4j.core.io.ShapeReadWriter;
import com.spatial4j.core.shape.Shape;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.StringTokenizer;
@ -44,8 +45,22 @@ import java.util.StringTokenizer;
*/
public class SpatialArgsParser {
/** Writes a close approximation to the parsed input format. */
static String writeSpatialArgs(SpatialArgs args) {
StringBuilder str = new StringBuilder();
str.append(args.getOperation().getName());
str.append('(');
str.append(args.getShape().toString());
if (args.getDistErrPct() != null)
str.append(" distErrPct=").append(String.format(Locale.ROOT, "%.2f%%", args.getDistErrPct() * 100d));
if (args.getDistErr() != null)
str.append(" distErr=").append(args.getDistErr());
str.append(')');
return str.toString();
}
/**
* Parses a string such as "Intersects(-10,20,-8,22) distPec=0.025".
* Parses a string such as "Intersects(-10,20,-8,22) distErrPct=0.025".
*
* @param v The string to parse. Mandatory.
* @param ctx The spatial context. Mandatory.
@ -75,12 +90,14 @@ public class SpatialArgsParser {
body = v.substring(edx + 1).trim();
if (body.length() > 0) {
Map<String, String> aa = parseMap(body);
args.setDistPrecision(readDouble(aa.remove("distPrec")));
args.setDistErrPct(readDouble(aa.remove("distErrPct")));
args.setDistErr(readDouble(aa.remove("distErr")));
if (!aa.isEmpty()) {
throw new IllegalArgumentException("unused parameters: " + aa, null);
}
}
}
args.validate();
return args;
}
@ -92,6 +109,8 @@ public class SpatialArgsParser {
return v == null ? defaultValue : Boolean.parseBoolean(v);
}
/** Parses "a=b c=d f" (whitespace separated) into name-value pairs. If there
* is no '=' as in 'f' above then it's short for f=f. */
protected static Map<String, String> parseMap(String body) {
Map<String, String> map = new HashMap<String, String>();
StringTokenizer st = new StringTokenizer(body, " \n\t");

View File

@ -19,10 +19,10 @@ package org.apache.lucene.spatial.prefix;
import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.distance.DistanceUtils;
import com.spatial4j.core.io.GeohashUtils;
import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Rectangle;
import com.spatial4j.core.shape.Shape;
import com.spatial4j.core.io.GeohashUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField;
@ -91,22 +91,22 @@ public class TestRecursivePrefixTreeStrategy extends StrategyTestCase {
final double DIST = 35.75;//35.7499...
assertEquals(DIST, ctx.getDistCalc().distance(iPt, qPt) * DEG2KM, 0.001);
//distPrec will affect the query shape precision. The indexed precision
//distErrPct will affect the query shape precision. The indexed precision
// was set to nearly zilch via init(GeohashPrefixTree.getMaxLevelsPossible());
final double distPrec = 0.025; //the suggested default, by the way
final double distMult = 1+distPrec;
final double distErrPct = 0.025; //the suggested default, by the way
final double distMult = 1+distErrPct;
assertTrue(35.74*distMult >= DIST);
checkHits(q(qPt, 35.74 * KM2DEG, distPrec), 1, null);
checkHits(q(qPt, 35.74 * KM2DEG, distErrPct), 1, null);
assertTrue(30*distMult < DIST);
checkHits(q(qPt, 30 * KM2DEG, distPrec), 0, null);
checkHits(q(qPt, 30 * KM2DEG, distErrPct), 0, null);
assertTrue(33*distMult < DIST);
checkHits(q(qPt, 33 * KM2DEG, distPrec), 0, null);
checkHits(q(qPt, 33 * KM2DEG, distErrPct), 0, null);
assertTrue(34*distMult < DIST);
checkHits(q(qPt, 34 * KM2DEG, distPrec), 0, null);
checkHits(q(qPt, 34 * KM2DEG, distErrPct), 0, null);
}
@Test @Ignore /* LUCENE-4351 ignore this test until I figure out why it failed (as reported by Jenkins) */
@ -178,10 +178,10 @@ public class TestRecursivePrefixTreeStrategy extends StrategyTestCase {
return q(pt, distDEG, 0.0);
}
private SpatialArgs q(Point pt, double distDEG, double distPrec) {
private SpatialArgs q(Point pt, double distDEG, double distErrPct) {
Shape shape = ctx.makeCircle(pt, distDEG);
SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,shape);
args.setDistPrecision(distPrec);
args.setDistErrPct(distErrPct);
return args;
}