mirror of https://github.com/apache/lucene.git
LUCENE-4186 distErrPct upgrade
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1382231 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9ff1c66154
commit
cf1717ed5e
|
@ -44,7 +44,7 @@ public abstract class PrefixTreeStrategy extends SpatialStrategy {
|
|||
protected final SpatialPrefixTree grid;
|
||||
private final Map<String, PointPrefixTreeFieldCacheProvider> provider = new ConcurrentHashMap<String, PointPrefixTreeFieldCacheProvider>();
|
||||
protected int defaultFieldValuesArrayLen = 2;
|
||||
protected double distErrPct = SpatialArgs.DEFAULT_DIST_PRECISION;
|
||||
protected double distErrPct = SpatialArgs.DEFAULT_DISTERRPCT;// [ 0 TO 0.5 ]
|
||||
|
||||
public PrefixTreeStrategy(SpatialPrefixTree grid, String fieldName) {
|
||||
super(grid.getSpatialContext(), fieldName);
|
||||
|
@ -56,21 +56,34 @@ public abstract class PrefixTreeStrategy extends SpatialStrategy {
|
|||
this.defaultFieldValuesArrayLen = defaultFieldValuesArrayLen;
|
||||
}
|
||||
|
||||
/** See {@link SpatialPrefixTree#getMaxLevelForPrecision(com.spatial4j.core.shape.Shape, double)}. */
|
||||
public double getDistErrPct() {
|
||||
return distErrPct;
|
||||
}
|
||||
|
||||
/**
|
||||
* The default measure of shape precision affecting indexed and query shapes.
|
||||
* Specific shapes at index and query time can use something different.
|
||||
* @see org.apache.lucene.spatial.query.SpatialArgs#getDistErrPct()
|
||||
*/
|
||||
public void setDistErrPct(double distErrPct) {
|
||||
this.distErrPct = distErrPct;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Field[] createIndexableFields(Shape shape) {
|
||||
int detailLevel = grid.getMaxLevelForPrecision(shape,distErrPct);
|
||||
double distErr = SpatialArgs.calcDistanceFromErrPct(shape, distErrPct, ctx);
|
||||
return createIndexableFields(shape, distErr);
|
||||
}
|
||||
|
||||
public Field[] createIndexableFields(Shape shape, double distErr) {
|
||||
int detailLevel = grid.getLevelForDistance(distErr);
|
||||
List<Node> cells = grid.getNodes(shape, detailLevel, true);//true=intermediates cells
|
||||
//If shape isn't a point, add a full-resolution center-point so that
|
||||
// PrefixFieldCacheProvider has the center-points.
|
||||
// PointPrefixTreeFieldCacheProvider has the center-points.
|
||||
// TODO index each center of a multi-point? Yes/no?
|
||||
if (!(shape instanceof Point)) {
|
||||
Point ctr = shape.getCenter();
|
||||
//TODO should be smarter; don't index 2 tokens for this in CellTokenizer. Harmless though.
|
||||
//TODO should be smarter; don't index 2 tokens for this in CellTokenStream. Harmless though.
|
||||
cells.add(grid.getNodes(ctr,grid.getMaxLevels(),false).get(0));
|
||||
}
|
||||
|
||||
|
|
|
@ -56,7 +56,7 @@ public class RecursivePrefixTreeStrategy extends PrefixTreeStrategy {
|
|||
|
||||
Shape shape = args.getShape();
|
||||
|
||||
int detailLevel = grid.getMaxLevelForPrecision(shape,args.getDistPrecision());
|
||||
int detailLevel = grid.getLevelForDistance(args.resolveDistErr(ctx, distErrPct));
|
||||
|
||||
return new RecursivePrefixTreeFilter(
|
||||
getFieldName(), grid,shape, prefixGridScanLevel, detailLevel);
|
||||
|
|
|
@ -48,7 +48,7 @@ public class TermQueryPrefixTreeStrategy extends PrefixTreeStrategy {
|
|||
throw new UnsupportedSpatialOperation(op);
|
||||
|
||||
Shape shape = args.getShape();
|
||||
int detailLevel = grid.getMaxLevelForPrecision(shape, args.getDistPrecision());
|
||||
int detailLevel = grid.getLevelForDistance(args.resolveDistErr(ctx, distErrPct));
|
||||
List<Node> cells = grid.getNodes(shape, detailLevel, false);
|
||||
TermsFilter filter = new TermsFilter();
|
||||
for (Node cell : cells) {
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
package org.apache.lucene.spatial.prefix.tree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -15,13 +17,11 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.spatial.prefix.tree;
|
||||
|
||||
import com.spatial4j.core.context.SpatialContext;
|
||||
import com.spatial4j.core.io.GeohashUtils;
|
||||
import com.spatial4j.core.shape.Point;
|
||||
import com.spatial4j.core.shape.Rectangle;
|
||||
import com.spatial4j.core.shape.Shape;
|
||||
import com.spatial4j.core.io.GeohashUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
@ -67,6 +67,8 @@ public class GeohashPrefixTree extends SpatialPrefixTree {
|
|||
|
||||
@Override
|
||||
public int getLevelForDistance(double dist) {
|
||||
if (dist == 0)
|
||||
return maxLevels;//short circuit
|
||||
final int level = GeohashUtils.lookupHashLenForWidthHeight(dist, dist);
|
||||
return Math.max(Math.min(level, maxLevels), 1);
|
||||
}
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
package org.apache.lucene.spatial.prefix.tree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -15,8 +17,6 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.spatial.prefix.tree;
|
||||
|
||||
import com.spatial4j.core.context.SpatialContext;
|
||||
import com.spatial4j.core.shape.Point;
|
||||
import com.spatial4j.core.shape.Rectangle;
|
||||
|
@ -121,6 +121,8 @@ public class QuadPrefixTree extends SpatialPrefixTree {
|
|||
|
||||
@Override
|
||||
public int getLevelForDistance(double dist) {
|
||||
if (dist == 0)//short circuit
|
||||
return maxLevels;
|
||||
for (int i = 0; i < maxLevels-1; i++) {
|
||||
//note: level[i] is actually a lookup for level i+1
|
||||
if(dist > levelW[i] && dist > levelH[i]) {
|
||||
|
|
|
@ -18,9 +18,7 @@ package org.apache.lucene.spatial.prefix.tree;
|
|||
*/
|
||||
|
||||
import com.spatial4j.core.context.SpatialContext;
|
||||
import com.spatial4j.core.shape.Circle;
|
||||
import com.spatial4j.core.shape.Point;
|
||||
import com.spatial4j.core.shape.Rectangle;
|
||||
import com.spatial4j.core.shape.Shape;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
|
@ -30,7 +28,7 @@ import java.util.Collections;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* A Spatial Prefix Tree, or Trie, which decomposes shapes into prefixed strings at variable lengths corresponding to
|
||||
* A spatial Prefix Tree, or Trie, which decomposes shapes into prefixed strings at variable lengths corresponding to
|
||||
* variable precision. Each string corresponds to a spatial region.
|
||||
*
|
||||
* Implementations of this class should be thread-safe and immutable once initialized.
|
||||
|
@ -64,28 +62,6 @@ public abstract class SpatialPrefixTree {
|
|||
return getClass().getSimpleName() + "(maxLevels:" + maxLevels + ",ctx:" + ctx + ")";
|
||||
}
|
||||
|
||||
/**
|
||||
* See {@link org.apache.lucene.spatial.query.SpatialArgs#getDistPrecision()}.
|
||||
* A grid level looked up via {@link #getLevelForDistance(double)} is returned.
|
||||
*
|
||||
* @param precision 0 to 0.5
|
||||
* @return 1 to maxLevels
|
||||
*/
|
||||
public int getMaxLevelForPrecision(Shape shape, double precision) {
|
||||
if (precision < 0 || precision > 0.5) {
|
||||
throw new IllegalArgumentException("Precision " + precision + " must be between [0 to 0.5]");
|
||||
}
|
||||
if (precision == 0 || shape instanceof Point) {
|
||||
return maxLevels;
|
||||
}
|
||||
Rectangle bbox = shape.getBoundingBox();
|
||||
//The diagonal distance should be the same computed from any opposite corner,
|
||||
// and this is the longest distance that might be occurring within the shape.
|
||||
double diagonalDist = ctx.getDistCalc().distance(
|
||||
ctx.makePoint(bbox.getMinX(), bbox.getMinY()), bbox.getMaxX(), bbox.getMaxY());
|
||||
return getLevelForDistance(diagonalDist * 0.5 * precision);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the level of the largest grid in which its longest side is less
|
||||
* than or equal to the provided distance (in degrees). Consequently {@code
|
||||
|
|
|
@ -17,8 +17,9 @@ package org.apache.lucene.spatial.query;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
import com.spatial4j.core.context.SpatialContext;
|
||||
import com.spatial4j.core.shape.Point;
|
||||
import com.spatial4j.core.shape.Rectangle;
|
||||
import com.spatial4j.core.shape.Shape;
|
||||
|
||||
/**
|
||||
|
@ -28,36 +29,72 @@ import com.spatial4j.core.shape.Shape;
|
|||
*/
|
||||
public class SpatialArgs {
|
||||
|
||||
public static final double DEFAULT_DIST_PRECISION = 0.025d;
|
||||
public static final double DEFAULT_DISTERRPCT = 0.025d;
|
||||
|
||||
private SpatialOperation operation;
|
||||
private Shape shape;
|
||||
private double distPrecision = DEFAULT_DIST_PRECISION;
|
||||
|
||||
public SpatialArgs(SpatialOperation operation) {
|
||||
this.operation = operation;
|
||||
}
|
||||
private Double distErrPct;
|
||||
private Double distErr;
|
||||
|
||||
public SpatialArgs(SpatialOperation operation, Shape shape) {
|
||||
if (operation == null || shape == null)
|
||||
throw new NullPointerException("operation and shape are required");
|
||||
this.operation = operation;
|
||||
this.shape = shape;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the distance given a shape and the {@code distErrPct}. The
|
||||
* algorithm is the fraction of the distance from the center of the query
|
||||
* shape to its furthest bounding box corner.
|
||||
*
|
||||
* @param shape Mandatory.
|
||||
* @param distErrPct 0 to 0.5
|
||||
* @param ctx Mandatory
|
||||
* @return A distance (in degrees).
|
||||
*/
|
||||
public static double calcDistanceFromErrPct(Shape shape, double distErrPct, SpatialContext ctx) {
|
||||
if (distErrPct < 0 || distErrPct > 0.5) {
|
||||
throw new IllegalArgumentException("distErrPct " + distErrPct + " must be between [0 to 0.5]");
|
||||
}
|
||||
if (distErrPct == 0 || shape instanceof Point) {
|
||||
return 0;
|
||||
}
|
||||
Rectangle bbox = shape.getBoundingBox();
|
||||
//The diagonal distance should be the same computed from any opposite corner,
|
||||
// and this is the longest distance that might be occurring within the shape.
|
||||
double diagonalDist = ctx.getDistCalc().distance(
|
||||
ctx.makePoint(bbox.getMinX(), bbox.getMinY()), bbox.getMaxX(), bbox.getMaxY());
|
||||
return diagonalDist * 0.5 * distErrPct;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the error distance that specifies how precise the query shape is. This
|
||||
* looks at {@link #getDistErr()}, {@link #getDistErrPct()}, and {@code
|
||||
* defaultDistErrPct}.
|
||||
* @param ctx
|
||||
* @param defaultDistErrPct 0 to 0.5
|
||||
* @return >= 0
|
||||
*/
|
||||
public double resolveDistErr(SpatialContext ctx, double defaultDistErrPct) {
|
||||
if (distErr != null)
|
||||
return distErr;
|
||||
double distErrPct = (this.distErrPct != null ? this.distErrPct : defaultDistErrPct);
|
||||
return calcDistanceFromErrPct(shape, distErrPct, ctx);
|
||||
}
|
||||
|
||||
/** Check if the arguments make sense -- throw an exception if not */
|
||||
public void validate() throws IllegalArgumentException {
|
||||
if (operation.isTargetNeedsArea() && !shape.hasArea()) {
|
||||
throw new IllegalArgumentException(operation + " only supports geometry with area");
|
||||
}
|
||||
if (distErr != null && distErrPct != null)
|
||||
throw new IllegalArgumentException("Only distErr or distErrPct can be specified.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder str = new StringBuilder();
|
||||
str.append(operation.getName()).append('(');
|
||||
str.append(shape.toString());
|
||||
str.append(" distPrec=").append(String.format(Locale.ROOT, "%.2f%%", distPrecision / 100d));
|
||||
str.append(')');
|
||||
return str.toString();
|
||||
return SpatialArgsParser.writeSpatialArgs(this);
|
||||
}
|
||||
|
||||
//------------------------------------------------
|
||||
|
@ -84,22 +121,33 @@ public class SpatialArgs {
|
|||
}
|
||||
|
||||
/**
|
||||
* A measure of acceptable error of the shape. It is specified as the
|
||||
* fraction of the distance from the center of the query shape to its furthest
|
||||
* bounding box corner. This effectively inflates the size of the shape but
|
||||
* should not shrink it.
|
||||
* <p/>
|
||||
* The default is {@link #DEFAULT_DIST_PRECISION}
|
||||
* A measure of acceptable error of the shape as a fraction. This effectively
|
||||
* inflates the size of the shape but should not shrink it.
|
||||
*
|
||||
* @return 0 to 0.5
|
||||
* @see #calcDistanceFromErrPct(com.spatial4j.core.shape.Shape, double,
|
||||
* com.spatial4j.core.context.SpatialContext)
|
||||
*/
|
||||
public Double getDistPrecision() {
|
||||
return distPrecision;
|
||||
public Double getDistErrPct() {
|
||||
return distErrPct;
|
||||
}
|
||||
|
||||
public void setDistPrecision(Double distPrecision) {
|
||||
if (distPrecision != null)
|
||||
this.distPrecision = distPrecision;
|
||||
public void setDistErrPct(Double distErrPct) {
|
||||
if (distErrPct != null)
|
||||
this.distErrPct = distErrPct;
|
||||
}
|
||||
|
||||
/**
|
||||
* The acceptable error of the shape. This effectively inflates the
|
||||
* size of the shape but should not shrink it.
|
||||
*
|
||||
* @return >= 0
|
||||
*/
|
||||
public Double getDistErr() {
|
||||
return distErr;
|
||||
}
|
||||
|
||||
public void setDistErr(Double distErr) {
|
||||
this.distErr = distErr;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import com.spatial4j.core.io.ShapeReadWriter;
|
|||
import com.spatial4j.core.shape.Shape;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
|
@ -44,8 +45,22 @@ import java.util.StringTokenizer;
|
|||
*/
|
||||
public class SpatialArgsParser {
|
||||
|
||||
/** Writes a close approximation to the parsed input format. */
|
||||
static String writeSpatialArgs(SpatialArgs args) {
|
||||
StringBuilder str = new StringBuilder();
|
||||
str.append(args.getOperation().getName());
|
||||
str.append('(');
|
||||
str.append(args.getShape().toString());
|
||||
if (args.getDistErrPct() != null)
|
||||
str.append(" distErrPct=").append(String.format(Locale.ROOT, "%.2f%%", args.getDistErrPct() * 100d));
|
||||
if (args.getDistErr() != null)
|
||||
str.append(" distErr=").append(args.getDistErr());
|
||||
str.append(')');
|
||||
return str.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a string such as "Intersects(-10,20,-8,22) distPec=0.025".
|
||||
* Parses a string such as "Intersects(-10,20,-8,22) distErrPct=0.025".
|
||||
*
|
||||
* @param v The string to parse. Mandatory.
|
||||
* @param ctx The spatial context. Mandatory.
|
||||
|
@ -75,12 +90,14 @@ public class SpatialArgsParser {
|
|||
body = v.substring(edx + 1).trim();
|
||||
if (body.length() > 0) {
|
||||
Map<String, String> aa = parseMap(body);
|
||||
args.setDistPrecision(readDouble(aa.remove("distPrec")));
|
||||
args.setDistErrPct(readDouble(aa.remove("distErrPct")));
|
||||
args.setDistErr(readDouble(aa.remove("distErr")));
|
||||
if (!aa.isEmpty()) {
|
||||
throw new IllegalArgumentException("unused parameters: " + aa, null);
|
||||
}
|
||||
}
|
||||
}
|
||||
args.validate();
|
||||
return args;
|
||||
}
|
||||
|
||||
|
@ -92,6 +109,8 @@ public class SpatialArgsParser {
|
|||
return v == null ? defaultValue : Boolean.parseBoolean(v);
|
||||
}
|
||||
|
||||
/** Parses "a=b c=d f" (whitespace separated) into name-value pairs. If there
|
||||
* is no '=' as in 'f' above then it's short for f=f. */
|
||||
protected static Map<String, String> parseMap(String body) {
|
||||
Map<String, String> map = new HashMap<String, String>();
|
||||
StringTokenizer st = new StringTokenizer(body, " \n\t");
|
||||
|
|
|
@ -19,10 +19,10 @@ package org.apache.lucene.spatial.prefix;
|
|||
|
||||
import com.spatial4j.core.context.SpatialContext;
|
||||
import com.spatial4j.core.distance.DistanceUtils;
|
||||
import com.spatial4j.core.io.GeohashUtils;
|
||||
import com.spatial4j.core.shape.Point;
|
||||
import com.spatial4j.core.shape.Rectangle;
|
||||
import com.spatial4j.core.shape.Shape;
|
||||
import com.spatial4j.core.io.GeohashUtils;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
|
@ -91,22 +91,22 @@ public class TestRecursivePrefixTreeStrategy extends StrategyTestCase {
|
|||
final double DIST = 35.75;//35.7499...
|
||||
assertEquals(DIST, ctx.getDistCalc().distance(iPt, qPt) * DEG2KM, 0.001);
|
||||
|
||||
//distPrec will affect the query shape precision. The indexed precision
|
||||
//distErrPct will affect the query shape precision. The indexed precision
|
||||
// was set to nearly zilch via init(GeohashPrefixTree.getMaxLevelsPossible());
|
||||
final double distPrec = 0.025; //the suggested default, by the way
|
||||
final double distMult = 1+distPrec;
|
||||
final double distErrPct = 0.025; //the suggested default, by the way
|
||||
final double distMult = 1+distErrPct;
|
||||
|
||||
assertTrue(35.74*distMult >= DIST);
|
||||
checkHits(q(qPt, 35.74 * KM2DEG, distPrec), 1, null);
|
||||
checkHits(q(qPt, 35.74 * KM2DEG, distErrPct), 1, null);
|
||||
|
||||
assertTrue(30*distMult < DIST);
|
||||
checkHits(q(qPt, 30 * KM2DEG, distPrec), 0, null);
|
||||
checkHits(q(qPt, 30 * KM2DEG, distErrPct), 0, null);
|
||||
|
||||
assertTrue(33*distMult < DIST);
|
||||
checkHits(q(qPt, 33 * KM2DEG, distPrec), 0, null);
|
||||
checkHits(q(qPt, 33 * KM2DEG, distErrPct), 0, null);
|
||||
|
||||
assertTrue(34*distMult < DIST);
|
||||
checkHits(q(qPt, 34 * KM2DEG, distPrec), 0, null);
|
||||
checkHits(q(qPt, 34 * KM2DEG, distErrPct), 0, null);
|
||||
}
|
||||
|
||||
@Test @Ignore /* LUCENE-4351 ignore this test until I figure out why it failed (as reported by Jenkins) */
|
||||
|
@ -178,10 +178,10 @@ public class TestRecursivePrefixTreeStrategy extends StrategyTestCase {
|
|||
return q(pt, distDEG, 0.0);
|
||||
}
|
||||
|
||||
private SpatialArgs q(Point pt, double distDEG, double distPrec) {
|
||||
private SpatialArgs q(Point pt, double distDEG, double distErrPct) {
|
||||
Shape shape = ctx.makeCircle(pt, distDEG);
|
||||
SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,shape);
|
||||
args.setDistPrecision(distPrec);
|
||||
args.setDistErrPct(distErrPct);
|
||||
return args;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue