LUCENE-4186 distErrPct upgrade

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1382231 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
David Wayne Smiley 2012-09-08 03:31:06 +00:00
parent 9ff1c66154
commit cf1717ed5e
9 changed files with 134 additions and 74 deletions

View File

@ -44,7 +44,7 @@ public abstract class PrefixTreeStrategy extends SpatialStrategy {
protected final SpatialPrefixTree grid; protected final SpatialPrefixTree grid;
private final Map<String, PointPrefixTreeFieldCacheProvider> provider = new ConcurrentHashMap<String, PointPrefixTreeFieldCacheProvider>(); private final Map<String, PointPrefixTreeFieldCacheProvider> provider = new ConcurrentHashMap<String, PointPrefixTreeFieldCacheProvider>();
protected int defaultFieldValuesArrayLen = 2; protected int defaultFieldValuesArrayLen = 2;
protected double distErrPct = SpatialArgs.DEFAULT_DIST_PRECISION; protected double distErrPct = SpatialArgs.DEFAULT_DISTERRPCT;// [ 0 TO 0.5 ]
public PrefixTreeStrategy(SpatialPrefixTree grid, String fieldName) { public PrefixTreeStrategy(SpatialPrefixTree grid, String fieldName) {
super(grid.getSpatialContext(), fieldName); super(grid.getSpatialContext(), fieldName);
@ -56,21 +56,34 @@ public abstract class PrefixTreeStrategy extends SpatialStrategy {
this.defaultFieldValuesArrayLen = defaultFieldValuesArrayLen; this.defaultFieldValuesArrayLen = defaultFieldValuesArrayLen;
} }
/** See {@link SpatialPrefixTree#getMaxLevelForPrecision(com.spatial4j.core.shape.Shape, double)}. */ public double getDistErrPct() {
return distErrPct;
}
/**
* The default measure of shape precision affecting indexed and query shapes.
* Specific shapes at index and query time can use something different.
* @see org.apache.lucene.spatial.query.SpatialArgs#getDistErrPct()
*/
public void setDistErrPct(double distErrPct) { public void setDistErrPct(double distErrPct) {
this.distErrPct = distErrPct; this.distErrPct = distErrPct;
} }
@Override @Override
public Field[] createIndexableFields(Shape shape) { public Field[] createIndexableFields(Shape shape) {
int detailLevel = grid.getMaxLevelForPrecision(shape,distErrPct); double distErr = SpatialArgs.calcDistanceFromErrPct(shape, distErrPct, ctx);
return createIndexableFields(shape, distErr);
}
public Field[] createIndexableFields(Shape shape, double distErr) {
int detailLevel = grid.getLevelForDistance(distErr);
List<Node> cells = grid.getNodes(shape, detailLevel, true);//true=intermediates cells List<Node> cells = grid.getNodes(shape, detailLevel, true);//true=intermediates cells
//If shape isn't a point, add a full-resolution center-point so that //If shape isn't a point, add a full-resolution center-point so that
// PrefixFieldCacheProvider has the center-points. // PointPrefixTreeFieldCacheProvider has the center-points.
// TODO index each center of a multi-point? Yes/no? // TODO index each center of a multi-point? Yes/no?
if (!(shape instanceof Point)) { if (!(shape instanceof Point)) {
Point ctr = shape.getCenter(); Point ctr = shape.getCenter();
//TODO should be smarter; don't index 2 tokens for this in CellTokenizer. Harmless though. //TODO should be smarter; don't index 2 tokens for this in CellTokenStream. Harmless though.
cells.add(grid.getNodes(ctr,grid.getMaxLevels(),false).get(0)); cells.add(grid.getNodes(ctr,grid.getMaxLevels(),false).get(0));
} }

View File

@ -56,7 +56,7 @@ public class RecursivePrefixTreeStrategy extends PrefixTreeStrategy {
Shape shape = args.getShape(); Shape shape = args.getShape();
int detailLevel = grid.getMaxLevelForPrecision(shape,args.getDistPrecision()); int detailLevel = grid.getLevelForDistance(args.resolveDistErr(ctx, distErrPct));
return new RecursivePrefixTreeFilter( return new RecursivePrefixTreeFilter(
getFieldName(), grid,shape, prefixGridScanLevel, detailLevel); getFieldName(), grid,shape, prefixGridScanLevel, detailLevel);

View File

@ -48,7 +48,7 @@ public class TermQueryPrefixTreeStrategy extends PrefixTreeStrategy {
throw new UnsupportedSpatialOperation(op); throw new UnsupportedSpatialOperation(op);
Shape shape = args.getShape(); Shape shape = args.getShape();
int detailLevel = grid.getMaxLevelForPrecision(shape, args.getDistPrecision()); int detailLevel = grid.getLevelForDistance(args.resolveDistErr(ctx, distErrPct));
List<Node> cells = grid.getNodes(shape, detailLevel, false); List<Node> cells = grid.getNodes(shape, detailLevel, false);
TermsFilter filter = new TermsFilter(); TermsFilter filter = new TermsFilter();
for (Node cell : cells) { for (Node cell : cells) {

View File

@ -1,3 +1,5 @@
package org.apache.lucene.spatial.prefix.tree;
/* /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -15,13 +17,11 @@
* limitations under the License. * limitations under the License.
*/ */
package org.apache.lucene.spatial.prefix.tree;
import com.spatial4j.core.context.SpatialContext; import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.io.GeohashUtils;
import com.spatial4j.core.shape.Point; import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Rectangle; import com.spatial4j.core.shape.Rectangle;
import com.spatial4j.core.shape.Shape; import com.spatial4j.core.shape.Shape;
import com.spatial4j.core.io.GeohashUtils;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
@ -67,6 +67,8 @@ public class GeohashPrefixTree extends SpatialPrefixTree {
@Override @Override
public int getLevelForDistance(double dist) { public int getLevelForDistance(double dist) {
if (dist == 0)
return maxLevels;//short circuit
final int level = GeohashUtils.lookupHashLenForWidthHeight(dist, dist); final int level = GeohashUtils.lookupHashLenForWidthHeight(dist, dist);
return Math.max(Math.min(level, maxLevels), 1); return Math.max(Math.min(level, maxLevels), 1);
} }

View File

@ -1,3 +1,5 @@
package org.apache.lucene.spatial.prefix.tree;
/* /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -15,8 +17,6 @@
* limitations under the License. * limitations under the License.
*/ */
package org.apache.lucene.spatial.prefix.tree;
import com.spatial4j.core.context.SpatialContext; import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.shape.Point; import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Rectangle; import com.spatial4j.core.shape.Rectangle;
@ -121,6 +121,8 @@ public class QuadPrefixTree extends SpatialPrefixTree {
@Override @Override
public int getLevelForDistance(double dist) { public int getLevelForDistance(double dist) {
if (dist == 0)//short circuit
return maxLevels;
for (int i = 0; i < maxLevels-1; i++) { for (int i = 0; i < maxLevels-1; i++) {
//note: level[i] is actually a lookup for level i+1 //note: level[i] is actually a lookup for level i+1
if(dist > levelW[i] && dist > levelH[i]) { if(dist > levelW[i] && dist > levelH[i]) {

View File

@ -18,9 +18,7 @@ package org.apache.lucene.spatial.prefix.tree;
*/ */
import com.spatial4j.core.context.SpatialContext; import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.shape.Circle;
import com.spatial4j.core.shape.Point; import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Rectangle;
import com.spatial4j.core.shape.Shape; import com.spatial4j.core.shape.Shape;
import java.nio.charset.Charset; import java.nio.charset.Charset;
@ -30,7 +28,7 @@ import java.util.Collections;
import java.util.List; import java.util.List;
/** /**
* A Spatial Prefix Tree, or Trie, which decomposes shapes into prefixed strings at variable lengths corresponding to * A spatial Prefix Tree, or Trie, which decomposes shapes into prefixed strings at variable lengths corresponding to
* variable precision. Each string corresponds to a spatial region. * variable precision. Each string corresponds to a spatial region.
* *
* Implementations of this class should be thread-safe and immutable once initialized. * Implementations of this class should be thread-safe and immutable once initialized.
@ -64,28 +62,6 @@ public abstract class SpatialPrefixTree {
return getClass().getSimpleName() + "(maxLevels:" + maxLevels + ",ctx:" + ctx + ")"; return getClass().getSimpleName() + "(maxLevels:" + maxLevels + ",ctx:" + ctx + ")";
} }
/**
* See {@link org.apache.lucene.spatial.query.SpatialArgs#getDistPrecision()}.
* A grid level looked up via {@link #getLevelForDistance(double)} is returned.
*
* @param precision 0 to 0.5
* @return 1 to maxLevels
*/
public int getMaxLevelForPrecision(Shape shape, double precision) {
if (precision < 0 || precision > 0.5) {
throw new IllegalArgumentException("Precision " + precision + " must be between [0 to 0.5]");
}
if (precision == 0 || shape instanceof Point) {
return maxLevels;
}
Rectangle bbox = shape.getBoundingBox();
//The diagonal distance should be the same computed from any opposite corner,
// and this is the longest distance that might be occurring within the shape.
double diagonalDist = ctx.getDistCalc().distance(
ctx.makePoint(bbox.getMinX(), bbox.getMinY()), bbox.getMaxX(), bbox.getMaxY());
return getLevelForDistance(diagonalDist * 0.5 * precision);
}
/** /**
* Returns the level of the largest grid in which its longest side is less * Returns the level of the largest grid in which its longest side is less
* than or equal to the provided distance (in degrees). Consequently {@code * than or equal to the provided distance (in degrees). Consequently {@code

View File

@ -17,8 +17,9 @@ package org.apache.lucene.spatial.query;
* limitations under the License. * limitations under the License.
*/ */
import java.util.Locale; import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Rectangle;
import com.spatial4j.core.shape.Shape; import com.spatial4j.core.shape.Shape;
/** /**
@ -28,36 +29,72 @@ import com.spatial4j.core.shape.Shape;
*/ */
public class SpatialArgs { public class SpatialArgs {
public static final double DEFAULT_DIST_PRECISION = 0.025d; public static final double DEFAULT_DISTERRPCT = 0.025d;
private SpatialOperation operation; private SpatialOperation operation;
private Shape shape; private Shape shape;
private double distPrecision = DEFAULT_DIST_PRECISION; private Double distErrPct;
private Double distErr;
public SpatialArgs(SpatialOperation operation) {
this.operation = operation;
}
public SpatialArgs(SpatialOperation operation, Shape shape) { public SpatialArgs(SpatialOperation operation, Shape shape) {
if (operation == null || shape == null)
throw new NullPointerException("operation and shape are required");
this.operation = operation; this.operation = operation;
this.shape = shape; this.shape = shape;
} }
/**
* Computes the distance given a shape and the {@code distErrPct}. The
* algorithm is the fraction of the distance from the center of the query
* shape to its furthest bounding box corner.
*
* @param shape Mandatory.
* @param distErrPct 0 to 0.5
* @param ctx Mandatory
* @return A distance (in degrees).
*/
public static double calcDistanceFromErrPct(Shape shape, double distErrPct, SpatialContext ctx) {
if (distErrPct < 0 || distErrPct > 0.5) {
throw new IllegalArgumentException("distErrPct " + distErrPct + " must be between [0 to 0.5]");
}
if (distErrPct == 0 || shape instanceof Point) {
return 0;
}
Rectangle bbox = shape.getBoundingBox();
//The diagonal distance should be the same computed from any opposite corner,
// and this is the longest distance that might be occurring within the shape.
double diagonalDist = ctx.getDistCalc().distance(
ctx.makePoint(bbox.getMinX(), bbox.getMinY()), bbox.getMaxX(), bbox.getMaxY());
return diagonalDist * 0.5 * distErrPct;
}
/**
* Gets the error distance that specifies how precise the query shape is. This
* looks at {@link #getDistErr()}, {@link #getDistErrPct()}, and {@code
* defaultDistErrPct}.
* @param ctx
* @param defaultDistErrPct 0 to 0.5
* @return >= 0
*/
public double resolveDistErr(SpatialContext ctx, double defaultDistErrPct) {
if (distErr != null)
return distErr;
double distErrPct = (this.distErrPct != null ? this.distErrPct : defaultDistErrPct);
return calcDistanceFromErrPct(shape, distErrPct, ctx);
}
/** Check if the arguments make sense -- throw an exception if not */ /** Check if the arguments make sense -- throw an exception if not */
public void validate() throws IllegalArgumentException { public void validate() throws IllegalArgumentException {
if (operation.isTargetNeedsArea() && !shape.hasArea()) { if (operation.isTargetNeedsArea() && !shape.hasArea()) {
throw new IllegalArgumentException(operation + " only supports geometry with area"); throw new IllegalArgumentException(operation + " only supports geometry with area");
} }
if (distErr != null && distErrPct != null)
throw new IllegalArgumentException("Only distErr or distErrPct can be specified.");
} }
@Override @Override
public String toString() { public String toString() {
StringBuilder str = new StringBuilder(); return SpatialArgsParser.writeSpatialArgs(this);
str.append(operation.getName()).append('(');
str.append(shape.toString());
str.append(" distPrec=").append(String.format(Locale.ROOT, "%.2f%%", distPrecision / 100d));
str.append(')');
return str.toString();
} }
//------------------------------------------------ //------------------------------------------------
@ -84,22 +121,33 @@ public class SpatialArgs {
} }
/** /**
* A measure of acceptable error of the shape. It is specified as the * A measure of acceptable error of the shape as a fraction. This effectively
* fraction of the distance from the center of the query shape to its furthest * inflates the size of the shape but should not shrink it.
* bounding box corner. This effectively inflates the size of the shape but
* should not shrink it.
* <p/>
* The default is {@link #DEFAULT_DIST_PRECISION}
* *
* @return 0 to 0.5 * @return 0 to 0.5
* @see #calcDistanceFromErrPct(com.spatial4j.core.shape.Shape, double,
* com.spatial4j.core.context.SpatialContext)
*/ */
public Double getDistPrecision() { public Double getDistErrPct() {
return distPrecision; return distErrPct;
} }
public void setDistPrecision(Double distPrecision) { public void setDistErrPct(Double distErrPct) {
if (distPrecision != null) if (distErrPct != null)
this.distPrecision = distPrecision; this.distErrPct = distErrPct;
} }
/**
* The acceptable error of the shape. This effectively inflates the
* size of the shape but should not shrink it.
*
* @return >= 0
*/
public Double getDistErr() {
return distErr;
}
public void setDistErr(Double distErr) {
this.distErr = distErr;
}
} }

View File

@ -23,6 +23,7 @@ import com.spatial4j.core.io.ShapeReadWriter;
import com.spatial4j.core.shape.Shape; import com.spatial4j.core.shape.Shape;
import java.util.HashMap; import java.util.HashMap;
import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.StringTokenizer; import java.util.StringTokenizer;
@ -44,8 +45,22 @@ import java.util.StringTokenizer;
*/ */
public class SpatialArgsParser { public class SpatialArgsParser {
/** Writes a close approximation to the parsed input format. */
static String writeSpatialArgs(SpatialArgs args) {
StringBuilder str = new StringBuilder();
str.append(args.getOperation().getName());
str.append('(');
str.append(args.getShape().toString());
if (args.getDistErrPct() != null)
str.append(" distErrPct=").append(String.format(Locale.ROOT, "%.2f%%", args.getDistErrPct() * 100d));
if (args.getDistErr() != null)
str.append(" distErr=").append(args.getDistErr());
str.append(')');
return str.toString();
}
/** /**
* Parses a string such as "Intersects(-10,20,-8,22) distPec=0.025". * Parses a string such as "Intersects(-10,20,-8,22) distErrPct=0.025".
* *
* @param v The string to parse. Mandatory. * @param v The string to parse. Mandatory.
* @param ctx The spatial context. Mandatory. * @param ctx The spatial context. Mandatory.
@ -75,12 +90,14 @@ public class SpatialArgsParser {
body = v.substring(edx + 1).trim(); body = v.substring(edx + 1).trim();
if (body.length() > 0) { if (body.length() > 0) {
Map<String, String> aa = parseMap(body); Map<String, String> aa = parseMap(body);
args.setDistPrecision(readDouble(aa.remove("distPrec"))); args.setDistErrPct(readDouble(aa.remove("distErrPct")));
args.setDistErr(readDouble(aa.remove("distErr")));
if (!aa.isEmpty()) { if (!aa.isEmpty()) {
throw new IllegalArgumentException("unused parameters: " + aa, null); throw new IllegalArgumentException("unused parameters: " + aa, null);
} }
} }
} }
args.validate();
return args; return args;
} }
@ -92,6 +109,8 @@ public class SpatialArgsParser {
return v == null ? defaultValue : Boolean.parseBoolean(v); return v == null ? defaultValue : Boolean.parseBoolean(v);
} }
/** Parses "a=b c=d f" (whitespace separated) into name-value pairs. If there
* is no '=' as in 'f' above then it's short for f=f. */
protected static Map<String, String> parseMap(String body) { protected static Map<String, String> parseMap(String body) {
Map<String, String> map = new HashMap<String, String>(); Map<String, String> map = new HashMap<String, String>();
StringTokenizer st = new StringTokenizer(body, " \n\t"); StringTokenizer st = new StringTokenizer(body, " \n\t");

View File

@ -19,10 +19,10 @@ package org.apache.lucene.spatial.prefix;
import com.spatial4j.core.context.SpatialContext; import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.distance.DistanceUtils; import com.spatial4j.core.distance.DistanceUtils;
import com.spatial4j.core.io.GeohashUtils;
import com.spatial4j.core.shape.Point; import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Rectangle; import com.spatial4j.core.shape.Rectangle;
import com.spatial4j.core.shape.Shape; import com.spatial4j.core.shape.Shape;
import com.spatial4j.core.io.GeohashUtils;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField; import org.apache.lucene.document.StoredField;
@ -91,22 +91,22 @@ public class TestRecursivePrefixTreeStrategy extends StrategyTestCase {
final double DIST = 35.75;//35.7499... final double DIST = 35.75;//35.7499...
assertEquals(DIST, ctx.getDistCalc().distance(iPt, qPt) * DEG2KM, 0.001); assertEquals(DIST, ctx.getDistCalc().distance(iPt, qPt) * DEG2KM, 0.001);
//distPrec will affect the query shape precision. The indexed precision //distErrPct will affect the query shape precision. The indexed precision
// was set to nearly zilch via init(GeohashPrefixTree.getMaxLevelsPossible()); // was set to nearly zilch via init(GeohashPrefixTree.getMaxLevelsPossible());
final double distPrec = 0.025; //the suggested default, by the way final double distErrPct = 0.025; //the suggested default, by the way
final double distMult = 1+distPrec; final double distMult = 1+distErrPct;
assertTrue(35.74*distMult >= DIST); assertTrue(35.74*distMult >= DIST);
checkHits(q(qPt, 35.74 * KM2DEG, distPrec), 1, null); checkHits(q(qPt, 35.74 * KM2DEG, distErrPct), 1, null);
assertTrue(30*distMult < DIST); assertTrue(30*distMult < DIST);
checkHits(q(qPt, 30 * KM2DEG, distPrec), 0, null); checkHits(q(qPt, 30 * KM2DEG, distErrPct), 0, null);
assertTrue(33*distMult < DIST); assertTrue(33*distMult < DIST);
checkHits(q(qPt, 33 * KM2DEG, distPrec), 0, null); checkHits(q(qPt, 33 * KM2DEG, distErrPct), 0, null);
assertTrue(34*distMult < DIST); assertTrue(34*distMult < DIST);
checkHits(q(qPt, 34 * KM2DEG, distPrec), 0, null); checkHits(q(qPt, 34 * KM2DEG, distErrPct), 0, null);
} }
@Test @Ignore /* LUCENE-4351 ignore this test until I figure out why it failed (as reported by Jenkins) */ @Test @Ignore /* LUCENE-4351 ignore this test until I figure out why it failed (as reported by Jenkins) */
@ -178,10 +178,10 @@ public class TestRecursivePrefixTreeStrategy extends StrategyTestCase {
return q(pt, distDEG, 0.0); return q(pt, distDEG, 0.0);
} }
private SpatialArgs q(Point pt, double distDEG, double distPrec) { private SpatialArgs q(Point pt, double distDEG, double distErrPct) {
Shape shape = ctx.makeCircle(pt, distDEG); Shape shape = ctx.makeCircle(pt, distDEG);
SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,shape); SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,shape);
args.setDistPrecision(distPrec); args.setDistErrPct(distErrPct);
return args; return args;
} }