[GEO] Prioritize tree_level and precision parameters over default distance_error_pct

If a user explicitly defined the tree_level or precision parameter in a geo_shape mapping their specification was always overridden by the default_error_pct parameter (even though our docs say this parameter is a 'hint'). This lead to unexpected accuracy problems in the results of a geo_shape filter. (example provided in issue #9691)

This simple patch fixes the unexpected behavior by setting the default distance_error_pct parameter to zero when the tree_level or precision parameters are provided by the user. Under the covers the quadtree will now use the tree level defined by the user. The docs will be updated to alert the user to exercise caution with these parameters.  Specifying a precision of "1m" for an index using large complex shapes can quickly lead to OOM issues.

closes #9691
This commit is contained in:
Nicholas Knize 2015-02-20 17:42:43 -06:00
parent 0205fc7ac2
commit 453217fd7a
3 changed files with 79 additions and 43 deletions

View File

@ -46,7 +46,13 @@ via the mapping API even if you use the precision parameter.
|`distance_error_pct` |Used as a hint to the PrefixTree about how
precise it should be. Defaults to 0.025 (2.5%) with 0.5 as the maximum
supported value.
supported value. PERFORMANCE NOTE: This value will be default to 0 if a `precision` or
`tree_level` definition is explicitly defined. This guarantees spatial precision
at the level defined in the mapping. This can lead to significant memory usage
for high resolution shapes with low error (e.g., large shapes at 1m with < 0.001 error).
To improve indexing performance (at the cost of query accuracy) explicitly define
`tree_level` or `precision` along with a reasonable `distance_error_pct`, noting
that large shapes will have greater false positives.
|`orientation` |Optionally define how to interpret vertex order for
polygons / multipolygons. This parameter defines one of two coordinate

View File

@ -114,6 +114,7 @@ public class GeoShapeFieldMapper extends AbstractFieldMapper<String> {
private int treeLevels = 0;
private double precisionInMeters = -1;
private double distanceErrorPct = Defaults.DISTANCE_ERROR_PCT;
private boolean distErrPctDefined;
private Orientation orientation = Defaults.ORIENTATION;
private SpatialPrefixTree prefixTree;
@ -173,23 +174,27 @@ public class GeoShapeFieldMapper extends AbstractFieldMapper<String> {
return new GeoShapeFieldMapper(names, prefixTree, strategyName, distanceErrorPct, orientation, fieldType,
context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo);
}
}
private static final int getLevels(int treeLevels, double precisionInMeters, int defaultLevels, boolean geoHash) {
if (treeLevels > 0 || precisionInMeters >= 0) {
return Math.max(treeLevels, precisionInMeters >= 0 ? (geoHash ? GeoUtils.geoHashLevelsForPrecision(precisionInMeters)
: GeoUtils.quadTreeLevelsForPrecision(precisionInMeters)) : 0);
private final int getLevels(int treeLevels, double precisionInMeters, int defaultLevels, boolean geoHash) {
if (treeLevels > 0 || precisionInMeters >= 0) {
// if the user specified a precision but not a distance error percent then zero out the distance err pct
// this is done to guarantee precision specified by the user without doing something unexpected under the covers
if (!distErrPctDefined) distanceErrorPct = 0;
return Math.max(treeLevels, precisionInMeters >= 0 ? (geoHash ? GeoUtils.geoHashLevelsForPrecision(precisionInMeters)
: GeoUtils.quadTreeLevelsForPrecision(precisionInMeters)) : 0);
}
return defaultLevels;
}
return defaultLevels;
}
public static class TypeParser implements Mapper.TypeParser {
@Override
public Mapper.Builder parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
Builder builder = geoShapeField(name);
// if index was created before 1.6, this conditional should be true (this forces any index created on/or after 1.6 to use 0 for
// the default distanceErrorPct parameter).
builder.distErrPctDefined = parserContext.indexVersionCreated().before(Version.V_1_6_0);
for (Iterator<Map.Entry<String, Object>> iterator = node.entrySet().iterator(); iterator.hasNext();) {
Map.Entry<String, Object> entry = iterator.next();
String fieldName = Strings.toUnderscoreCase(entry.getKey());
@ -205,6 +210,7 @@ public class GeoShapeFieldMapper extends AbstractFieldMapper<String> {
iterator.remove();
} else if (Names.DISTANCE_ERROR_PCT.equals(fieldName)) {
builder.distanceErrorPct(Double.parseDouble(fieldNode.toString()));
builder.distErrPctDefined = true;
iterator.remove();
} else if (Names.ORIENTATION.equals(fieldName)) {
builder.orientation(ShapeBuilder.orientationFromString(fieldNode.toString()));
@ -282,40 +288,38 @@ public class GeoShapeFieldMapper extends AbstractFieldMapper<String> {
return;
}
final GeoShapeFieldMapper fieldMergeWith = (GeoShapeFieldMapper) mergeWith;
if (!mergeContext.mergeFlags().simulate()) {
final PrefixTreeStrategy mergeWithStrategy = fieldMergeWith.defaultStrategy;
final PrefixTreeStrategy mergeWithStrategy = fieldMergeWith.defaultStrategy;
// prevent user from changing strategies
if (!(this.defaultStrategy.getClass().equals(mergeWithStrategy.getClass()))) {
mergeContext.addConflict("mapper [" + names.fullName() + "] has different strategy");
}
final SpatialPrefixTree grid = this.defaultStrategy.getGrid();
final SpatialPrefixTree mergeGrid = mergeWithStrategy.getGrid();
// prevent user from changing trees (changes encoding)
if (!grid.getClass().equals(mergeGrid.getClass())) {
mergeContext.addConflict("mapper [" + names.fullName() + "] has different tree");
}
// TODO we should allow this, but at the moment levels is used to build bookkeeping variables
// in lucene's SpatialPrefixTree implementations, need a patch to correct that first
if (grid.getMaxLevels() != mergeGrid.getMaxLevels()) {
mergeContext.addConflict("mapper [" + names.fullName() + "] has different tree_levels or precision");
}
// bail if there were merge conflicts
if (mergeContext.hasConflicts()) {
return;
}
// change distance error percent
this.defaultStrategy.setDistErrPct(mergeWithStrategy.getDistErrPct());
// change orientation - this is allowed because existing dateline spanning shapes
// have already been unwound and segmented
this.shapeOrientation = fieldMergeWith.shapeOrientation;
// prevent user from changing strategies
if (!(this.defaultStrategy.getClass().equals(mergeWithStrategy.getClass()))) {
mergeContext.addConflict("mapper [" + names.fullName() + "] has different strategy");
}
final SpatialPrefixTree grid = this.defaultStrategy.getGrid();
final SpatialPrefixTree mergeGrid = mergeWithStrategy.getGrid();
// prevent user from changing trees (changes encoding)
if (!grid.getClass().equals(mergeGrid.getClass())) {
mergeContext.addConflict("mapper [" + names.fullName() + "] has different tree");
}
// TODO we should allow this, but at the moment levels is used to build bookkeeping variables
// in lucene's SpatialPrefixTree implementations, need a patch to correct that first
if (grid.getMaxLevels() != mergeGrid.getMaxLevels()) {
mergeContext.addConflict("mapper [" + names.fullName() + "] has different tree_levels or precision");
}
// bail if there were merge conflicts
if (mergeContext.hasConflicts() || mergeContext.mergeFlags().simulate()) {
return;
}
// change distance error percent
this.defaultStrategy.setDistErrPct(mergeWithStrategy.getDistErrPct());
// change orientation - this is allowed because existing dateline spanning shapes
// have already been unwound and segmented
this.shapeOrientation = fieldMergeWith.shapeOrientation;
}
@Override

View File

@ -173,10 +173,36 @@ public class GeoShapeFieldMapperTests extends ElasticsearchSingleNodeTest {
assertThat(strategy.getDistErrPct(), equalTo(0.5));
assertThat(strategy.getGrid(), instanceOf(QuadPrefixTree.class));
/* 70m is more precise so it wins */
// 70m is more precise so it wins
assertThat(strategy.getGrid().getMaxLevels(), equalTo(GeoUtils.quadTreeLevelsForPrecision(70d)));
}
{
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties").startObject("location")
.field("type", "geo_shape")
.field("tree", "quadtree")
.field("tree_levels", "26")
.field("precision", "70m")
.endObject().endObject()
.endObject().endObject().string();
DocumentMapper defaultMapper = parser.parse(mapping);
FieldMapper fieldMapper = defaultMapper.mappers().name("location").mapper();
assertThat(fieldMapper, instanceOf(GeoShapeFieldMapper.class));
GeoShapeFieldMapper geoShapeFieldMapper = (GeoShapeFieldMapper) fieldMapper;
PrefixTreeStrategy strategy = geoShapeFieldMapper.defaultStrategy();
// distance_error_pct was not specified so we expect the mapper to take the highest precision between "precision" and
// "tree_levels" setting distErrPct to 0 to guarantee desired precision
assertThat(strategy.getDistErrPct(), equalTo(0.0));
assertThat(strategy.getGrid(), instanceOf(QuadPrefixTree.class));
// 70m is less precise so it loses
assertThat(strategy.getGrid().getMaxLevels(), equalTo(26));
}
{
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties").startObject("location")
@ -197,7 +223,7 @@ public class GeoShapeFieldMapperTests extends ElasticsearchSingleNodeTest {
assertThat(strategy.getDistErrPct(), equalTo(0.5));
assertThat(strategy.getGrid(), instanceOf(GeohashPrefixTree.class));
/* 70m is more precise so it wins */
// 70m is more precise so it wins
assertThat(strategy.getGrid().getMaxLevels(), equalTo(GeoUtils.geoHashLevelsForPrecision(70d)));
}