Geo: enables coerce support in WKT polygon parser (#35414)

WKT parser now automatically closes open polygons similar to GeoJSON
parser if coerce flag in mapping is set to true.

Closes to #35059
This commit is contained in:
Igor Motov 2018-11-12 09:40:04 -10:00 committed by GitHub
parent 16cbbab7b7
commit e7896bcefc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 89 additions and 33 deletions

View File

@ -19,6 +19,7 @@
package org.elasticsearch.common.geo.parsers;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.geo.GeoShapeType;
import org.elasticsearch.common.geo.builders.CoordinatesBuilder;
@ -77,7 +78,9 @@ public class GeoWKTParser {
final GeoShapeFieldMapper shapeMapper)
throws IOException, ElasticsearchParseException {
try (StringReader reader = new StringReader(parser.text())) {
boolean ignoreZValue = (shapeMapper != null && shapeMapper.ignoreZValue().value() == true);
Explicit<Boolean> ignoreZValue = (shapeMapper == null) ? GeoShapeFieldMapper.Defaults.IGNORE_Z_VALUE :
shapeMapper.ignoreZValue();
Explicit<Boolean> coerce = (shapeMapper == null) ? GeoShapeFieldMapper.Defaults.COERCE : shapeMapper.coerce();
// setup the tokenizer; configured to read words w/o numbers
StreamTokenizer tokenizer = new StreamTokenizer(reader);
tokenizer.resetSyntax();
@ -90,14 +93,15 @@ public class GeoWKTParser {
tokenizer.wordChars('.', '.');
tokenizer.whitespaceChars(0, ' ');
tokenizer.commentChar('#');
ShapeBuilder builder = parseGeometry(tokenizer, shapeType, ignoreZValue);
ShapeBuilder builder = parseGeometry(tokenizer, shapeType, ignoreZValue.value(), coerce.value());
checkEOF(tokenizer);
return builder;
}
}
/** parse geometry from the stream tokenizer */
private static ShapeBuilder parseGeometry(StreamTokenizer stream, GeoShapeType shapeType, final boolean ignoreZValue)
private static ShapeBuilder parseGeometry(StreamTokenizer stream, GeoShapeType shapeType, final boolean ignoreZValue,
final boolean coerce)
throws IOException, ElasticsearchParseException {
final GeoShapeType type = GeoShapeType.forName(nextWord(stream));
if (shapeType != null && shapeType != GeoShapeType.GEOMETRYCOLLECTION) {
@ -107,21 +111,21 @@ public class GeoWKTParser {
}
switch (type) {
case POINT:
return parsePoint(stream, ignoreZValue);
return parsePoint(stream, ignoreZValue, coerce);
case MULTIPOINT:
return parseMultiPoint(stream, ignoreZValue);
return parseMultiPoint(stream, ignoreZValue, coerce);
case LINESTRING:
return parseLine(stream, ignoreZValue);
return parseLine(stream, ignoreZValue, coerce);
case MULTILINESTRING:
return parseMultiLine(stream, ignoreZValue);
return parseMultiLine(stream, ignoreZValue, coerce);
case POLYGON:
return parsePolygon(stream, ignoreZValue);
return parsePolygon(stream, ignoreZValue, coerce);
case MULTIPOLYGON:
return parseMultiPolygon(stream, ignoreZValue);
return parseMultiPolygon(stream, ignoreZValue, coerce);
case ENVELOPE:
return parseBBox(stream);
case GEOMETRYCOLLECTION:
return parseGeometryCollection(stream, ignoreZValue);
return parseGeometryCollection(stream, ignoreZValue, coerce);
default:
throw new IllegalArgumentException("Unknown geometry type: " + type);
}
@ -142,7 +146,7 @@ public class GeoWKTParser {
return new EnvelopeBuilder(new Coordinate(minLon, maxLat), new Coordinate(maxLon, minLat));
}
private static PointBuilder parsePoint(StreamTokenizer stream, final boolean ignoreZValue)
private static PointBuilder parsePoint(StreamTokenizer stream, final boolean ignoreZValue, final boolean coerce)
throws IOException, ElasticsearchParseException {
if (nextEmptyOrOpen(stream).equals(EMPTY)) {
return null;
@ -155,12 +159,12 @@ public class GeoWKTParser {
return pt;
}
private static List<Coordinate> parseCoordinateList(StreamTokenizer stream, final boolean ignoreZValue)
private static List<Coordinate> parseCoordinateList(StreamTokenizer stream, final boolean ignoreZValue, final boolean coerce)
throws IOException, ElasticsearchParseException {
CoordinatesBuilder coordinates = new CoordinatesBuilder();
boolean isOpenParen = false;
if (isNumberNext(stream) || (isOpenParen = nextWord(stream).equals(LPAREN))) {
coordinates.coordinate(parseCoordinate(stream, ignoreZValue));
coordinates.coordinate(parseCoordinate(stream, ignoreZValue, coerce));
}
if (isOpenParen && nextCloser(stream).equals(RPAREN) == false) {
@ -170,7 +174,7 @@ public class GeoWKTParser {
while (nextCloserOrComma(stream).equals(COMMA)) {
isOpenParen = false;
if (isNumberNext(stream) || (isOpenParen = nextWord(stream).equals(LPAREN))) {
coordinates.coordinate(parseCoordinate(stream, ignoreZValue));
coordinates.coordinate(parseCoordinate(stream, ignoreZValue, coerce));
}
if (isOpenParen && nextCloser(stream).equals(RPAREN) == false) {
throw new ElasticsearchParseException("expected: " + RPAREN + " but found: " + tokenString(stream), stream.lineno());
@ -179,7 +183,7 @@ public class GeoWKTParser {
return coordinates.build();
}
private static Coordinate parseCoordinate(StreamTokenizer stream, final boolean ignoreZValue)
private static Coordinate parseCoordinate(StreamTokenizer stream, final boolean ignoreZValue, final boolean coerce)
throws IOException, ElasticsearchParseException {
final double lon = nextNumber(stream);
final double lat = nextNumber(stream);
@ -190,71 +194,98 @@ public class GeoWKTParser {
return z == null ? new Coordinate(lon, lat) : new Coordinate(lon, lat, z);
}
private static MultiPointBuilder parseMultiPoint(StreamTokenizer stream, final boolean ignoreZValue)
private static MultiPointBuilder parseMultiPoint(StreamTokenizer stream, final boolean ignoreZValue, final boolean coerce)
throws IOException, ElasticsearchParseException {
String token = nextEmptyOrOpen(stream);
if (token.equals(EMPTY)) {
return null;
}
return new MultiPointBuilder(parseCoordinateList(stream, ignoreZValue));
return new MultiPointBuilder(parseCoordinateList(stream, ignoreZValue, coerce));
}
private static LineStringBuilder parseLine(StreamTokenizer stream, final boolean ignoreZValue)
private static LineStringBuilder parseLine(StreamTokenizer stream, final boolean ignoreZValue, final boolean coerce)
throws IOException, ElasticsearchParseException {
String token = nextEmptyOrOpen(stream);
if (token.equals(EMPTY)) {
return null;
}
return new LineStringBuilder(parseCoordinateList(stream, ignoreZValue));
return new LineStringBuilder(parseCoordinateList(stream, ignoreZValue, coerce));
}
private static MultiLineStringBuilder parseMultiLine(StreamTokenizer stream, final boolean ignoreZValue)
// A LinearRing is closed LineString with 4 or more positions. The first and last positions
// are equivalent (they represent equivalent points).
private static LineStringBuilder parseLinearRing(StreamTokenizer stream, final boolean ignoreZValue, final boolean coerce)
throws IOException, ElasticsearchParseException {
String token = nextEmptyOrOpen(stream);
if (token.equals(EMPTY)) {
return null;
}
List<Coordinate> coordinates = parseCoordinateList(stream, ignoreZValue, coerce);
int coordinatesNeeded = coerce ? 3 : 4;
if (coordinates.size() >= coordinatesNeeded) {
if (!coordinates.get(0).equals(coordinates.get(coordinates.size() - 1))) {
if (coerce == true) {
coordinates.add(coordinates.get(0));
} else {
throw new ElasticsearchParseException("invalid LinearRing found (coordinates are not closed)");
}
}
}
if (coordinates.size() < 4) {
throw new ElasticsearchParseException("invalid number of points in LinearRing (found [{}] - must be >= 4)",
coordinates.size());
}
return new LineStringBuilder(coordinates);
}
private static MultiLineStringBuilder parseMultiLine(StreamTokenizer stream, final boolean ignoreZValue, final boolean coerce)
throws IOException, ElasticsearchParseException {
String token = nextEmptyOrOpen(stream);
if (token.equals(EMPTY)) {
return null;
}
MultiLineStringBuilder builder = new MultiLineStringBuilder();
builder.linestring(parseLine(stream, ignoreZValue));
builder.linestring(parseLine(stream, ignoreZValue, coerce));
while (nextCloserOrComma(stream).equals(COMMA)) {
builder.linestring(parseLine(stream, ignoreZValue));
builder.linestring(parseLine(stream, ignoreZValue, coerce));
}
return builder;
}
private static PolygonBuilder parsePolygon(StreamTokenizer stream, final boolean ignoreZValue)
private static PolygonBuilder parsePolygon(StreamTokenizer stream, final boolean ignoreZValue, final boolean coerce)
throws IOException, ElasticsearchParseException {
if (nextEmptyOrOpen(stream).equals(EMPTY)) {
return null;
}
PolygonBuilder builder = new PolygonBuilder(parseLine(stream, ignoreZValue), ShapeBuilder.Orientation.RIGHT);
PolygonBuilder builder = new PolygonBuilder(parseLinearRing(stream, ignoreZValue, coerce), ShapeBuilder.Orientation.RIGHT);
while (nextCloserOrComma(stream).equals(COMMA)) {
builder.hole(parseLine(stream, ignoreZValue));
builder.hole(parseLinearRing(stream, ignoreZValue, coerce));
}
return builder;
}
private static MultiPolygonBuilder parseMultiPolygon(StreamTokenizer stream, final boolean ignoreZValue)
private static MultiPolygonBuilder parseMultiPolygon(StreamTokenizer stream, final boolean ignoreZValue, final boolean coerce)
throws IOException, ElasticsearchParseException {
if (nextEmptyOrOpen(stream).equals(EMPTY)) {
return null;
}
MultiPolygonBuilder builder = new MultiPolygonBuilder().polygon(parsePolygon(stream, ignoreZValue));
MultiPolygonBuilder builder = new MultiPolygonBuilder().polygon(parsePolygon(stream, ignoreZValue, coerce));
while (nextCloserOrComma(stream).equals(COMMA)) {
builder.polygon(parsePolygon(stream, ignoreZValue));
builder.polygon(parsePolygon(stream, ignoreZValue, coerce));
}
return builder;
}
private static GeometryCollectionBuilder parseGeometryCollection(StreamTokenizer stream, final boolean ignoreZValue)
private static GeometryCollectionBuilder parseGeometryCollection(StreamTokenizer stream, final boolean ignoreZValue,
final boolean coerce)
throws IOException, ElasticsearchParseException {
if (nextEmptyOrOpen(stream).equals(EMPTY)) {
return null;
}
GeometryCollectionBuilder builder = new GeometryCollectionBuilder().shape(
parseGeometry(stream, GeoShapeType.GEOMETRYCOLLECTION, ignoreZValue));
parseGeometry(stream, GeoShapeType.GEOMETRYCOLLECTION, ignoreZValue, coerce));
while (nextCloserOrComma(stream).equals(COMMA)) {
builder.shape(parseGeometry(stream, null, ignoreZValue));
builder.shape(parseGeometry(stream, null, ignoreZValue, coerce));
}
return builder;
}

View File

@ -135,7 +135,7 @@ public class GeoShapeFieldMapper extends FieldMapper {
public Builder coerce(boolean coerce) {
this.coerce = coerce;
return builder;
return this;
}
@Override
@ -155,7 +155,7 @@ public class GeoShapeFieldMapper extends FieldMapper {
public Builder ignoreMalformed(boolean ignoreMalformed) {
this.ignoreMalformed = ignoreMalformed;
return builder;
return this;
}
protected Explicit<Boolean> ignoreMalformed(BuilderContext context) {

View File

@ -318,6 +318,31 @@ public class GeoWKTShapeParserTests extends BaseGeoParsingTestCase {
assertEquals(shapeBuilder.numDimensions(), 3);
}
public void testParseOpenPolygon() throws IOException {
String openPolygon = "POLYGON ((100 5, 100 10, 90 10, 90 5))";
XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().value(openPolygon);
XContentParser parser = createParser(xContentBuilder);
parser.nextToken();
Settings indexSettings = Settings.builder()
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_6_3_0)
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()).build();
Mapper.BuilderContext mockBuilderContext = new Mapper.BuilderContext(indexSettings, new ContentPath());
final GeoShapeFieldMapper defaultMapperBuilder = new GeoShapeFieldMapper.Builder("test").coerce(false).build(mockBuilderContext);
ElasticsearchParseException exception = expectThrows(ElasticsearchParseException.class,
() -> ShapeParser.parse(parser, defaultMapperBuilder));
assertEquals("invalid LinearRing found (coordinates are not closed)", exception.getMessage());
final GeoShapeFieldMapper coercingMapperBuilder = new GeoShapeFieldMapper.Builder("test").coerce(true).build(mockBuilderContext);
ShapeBuilder<?, ?> shapeBuilder = ShapeParser.parse(parser, coercingMapperBuilder);
assertNotNull(shapeBuilder);
assertEquals("polygon ((100.0 5.0, 100.0 10.0, 90.0 10.0, 90.0 5.0, 100.0 5.0))", shapeBuilder.toWKT());
}
public void testParseSelfCrossingPolygon() throws IOException {
// test self crossing ccw poly not crossing dateline
List<Coordinate> shellCoordinates = new ArrayList<>();