GEO: More robust handling of ignore_malformed in geoshape parsing (#35603)

Adds an XContent sub parser class that can to wrap another
XContent parser at the beginning of an object and allow skiping
all children in case of the parsing failure. It also uses this
subparser to ignore the rest of the GeoJson shape if the 
parsing fails and we need to ignore the geoshape due to the 
ignore_malformed flag.

Supersedes #34498

Closes #34047
This commit is contained in:
Igor Motov 2018-11-21 11:04:01 -10:00 committed by GitHub
parent 1a5553d495
commit 39789d0a10
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 479 additions and 29 deletions

View File

@ -0,0 +1,284 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.xcontent;
import java.io.IOException;
import java.nio.CharBuffer;
import java.util.List;
import java.util.Map;
/**
* Wrapper for a XContentParser that makes a single object to look like a complete document.
*
* The wrapper prevents the parsing logic to consume tokens outside of the wrapped object as well
* as skipping to the end of the object in case of a parsing error. The wrapper is intended to be
* used for parsing objects that should be ignored if they are malformed.
*/
public class XContentSubParser implements XContentParser {
private final XContentParser parser;
private int level;
private boolean closed;
public XContentSubParser(XContentParser parser) {
this.parser = parser;
if (parser.currentToken() != Token.START_OBJECT) {
throw new IllegalStateException("The sub parser has to be created on the start of an object");
}
level = 1;
}
@Override
public XContentType contentType() {
return parser.contentType();
}
@Override
public Token nextToken() throws IOException {
if (level > 0) {
Token token = parser.nextToken();
if (token == Token.START_OBJECT || token == Token.START_ARRAY) {
level++;
} else if (token == Token.END_OBJECT || token == Token.END_ARRAY) {
level--;
}
return token;
} else {
return null; // we have reached the end of the wrapped object
}
}
@Override
public void skipChildren() throws IOException {
Token token = parser.currentToken();
if (token != Token.START_OBJECT && token != Token.START_ARRAY) {
// skip if not starting on an object or an array
return;
}
int backToLevel = level - 1;
while (nextToken() != null) {
if (level <= backToLevel) {
return;
}
}
}
@Override
public Token currentToken() {
return parser.currentToken();
}
@Override
public String currentName() throws IOException {
return parser.currentName();
}
@Override
public Map<String, Object> map() throws IOException {
return parser.map();
}
@Override
public Map<String, Object> mapOrdered() throws IOException {
return parser.mapOrdered();
}
@Override
public Map<String, String> mapStrings() throws IOException {
return parser.mapStrings();
}
@Override
public Map<String, String> mapStringsOrdered() throws IOException {
return parser.mapStringsOrdered();
}
@Override
public List<Object> list() throws IOException {
return parser.list();
}
@Override
public List<Object> listOrderedMap() throws IOException {
return parser.listOrderedMap();
}
@Override
public String text() throws IOException {
return parser.text();
}
@Override
public String textOrNull() throws IOException {
return parser.textOrNull();
}
@Override
public CharBuffer charBufferOrNull() throws IOException {
return parser.charBufferOrNull();
}
@Override
public CharBuffer charBuffer() throws IOException {
return parser.charBuffer();
}
@Override
public Object objectText() throws IOException {
return parser.objectText();
}
@Override
public Object objectBytes() throws IOException {
return parser.objectBytes();
}
@Override
public boolean hasTextCharacters() {
return parser.hasTextCharacters();
}
@Override
public char[] textCharacters() throws IOException {
return parser.textCharacters();
}
@Override
public int textLength() throws IOException {
return parser.textLength();
}
@Override
public int textOffset() throws IOException {
return parser.textOffset();
}
@Override
public Number numberValue() throws IOException {
return parser.numberValue();
}
@Override
public NumberType numberType() throws IOException {
return parser.numberType();
}
@Override
public short shortValue(boolean coerce) throws IOException {
return parser.shortValue(coerce);
}
@Override
public int intValue(boolean coerce) throws IOException {
return parser.intValue(coerce);
}
@Override
public long longValue(boolean coerce) throws IOException {
return parser.longValue(coerce);
}
@Override
public float floatValue(boolean coerce) throws IOException {
return parser.floatValue(coerce);
}
@Override
public double doubleValue(boolean coerce) throws IOException {
return parser.doubleValue();
}
@Override
public short shortValue() throws IOException {
return parser.shortValue();
}
@Override
public int intValue() throws IOException {
return parser.intValue();
}
@Override
public long longValue() throws IOException {
return parser.longValue();
}
@Override
public float floatValue() throws IOException {
return parser.floatValue();
}
@Override
public double doubleValue() throws IOException {
return parser.doubleValue();
}
@Override
public boolean isBooleanValue() throws IOException {
return parser.isBooleanValue();
}
@Override
public boolean booleanValue() throws IOException {
return parser.booleanValue();
}
@Override
public byte[] binaryValue() throws IOException {
return parser.binaryValue();
}
@Override
public XContentLocation getTokenLocation() {
return parser.getTokenLocation();
}
@Override
public <T> T namedObject(Class<T> categoryClass, String name, Object context) throws IOException {
return parser.namedObject(categoryClass, name, context);
}
@Override
public NamedXContentRegistry getXContentRegistry() {
return parser.getXContentRegistry();
}
@Override
public boolean isClosed() {
return closed;
}
@Override
public DeprecationHandler getDeprecationHandler() {
return parser.getDeprecationHandler();
}
@Override
public void close() throws IOException {
if (closed == false) {
closed = true;
while (true) {
if (nextToken() == null) {
return;
}
}
}
}
}

View File

@ -22,13 +22,12 @@ package org.elasticsearch.common.xcontent.json;
import com.fasterxml.jackson.core.JsonLocation;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import org.elasticsearch.core.internal.io.IOUtils;
import org.elasticsearch.common.xcontent.DeprecationHandler;
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
import org.elasticsearch.common.xcontent.XContentLocation;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.common.xcontent.support.AbstractXContentParser;
import org.elasticsearch.core.internal.io.IOUtils;
import java.io.IOException;
import java.nio.CharBuffer;

View File

@ -20,6 +20,7 @@
package org.elasticsearch.common.xcontent;
import com.fasterxml.jackson.core.JsonParseException;
import org.elasticsearch.common.CheckedSupplier;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.xcontent.json.JsonXContent;
@ -327,4 +328,149 @@ public class XContentParserTests extends ESTestCase {
parser.list());
}
}
public void testSubParser() throws IOException {
XContentBuilder builder = XContentFactory.jsonBuilder();
int numberOfTokens;
numberOfTokens = generateRandomObjectForMarking(builder);
String content = Strings.toString(builder);
try (XContentParser parser = createParser(JsonXContent.jsonXContent, content)) {
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken()); // first field
assertEquals("first_field", parser.currentName());
assertEquals(XContentParser.Token.VALUE_STRING, parser.nextToken()); // foo
assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken()); // marked field
assertEquals("marked_field", parser.currentName());
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken()); // {
XContentParser subParser = new XContentSubParser(parser);
try {
int tokensToSkip = randomInt(numberOfTokens - 1);
for (int i = 0; i < tokensToSkip; i++) {
// Simulate incomplete parsing
assertNotNull(subParser.nextToken());
}
if (randomBoolean()) {
// And sometimes skipping children
subParser.skipChildren();
}
} finally {
assertFalse(subParser.isClosed());
subParser.close();
assertTrue(subParser.isClosed());
}
assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken()); // last field
assertEquals("last_field", parser.currentName());
assertEquals(XContentParser.Token.VALUE_STRING, parser.nextToken());
assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken());
assertNull(parser.nextToken());
}
}
public void testCreateSubParserAtAWrongPlace() throws IOException {
XContentBuilder builder = XContentFactory.jsonBuilder();
generateRandomObjectForMarking(builder);
String content = Strings.toString(builder);
try (XContentParser parser = createParser(JsonXContent.jsonXContent, content)) {
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken()); // first field
assertEquals("first_field", parser.currentName());
IllegalStateException exception = expectThrows(IllegalStateException.class, () -> new XContentSubParser(parser));
assertEquals("The sub parser has to be created on the start of an object", exception.getMessage());
}
}
public void testCreateRootSubParser() throws IOException {
XContentBuilder builder = XContentFactory.jsonBuilder();
int numberOfTokens = generateRandomObjectForMarking(builder);
String content = Strings.toString(builder);
try (XContentParser parser = createParser(JsonXContent.jsonXContent, content)) {
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
try (XContentParser subParser = new XContentSubParser(parser)) {
int tokensToSkip = randomInt(numberOfTokens + 3);
for (int i = 0; i < tokensToSkip; i++) {
// Simulate incomplete parsing
assertNotNull(subParser.nextToken());
}
}
assertNull(parser.nextToken());
}
}
/**
* Generates a random object {"first_field": "foo", "marked_field": {...random...}, "last_field": "bar}
*
* Returns the number of tokens in the marked field
*/
private int generateRandomObjectForMarking(XContentBuilder builder) throws IOException {
builder.startObject()
.field("first_field", "foo")
.field("marked_field");
int numberOfTokens = generateRandomObject(builder, 0);
builder.field("last_field", "bar").endObject();
return numberOfTokens;
}
private int generateRandomObject(XContentBuilder builder, int level) throws IOException {
int tokens = 2;
builder.startObject();
int numberOfElements = randomInt(5);
for (int i = 0; i < numberOfElements; i++) {
builder.field(randomAlphaOfLength(10) + "_" + i);
tokens += generateRandomValue(builder, level + 1);
}
builder.endObject();
return tokens;
}
private int generateRandomValue(XContentBuilder builder, int level) throws IOException {
@SuppressWarnings("unchecked") CheckedSupplier<Integer, IOException> fieldGenerator = randomFrom(
() -> {
builder.value(randomInt());
return 1;
},
() -> {
builder.value(randomAlphaOfLength(10));
return 1;
},
() -> {
builder.value(randomDouble());
return 1;
},
() -> {
if (level < 3) {
// don't need to go too deep
return generateRandomObject(builder, level + 1);
} else {
builder.value(0);
return 1;
}
},
() -> {
if (level < 5) { // don't need to go too deep
return generateRandomArray(builder, level);
} else {
builder.value(0);
return 1;
}
}
);
return fieldGenerator.get();
}
private int generateRandomArray(XContentBuilder builder, int level) throws IOException {
int tokens = 2;
int arraySize = randomInt(3);
builder.startArray();
for (int i = 0; i < arraySize; i++) {
tokens += generateRandomValue(builder, level + 1);
}
builder.endArray();
return tokens;
}
}

View File

@ -27,6 +27,7 @@ import org.elasticsearch.common.geo.builders.GeometryCollectionBuilder;
import org.elasticsearch.common.geo.builders.ShapeBuilder;
import org.elasticsearch.common.unit.DistanceUnit;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentSubParser;
import org.elasticsearch.index.mapper.GeoShapeFieldMapper;
import org.locationtech.jts.geom.Coordinate;
@ -55,66 +56,59 @@ abstract class GeoJsonParser {
String malformedException = null;
XContentParser.Token token;
try {
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
try (XContentParser subParser = new XContentSubParser(parser)) {
while ((token = subParser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
String fieldName = parser.currentName();
String fieldName = subParser.currentName();
if (ShapeParser.FIELD_TYPE.match(fieldName, parser.getDeprecationHandler())) {
parser.nextToken();
final GeoShapeType type = GeoShapeType.forName(parser.text());
if (ShapeParser.FIELD_TYPE.match(fieldName, subParser.getDeprecationHandler())) {
subParser.nextToken();
final GeoShapeType type = GeoShapeType.forName(subParser.text());
if (shapeType != null && shapeType.equals(type) == false) {
malformedException = ShapeParser.FIELD_TYPE + " already parsed as ["
+ shapeType + "] cannot redefine as [" + type + "]";
} else {
shapeType = type;
}
} else if (ShapeParser.FIELD_COORDINATES.match(fieldName, parser.getDeprecationHandler())) {
parser.nextToken();
CoordinateNode tempNode = parseCoordinates(parser, ignoreZValue.value());
} else if (ShapeParser.FIELD_COORDINATES.match(fieldName, subParser.getDeprecationHandler())) {
subParser.nextToken();
CoordinateNode tempNode = parseCoordinates(subParser, ignoreZValue.value());
if (coordinateNode != null && tempNode.numDimensions() != coordinateNode.numDimensions()) {
throw new ElasticsearchParseException("Exception parsing coordinates: " +
"number of dimensions do not match");
}
coordinateNode = tempNode;
} else if (ShapeParser.FIELD_GEOMETRIES.match(fieldName, parser.getDeprecationHandler())) {
} else if (ShapeParser.FIELD_GEOMETRIES.match(fieldName, subParser.getDeprecationHandler())) {
if (shapeType == null) {
shapeType = GeoShapeType.GEOMETRYCOLLECTION;
} else if (shapeType.equals(GeoShapeType.GEOMETRYCOLLECTION) == false) {
malformedException = "cannot have [" + ShapeParser.FIELD_GEOMETRIES + "] with type set to ["
+ shapeType + "]";
}
parser.nextToken();
geometryCollections = parseGeometries(parser, shapeMapper);
} else if (CircleBuilder.FIELD_RADIUS.match(fieldName, parser.getDeprecationHandler())) {
subParser.nextToken();
geometryCollections = parseGeometries(subParser, shapeMapper);
} else if (CircleBuilder.FIELD_RADIUS.match(fieldName, subParser.getDeprecationHandler())) {
if (shapeType == null) {
shapeType = GeoShapeType.CIRCLE;
} else if (shapeType != null && shapeType.equals(GeoShapeType.CIRCLE) == false) {
malformedException = "cannot have [" + CircleBuilder.FIELD_RADIUS + "] with type set to ["
+ shapeType + "]";
}
parser.nextToken();
radius = DistanceUnit.Distance.parseDistance(parser.text());
} else if (ShapeParser.FIELD_ORIENTATION.match(fieldName, parser.getDeprecationHandler())) {
subParser.nextToken();
radius = DistanceUnit.Distance.parseDistance(subParser.text());
} else if (ShapeParser.FIELD_ORIENTATION.match(fieldName, subParser.getDeprecationHandler())) {
if (shapeType != null
&& (shapeType.equals(GeoShapeType.POLYGON) || shapeType.equals(GeoShapeType.MULTIPOLYGON)) == false) {
malformedException = "cannot have [" + ShapeParser.FIELD_ORIENTATION + "] with type set to [" + shapeType + "]";
}
parser.nextToken();
requestedOrientation = ShapeBuilder.Orientation.fromString(parser.text());
subParser.nextToken();
requestedOrientation = ShapeBuilder.Orientation.fromString(subParser.text());
} else {
parser.nextToken();
parser.skipChildren();
subParser.nextToken();
subParser.skipChildren();
}
}
}
} catch (Exception ex) {
// Skip all other fields until the end of the object
while (parser.currentToken() != XContentParser.Token.END_OBJECT && parser.currentToken() != null) {
parser.nextToken();
parser.skipChildren();
}
throw ex;
}
if (malformedException != null) {

View File

@ -1390,4 +1390,31 @@ public class GeoJsonShapeParserTests extends BaseGeoParsingTestCase {
assertNull(parser.nextToken());
}
}
public void testParseInvalidGeometryCollectionShapes() throws IOException {
// single dimensions point
XContentBuilder invalidPoints = XContentFactory.jsonBuilder()
.startObject()
.startObject("foo")
.field("type", "geometrycollection")
.startArray("geometries")
.startObject()
.field("type", "polygon")
.startArray("coordinates")
.startArray().value("46.6022226498514").value("24.7237442867977").endArray()
.startArray().value("46.6031857243798").value("24.722968774929").endArray()
.endArray() // coordinates
.endObject()
.endArray() // geometries
.endObject()
.endObject();
try (XContentParser parser = createParser(invalidPoints)) {
parser.nextToken(); // foo
parser.nextToken(); // start object
parser.nextToken(); // start object
ElasticsearchGeoAssertions.assertValidException(parser, ElasticsearchParseException.class);
assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken()); // end of the document
assertNull(parser.nextToken()); // no more elements afterwards
}
}
}