[ML-DataFrame] validate group name to not contain invalid characters (#42292)

disallows of creating groupBy field with '[', ']', '>' in the name to be consistent with aggregations
This commit is contained in:
Hendrik Muhs 2019-05-22 08:13:32 +02:00
parent 3493f3b637
commit ad24231c1a
2 changed files with 40 additions and 0 deletions

View File

@ -20,6 +20,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.xpack.core.dataframe.DataFrameField;
import org.elasticsearch.xpack.core.dataframe.DataFrameMessages;
import org.elasticsearch.xpack.core.dataframe.utils.ExceptionsHelper;
@ -29,6 +30,7 @@ import java.util.LinkedHashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.regex.Matcher;
import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken;
@ -135,6 +137,7 @@ public class GroupConfig implements Writeable, ToXContentObject {
private static Map<String, SingleGroupSource> parseGroupConfig(final XContentParser parser,
boolean lenient) throws IOException {
Matcher validAggMatcher = AggregatorFactories.VALID_AGG_NAME.matcher("");
LinkedHashMap<String, SingleGroupSource> groups = new LinkedHashMap<>();
// be parsing friendly, whether the token needs to be advanced or not (similar to what ObjectParser does)
@ -150,6 +153,11 @@ public class GroupConfig implements Writeable, ToXContentObject {
ensureExpectedToken(XContentParser.Token.FIELD_NAME, token, parser::getTokenLocation);
String destinationFieldName = parser.currentName();
if (validAggMatcher.reset(destinationFieldName).matches() == false) {
throw new ParsingException(parser.getTokenLocation(), "Invalid group name [" + destinationFieldName
+ "]. Group names can contain any character except '[', ']', and '>'");
}
token = parser.nextToken();
ensureExpectedToken(XContentParser.Token.START_OBJECT, token, parser::getTokenLocation);
token = parser.nextToken();

View File

@ -6,6 +6,7 @@
package org.elasticsearch.xpack.core.dataframe.transforms.pivot;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.Writeable.Reader;
import org.elasticsearch.common.xcontent.ToXContent;
@ -27,6 +28,9 @@ import java.util.Set;
public class GroupConfigTests extends AbstractSerializingTestCase<GroupConfig> {
// array of illegal characters, see {@link AggregatorFactories#VALID_AGG_NAME}
private static final char[] ILLEGAL_FIELD_NAME_CHARACTERS = {'[', ']', '>'};
public static GroupConfig randomGroupConfig() {
Map<String, Object> source = new LinkedHashMap<>();
Map<String, SingleGroupSource> groups = new LinkedHashMap<>();
@ -88,6 +92,34 @@ public class GroupConfigTests extends AbstractSerializingTestCase<GroupConfig> {
}
}
public void testInvalidGroupByNames() throws IOException {
String invalidName = randomAlphaOfLengthBetween(0, 5)
+ ILLEGAL_FIELD_NAME_CHARACTERS[randomIntBetween(0, ILLEGAL_FIELD_NAME_CHARACTERS.length - 1)]
+ randomAlphaOfLengthBetween(0, 5);
XContentBuilder source = JsonXContent.contentBuilder()
.startObject()
.startObject(invalidName)
.startObject("terms")
.field("field", "user")
.endObject()
.endObject()
.endObject();
// lenient, passes but reports invalid
try (XContentParser parser = createParser(source)) {
GroupConfig groupConfig = GroupConfig.fromXContent(parser, true);
assertFalse(groupConfig.isValid());
}
// strict throws
try (XContentParser parser = createParser(source)) {
Exception e = expectThrows(ParsingException.class, () -> GroupConfig.fromXContent(parser, false));
assertTrue(e.getMessage().startsWith("Invalid group name"));
}
}
private static Map<String, Object> getSource(SingleGroupSource groupSource) {
try (XContentBuilder xContentBuilder = XContentFactory.jsonBuilder()) {
XContentBuilder content = groupSource.toXContent(xContentBuilder, ToXContent.EMPTY_PARAMS);