[ML][Data Frame] adding dest.index and id validations (#43053) (#43109)

* [ML][Data Frame] adding dest.index and id validations

* adjusting message format

* Adjusting id validity pattern

* Update DataFrameStrings.java
This commit is contained in:
Benjamin Trent 2019-06-11 15:55:18 -05:00 committed by GitHub
parent e384bf0276
commit 7ff3d86cf0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 140 additions and 18 deletions

View File

@ -65,6 +65,10 @@ public class DataFrameMessages {
public static final String FAILED_TO_PARSE_TRANSFORM_CHECKPOINTS =
"Failed to parse transform checkpoints for [{0}]";
public static final String ID_TOO_LONG = "The id cannot contain more than {0} characters.";
public static final String INVALID_ID = "Invalid {0}; ''{1}'' can contain lowercase alphanumeric (a-z and 0-9), hyphens or " +
"underscores; must start and end with alphanumeric";
private DataFrameMessages() {
}

View File

@ -15,12 +15,18 @@ import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.ToXContentObject;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.indices.InvalidIndexNameException;
import org.elasticsearch.xpack.core.dataframe.DataFrameField;
import org.elasticsearch.xpack.core.dataframe.transforms.DataFrameTransformConfig;
import org.elasticsearch.xpack.core.dataframe.utils.DataFrameStrings;
import org.elasticsearch.xpack.core.dataframe.DataFrameMessages;
import java.io.IOException;
import java.util.Locale;
import java.util.Objects;
import static org.elasticsearch.action.ValidateActions.addValidationError;
import static org.elasticsearch.cluster.metadata.MetaDataCreateIndexService.validateIndexOrAliasName;
public class PutDataFrameTransformAction extends Action<AcknowledgedResponse> {
@ -67,6 +73,25 @@ public class PutDataFrameTransformAction extends Action<AcknowledgedResponse> {
for(String failure : config.getPivotConfig().aggFieldValidation()) {
validationException = addValidationError(failure, validationException);
}
String destIndex = config.getDestination().getIndex();
try {
validateIndexOrAliasName(destIndex, InvalidIndexNameException::new);
if (!destIndex.toLowerCase(Locale.ROOT).equals(destIndex)) {
validationException = addValidationError("dest.index [" + destIndex +"] must be lowercase", validationException);
}
} catch (InvalidIndexNameException ex) {
validationException = addValidationError(ex.getMessage(), validationException);
}
if (DataFrameStrings.isValidId(config.getId()) == false) {
validationException = addValidationError(
DataFrameMessages.getMessage(DataFrameMessages.INVALID_ID, DataFrameField.ID.getPreferredName(), config.getId()),
validationException);
}
if (DataFrameStrings.hasValidLengthForId(config.getId()) == false) {
validationException = addValidationError(
DataFrameMessages.getMessage(DataFrameMessages.ID_TOO_LONG, DataFrameStrings.ID_LENGTH_LIMIT),
validationException);
}
return validationException;
}

View File

@ -0,0 +1,46 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.core.dataframe.utils;
import org.elasticsearch.cluster.metadata.MetaData;
import java.util.regex.Pattern;
/**
* Yet Another String utilities class.
*/
public final class DataFrameStrings {
/**
* Valid user id pattern.
* Matches a string that contains characters, digits, hyphens, underscores or dots.
* The string may start and end only in characters or digits.
* Note that '.' is allowed but not documented.
*/
private static final Pattern VALID_ID_CHAR_PATTERN = Pattern.compile("[a-zA-Z0-9](?:[a-zA-Z0-9_\\-\\.]*[a-zA-Z0-9])?");
public static final int ID_LENGTH_LIMIT = 64;
private DataFrameStrings() {
}
public static boolean isValidId(String id) {
return id != null && VALID_ID_CHAR_PATTERN.matcher(id).matches() && !MetaData.ALL.equals(id);
}
/**
* Checks if the given {@code id} has a valid length.
* We keep IDs in a length shorter or equal than {@link #ID_LENGTH_LIMIT}
* in order to avoid unfriendly errors when storing docs with
* more than 512 bytes.
*
* @param id the id
* @return {@code true} if the id has a valid length
*/
public static boolean hasValidLengthForId(String id) {
return id.length() <= ID_LENGTH_LIMIT;
}
}

View File

@ -153,7 +153,7 @@ public class TransportPutDataFrameTransformAction
final String[] concreteDest =
indexNameExpressionResolver.concreteIndexNames(clusterState, IndicesOptions.lenientExpandOpen(), destIndex);
if (concreteDest.length > 1 || Regex.isSimpleMatchPattern(destIndex)) {
if (concreteDest.length > 1) {
listener.onFailure(new ElasticsearchStatusException(
DataFrameMessages.getMessage(DataFrameMessages.REST_PUT_DATA_FRAME_DEST_SINGLE_INDEX, destIndex),
RestStatus.BAD_REQUEST

View File

@ -208,23 +208,6 @@ setup:
}
}
---
"Test transform where dest is a simple index pattern":
- do:
catch: /Destination index .* should refer to a single index/
data_frame.put_data_frame_transform:
transform_id: "airline-transform"
body: >
{
"source": {
"index": ["airline-data*"]
},
"dest": { "index": "destination*" },
"pivot": {
"group_by": { "airline": {"terms": {"field": "airline"}}},
"aggs": {"avg_response": {"avg": {"field": "responsetime"}}}
}
}
---
"Test alias scenarios":
- do:
indices.create:
@ -364,3 +347,67 @@ setup:
"aggs": {"airline.responsetime": {"avg": {"field": "responsetime"}}}
}
}
---
"Test invalid data frame id":
- do:
catch: /can contain lowercase alphanumeric \(a-z and 0-9\), hyphens or underscores; must start and end with alphanumeric/
data_frame.put_data_frame_transform:
transform_id: "!@#$%^&*(duplicate-field-transform"
body: >
{
"source": {
"index": "source-index"
},
"dest": { "index": "dest-index" },
"pivot": {
"group_by": { "airline.id": {"terms": {"field": "airline"}}},
"aggs": {"airline.response": {"avg": {"field": "responsetime"}}}
}
}
- do:
catch: /The id cannot contain more than 64 character/
data_frame.put_data_frame_transform:
transform_id: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
body: >
{
"source": {
"index": "source-index"
},
"dest": { "index": "dest-index" },
"pivot": {
"group_by": { "airline.id": {"terms": {"field": "airline"}}},
"aggs": {"airline.response": {"avg": {"field": "responsetime"}}}
}
}
---
"Test invalid destination index name":
- do:
catch: /dest\.index \[DeStInAtIoN\] must be lowercase/
data_frame.put_data_frame_transform:
transform_id: "airline-transform"
body: >
{
"source": {
"index": "airline-data"
},
"dest": { "index": "DeStInAtIoN" },
"pivot": {
"group_by": { "airline": {"terms": {"field": "airline"}}},
"aggs": {"avg_response": {"avg": {"field": "responsetime"}}}
}
}
- do:
catch: /Invalid index name \[destination#dest\], must not contain \'#\'/
data_frame.put_data_frame_transform:
transform_id: "airline-transform"
body: >
{
"source": {
"index": "airline-data"
},
"dest": { "index": "destination#dest" },
"pivot": {
"group_by": { "airline": {"terms": {"field": "airline"}}},
"aggs": {"avg_response": {"avg": {"field": "responsetime"}}}
}
}