* [ML][Data Frame] adding dest.index and id validations * adjusting message format * Adjusting id validity pattern * Update DataFrameStrings.java
This commit is contained in:
parent
e384bf0276
commit
7ff3d86cf0
|
@ -65,6 +65,10 @@ public class DataFrameMessages {
|
|||
public static final String FAILED_TO_PARSE_TRANSFORM_CHECKPOINTS =
|
||||
"Failed to parse transform checkpoints for [{0}]";
|
||||
|
||||
|
||||
public static final String ID_TOO_LONG = "The id cannot contain more than {0} characters.";
|
||||
public static final String INVALID_ID = "Invalid {0}; ''{1}'' can contain lowercase alphanumeric (a-z and 0-9), hyphens or " +
|
||||
"underscores; must start and end with alphanumeric";
|
||||
private DataFrameMessages() {
|
||||
}
|
||||
|
||||
|
|
|
@ -15,12 +15,18 @@ import org.elasticsearch.common.io.stream.StreamOutput;
|
|||
import org.elasticsearch.common.xcontent.ToXContentObject;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.indices.InvalidIndexNameException;
|
||||
import org.elasticsearch.xpack.core.dataframe.DataFrameField;
|
||||
import org.elasticsearch.xpack.core.dataframe.transforms.DataFrameTransformConfig;
|
||||
import org.elasticsearch.xpack.core.dataframe.utils.DataFrameStrings;
|
||||
import org.elasticsearch.xpack.core.dataframe.DataFrameMessages;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
|
||||
import static org.elasticsearch.action.ValidateActions.addValidationError;
|
||||
import static org.elasticsearch.cluster.metadata.MetaDataCreateIndexService.validateIndexOrAliasName;
|
||||
|
||||
public class PutDataFrameTransformAction extends Action<AcknowledgedResponse> {
|
||||
|
||||
|
@ -67,6 +73,25 @@ public class PutDataFrameTransformAction extends Action<AcknowledgedResponse> {
|
|||
for(String failure : config.getPivotConfig().aggFieldValidation()) {
|
||||
validationException = addValidationError(failure, validationException);
|
||||
}
|
||||
String destIndex = config.getDestination().getIndex();
|
||||
try {
|
||||
validateIndexOrAliasName(destIndex, InvalidIndexNameException::new);
|
||||
if (!destIndex.toLowerCase(Locale.ROOT).equals(destIndex)) {
|
||||
validationException = addValidationError("dest.index [" + destIndex +"] must be lowercase", validationException);
|
||||
}
|
||||
} catch (InvalidIndexNameException ex) {
|
||||
validationException = addValidationError(ex.getMessage(), validationException);
|
||||
}
|
||||
if (DataFrameStrings.isValidId(config.getId()) == false) {
|
||||
validationException = addValidationError(
|
||||
DataFrameMessages.getMessage(DataFrameMessages.INVALID_ID, DataFrameField.ID.getPreferredName(), config.getId()),
|
||||
validationException);
|
||||
}
|
||||
if (DataFrameStrings.hasValidLengthForId(config.getId()) == false) {
|
||||
validationException = addValidationError(
|
||||
DataFrameMessages.getMessage(DataFrameMessages.ID_TOO_LONG, DataFrameStrings.ID_LENGTH_LIMIT),
|
||||
validationException);
|
||||
}
|
||||
return validationException;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.core.dataframe.utils;
|
||||
|
||||
import org.elasticsearch.cluster.metadata.MetaData;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Yet Another String utilities class.
|
||||
*/
|
||||
public final class DataFrameStrings {
|
||||
|
||||
/**
|
||||
* Valid user id pattern.
|
||||
* Matches a string that contains characters, digits, hyphens, underscores or dots.
|
||||
* The string may start and end only in characters or digits.
|
||||
* Note that '.' is allowed but not documented.
|
||||
*/
|
||||
private static final Pattern VALID_ID_CHAR_PATTERN = Pattern.compile("[a-zA-Z0-9](?:[a-zA-Z0-9_\\-\\.]*[a-zA-Z0-9])?");
|
||||
|
||||
public static final int ID_LENGTH_LIMIT = 64;
|
||||
|
||||
private DataFrameStrings() {
|
||||
}
|
||||
|
||||
public static boolean isValidId(String id) {
|
||||
return id != null && VALID_ID_CHAR_PATTERN.matcher(id).matches() && !MetaData.ALL.equals(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the given {@code id} has a valid length.
|
||||
* We keep IDs in a length shorter or equal than {@link #ID_LENGTH_LIMIT}
|
||||
* in order to avoid unfriendly errors when storing docs with
|
||||
* more than 512 bytes.
|
||||
*
|
||||
* @param id the id
|
||||
* @return {@code true} if the id has a valid length
|
||||
*/
|
||||
public static boolean hasValidLengthForId(String id) {
|
||||
return id.length() <= ID_LENGTH_LIMIT;
|
||||
}
|
||||
}
|
|
@ -153,7 +153,7 @@ public class TransportPutDataFrameTransformAction
|
|||
final String[] concreteDest =
|
||||
indexNameExpressionResolver.concreteIndexNames(clusterState, IndicesOptions.lenientExpandOpen(), destIndex);
|
||||
|
||||
if (concreteDest.length > 1 || Regex.isSimpleMatchPattern(destIndex)) {
|
||||
if (concreteDest.length > 1) {
|
||||
listener.onFailure(new ElasticsearchStatusException(
|
||||
DataFrameMessages.getMessage(DataFrameMessages.REST_PUT_DATA_FRAME_DEST_SINGLE_INDEX, destIndex),
|
||||
RestStatus.BAD_REQUEST
|
||||
|
|
|
@ -208,23 +208,6 @@ setup:
|
|||
}
|
||||
}
|
||||
---
|
||||
"Test transform where dest is a simple index pattern":
|
||||
- do:
|
||||
catch: /Destination index .* should refer to a single index/
|
||||
data_frame.put_data_frame_transform:
|
||||
transform_id: "airline-transform"
|
||||
body: >
|
||||
{
|
||||
"source": {
|
||||
"index": ["airline-data*"]
|
||||
},
|
||||
"dest": { "index": "destination*" },
|
||||
"pivot": {
|
||||
"group_by": { "airline": {"terms": {"field": "airline"}}},
|
||||
"aggs": {"avg_response": {"avg": {"field": "responsetime"}}}
|
||||
}
|
||||
}
|
||||
---
|
||||
"Test alias scenarios":
|
||||
- do:
|
||||
indices.create:
|
||||
|
@ -364,3 +347,67 @@ setup:
|
|||
"aggs": {"airline.responsetime": {"avg": {"field": "responsetime"}}}
|
||||
}
|
||||
}
|
||||
---
|
||||
"Test invalid data frame id":
|
||||
- do:
|
||||
catch: /can contain lowercase alphanumeric \(a-z and 0-9\), hyphens or underscores; must start and end with alphanumeric/
|
||||
data_frame.put_data_frame_transform:
|
||||
transform_id: "!@#$%^&*(duplicate-field-transform"
|
||||
body: >
|
||||
{
|
||||
"source": {
|
||||
"index": "source-index"
|
||||
},
|
||||
"dest": { "index": "dest-index" },
|
||||
"pivot": {
|
||||
"group_by": { "airline.id": {"terms": {"field": "airline"}}},
|
||||
"aggs": {"airline.response": {"avg": {"field": "responsetime"}}}
|
||||
}
|
||||
}
|
||||
- do:
|
||||
catch: /The id cannot contain more than 64 character/
|
||||
data_frame.put_data_frame_transform:
|
||||
transform_id: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
|
||||
body: >
|
||||
{
|
||||
"source": {
|
||||
"index": "source-index"
|
||||
},
|
||||
"dest": { "index": "dest-index" },
|
||||
"pivot": {
|
||||
"group_by": { "airline.id": {"terms": {"field": "airline"}}},
|
||||
"aggs": {"airline.response": {"avg": {"field": "responsetime"}}}
|
||||
}
|
||||
}
|
||||
---
|
||||
"Test invalid destination index name":
|
||||
- do:
|
||||
catch: /dest\.index \[DeStInAtIoN\] must be lowercase/
|
||||
data_frame.put_data_frame_transform:
|
||||
transform_id: "airline-transform"
|
||||
body: >
|
||||
{
|
||||
"source": {
|
||||
"index": "airline-data"
|
||||
},
|
||||
"dest": { "index": "DeStInAtIoN" },
|
||||
"pivot": {
|
||||
"group_by": { "airline": {"terms": {"field": "airline"}}},
|
||||
"aggs": {"avg_response": {"avg": {"field": "responsetime"}}}
|
||||
}
|
||||
}
|
||||
- do:
|
||||
catch: /Invalid index name \[destination#dest\], must not contain \'#\'/
|
||||
data_frame.put_data_frame_transform:
|
||||
transform_id: "airline-transform"
|
||||
body: >
|
||||
{
|
||||
"source": {
|
||||
"index": "airline-data"
|
||||
},
|
||||
"dest": { "index": "destination#dest" },
|
||||
"pivot": {
|
||||
"group_by": { "airline": {"terms": {"field": "airline"}}},
|
||||
"aggs": {"avg_response": {"avg": {"field": "responsetime"}}}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue