[7.x] Verify that the field is aggregatable before attempting cardinality aggregation (#53874) (#54004)

This commit is contained in:
Przemysław Witek 2020-03-23 19:36:33 +01:00 committed by GitHub
parent ce31997ab2
commit 88c5d520b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 25 additions and 2 deletions

View File

@ -57,6 +57,7 @@ public class ClassificationIT extends MlNativeDataFrameAnalyticsIntegTestCase {
private static final String BOOLEAN_FIELD = "boolean-field";
private static final String NUMERICAL_FIELD = "numerical-field";
private static final String DISCRETE_NUMERICAL_FIELD = "discrete-numerical-field";
private static final String TEXT_FIELD = "text-field";
private static final String KEYWORD_FIELD = "keyword-field";
private static final String NESTED_FIELD = "outer-field.inner-field";
private static final String ALIAS_TO_KEYWORD_FIELD = "alias-to-keyword-field";
@ -261,6 +262,14 @@ public class ClassificationIT extends MlNativeDataFrameAnalyticsIntegTestCase {
assertThat(e.getMessage(), startsWith("invalid types [double] for required field [numerical-field];"));
}
public void testWithOnlyTrainingRowsAndTrainingPercentIsFifty_DependentVariableIsText() throws Exception {
ElasticsearchStatusException e = expectThrows(
ElasticsearchStatusException.class,
() -> testWithOnlyTrainingRowsAndTrainingPercentIsFifty(
"classification_training_percent_is_50_text", TEXT_FIELD, KEYWORD_FIELD_VALUES, null));
assertThat(e.getMessage(), startsWith("field [text-field] of type [text] is non-aggregatable"));
}
public void testWithOnlyTrainingRowsAndTrainingPercentIsFifty_DependentVariableIsBoolean() throws Exception {
testWithOnlyTrainingRowsAndTrainingPercentIsFifty(
"classification_training_percent_is_50_boolean", BOOLEAN_FIELD, BOOLEAN_FIELD_VALUES, "boolean");
@ -517,6 +526,7 @@ public class ClassificationIT extends MlNativeDataFrameAnalyticsIntegTestCase {
BOOLEAN_FIELD, "type=boolean",
NUMERICAL_FIELD, "type=double",
DISCRETE_NUMERICAL_FIELD, "type=integer",
TEXT_FIELD, "type=text",
KEYWORD_FIELD, "type=keyword",
NESTED_FIELD, "type=keyword",
ALIAS_TO_KEYWORD_FIELD, "type=alias,path=" + KEYWORD_FIELD,
@ -532,6 +542,7 @@ public class ClassificationIT extends MlNativeDataFrameAnalyticsIntegTestCase {
BOOLEAN_FIELD, BOOLEAN_FIELD_VALUES.get(i % BOOLEAN_FIELD_VALUES.size()),
NUMERICAL_FIELD, NUMERICAL_FIELD_VALUES.get(i % NUMERICAL_FIELD_VALUES.size()),
DISCRETE_NUMERICAL_FIELD, DISCRETE_NUMERICAL_FIELD_VALUES.get(i % DISCRETE_NUMERICAL_FIELD_VALUES.size()),
TEXT_FIELD, KEYWORD_FIELD_VALUES.get(i % KEYWORD_FIELD_VALUES.size()),
KEYWORD_FIELD, KEYWORD_FIELD_VALUES.get(i % KEYWORD_FIELD_VALUES.size()),
NESTED_FIELD, KEYWORD_FIELD_VALUES.get(i % KEYWORD_FIELD_VALUES.size()));
IndexRequest indexRequest = new IndexRequest(sourceIndex).source(source.toArray());
@ -550,6 +561,9 @@ public class ClassificationIT extends MlNativeDataFrameAnalyticsIntegTestCase {
Arrays.asList(
DISCRETE_NUMERICAL_FIELD, DISCRETE_NUMERICAL_FIELD_VALUES.get(i % DISCRETE_NUMERICAL_FIELD_VALUES.size())));
}
if (TEXT_FIELD.equals(dependentVariable) == false) {
source.addAll(Arrays.asList(TEXT_FIELD, KEYWORD_FIELD_VALUES.get(i % KEYWORD_FIELD_VALUES.size())));
}
if (KEYWORD_FIELD.equals(dependentVariable) == false) {
source.addAll(Arrays.asList(KEYWORD_FIELD, KEYWORD_FIELD_VALUES.get(i % KEYWORD_FIELD_VALUES.size())));
}

View File

@ -10,6 +10,7 @@ import org.elasticsearch.ResourceNotFoundException;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.admin.indices.settings.get.GetSettingsRequest;
import org.elasticsearch.action.admin.indices.settings.get.GetSettingsResponse;
import org.elasticsearch.action.fieldcaps.FieldCapabilities;
import org.elasticsearch.action.fieldcaps.FieldCapabilitiesAction;
import org.elasticsearch.action.fieldcaps.FieldCapabilitiesRequest;
import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse;
@ -78,7 +79,7 @@ public class ExtractedFieldsDetectorFactory {
ActionListener<FieldCapabilitiesResponse> fieldCapabilitiesHandler = ActionListener.wrap(
fieldCapabilitiesResponse -> {
fieldCapsResponseHolder.set(fieldCapabilitiesResponse);
getCardinalitiesForFieldsWithConstraints(index, config, fieldCardinalitiesHandler);
getCardinalitiesForFieldsWithConstraints(index, config, fieldCapabilitiesResponse, fieldCardinalitiesHandler);
},
listener::onFailure
);
@ -96,7 +97,9 @@ public class ExtractedFieldsDetectorFactory {
getDocValueFieldsLimit(index, docValueFieldsLimitListener);
}
private void getCardinalitiesForFieldsWithConstraints(String[] index, DataFrameAnalyticsConfig config,
private void getCardinalitiesForFieldsWithConstraints(String[] index,
DataFrameAnalyticsConfig config,
FieldCapabilitiesResponse fieldCapabilitiesResponse,
ActionListener<Map<String, Long>> listener) {
List<FieldCardinalityConstraint> fieldCardinalityConstraints = config.getAnalysis().getFieldCardinalityConstraints();
if (fieldCardinalityConstraints.isEmpty()) {
@ -111,6 +114,12 @@ public class ExtractedFieldsDetectorFactory {
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().size(0).query(config.getSource().getParsedQuery());
for (FieldCardinalityConstraint constraint : fieldCardinalityConstraints) {
for (FieldCapabilities fieldCaps : fieldCapabilitiesResponse.getField(constraint.getField()).values()) {
if (fieldCaps.isAggregatable() == false) {
throw ExceptionsHelper.badRequestException("field [{}] of type [{}] is non-aggregatable",
fieldCaps.getName(), fieldCaps.getType());
}
}
searchSourceBuilder.aggregation(
AggregationBuilders.cardinality(constraint.getField())
.field(constraint.getField())