diff --git a/docs/reference/ml/df-analytics/apis/explain-dfanalytics.asciidoc b/docs/reference/ml/df-analytics/apis/explain-dfanalytics.asciidoc index f2f570cadaf..097a0a0e0ed 100644 --- a/docs/reference/ml/df-analytics/apis/explain-dfanalytics.asciidoc +++ b/docs/reference/ml/df-analytics/apis/explain-dfanalytics.asciidoc @@ -44,9 +44,10 @@ The following explanations are provided: * which fields are included or not in the analysis and why, * how much memory is estimated to be required. The estimate can be used when - deciding the appropriate value for `model_memory_limit` setting later on, + deciding the appropriate value for `model_memory_limit` setting later on. -about either an existing {dfanalytics-job} or one that has not been created yet. +If you have object fields or fields that are excluded via source filtering, +they are not included in the explanation. [[ml-explain-dfanalytics-path-params]] diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java index 632efd6ab85..4f2df98a311 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java @@ -15,6 +15,7 @@ import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.regex.Regex; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.BooleanFieldMapper; +import org.elasticsearch.index.mapper.ObjectMapper; import org.elasticsearch.search.fetch.subphase.FetchSourceContext; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; import org.elasticsearch.xpack.core.ml.dataframe.analyses.DataFrameAnalysis; @@ -40,6 +41,7 @@ import java.util.Objects; import java.util.Set; import java.util.TreeSet; import java.util.stream.Collectors; +import java.util.stream.Stream; public class ExtractedFieldsDetector { @@ -82,6 +84,7 @@ public class ExtractedFieldsDetector { Set fields = new TreeSet<>(fieldCapabilitiesResponse.get().keySet()); fields.removeAll(IGNORE_FIELDS); removeFieldsUnderResultsField(fields); + removeObjects(fields); applySourceFiltering(fields); FetchSourceContext analyzedFields = config.getAnalyzedFields(); @@ -112,6 +115,17 @@ public class ExtractedFieldsDetector { fields.removeIf(field -> field.startsWith(resultsField + ".")); } + private void removeObjects(Set fields) { + Iterator fieldsIterator = fields.iterator(); + while (fieldsIterator.hasNext()) { + String field = fieldsIterator.next(); + Set types = getMappingTypes(field); + if (isObject(types)) { + fieldsIterator.remove(); + } + } + } + private void applySourceFiltering(Set fields) { Iterator fieldsIterator = fields.iterator(); while (fieldsIterator.hasNext()) { @@ -178,6 +192,9 @@ public class ExtractedFieldsDetector { if (analyzedFields == null) { return; } + + checkIncludesExcludesAreNotObjects(analyzedFields); + String includes = analyzedFields.includes().length == 0 ? "*" : Strings.arrayToCommaDelimitedString(analyzedFields.includes()); String excludes = Strings.arrayToCommaDelimitedString(analyzedFields.excludes()); @@ -205,6 +222,16 @@ public class ExtractedFieldsDetector { } } + private void checkIncludesExcludesAreNotObjects(FetchSourceContext analyzedFields) { + List objectFields = Stream.concat(Arrays.stream(analyzedFields.includes()), Arrays.stream(analyzedFields.excludes())) + .filter(field -> isObject(getMappingTypes(field))) + .collect(Collectors.toList()); + if (objectFields.isEmpty() == false) { + throw ExceptionsHelper.badRequestException("{} must not include or exclude object fields: {}", + DataFrameAnalyticsConfig.ANALYZED_FIELDS.getPreferredName(), objectFields); + } + } + private void applyIncludesExcludes(Set fields, Set includes, Set excludes, Set fieldSelection) { Iterator fieldsIterator = fields.iterator(); @@ -394,4 +421,8 @@ public class ExtractedFieldsDetector { private static boolean isBoolean(Set types) { return types.size() == 1 && types.contains(BooleanFieldMapper.CONTENT_TYPE); } + + private boolean isObject(Set types) { + return types.size() == 1 && types.contains(ObjectMapper.CONTENT_TYPE); + } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java index 47776b85ae8..983ebce303b 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java @@ -861,6 +861,49 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { FieldSelection.included("field_22", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL)); } + public void testDetect_GivenObjectFields() { + FieldCapabilitiesResponse fieldCapabilities = new MockFieldCapsResponseBuilder() + .addAggregatableField("float_field", "float") + .addNonAggregatableField("object_field_1", "object") + .addNonAggregatableField("object_field_2", "object").build(); + + ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( + SOURCE_INDEX, buildOutlierDetectionConfig(), 100, fieldCapabilities, Collections.emptyMap()); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); + + List allFields = fieldExtraction.v1().getAllFields(); + assertThat(allFields, hasSize(1)); + assertThat(allFields.get(0).getName(), equalTo("float_field")); + } + + public void testDetect_GivenAnalyzedFieldIncludesObjectField() { + FieldCapabilitiesResponse fieldCapabilities = new MockFieldCapsResponseBuilder() + .addAggregatableField("float_field", "float") + .addNonAggregatableField("object_field", "object").build(); + + analyzedFields = new FetchSourceContext(true, new String[] { "float_field", "object_field" }, null); + + ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( + SOURCE_INDEX, buildOutlierDetectionConfig(), 100, fieldCapabilities, Collections.emptyMap()); + ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, extractedFieldsDetector::detect); + + assertThat(e.getMessage(), equalTo("analyzed_fields must not include or exclude object fields: [object_field]")); + } + + public void testDetect_GivenAnalyzedFieldExcludesObjectField() { + FieldCapabilitiesResponse fieldCapabilities = new MockFieldCapsResponseBuilder() + .addAggregatableField("float_field", "float") + .addNonAggregatableField("object_field", "object").build(); + + analyzedFields = new FetchSourceContext(true, null, new String[] { "object_field" }); + + ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( + SOURCE_INDEX, buildOutlierDetectionConfig(), 100, fieldCapabilities, Collections.emptyMap()); + ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, extractedFieldsDetector::detect); + + assertThat(e.getMessage(), equalTo("analyzed_fields must not include or exclude object fields: [object_field]")); + } + private DataFrameAnalyticsConfig buildOutlierDetectionConfig() { return new DataFrameAnalyticsConfig.Builder() .setId("foo")