From da67c2f7f8b04809dd642918383cf3bee82d9b72 Mon Sep 17 00:00:00 2001 From: Gordon Brown Date: Tue, 12 Mar 2019 09:06:31 -0600 Subject: [PATCH] Deprecation check for indices with very large numbers of fields (#39869) Indices with very large numbers of fields (>1024 by default) that do not have index.query.default_field set will experience query failures in 7.0 for Simple Query String and Multi-Match queries. This deprecation check issues a warning for indices of that size that do not have index.query.default_field set. This also adds a deprecation check for index templates with field counts that would trigger these query failures as well. --- .../deprecation/ClusterDeprecationChecks.java | 40 +++++++ .../xpack/deprecation/DeprecationChecks.java | 7 +- .../deprecation/IndexDeprecationChecks.java | 66 ++++++++++- .../ClusterDeprecationChecksTests.java | 82 +++++++++++++ .../IndexDeprecationChecksTests.java | 110 ++++++++++++++++++ 5 files changed, 302 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecks.java b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecks.java index 38f78b7156e..1a71f094fc1 100644 --- a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecks.java +++ b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecks.java @@ -6,17 +6,26 @@ package org.elasticsearch.xpack.deprecation; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.MappingMetaData; +import org.elasticsearch.index.IndexSettings; import org.elasticsearch.ingest.IngestService; import org.elasticsearch.ingest.PipelineConfiguration; import org.elasticsearch.xpack.core.deprecation.DeprecationIssue; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; +import static org.elasticsearch.search.SearchModule.INDICES_MAX_CLAUSE_COUNT_SETTING; + public class ClusterDeprecationChecks { + private static final Logger logger = LogManager.getLogger(ClusterDeprecationChecks.class); @SuppressWarnings("unchecked") static DeprecationIssue checkUserAgentPipelines(ClusterState state) { @@ -48,4 +57,35 @@ public class ClusterDeprecationChecks { return null; } + + static DeprecationIssue checkTemplatesWithTooManyFields(ClusterState state) { + Integer maxClauseCount = INDICES_MAX_CLAUSE_COUNT_SETTING.get(state.getMetaData().settings()); + List templatesOverLimit = new ArrayList<>(); + state.getMetaData().getTemplates().forEach((templateCursor) -> { + AtomicInteger maxFields = new AtomicInteger(0); + String templateName = templateCursor.key; + boolean defaultFieldSet = templateCursor.value.settings().get(IndexSettings.DEFAULT_FIELD_SETTING.getKey()) != null; + templateCursor.value.getMappings().forEach((mappingCursor) -> { + MappingMetaData mappingMetaData = new MappingMetaData(mappingCursor.value); + if (mappingMetaData != null && defaultFieldSet == false) { + maxFields.set(IndexDeprecationChecks.countFieldsRecursively(mappingMetaData.type(), mappingMetaData.sourceAsMap())); + } + if (maxFields.get() > maxClauseCount) { + templatesOverLimit.add(templateName); + } + }); + }); + + if (templatesOverLimit.isEmpty() == false) { + return new DeprecationIssue(DeprecationIssue.Level.WARNING, + "Fields in index template exceed automatic field expansion limit", + "https://www.elastic.co/guide/en/elasticsearch/reference/7.0/breaking-changes-7.0.html" + + "#_limiting_the_number_of_auto_expanded_fields", + "Index templates " + templatesOverLimit + " have a number of fields which exceeds the automatic field expansion " + + "limit of [" + maxClauseCount + "] and does not have [" + IndexSettings.DEFAULT_FIELD_SETTING.getKey() + "] set, " + + "which may cause queries which use automatic field expansion, such as query_string, simple_query_string, and " + + "multi_match to fail if fields are not explicitly specified in the query."); + } + return null; + } } diff --git a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationChecks.java b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationChecks.java index e1711a84529..b70c7c4fa32 100644 --- a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationChecks.java +++ b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationChecks.java @@ -33,7 +33,8 @@ public class DeprecationChecks { static List> CLUSTER_SETTINGS_CHECKS = Collections.unmodifiableList(Arrays.asList( - ClusterDeprecationChecks::checkUserAgentPipelines + ClusterDeprecationChecks::checkUserAgentPipelines, + ClusterDeprecationChecks::checkTemplatesWithTooManyFields )); @@ -44,7 +45,9 @@ public class DeprecationChecks { static List> INDEX_SETTINGS_CHECKS = Collections.unmodifiableList(Arrays.asList( - IndexDeprecationChecks::oldIndicesCheck)); + IndexDeprecationChecks::oldIndicesCheck, + IndexDeprecationChecks::tooManyFieldsCheck + )); static List> ML_SETTINGS_CHECKS = Collections.unmodifiableList(Arrays.asList( diff --git a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/IndexDeprecationChecks.java b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/IndexDeprecationChecks.java index 7defb80ccaa..a0cdbaee3b4 100644 --- a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/IndexDeprecationChecks.java +++ b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/IndexDeprecationChecks.java @@ -10,11 +10,13 @@ import com.carrotsearch.hppc.cursors.ObjectCursor; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.MappingMetaData; +import org.elasticsearch.index.IndexSettings; import org.elasticsearch.xpack.core.deprecation.DeprecationIssue; import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiConsumer; import java.util.function.Function; @@ -40,7 +42,7 @@ public class IndexDeprecationChecks { * @return a list of issues found in fields */ @SuppressWarnings("unchecked") - private static List findInPropertiesRecursively(String type, Map parentMap, + static List findInPropertiesRecursively(String type, Map parentMap, Function, Boolean> predicate) { List issues = new ArrayList<>(); Map properties = (Map) parentMap.get("properties"); @@ -84,4 +86,66 @@ public class IndexDeprecationChecks { } return null; } + + static DeprecationIssue tooManyFieldsCheck(IndexMetaData indexMetaData) { + if (indexMetaData.getSettings().get(IndexSettings.DEFAULT_FIELD_SETTING.getKey()) == null) { + AtomicInteger fieldCount = new AtomicInteger(0); + + fieldLevelMappingIssue(indexMetaData, ((mappingMetaData, sourceAsMap) -> { + fieldCount.addAndGet(countFieldsRecursively(mappingMetaData.type(), sourceAsMap)); + })); + + // We can't get to the setting `indices.query.bool.max_clause_count` from here, so just check the default of that setting. + // It's also much better practice to set `index.query.default_field` than `indices.query.bool.max_clause_count` - there's a + // reason we introduced the limit. + if (fieldCount.get() > 1024) { + return new DeprecationIssue(DeprecationIssue.Level.WARNING, + "Number of fields exceeds automatic field expansion limit", + "https://www.elastic.co/guide/en/elasticsearch/reference/7.0/breaking-changes-7.0.html" + + "#_limiting_the_number_of_auto_expanded_fields", + "This index has [" + fieldCount.get() + "] fields, which exceeds the automatic field expansion limit of 1024 " + + "and does not have [" + IndexSettings.DEFAULT_FIELD_SETTING.getKey() + "] set, which may cause queries which use " + + "automatic field expansion, such as query_string, simple_query_string, and multi_match to fail if fields are not " + + "explicitly specified in the query."); + } + } + return null; + } + + /* Counts the number of fields in a mapping, designed to count the as closely as possible to + * org.elasticsearch.index.search.QueryParserHelper#checkForTooManyFields + */ + @SuppressWarnings("unchecked") + static int countFieldsRecursively(String type, Map parentMap) { + int fields = 0; + Map properties = (Map) parentMap.get("properties"); + if (properties == null) { + return fields; + } + for (Map.Entry entry : properties.entrySet()) { + Map valueMap = (Map) entry.getValue(); + if (valueMap.containsKey("type") + && (valueMap.get("type").equals("object") && valueMap.containsKey("properties") == false) == false) { + fields++; + } + + Map values = (Map) valueMap.get("fields"); + if (values != null) { + for (Map.Entry multifieldEntry : values.entrySet()) { + Map multifieldValueMap = (Map) multifieldEntry.getValue(); + if (multifieldValueMap.containsKey("type")) { + fields++; + } + if (multifieldValueMap.containsKey("properties")) { + fields += countFieldsRecursively(type, multifieldValueMap); + } + } + } + if (valueMap.containsKey("properties")) { + fields += countFieldsRecursively(type, valueMap); + } + } + + return fields; + } } diff --git a/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecksTests.java b/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecksTests.java index 7f31067b6bc..990958e766c 100644 --- a/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecksTests.java +++ b/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecksTests.java @@ -8,16 +8,26 @@ package org.elasticsearch.xpack.deprecation; import org.elasticsearch.action.ingest.PutPipelineRequest; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.IndexTemplateMetaData; +import org.elasticsearch.cluster.metadata.MetaData; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.index.IndexSettings; import org.elasticsearch.ingest.IngestService; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.core.deprecation.DeprecationIssue; +import java.io.IOException; +import java.util.Collections; import java.util.List; import static java.util.Collections.singletonList; +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; import static org.elasticsearch.xpack.deprecation.DeprecationChecks.CLUSTER_SETTINGS_CHECKS; +import static org.elasticsearch.xpack.deprecation.IndexDeprecationChecksTests.addRandomFields; public class ClusterDeprecationChecksTests extends ESTestCase { @@ -73,4 +83,76 @@ public class ClusterDeprecationChecksTests extends ESTestCase { "Ingest pipelines [ecs_false, ecs_true] uses the [ecs] option which needs to be removed to work in 8.0"); assertEquals(singletonList(expected), issues); } + + public void testTemplateWithTooManyFields() throws IOException { + String tooManyFieldsTemplate = randomAlphaOfLength(5); + String tooManyFieldsWithDefaultFieldsTemplate = randomAlphaOfLength(6); + String goodTemplateName = randomAlphaOfLength(7); + + // A template with too many fields + int tooHighFieldCount = randomIntBetween(1025, 10_000); // 10_000 is arbitrary + XContentBuilder badMappingBuilder = jsonBuilder(); + badMappingBuilder.startObject(); + { + badMappingBuilder.startObject("_doc"); + { + badMappingBuilder.startObject("properties"); + { + addRandomFields(tooHighFieldCount, badMappingBuilder); + } + badMappingBuilder.endObject(); + } + badMappingBuilder.endObject(); + } + badMappingBuilder.endObject(); + + // A template with an OK number of fields + int okFieldCount = randomIntBetween(1, 1024); + XContentBuilder goodMappingBuilder = jsonBuilder(); + goodMappingBuilder.startObject(); + { + goodMappingBuilder.startObject("_doc"); + { + goodMappingBuilder.startObject("properties"); + { + addRandomFields(okFieldCount, goodMappingBuilder); + } + goodMappingBuilder.endObject(); + } + goodMappingBuilder.endObject(); + } + goodMappingBuilder.endObject(); + + final ClusterState state = ClusterState.builder(new ClusterName(randomAlphaOfLength(5))) + .metaData(MetaData.builder() + .put(IndexTemplateMetaData.builder(tooManyFieldsTemplate) + .patterns(Collections.singletonList(randomAlphaOfLength(5))) + .putMapping("_doc", Strings.toString(badMappingBuilder)) + .build()) + .put(IndexTemplateMetaData.builder(tooManyFieldsWithDefaultFieldsTemplate) + .patterns(Collections.singletonList(randomAlphaOfLength(5))) + .putMapping("_doc", Strings.toString(badMappingBuilder)) + .settings(Settings.builder() + .put(IndexSettings.DEFAULT_FIELD_SETTING.getKey(), + Collections.singletonList(randomAlphaOfLength(5)).toString())) + .build()) + .put(IndexTemplateMetaData.builder(goodTemplateName) + .patterns(Collections.singletonList(randomAlphaOfLength(5))) + .putMapping("_doc", Strings.toString(goodMappingBuilder)) + .build()) + .build()) + .build(); + + List issues = DeprecationChecks.filterChecks(CLUSTER_SETTINGS_CHECKS, c -> c.apply(state)); + + DeprecationIssue expected = new DeprecationIssue(DeprecationIssue.Level.WARNING, + "Fields in index template exceed automatic field expansion limit", + "https://www.elastic.co/guide/en/elasticsearch/reference/7.0/breaking-changes-7.0.html" + + "#_limiting_the_number_of_auto_expanded_fields", + "Index templates " + Collections.singletonList(tooManyFieldsTemplate) + " have a number of fields which exceeds the " + + "automatic field expansion limit of [1024] and does not have [" + IndexSettings.DEFAULT_FIELD_SETTING.getKey() + "] set, " + + "which may cause queries which use automatic field expansion, such as query_string, simple_query_string, and multi_match " + + "to fail if fields are not explicitly specified in the query."); + assertEquals(singletonList(expected), issues); + } } diff --git a/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/IndexDeprecationChecksTests.java b/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/IndexDeprecationChecksTests.java index b0f5a556ac6..9eb27cb127c 100644 --- a/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/IndexDeprecationChecksTests.java +++ b/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/IndexDeprecationChecksTests.java @@ -8,13 +8,20 @@ package org.elasticsearch.xpack.deprecation; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.index.IndexSettings; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.VersionUtils; import org.elasticsearch.xpack.core.deprecation.DeprecationIssue; +import java.io.IOException; +import java.util.ArrayList; import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; import static java.util.Collections.singletonList; +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; import static org.elasticsearch.xpack.deprecation.DeprecationChecks.INDEX_SETTINGS_CHECKS; public class IndexDeprecationChecksTests extends ESTestCase { @@ -34,4 +41,107 @@ public class IndexDeprecationChecksTests extends ESTestCase { List issues = DeprecationChecks.filterChecks(INDEX_SETTINGS_CHECKS, c -> c.apply(indexMetaData)); assertEquals(singletonList(expected), issues); } + + public void testTooManyFieldsCheck() throws IOException { + String simpleMapping = "{\n" + + " \"properties\": {\n" + + " \"some_field\": {\n" + + " \"type\": \"text\"\n" + + " },\n" + + " \"other_field\": {\n" + + " \"type\": \"text\",\n" + + " \"properties\": {\n" + + " \"raw\": {\"type\": \"keyword\"}\n" + + " }\n" + + " }\n" + + " }\n" + + "}"; + + IndexMetaData simpleIndex = IndexMetaData.builder(randomAlphaOfLengthBetween(5, 10)) + .settings(settings(Version.V_7_0_0)) + .numberOfShards(randomIntBetween(1, 100)) + .numberOfReplicas(randomIntBetween(1, 100)) + .putMapping("_doc", simpleMapping) + .build(); + List noIssues = DeprecationChecks.filterChecks(INDEX_SETTINGS_CHECKS, c -> c.apply(simpleIndex)); + assertEquals(0, noIssues.size()); + + // Test that it catches having too many fields + int fieldCount = randomIntBetween(1025, 10_000); // 10_000 is arbitrary + + XContentBuilder mappingBuilder = jsonBuilder(); + mappingBuilder.startObject(); + { + mappingBuilder.startObject("properties"); + { + addRandomFields(fieldCount, mappingBuilder); + } + mappingBuilder.endObject(); + } + mappingBuilder.endObject(); + + IndexMetaData tooManyFieldsIndex = IndexMetaData.builder(randomAlphaOfLengthBetween(5, 10)) + .settings(settings(Version.V_7_0_0)) + .numberOfShards(randomIntBetween(1, 100)) + .numberOfReplicas(randomIntBetween(1, 100)) + .putMapping("_doc", Strings.toString(mappingBuilder)) + .build(); + DeprecationIssue expected = new DeprecationIssue(DeprecationIssue.Level.WARNING, + "Number of fields exceeds automatic field expansion limit", + "https://www.elastic.co/guide/en/elasticsearch/reference/7.0/breaking-changes-7.0.html" + + "#_limiting_the_number_of_auto_expanded_fields", + "This index has [" + fieldCount + "] fields, which exceeds the automatic field expansion limit of 1024 " + + "and does not have [" + IndexSettings.DEFAULT_FIELD_SETTING.getKey() + "] set, which may cause queries which use " + + "automatic field expansion, such as query_string, simple_query_string, and multi_match to fail if fields are not " + + "explicitly specified in the query."); + List issues = DeprecationChecks.filterChecks(INDEX_SETTINGS_CHECKS, c -> c.apply(tooManyFieldsIndex)); + assertEquals(singletonList(expected), issues); + + // Check that it's okay to have too many fields as long as `index.query.default_field` is set + IndexMetaData tooManyFieldsOk = IndexMetaData.builder(randomAlphaOfLengthBetween(5, 10)) + .settings(settings(Version.V_7_0_0) + .put(IndexSettings.DEFAULT_FIELD_SETTING.getKey(), randomAlphaOfLength(5))) + .numberOfShards(randomIntBetween(1, 100)) + .numberOfReplicas(randomIntBetween(1, 100)) + .putMapping("_doc", Strings.toString(mappingBuilder)) + .build(); + List withDefaultFieldIssues = + DeprecationChecks.filterChecks(INDEX_SETTINGS_CHECKS, c -> c.apply(tooManyFieldsOk)); + assertEquals(0, withDefaultFieldIssues.size()); + } + + static void addRandomFields(final int fieldLimit, + XContentBuilder mappingBuilder) throws IOException { + AtomicInteger fieldCount = new AtomicInteger(0); + List existingFieldNames = new ArrayList<>(); + while (fieldCount.get() < fieldLimit) { + addRandomField(existingFieldNames, fieldLimit, mappingBuilder, fieldCount); + } + } + + private static void addRandomField(List existingFieldNames, final int fieldLimit, + XContentBuilder mappingBuilder, AtomicInteger fieldCount) throws IOException { + if (fieldCount.get() > fieldLimit) { + return; + } + String newField = randomValueOtherThanMany(existingFieldNames::contains, () -> randomAlphaOfLengthBetween(2, 20)); + existingFieldNames.add(newField); + mappingBuilder.startObject(newField); + { + if (rarely()) { + mappingBuilder.startObject("properties"); + { + int subfields = randomIntBetween(1, 10); + while (existingFieldNames.size() < subfields) { + addRandomField(existingFieldNames, fieldLimit, mappingBuilder, fieldCount); + } + } + mappingBuilder.endObject(); + } else { + mappingBuilder.field("type", randomFrom("array", "binary", "range", "boolean", "date", "ip", "keyword", "text")); + fieldCount.incrementAndGet(); + } + } + mappingBuilder.endObject(); + } }