Deprecation check for indices with very large numbers of fields (#39869)

Indices with very large numbers of fields (>1024 by default) that do not
have index.query.default_field set will experience query failures in 7.0
for Simple Query String and Multi-Match queries. This deprecation check
issues a warning for indices of that size that do not have
index.query.default_field set.

This also adds a deprecation check for index templates with field counts
that would trigger these query failures as well.
This commit is contained in:
Gordon Brown 2019-03-12 09:06:31 -06:00 committed by GitHub
parent aa967edd73
commit da67c2f7f8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 302 additions and 3 deletions

View File

@ -6,17 +6,26 @@
package org.elasticsearch.xpack.deprecation; package org.elasticsearch.xpack.deprecation;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.metadata.MappingMetaData;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.ingest.IngestService; import org.elasticsearch.ingest.IngestService;
import org.elasticsearch.ingest.PipelineConfiguration; import org.elasticsearch.ingest.PipelineConfiguration;
import org.elasticsearch.xpack.core.deprecation.DeprecationIssue; import org.elasticsearch.xpack.core.deprecation.DeprecationIssue;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects; import java.util.Objects;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import static org.elasticsearch.search.SearchModule.INDICES_MAX_CLAUSE_COUNT_SETTING;
public class ClusterDeprecationChecks { public class ClusterDeprecationChecks {
private static final Logger logger = LogManager.getLogger(ClusterDeprecationChecks.class);
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
static DeprecationIssue checkUserAgentPipelines(ClusterState state) { static DeprecationIssue checkUserAgentPipelines(ClusterState state) {
@ -48,4 +57,35 @@ public class ClusterDeprecationChecks {
return null; return null;
} }
static DeprecationIssue checkTemplatesWithTooManyFields(ClusterState state) {
Integer maxClauseCount = INDICES_MAX_CLAUSE_COUNT_SETTING.get(state.getMetaData().settings());
List<String> templatesOverLimit = new ArrayList<>();
state.getMetaData().getTemplates().forEach((templateCursor) -> {
AtomicInteger maxFields = new AtomicInteger(0);
String templateName = templateCursor.key;
boolean defaultFieldSet = templateCursor.value.settings().get(IndexSettings.DEFAULT_FIELD_SETTING.getKey()) != null;
templateCursor.value.getMappings().forEach((mappingCursor) -> {
MappingMetaData mappingMetaData = new MappingMetaData(mappingCursor.value);
if (mappingMetaData != null && defaultFieldSet == false) {
maxFields.set(IndexDeprecationChecks.countFieldsRecursively(mappingMetaData.type(), mappingMetaData.sourceAsMap()));
}
if (maxFields.get() > maxClauseCount) {
templatesOverLimit.add(templateName);
}
});
});
if (templatesOverLimit.isEmpty() == false) {
return new DeprecationIssue(DeprecationIssue.Level.WARNING,
"Fields in index template exceed automatic field expansion limit",
"https://www.elastic.co/guide/en/elasticsearch/reference/7.0/breaking-changes-7.0.html" +
"#_limiting_the_number_of_auto_expanded_fields",
"Index templates " + templatesOverLimit + " have a number of fields which exceeds the automatic field expansion " +
"limit of [" + maxClauseCount + "] and does not have [" + IndexSettings.DEFAULT_FIELD_SETTING.getKey() + "] set, " +
"which may cause queries which use automatic field expansion, such as query_string, simple_query_string, and " +
"multi_match to fail if fields are not explicitly specified in the query.");
}
return null;
}
} }

View File

@ -33,7 +33,8 @@ public class DeprecationChecks {
static List<Function<ClusterState, DeprecationIssue>> CLUSTER_SETTINGS_CHECKS = static List<Function<ClusterState, DeprecationIssue>> CLUSTER_SETTINGS_CHECKS =
Collections.unmodifiableList(Arrays.asList( Collections.unmodifiableList(Arrays.asList(
ClusterDeprecationChecks::checkUserAgentPipelines ClusterDeprecationChecks::checkUserAgentPipelines,
ClusterDeprecationChecks::checkTemplatesWithTooManyFields
)); ));
@ -44,7 +45,9 @@ public class DeprecationChecks {
static List<Function<IndexMetaData, DeprecationIssue>> INDEX_SETTINGS_CHECKS = static List<Function<IndexMetaData, DeprecationIssue>> INDEX_SETTINGS_CHECKS =
Collections.unmodifiableList(Arrays.asList( Collections.unmodifiableList(Arrays.asList(
IndexDeprecationChecks::oldIndicesCheck)); IndexDeprecationChecks::oldIndicesCheck,
IndexDeprecationChecks::tooManyFieldsCheck
));
static List<BiFunction<DatafeedConfig, NamedXContentRegistry, DeprecationIssue>> ML_SETTINGS_CHECKS = static List<BiFunction<DatafeedConfig, NamedXContentRegistry, DeprecationIssue>> ML_SETTINGS_CHECKS =
Collections.unmodifiableList(Arrays.asList( Collections.unmodifiableList(Arrays.asList(

View File

@ -10,11 +10,13 @@ import com.carrotsearch.hppc.cursors.ObjectCursor;
import org.elasticsearch.Version; import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.metadata.MappingMetaData; import org.elasticsearch.cluster.metadata.MappingMetaData;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.xpack.core.deprecation.DeprecationIssue; import org.elasticsearch.xpack.core.deprecation.DeprecationIssue;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.BiConsumer; import java.util.function.BiConsumer;
import java.util.function.Function; import java.util.function.Function;
@ -40,7 +42,7 @@ public class IndexDeprecationChecks {
* @return a list of issues found in fields * @return a list of issues found in fields
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
private static List<String> findInPropertiesRecursively(String type, Map<String, Object> parentMap, static List<String> findInPropertiesRecursively(String type, Map<String, Object> parentMap,
Function<Map<?,?>, Boolean> predicate) { Function<Map<?,?>, Boolean> predicate) {
List<String> issues = new ArrayList<>(); List<String> issues = new ArrayList<>();
Map<?, ?> properties = (Map<?, ?>) parentMap.get("properties"); Map<?, ?> properties = (Map<?, ?>) parentMap.get("properties");
@ -84,4 +86,66 @@ public class IndexDeprecationChecks {
} }
return null; return null;
} }
static DeprecationIssue tooManyFieldsCheck(IndexMetaData indexMetaData) {
if (indexMetaData.getSettings().get(IndexSettings.DEFAULT_FIELD_SETTING.getKey()) == null) {
AtomicInteger fieldCount = new AtomicInteger(0);
fieldLevelMappingIssue(indexMetaData, ((mappingMetaData, sourceAsMap) -> {
fieldCount.addAndGet(countFieldsRecursively(mappingMetaData.type(), sourceAsMap));
}));
// We can't get to the setting `indices.query.bool.max_clause_count` from here, so just check the default of that setting.
// It's also much better practice to set `index.query.default_field` than `indices.query.bool.max_clause_count` - there's a
// reason we introduced the limit.
if (fieldCount.get() > 1024) {
return new DeprecationIssue(DeprecationIssue.Level.WARNING,
"Number of fields exceeds automatic field expansion limit",
"https://www.elastic.co/guide/en/elasticsearch/reference/7.0/breaking-changes-7.0.html" +
"#_limiting_the_number_of_auto_expanded_fields",
"This index has [" + fieldCount.get() + "] fields, which exceeds the automatic field expansion limit of 1024 " +
"and does not have [" + IndexSettings.DEFAULT_FIELD_SETTING.getKey() + "] set, which may cause queries which use " +
"automatic field expansion, such as query_string, simple_query_string, and multi_match to fail if fields are not " +
"explicitly specified in the query.");
}
}
return null;
}
/* Counts the number of fields in a mapping, designed to count the as closely as possible to
* org.elasticsearch.index.search.QueryParserHelper#checkForTooManyFields
*/
@SuppressWarnings("unchecked")
static int countFieldsRecursively(String type, Map<String, Object> parentMap) {
int fields = 0;
Map<?, ?> properties = (Map<?, ?>) parentMap.get("properties");
if (properties == null) {
return fields;
}
for (Map.Entry<?, ?> entry : properties.entrySet()) {
Map<String, Object> valueMap = (Map<String, Object>) entry.getValue();
if (valueMap.containsKey("type")
&& (valueMap.get("type").equals("object") && valueMap.containsKey("properties") == false) == false) {
fields++;
}
Map<?, ?> values = (Map<?, ?>) valueMap.get("fields");
if (values != null) {
for (Map.Entry<?, ?> multifieldEntry : values.entrySet()) {
Map<String, Object> multifieldValueMap = (Map<String, Object>) multifieldEntry.getValue();
if (multifieldValueMap.containsKey("type")) {
fields++;
}
if (multifieldValueMap.containsKey("properties")) {
fields += countFieldsRecursively(type, multifieldValueMap);
}
}
}
if (valueMap.containsKey("properties")) {
fields += countFieldsRecursively(type, valueMap);
}
}
return fields;
}
} }

View File

@ -8,16 +8,26 @@ package org.elasticsearch.xpack.deprecation;
import org.elasticsearch.action.ingest.PutPipelineRequest; import org.elasticsearch.action.ingest.PutPipelineRequest;
import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterName;
import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.metadata.IndexTemplateMetaData;
import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.ingest.IngestService; import org.elasticsearch.ingest.IngestService;
import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.core.deprecation.DeprecationIssue; import org.elasticsearch.xpack.core.deprecation.DeprecationIssue;
import java.io.IOException;
import java.util.Collections;
import java.util.List; import java.util.List;
import static java.util.Collections.singletonList; import static java.util.Collections.singletonList;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.xpack.deprecation.DeprecationChecks.CLUSTER_SETTINGS_CHECKS; import static org.elasticsearch.xpack.deprecation.DeprecationChecks.CLUSTER_SETTINGS_CHECKS;
import static org.elasticsearch.xpack.deprecation.IndexDeprecationChecksTests.addRandomFields;
public class ClusterDeprecationChecksTests extends ESTestCase { public class ClusterDeprecationChecksTests extends ESTestCase {
@ -73,4 +83,76 @@ public class ClusterDeprecationChecksTests extends ESTestCase {
"Ingest pipelines [ecs_false, ecs_true] uses the [ecs] option which needs to be removed to work in 8.0"); "Ingest pipelines [ecs_false, ecs_true] uses the [ecs] option which needs to be removed to work in 8.0");
assertEquals(singletonList(expected), issues); assertEquals(singletonList(expected), issues);
} }
public void testTemplateWithTooManyFields() throws IOException {
String tooManyFieldsTemplate = randomAlphaOfLength(5);
String tooManyFieldsWithDefaultFieldsTemplate = randomAlphaOfLength(6);
String goodTemplateName = randomAlphaOfLength(7);
// A template with too many fields
int tooHighFieldCount = randomIntBetween(1025, 10_000); // 10_000 is arbitrary
XContentBuilder badMappingBuilder = jsonBuilder();
badMappingBuilder.startObject();
{
badMappingBuilder.startObject("_doc");
{
badMappingBuilder.startObject("properties");
{
addRandomFields(tooHighFieldCount, badMappingBuilder);
}
badMappingBuilder.endObject();
}
badMappingBuilder.endObject();
}
badMappingBuilder.endObject();
// A template with an OK number of fields
int okFieldCount = randomIntBetween(1, 1024);
XContentBuilder goodMappingBuilder = jsonBuilder();
goodMappingBuilder.startObject();
{
goodMappingBuilder.startObject("_doc");
{
goodMappingBuilder.startObject("properties");
{
addRandomFields(okFieldCount, goodMappingBuilder);
}
goodMappingBuilder.endObject();
}
goodMappingBuilder.endObject();
}
goodMappingBuilder.endObject();
final ClusterState state = ClusterState.builder(new ClusterName(randomAlphaOfLength(5)))
.metaData(MetaData.builder()
.put(IndexTemplateMetaData.builder(tooManyFieldsTemplate)
.patterns(Collections.singletonList(randomAlphaOfLength(5)))
.putMapping("_doc", Strings.toString(badMappingBuilder))
.build())
.put(IndexTemplateMetaData.builder(tooManyFieldsWithDefaultFieldsTemplate)
.patterns(Collections.singletonList(randomAlphaOfLength(5)))
.putMapping("_doc", Strings.toString(badMappingBuilder))
.settings(Settings.builder()
.put(IndexSettings.DEFAULT_FIELD_SETTING.getKey(),
Collections.singletonList(randomAlphaOfLength(5)).toString()))
.build())
.put(IndexTemplateMetaData.builder(goodTemplateName)
.patterns(Collections.singletonList(randomAlphaOfLength(5)))
.putMapping("_doc", Strings.toString(goodMappingBuilder))
.build())
.build())
.build();
List<DeprecationIssue> issues = DeprecationChecks.filterChecks(CLUSTER_SETTINGS_CHECKS, c -> c.apply(state));
DeprecationIssue expected = new DeprecationIssue(DeprecationIssue.Level.WARNING,
"Fields in index template exceed automatic field expansion limit",
"https://www.elastic.co/guide/en/elasticsearch/reference/7.0/breaking-changes-7.0.html" +
"#_limiting_the_number_of_auto_expanded_fields",
"Index templates " + Collections.singletonList(tooManyFieldsTemplate) + " have a number of fields which exceeds the " +
"automatic field expansion limit of [1024] and does not have [" + IndexSettings.DEFAULT_FIELD_SETTING.getKey() + "] set, " +
"which may cause queries which use automatic field expansion, such as query_string, simple_query_string, and multi_match " +
"to fail if fields are not explicitly specified in the query.");
assertEquals(singletonList(expected), issues);
}
} }

View File

@ -8,13 +8,20 @@ package org.elasticsearch.xpack.deprecation;
import org.elasticsearch.Version; import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.VersionUtils; import org.elasticsearch.test.VersionUtils;
import org.elasticsearch.xpack.core.deprecation.DeprecationIssue; import org.elasticsearch.xpack.core.deprecation.DeprecationIssue;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import static java.util.Collections.singletonList; import static java.util.Collections.singletonList;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.xpack.deprecation.DeprecationChecks.INDEX_SETTINGS_CHECKS; import static org.elasticsearch.xpack.deprecation.DeprecationChecks.INDEX_SETTINGS_CHECKS;
public class IndexDeprecationChecksTests extends ESTestCase { public class IndexDeprecationChecksTests extends ESTestCase {
@ -34,4 +41,107 @@ public class IndexDeprecationChecksTests extends ESTestCase {
List<DeprecationIssue> issues = DeprecationChecks.filterChecks(INDEX_SETTINGS_CHECKS, c -> c.apply(indexMetaData)); List<DeprecationIssue> issues = DeprecationChecks.filterChecks(INDEX_SETTINGS_CHECKS, c -> c.apply(indexMetaData));
assertEquals(singletonList(expected), issues); assertEquals(singletonList(expected), issues);
} }
public void testTooManyFieldsCheck() throws IOException {
String simpleMapping = "{\n" +
" \"properties\": {\n" +
" \"some_field\": {\n" +
" \"type\": \"text\"\n" +
" },\n" +
" \"other_field\": {\n" +
" \"type\": \"text\",\n" +
" \"properties\": {\n" +
" \"raw\": {\"type\": \"keyword\"}\n" +
" }\n" +
" }\n" +
" }\n" +
"}";
IndexMetaData simpleIndex = IndexMetaData.builder(randomAlphaOfLengthBetween(5, 10))
.settings(settings(Version.V_7_0_0))
.numberOfShards(randomIntBetween(1, 100))
.numberOfReplicas(randomIntBetween(1, 100))
.putMapping("_doc", simpleMapping)
.build();
List<DeprecationIssue> noIssues = DeprecationChecks.filterChecks(INDEX_SETTINGS_CHECKS, c -> c.apply(simpleIndex));
assertEquals(0, noIssues.size());
// Test that it catches having too many fields
int fieldCount = randomIntBetween(1025, 10_000); // 10_000 is arbitrary
XContentBuilder mappingBuilder = jsonBuilder();
mappingBuilder.startObject();
{
mappingBuilder.startObject("properties");
{
addRandomFields(fieldCount, mappingBuilder);
}
mappingBuilder.endObject();
}
mappingBuilder.endObject();
IndexMetaData tooManyFieldsIndex = IndexMetaData.builder(randomAlphaOfLengthBetween(5, 10))
.settings(settings(Version.V_7_0_0))
.numberOfShards(randomIntBetween(1, 100))
.numberOfReplicas(randomIntBetween(1, 100))
.putMapping("_doc", Strings.toString(mappingBuilder))
.build();
DeprecationIssue expected = new DeprecationIssue(DeprecationIssue.Level.WARNING,
"Number of fields exceeds automatic field expansion limit",
"https://www.elastic.co/guide/en/elasticsearch/reference/7.0/breaking-changes-7.0.html" +
"#_limiting_the_number_of_auto_expanded_fields",
"This index has [" + fieldCount + "] fields, which exceeds the automatic field expansion limit of 1024 " +
"and does not have [" + IndexSettings.DEFAULT_FIELD_SETTING.getKey() + "] set, which may cause queries which use " +
"automatic field expansion, such as query_string, simple_query_string, and multi_match to fail if fields are not " +
"explicitly specified in the query.");
List<DeprecationIssue> issues = DeprecationChecks.filterChecks(INDEX_SETTINGS_CHECKS, c -> c.apply(tooManyFieldsIndex));
assertEquals(singletonList(expected), issues);
// Check that it's okay to have too many fields as long as `index.query.default_field` is set
IndexMetaData tooManyFieldsOk = IndexMetaData.builder(randomAlphaOfLengthBetween(5, 10))
.settings(settings(Version.V_7_0_0)
.put(IndexSettings.DEFAULT_FIELD_SETTING.getKey(), randomAlphaOfLength(5)))
.numberOfShards(randomIntBetween(1, 100))
.numberOfReplicas(randomIntBetween(1, 100))
.putMapping("_doc", Strings.toString(mappingBuilder))
.build();
List<DeprecationIssue> withDefaultFieldIssues =
DeprecationChecks.filterChecks(INDEX_SETTINGS_CHECKS, c -> c.apply(tooManyFieldsOk));
assertEquals(0, withDefaultFieldIssues.size());
}
static void addRandomFields(final int fieldLimit,
XContentBuilder mappingBuilder) throws IOException {
AtomicInteger fieldCount = new AtomicInteger(0);
List<String> existingFieldNames = new ArrayList<>();
while (fieldCount.get() < fieldLimit) {
addRandomField(existingFieldNames, fieldLimit, mappingBuilder, fieldCount);
}
}
private static void addRandomField(List<String> existingFieldNames, final int fieldLimit,
XContentBuilder mappingBuilder, AtomicInteger fieldCount) throws IOException {
if (fieldCount.get() > fieldLimit) {
return;
}
String newField = randomValueOtherThanMany(existingFieldNames::contains, () -> randomAlphaOfLengthBetween(2, 20));
existingFieldNames.add(newField);
mappingBuilder.startObject(newField);
{
if (rarely()) {
mappingBuilder.startObject("properties");
{
int subfields = randomIntBetween(1, 10);
while (existingFieldNames.size() < subfields) {
addRandomField(existingFieldNames, fieldLimit, mappingBuilder, fieldCount);
}
}
mappingBuilder.endObject();
} else {
mappingBuilder.field("type", randomFrom("array", "binary", "range", "boolean", "date", "ip", "keyword", "text"));
fieldCount.incrementAndGet();
}
}
mappingBuilder.endObject();
}
} }