From d39c18f7fe77d886a00672613ce50119821030d1 Mon Sep 17 00:00:00 2001 From: Ankit Jain Date: Fri, 15 Apr 2022 05:06:01 +0530 Subject: [PATCH] Excluding system indices from max shard limit validator (#2894) * Excluding system indices from max shard limit validator Signed-off-by: Ankit Jain * Fixing spotless check violations Signed-off-by: Ankit Jain * Fixing NPE due to null isHidden Signed-off-by: Ankit Jain * Adding unit tests for shard opening scenario Signed-off-by: Ankit Jain * Addressing review comments Signed-off-by: Ankit Jain --- .../metadata/MetadataCreateIndexService.java | 17 +-- .../indices/ShardLimitValidator.java | 14 ++- .../org/opensearch/indices/SystemIndices.java | 25 +++++ .../main/java/org/opensearch/node/Node.java | 2 +- .../opensearch/snapshots/RestoreService.java | 6 +- .../MetadataRolloverServiceTests.java | 10 +- .../indices/ShardLimitValidatorTests.java | 106 +++++++++++++++++- .../indices/cluster/ClusterStateChanges.java | 5 +- .../snapshots/SnapshotResiliencyTests.java | 5 +- 9 files changed, 160 insertions(+), 30 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java b/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java index 64198dce89c..7f2be879f36 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java @@ -88,7 +88,6 @@ import org.opensearch.indices.IndexCreationException; import org.opensearch.indices.IndicesService; import org.opensearch.indices.InvalidIndexNameException; import org.opensearch.indices.ShardLimitValidator; -import org.opensearch.indices.SystemIndexDescriptor; import org.opensearch.indices.SystemIndices; import org.opensearch.threadpool.ThreadPool; @@ -214,17 +213,9 @@ public class MetadataCreateIndexService { * @param isHidden Whether or not this is a hidden index */ public boolean validateDotIndex(String index, @Nullable Boolean isHidden) { - boolean isSystem = false; if (index.charAt(0) == '.') { - SystemIndexDescriptor matchingDescriptor = systemIndices.findMatchingDescriptor(index); - if (matchingDescriptor != null) { - logger.trace( - "index [{}] is a system index because it matches index pattern [{}] with description [{}]", - index, - matchingDescriptor.getIndexPattern(), - matchingDescriptor.getDescription() - ); - isSystem = true; + if (systemIndices.validateSystemIndex(index)) { + return true; } else if (isHidden) { logger.trace("index [{}] is a hidden index", index); } else { @@ -237,7 +228,7 @@ public class MetadataCreateIndexService { } } - return isSystem; + return false; } /** @@ -884,7 +875,7 @@ public class MetadataCreateIndexService { * We can not validate settings until we have applied templates, otherwise we do not know the actual settings * that will be used to create this index. */ - shardLimitValidator.validateShardLimit(indexSettings, currentState); + shardLimitValidator.validateShardLimit(request.index(), indexSettings, currentState); if (IndexSettings.INDEX_SOFT_DELETES_SETTING.get(indexSettings) == false && IndexMetadata.SETTING_INDEX_VERSION_CREATED.get(indexSettings).onOrAfter(Version.V_2_0_0)) { throw new IllegalArgumentException( diff --git a/server/src/main/java/org/opensearch/indices/ShardLimitValidator.java b/server/src/main/java/org/opensearch/indices/ShardLimitValidator.java index 3ed0dbee59e..7e4376e8ea8 100644 --- a/server/src/main/java/org/opensearch/indices/ShardLimitValidator.java +++ b/server/src/main/java/org/opensearch/indices/ShardLimitValidator.java @@ -63,10 +63,12 @@ public class ShardLimitValidator { Setting.Property.NodeScope ); protected final AtomicInteger shardLimitPerNode = new AtomicInteger(); + private final SystemIndices systemIndices; - public ShardLimitValidator(final Settings settings, ClusterService clusterService) { + public ShardLimitValidator(final Settings settings, ClusterService clusterService, SystemIndices systemIndices) { this.shardLimitPerNode.set(SETTING_CLUSTER_MAX_SHARDS_PER_NODE.get(settings)); clusterService.getClusterSettings().addSettingsUpdateConsumer(SETTING_CLUSTER_MAX_SHARDS_PER_NODE, this::setShardLimitPerNode); + this.systemIndices = systemIndices; } private void setShardLimitPerNode(int newValue) { @@ -84,11 +86,17 @@ public class ShardLimitValidator { /** * Checks whether an index can be created without going over the cluster shard limit. * + * @param indexName the name of the index being created * @param settings the settings of the index to be created * @param state the current cluster state * @throws ValidationException if creating this index would put the cluster over the cluster shard limit */ - public void validateShardLimit(final Settings settings, final ClusterState state) { + public void validateShardLimit(final String indexName, final Settings settings, final ClusterState state) { + // Validate shard limit only for non system indices as it is not hard limit anyways + if (systemIndices.validateSystemIndex(indexName)) { + return; + } + final int numberOfShards = INDEX_NUMBER_OF_SHARDS_SETTING.get(settings); final int numberOfReplicas = IndexMetadata.INDEX_NUMBER_OF_REPLICAS_SETTING.get(settings); final int shardsToCreate = numberOfShards * (1 + numberOfReplicas); @@ -111,6 +119,8 @@ public class ShardLimitValidator { */ public void validateShardLimit(ClusterState currentState, Index[] indicesToOpen) { int shardsToOpen = Arrays.stream(indicesToOpen) + // Validate shard limit only for non system indices as it is not hard limit anyways + .filter(index -> !systemIndices.validateSystemIndex(index.getName())) .filter(index -> currentState.metadata().index(index).getState().equals(IndexMetadata.State.CLOSE)) .mapToInt(index -> getTotalShardCount(currentState, index)) .sum(); diff --git a/server/src/main/java/org/opensearch/indices/SystemIndices.java b/server/src/main/java/org/opensearch/indices/SystemIndices.java index fc34645b432..04229155467 100644 --- a/server/src/main/java/org/opensearch/indices/SystemIndices.java +++ b/server/src/main/java/org/opensearch/indices/SystemIndices.java @@ -32,6 +32,8 @@ package org.opensearch.indices; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.CharacterRunAutomaton; @@ -63,6 +65,8 @@ import static org.opensearch.tasks.TaskResultsService.TASK_INDEX; * to reduce the locations within the code that need to deal with {@link SystemIndexDescriptor}s. */ public class SystemIndices { + private static final Logger logger = LogManager.getLogger(SystemIndices.class); + private static final Map> SERVER_SYSTEM_INDEX_DESCRIPTORS = singletonMap( TaskResultsService.class.getName(), singletonList(new SystemIndexDescriptor(TASK_INDEX + "*", "Task Result Index")) @@ -135,6 +139,27 @@ public class SystemIndices { } } + /** + * Validates (if this index has a dot-prefixed name) and it is system index. + * @param index The name of the index in question + */ + public boolean validateSystemIndex(String index) { + if (index.charAt(0) == '.') { + SystemIndexDescriptor matchingDescriptor = findMatchingDescriptor(index); + if (matchingDescriptor != null) { + logger.trace( + "index [{}] is a system index because it matches index pattern [{}] with description [{}]", + index, + matchingDescriptor.getIndexPattern(), + matchingDescriptor.getDescription() + ); + return true; + } + } + + return false; + } + private static CharacterRunAutomaton buildCharacterRunAutomaton(Collection descriptors) { Optional automaton = descriptors.stream() .map(descriptor -> Regex.simpleMatchToAutomaton(descriptor.getIndexPattern())) diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 8ede6fdf766..46400e5c8d2 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -635,7 +635,7 @@ public class Node implements Closeable { final AliasValidator aliasValidator = new AliasValidator(); - final ShardLimitValidator shardLimitValidator = new ShardLimitValidator(settings, clusterService); + final ShardLimitValidator shardLimitValidator = new ShardLimitValidator(settings, clusterService, systemIndices); final MetadataCreateIndexService metadataCreateIndexService = new MetadataCreateIndexService( settings, clusterService, diff --git a/server/src/main/java/org/opensearch/snapshots/RestoreService.java b/server/src/main/java/org/opensearch/snapshots/RestoreService.java index ad5cfe6e443..e1b143b5f52 100644 --- a/server/src/main/java/org/opensearch/snapshots/RestoreService.java +++ b/server/src/main/java/org/opensearch/snapshots/RestoreService.java @@ -384,7 +384,11 @@ public class RestoreService implements ClusterStateApplier { .put(snapshotIndexMetadata.getSettings()) .put(IndexMetadata.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()) ); - shardLimitValidator.validateShardLimit(snapshotIndexMetadata.getSettings(), currentState); + shardLimitValidator.validateShardLimit( + renamedIndexName, + snapshotIndexMetadata.getSettings(), + currentState + ); if (!request.includeAliases() && !snapshotIndexMetadata.getAliases().isEmpty()) { // Remove all aliases - they shouldn't be restored indexMdBuilder.removeAllAliases(); diff --git a/server/src/test/java/org/opensearch/action/admin/indices/rollover/MetadataRolloverServiceTests.java b/server/src/test/java/org/opensearch/action/admin/indices/rollover/MetadataRolloverServiceTests.java index fd052308ed8..afe35538ada 100644 --- a/server/src/test/java/org/opensearch/action/admin/indices/rollover/MetadataRolloverServiceTests.java +++ b/server/src/test/java/org/opensearch/action/admin/indices/rollover/MetadataRolloverServiceTests.java @@ -603,7 +603,8 @@ public class MetadataRolloverServiceTests extends OpenSearchTestCase { IndexNameExpressionResolver mockIndexNameExpressionResolver = mock(IndexNameExpressionResolver.class); when(mockIndexNameExpressionResolver.resolveDateMathExpression(any())).then(returnsFirstArg()); - ShardLimitValidator shardLimitValidator = new ShardLimitValidator(Settings.EMPTY, clusterService); + final SystemIndices systemIndices = new SystemIndices(emptyMap()); + ShardLimitValidator shardLimitValidator = new ShardLimitValidator(Settings.EMPTY, clusterService, systemIndices); MetadataCreateIndexService createIndexService = new MetadataCreateIndexService( Settings.EMPTY, clusterService, @@ -615,7 +616,7 @@ public class MetadataRolloverServiceTests extends OpenSearchTestCase { IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, testThreadPool, null, - new SystemIndices(emptyMap()), + systemIndices, false ); MetadataIndexAliasesService indexAliasesService = new MetadataIndexAliasesService( @@ -739,7 +740,8 @@ public class MetadataRolloverServiceTests extends OpenSearchTestCase { IndexNameExpressionResolver mockIndexNameExpressionResolver = mock(IndexNameExpressionResolver.class); when(mockIndexNameExpressionResolver.resolveDateMathExpression(any())).then(returnsFirstArg()); - ShardLimitValidator shardLimitValidator = new ShardLimitValidator(Settings.EMPTY, clusterService); + final SystemIndices systemIndices = new SystemIndices(emptyMap()); + ShardLimitValidator shardLimitValidator = new ShardLimitValidator(Settings.EMPTY, clusterService, systemIndices); MetadataCreateIndexService createIndexService = new MetadataCreateIndexService( Settings.EMPTY, clusterService, @@ -751,7 +753,7 @@ public class MetadataRolloverServiceTests extends OpenSearchTestCase { IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, testThreadPool, null, - new SystemIndices(emptyMap()), + systemIndices, false ); MetadataIndexAliasesService indexAliasesService = new MetadataIndexAliasesService( diff --git a/server/src/test/java/org/opensearch/indices/ShardLimitValidatorTests.java b/server/src/test/java/org/opensearch/indices/ShardLimitValidatorTests.java index 7e9c971cae1..a61ca13df02 100644 --- a/server/src/test/java/org/opensearch/indices/ShardLimitValidatorTests.java +++ b/server/src/test/java/org/opensearch/indices/ShardLimitValidatorTests.java @@ -52,6 +52,8 @@ import java.util.Arrays; import java.util.Optional; import java.util.stream.Collectors; +import static java.util.Collections.emptyMap; +import static org.opensearch.cluster.metadata.IndexMetadata.*; import static org.opensearch.cluster.metadata.MetadataIndexStateServiceTests.addClosedIndex; import static org.opensearch.cluster.metadata.MetadataIndexStateServiceTests.addOpenedIndex; import static org.opensearch.cluster.shards.ShardCounts.forDataNodeCount; @@ -104,7 +106,54 @@ public class ShardLimitValidatorTests extends OpenSearchTestCase { assertFalse(errorMessage.isPresent()); } - public void testValidateShardLimit() { + /** + * This test validates that system index creation succeeds + * even though it exceeds the cluster max shard limit + */ + public void testSystemIndexCreationSucceeds() { + final ShardLimitValidator shardLimitValidator = createTestShardLimitService(1); + final Settings settings = Settings.builder() + .put(SETTING_VERSION_CREATED, Version.CURRENT) + .put(SETTING_NUMBER_OF_SHARDS, 1) + .put(SETTING_NUMBER_OF_REPLICAS, 1) + .build(); + final ClusterState state = createClusterForShardLimitTest(1, 1, 0); + shardLimitValidator.validateShardLimit(".tasks", settings, state); + } + + /** + * This test validates that non-system index creation + * fails when it exceeds the cluster max shard limit + */ + public void testNonSystemIndexCreationFails() { + final ShardLimitValidator shardLimitValidator = createTestShardLimitService(1); + final Settings settings = Settings.builder() + .put(SETTING_VERSION_CREATED, Version.CURRENT) + .put(SETTING_NUMBER_OF_SHARDS, 1) + .put(SETTING_NUMBER_OF_REPLICAS, 1) + .build(); + final ClusterState state = createClusterForShardLimitTest(1, 1, 0); + final ValidationException exception = expectThrows( + ValidationException.class, + () -> shardLimitValidator.validateShardLimit("abc", settings, state) + ); + assertEquals( + "Validation Failed: 1: this action would add [" + + 2 + + "] total shards, but this cluster currently has [" + + 1 + + "]/[" + + 1 + + "] maximum shards open;", + exception.getMessage() + ); + } + + /** + * This test validates that non-system index opening + * fails when it exceeds the cluster max shard limit + */ + public void testNonSystemIndexOpeningFails() { int nodesInCluster = randomIntBetween(2, 90); ShardCounts counts = forDataNodeCount(nodesInCluster); ClusterState state = createClusterForShardLimitTest( @@ -140,6 +189,33 @@ public class ShardLimitValidatorTests extends OpenSearchTestCase { ); } + /** + * This test validates that system index opening succeeds + * even when it exceeds the cluster max shard limit + */ + public void testSystemIndexOpeningSucceeds() { + int nodesInCluster = randomIntBetween(2, 90); + ShardCounts counts = forDataNodeCount(nodesInCluster); + ClusterState state = createClusterForShardLimitTest( + nodesInCluster, + randomAlphaOfLengthBetween(5, 15), + counts.getFirstIndexShards(), + counts.getFirstIndexReplicas(), + ".tasks", // Adding closed system index to cluster state + counts.getFailingIndexShards(), + counts.getFailingIndexReplicas() + ); + + Index[] indices = Arrays.stream(state.metadata().indices().values().toArray(IndexMetadata.class)) + .map(IndexMetadata::getIndex) + .collect(Collectors.toList()) + .toArray(new Index[2]); + + // Shard limit validation succeeds without any issues as system index is being opened + ShardLimitValidator shardLimitValidator = createTestShardLimitService(counts.getShardsPerNode()); + shardLimitValidator.validateShardLimit(state, indices); + } + public static ClusterState createClusterForShardLimitTest(int nodesInCluster, int shardsInIndex, int replicas) { ImmutableOpenMap.Builder dataNodes = ImmutableOpenMap.builder(); for (int i = 0; i < nodesInCluster; i++) { @@ -165,8 +241,10 @@ public class ShardLimitValidatorTests extends OpenSearchTestCase { public static ClusterState createClusterForShardLimitTest( int nodesInCluster, + String openIndexName, int openIndexShards, int openIndexReplicas, + String closeIndexName, int closedIndexShards, int closedIndexReplicas ) { @@ -178,8 +256,8 @@ public class ShardLimitValidatorTests extends OpenSearchTestCase { when(nodes.getDataNodes()).thenReturn(dataNodes.build()); ClusterState state = ClusterState.builder(ClusterName.DEFAULT).build(); - state = addOpenedIndex(randomAlphaOfLengthBetween(5, 15), openIndexShards, openIndexReplicas, state); - state = addClosedIndex(randomAlphaOfLengthBetween(5, 15), closedIndexShards, closedIndexReplicas, state); + state = addOpenedIndex(openIndexName, openIndexShards, openIndexReplicas, state); + state = addClosedIndex(closeIndexName, closedIndexShards, closedIndexReplicas, state); final Metadata.Builder metadata = Metadata.builder(state.metadata()); if (randomBoolean()) { @@ -190,6 +268,24 @@ public class ShardLimitValidatorTests extends OpenSearchTestCase { return ClusterState.builder(state).metadata(metadata).nodes(nodes).build(); } + public static ClusterState createClusterForShardLimitTest( + int nodesInCluster, + int openIndexShards, + int openIndexReplicas, + int closedIndexShards, + int closedIndexReplicas + ) { + return createClusterForShardLimitTest( + nodesInCluster, + randomAlphaOfLengthBetween(5, 15), + openIndexShards, + openIndexReplicas, + randomAlphaOfLengthBetween(5, 15), + closedIndexShards, + closedIndexReplicas + ); + } + /** * Creates a {@link ShardLimitValidator} for testing with the given setting and a mocked cluster service. * @@ -204,7 +300,7 @@ public class ShardLimitValidatorTests extends OpenSearchTestCase { new ClusterSettings(limitOnlySettings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) ); - return new ShardLimitValidator(limitOnlySettings, clusterService); + return new ShardLimitValidator(limitOnlySettings, clusterService, new SystemIndices(emptyMap())); } /** @@ -217,6 +313,6 @@ public class ShardLimitValidatorTests extends OpenSearchTestCase { public static ShardLimitValidator createTestShardLimitService(int maxShardsPerNode, ClusterService clusterService) { Settings limitOnlySettings = Settings.builder().put(SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey(), maxShardsPerNode).build(); - return new ShardLimitValidator(limitOnlySettings, clusterService); + return new ShardLimitValidator(limitOnlySettings, clusterService, new SystemIndices(emptyMap())); } } diff --git a/server/src/test/java/org/opensearch/indices/cluster/ClusterStateChanges.java b/server/src/test/java/org/opensearch/indices/cluster/ClusterStateChanges.java index 99ec043cc78..a7d9ba0bf3d 100644 --- a/server/src/test/java/org/opensearch/indices/cluster/ClusterStateChanges.java +++ b/server/src/test/java/org/opensearch/indices/cluster/ClusterStateChanges.java @@ -259,7 +259,8 @@ public class ClusterStateChanges { null, actionFilters ); - ShardLimitValidator shardLimitValidator = new ShardLimitValidator(SETTINGS, clusterService); + final SystemIndices systemIndices = new SystemIndices(emptyMap()); + ShardLimitValidator shardLimitValidator = new ShardLimitValidator(SETTINGS, clusterService, systemIndices); MetadataIndexStateService indexStateService = new MetadataIndexStateService( clusterService, allocationService, @@ -290,7 +291,7 @@ public class ClusterStateChanges { IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, threadPool, xContentRegistry, - new SystemIndices(emptyMap()), + systemIndices, true ); diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index 26e19e532b6..a896aab0f70 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -1863,7 +1863,8 @@ public class SnapshotResiliencyTests extends OpenSearchTestCase { RetentionLeaseSyncer.EMPTY ); Map actions = new HashMap<>(); - final ShardLimitValidator shardLimitValidator = new ShardLimitValidator(settings, clusterService); + final SystemIndices systemIndices = new SystemIndices(emptyMap()); + final ShardLimitValidator shardLimitValidator = new ShardLimitValidator(settings, clusterService, systemIndices); final MetadataCreateIndexService metadataCreateIndexService = new MetadataCreateIndexService( settings, clusterService, @@ -1875,7 +1876,7 @@ public class SnapshotResiliencyTests extends OpenSearchTestCase { indexScopedSettings, threadPool, namedXContentRegistry, - new SystemIndices(emptyMap()), + systemIndices, false ); actions.put(