diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/DataTierMigrationRoutedStep.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/DataTierMigrationRoutedStep.java index 7d451c042c0..1120a72c872 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/DataTierMigrationRoutedStep.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/DataTierMigrationRoutedStep.java @@ -13,6 +13,7 @@ import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodeRole; import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; @@ -25,8 +26,9 @@ import java.util.Locale; import java.util.Set; import static org.elasticsearch.cluster.node.DiscoveryNodeRole.DATA_ROLE; -import static org.elasticsearch.xpack.cluster.routing.allocation.DataTierAllocationDecider.INDEX_ROUTING_INCLUDE_SETTING; +import static org.elasticsearch.xpack.cluster.routing.allocation.DataTierAllocationDecider.INDEX_ROUTING_PREFER_SETTING; import static org.elasticsearch.xpack.core.ilm.AllocationRoutedStep.getPendingAllocations; +import static org.elasticsearch.xpack.core.ilm.step.info.AllocationInfo.waitingForActiveShardsAllocationInfo; /** * Checks whether all shards have been correctly routed in response to updating the allocation rules for an index in order @@ -70,11 +72,23 @@ public class DataTierMigrationRoutedStep extends ClusterStateWaitStep { logger.debug("[{}] lifecycle action for index [{}] executed but index no longer exists", getKey().getAction(), index.getName()); return new Result(false, null); } - String destinationTier = INDEX_ROUTING_INCLUDE_SETTING.get(idxMeta.getSettings()); + String destinationTier = INDEX_ROUTING_PREFER_SETTING.get(idxMeta.getSettings()); if (ActiveShardCount.ALL.enoughShardsActive(clusterState, index.getName()) == false) { - logger.debug("[{}] migration of index [{}] to the [{}] tier cannot progress, as not all shards are active", + if (Strings.isEmpty(destinationTier)) { + logger.debug("[{}] lifecycle action for index [{}] cannot make progress because not all shards are active", + getKey().getAction(), index.getName()); + } else { + logger.debug("[{}] migration of index [{}] to the [{}] tier cannot progress, as not all shards are active", getKey().getAction(), index.getName(), destinationTier); - return new Result(false, AllocationInfo.waitingForActiveShardsAllocationInfo(idxMeta.getNumberOfReplicas())); + } + return new Result(false, waitingForActiveShardsAllocationInfo(idxMeta.getNumberOfReplicas())); + } + + if (Strings.isEmpty(destinationTier)) { + logger.debug("index [{}] has no data tier routing setting configured and all its shards are active. considering the [{}] " + + "step condition met and continuing to the next step", index.getName(), getKey().getName()); + // the user removed the tier routing setting and all the shards are active so we'll cary on + return new Result(true, null); } int allocationPendingAllShards = getPendingAllocations(index, ALLOCATION_DECIDERS, clusterState); diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/DataTierMigrationRoutedStepTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/DataTierMigrationRoutedStepTests.java index 13ae30adba4..6518706544a 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/DataTierMigrationRoutedStepTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/DataTierMigrationRoutedStepTests.java @@ -30,7 +30,7 @@ import java.util.Collections; import java.util.Set; import static java.util.Collections.emptyMap; -import static org.elasticsearch.xpack.cluster.routing.allocation.DataTierAllocationDecider.INDEX_ROUTING_INCLUDE_SETTING; +import static org.elasticsearch.xpack.cluster.routing.allocation.DataTierAllocationDecider.INDEX_ROUTING_PREFER; import static org.elasticsearch.xpack.core.ilm.step.info.AllocationInfo.waitingForActiveShardsAllocationInfo; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.nullValue; @@ -95,7 +95,7 @@ public class DataTierMigrationRoutedStepTests extends AbstractStepTestCase roles) { return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), emptyMap(), roles, Version.CURRENT); } diff --git a/x-pack/plugin/ilm/src/internalClusterTest/java/org/elasticsearch/xpack/ilm/DataTiersMigrationsTests.java b/x-pack/plugin/ilm/src/internalClusterTest/java/org/elasticsearch/xpack/ilm/DataTiersMigrationsTests.java index 55429decaa3..1469a1a631e 100644 --- a/x-pack/plugin/ilm/src/internalClusterTest/java/org/elasticsearch/xpack/ilm/DataTiersMigrationsTests.java +++ b/x-pack/plugin/ilm/src/internalClusterTest/java/org/elasticsearch/xpack/ilm/DataTiersMigrationsTests.java @@ -6,11 +6,16 @@ package org.elasticsearch.xpack.ilm; +import org.elasticsearch.action.admin.cluster.allocation.ClusterAllocationExplainRequest; +import org.elasticsearch.action.admin.cluster.allocation.ClusterAllocationExplainResponse; import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; +import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest; +import org.elasticsearch.cluster.routing.ShardRoutingState; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.xpack.cluster.routing.allocation.DataTierAllocationDecider; import org.elasticsearch.xpack.core.DataTier; import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin; import org.elasticsearch.xpack.core.XPackSettings; @@ -29,6 +34,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Locale; +import java.util.concurrent.TimeUnit; import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; @@ -153,4 +159,91 @@ public class DataTiersMigrationsTests extends ESIntegTestCase { assertThat(indexLifecycleExplainResponse.getStep(), is("complete")); }); } + + public void testUserOptsOutOfTierMigration() throws Exception { + internalCluster().startMasterOnlyNodes(1, Settings.EMPTY); + logger.info("starting hot data node"); + internalCluster().startNode(hotNode(Settings.EMPTY)); + + Phase hotPhase = new Phase("hot", TimeValue.ZERO, Collections.emptyMap()); + Phase warmPhase = new Phase("warm", TimeValue.ZERO, Collections.emptyMap()); + Phase coldPhase = new Phase("cold", TimeValue.ZERO, Collections.emptyMap()); + LifecyclePolicy lifecyclePolicy = new LifecyclePolicy( + policy, org.elasticsearch.common.collect.Map.of("hot", hotPhase, "warm", warmPhase, "cold", coldPhase) + ); + PutLifecycleAction.Request putLifecycleRequest = new PutLifecycleAction.Request(lifecyclePolicy); + PutLifecycleAction.Response putLifecycleResponse = client().execute(PutLifecycleAction.INSTANCE, putLifecycleRequest).get(); + assertAcked(putLifecycleResponse); + + Settings settings = Settings.builder().put(indexSettings()).put(SETTING_NUMBER_OF_SHARDS, 1) + .put(SETTING_NUMBER_OF_REPLICAS, 1).put(LifecycleSettings.LIFECYCLE_NAME, policy).build(); + CreateIndexResponse res = client().admin().indices().prepareCreate(managedIndex).setSettings(settings).get(); + assertTrue(res.isAcknowledged()); + + assertBusy(() -> { + ExplainLifecycleRequest explainRequest = new ExplainLifecycleRequest().indices(managedIndex); + ExplainLifecycleResponse explainResponse = client().execute(ExplainLifecycleAction.INSTANCE, + explainRequest).get(); + + IndexLifecycleExplainResponse indexLifecycleExplainResponse = explainResponse.getIndexResponses().get(managedIndex); + assertThat(indexLifecycleExplainResponse.getPhase(), is("warm")); + assertThat(indexLifecycleExplainResponse.getStep(), is(DataTierMigrationRoutedStep.NAME)); + }); + + Settings removeTierRoutingSetting = Settings.builder().putNull(DataTierAllocationDecider.INDEX_ROUTING_PREFER).build(); + UpdateSettingsRequest updateSettingsRequest = new UpdateSettingsRequest(managedIndex).settings(removeTierRoutingSetting); + assertAcked(client().admin().indices().updateSettings(updateSettingsRequest).actionGet()); + + assertBusy(() -> { + ExplainLifecycleRequest explainRequest = new ExplainLifecycleRequest().indices(managedIndex); + ExplainLifecycleResponse explainResponse = client().execute(ExplainLifecycleAction.INSTANCE, + explainRequest).get(); + + IndexLifecycleExplainResponse indexLifecycleExplainResponse = explainResponse.getIndexResponses().get(managedIndex); + assertThat(indexLifecycleExplainResponse.getPhase(), is("warm")); + assertThat(indexLifecycleExplainResponse.getStep(), is(DataTierMigrationRoutedStep.NAME)); + assertReplicaIsUnassigned(); + }, 30, TimeUnit.SECONDS); + + internalCluster().startNode(coldNode(Settings.EMPTY)); + + // the index should successfully allocate + ensureGreen(managedIndex); + + // the index is successfully allocated but the migrate action from the cold phase re-configured the tier migration setting to the + // cold tier so ILM is stuck in `check-migration` in the cold phase this time + // we have 2 options to resume the ILM execution: + // 1. start another cold node so both the primary and replica can relocate to the cold nodes + // 2. remove the tier routing setting from the index again (we're doing this below) + assertBusy(() -> { + ExplainLifecycleRequest explainRequest = new ExplainLifecycleRequest().indices(managedIndex); + ExplainLifecycleResponse explainResponse = client().execute(ExplainLifecycleAction.INSTANCE, + explainRequest).get(); + + IndexLifecycleExplainResponse indexLifecycleExplainResponse = explainResponse.getIndexResponses().get(managedIndex); + assertThat(indexLifecycleExplainResponse.getPhase(), is("cold")); + assertThat(indexLifecycleExplainResponse.getStep(), is(DataTierMigrationRoutedStep.NAME)); + }); + + // remove the tier routing setting again + assertAcked(client().admin().indices().updateSettings(updateSettingsRequest).actionGet()); + + // wait for lifecycle to complete in the cold phase + assertBusy(() -> { + ExplainLifecycleRequest explainRequest = new ExplainLifecycleRequest().indices(managedIndex); + ExplainLifecycleResponse explainResponse = client().execute(ExplainLifecycleAction.INSTANCE, + explainRequest).get(); + + IndexLifecycleExplainResponse indexLifecycleExplainResponse = explainResponse.getIndexResponses().get(managedIndex); + assertThat(indexLifecycleExplainResponse.getPhase(), is("cold")); + assertThat(indexLifecycleExplainResponse.getStep(), is("complete")); + }, 30, TimeUnit.SECONDS); + } + + private void assertReplicaIsUnassigned() { + ClusterAllocationExplainRequest explainReplicaShard = + new ClusterAllocationExplainRequest().setIndex(managedIndex).setPrimary(false).setShard(0); + ClusterAllocationExplainResponse response = client().admin().cluster().allocationExplain(explainReplicaShard).actionGet(); + assertThat(response.getExplanation().getShardState(), is(ShardRoutingState.UNASSIGNED)); + } }