[7.x] Narrow period of Shrink action in which ILM prevents stopping (#43254) (#43393)

* Narrow period of Shrink action in which ILM prevents stopping

Prior to this change, we would prevent stopping of ILM if the index was
anywhere in the shrink action. This commit changes
`IndexLifecycleService` to allow stopping when in any of the innocuous
steps during shrink. This changes ILM only to prevent stopping if
absolutely necessary.

Resolves #43253

* Rename variable for ignore actions -> ignore steps

* Fix comment

* Factor test out to test *all* stoppable steps
This commit is contained in:
Lee Hinman 2019-06-19 16:37:41 -06:00 committed by GitHub
parent 6b084e55c5
commit c2bf628a6d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 75 additions and 20 deletions

View File

@ -29,7 +29,7 @@ import org.elasticsearch.xpack.core.indexlifecycle.LifecycleExecutionState;
import org.elasticsearch.xpack.core.indexlifecycle.LifecyclePolicy;
import org.elasticsearch.xpack.core.indexlifecycle.LifecycleSettings;
import org.elasticsearch.xpack.core.indexlifecycle.OperationMode;
import org.elasticsearch.xpack.core.indexlifecycle.ShrinkAction;
import org.elasticsearch.xpack.core.indexlifecycle.ShrinkStep;
import org.elasticsearch.xpack.core.indexlifecycle.Step.StepKey;
import org.elasticsearch.xpack.core.scheduler.SchedulerEngine;
@ -45,7 +45,7 @@ import java.util.function.LongSupplier;
public class IndexLifecycleService
implements ClusterStateListener, ClusterStateApplier, SchedulerEngine.Listener, Closeable, LocalNodeMasterListener {
private static final Logger logger = LogManager.getLogger(IndexLifecycleService.class);
private static final Set<String> IGNORE_ACTIONS_MAINTENANCE_REQUESTED = Collections.singleton(ShrinkAction.NAME);
private static final Set<String> IGNORE_STEPS_MAINTENANCE_REQUESTED = Collections.singleton(ShrinkStep.NAME);
private volatile boolean isMaster = false;
private volatile TimeValue pollInterval;
@ -115,15 +115,15 @@ public class IndexLifecycleService
StepKey stepKey = IndexLifecycleRunner.getCurrentStepKey(lifecycleState);
if (OperationMode.STOPPING == currentMode) {
if (stepKey != null && IGNORE_ACTIONS_MAINTENANCE_REQUESTED.contains(stepKey.getAction())) {
logger.info("waiting to stop ILM because index [{}] with policy [{}] is currently in action [{}]",
idxMeta.getIndex().getName(), policyName, stepKey.getAction());
if (stepKey != null && IGNORE_STEPS_MAINTENANCE_REQUESTED.contains(stepKey.getName())) {
logger.info("waiting to stop ILM because index [{}] with policy [{}] is currently in step [{}]",
idxMeta.getIndex().getName(), policyName, stepKey.getName());
lifecycleRunner.maybeRunAsyncAction(clusterState, idxMeta, policyName, stepKey);
// ILM is trying to stop, but this index is in a Shrink action (or other dangerous action) so we can't stop
// ILM is trying to stop, but this index is in a Shrink step (or other dangerous step) so we can't stop
safeToStop = false;
} else {
logger.info("skipping policy execution for index [{}] with policy [{}] because ILM is stopping",
idxMeta.getIndex().getName(), policyName);
logger.info("skipping policy execution of step [{}] for index [{}] with policy [{}] because ILM is stopping",
stepKey == null ? "n/a" : stepKey.getName(), idxMeta.getIndex().getName(), policyName);
}
} else {
lifecycleRunner.maybeRunAsyncAction(clusterState, idxMeta, policyName, stepKey);
@ -249,19 +249,19 @@ public class IndexLifecycleService
StepKey stepKey = IndexLifecycleRunner.getCurrentStepKey(lifecycleState);
if (OperationMode.STOPPING == currentMode) {
if (stepKey != null && IGNORE_ACTIONS_MAINTENANCE_REQUESTED.contains(stepKey.getAction())) {
logger.info("waiting to stop ILM because index [{}] with policy [{}] is currently in action [{}]",
idxMeta.getIndex().getName(), policyName, stepKey.getAction());
if (stepKey != null && IGNORE_STEPS_MAINTENANCE_REQUESTED.contains(stepKey.getName())) {
logger.info("waiting to stop ILM because index [{}] with policy [{}] is currently in step [{}]",
idxMeta.getIndex().getName(), policyName, stepKey.getName());
if (fromClusterStateChange) {
lifecycleRunner.runPolicyAfterStateChange(policyName, idxMeta);
} else {
lifecycleRunner.runPeriodicStep(policyName, idxMeta);
}
// ILM is trying to stop, but this index is in a Shrink action (or other dangerous action) so we can't stop
// ILM is trying to stop, but this index is in a Shrink step (or other dangerous step) so we can't stop
safeToStop = false;
} else {
logger.info("skipping policy execution for index [{}] with policy [{}] because ILM is stopping",
idxMeta.getIndex().getName(), policyName);
logger.info("skipping policy execution of step [{}] for index [{}] with policy [{}] because ILM is stopping",
stepKey == null ? "n/a" : stepKey.getName(), idxMeta.getIndex().getName(), policyName);
}
} else {
if (fromClusterStateChange) {

View File

@ -37,6 +37,7 @@ import org.elasticsearch.xpack.core.indexlifecycle.MockAction;
import org.elasticsearch.xpack.core.indexlifecycle.OperationMode;
import org.elasticsearch.xpack.core.indexlifecycle.Phase;
import org.elasticsearch.xpack.core.indexlifecycle.ShrinkAction;
import org.elasticsearch.xpack.core.indexlifecycle.ShrinkStep;
import org.elasticsearch.xpack.core.indexlifecycle.Step;
import org.elasticsearch.xpack.core.scheduler.SchedulerEngine;
import org.junit.After;
@ -58,6 +59,7 @@ import static org.elasticsearch.xpack.core.indexlifecycle.LifecyclePolicyTestsUt
import static org.hamcrest.Matchers.equalTo;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyString;
import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
@ -148,7 +150,7 @@ public class IndexLifecycleServiceTests extends ESTestCase {
}
public void testRequestedStopOnShrink() {
Step.StepKey mockShrinkStep = new Step.StepKey(randomAlphaOfLength(4), ShrinkAction.NAME, randomAlphaOfLength(5));
Step.StepKey mockShrinkStep = new Step.StepKey(randomAlphaOfLength(4), ShrinkAction.NAME, ShrinkStep.NAME);
String policyName = randomAlphaOfLengthBetween(1, 20);
IndexLifecycleRunnerTests.MockClusterStateActionStep mockStep =
new IndexLifecycleRunnerTests.MockClusterStateActionStep(mockShrinkStep, randomStepKey());
@ -180,14 +182,67 @@ public class IndexLifecycleServiceTests extends ESTestCase {
.build();
ClusterChangedEvent event = new ClusterChangedEvent("_source", currentState, ClusterState.EMPTY_STATE);
SetOnce<Boolean> executedShrink = new SetOnce<>();
SetOnce<Boolean> changedOperationMode = new SetOnce<>();
doAnswer(invocationOnMock -> {
executedShrink.set(true);
changedOperationMode.set(true);
return null;
}).when(clusterService).submitStateUpdateTask(anyString(), any(ExecuteStepsUpdateTask.class));
}).when(clusterService).submitStateUpdateTask(eq("ilm_operation_mode_update"), any(OperationModeUpdateTask.class));
indexLifecycleService.applyClusterState(event);
indexLifecycleService.triggerPolicies(currentState, true);
assertTrue(executedShrink.get());
assertNull(changedOperationMode.get());
}
public void testRequestedStopInShrinkActionButNotShrinkStep() {
// test all the shrink action steps that ILM can be stopped during (basically all of them minus the actual shrink)
ShrinkAction action = new ShrinkAction(1);
action.toSteps(mock(Client.class), "warm", randomStepKey()).stream()
.map(sk -> sk.getKey().getName())
.filter(name -> name.equals(ShrinkStep.NAME) == false)
.forEach(this::verifyCanStopWithStep);
}
// Check that ILM can stop when in the shrink action on the provided step
private void verifyCanStopWithStep(String stoppableStep) {
Step.StepKey mockShrinkStep = new Step.StepKey(randomAlphaOfLength(4), ShrinkAction.NAME, stoppableStep);
String policyName = randomAlphaOfLengthBetween(1, 20);
IndexLifecycleRunnerTests.MockClusterStateActionStep mockStep =
new IndexLifecycleRunnerTests.MockClusterStateActionStep(mockShrinkStep, randomStepKey());
MockAction mockAction = new MockAction(Collections.singletonList(mockStep));
Phase phase = new Phase("phase", TimeValue.ZERO, Collections.singletonMap("action", mockAction));
LifecyclePolicy policy = newTestLifecyclePolicy(policyName, Collections.singletonMap(phase.getName(), phase));
SortedMap<String, LifecyclePolicyMetadata> policyMap = new TreeMap<>();
policyMap.put(policyName, new LifecyclePolicyMetadata(policy, Collections.emptyMap(),
randomNonNegativeLong(), randomNonNegativeLong()));
Index index = new Index(randomAlphaOfLengthBetween(1, 20), randomAlphaOfLengthBetween(1, 20));
LifecycleExecutionState.Builder lifecycleState = LifecycleExecutionState.builder();
lifecycleState.setPhase(mockShrinkStep.getPhase());
lifecycleState.setAction(mockShrinkStep.getAction());
lifecycleState.setStep(mockShrinkStep.getName());
IndexMetaData indexMetadata = IndexMetaData.builder(index.getName())
.settings(settings(Version.CURRENT).put(LifecycleSettings.LIFECYCLE_NAME_SETTING.getKey(), policyName))
.putCustom(ILM_CUSTOM_METADATA_KEY, lifecycleState.build().asMap())
.numberOfShards(randomIntBetween(1, 5)).numberOfReplicas(randomIntBetween(0, 5)).build();
ImmutableOpenMap.Builder<String, IndexMetaData> indices = ImmutableOpenMap.<String, IndexMetaData> builder()
.fPut(index.getName(), indexMetadata);
MetaData metaData = MetaData.builder()
.putCustom(IndexLifecycleMetadata.TYPE, new IndexLifecycleMetadata(policyMap, OperationMode.STOPPING))
.indices(indices.build())
.persistentSettings(settings(Version.CURRENT).build())
.build();
ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT)
.metaData(metaData)
.nodes(DiscoveryNodes.builder().localNodeId(nodeId).masterNodeId(nodeId).add(masterNode).build())
.build();
ClusterChangedEvent event = new ClusterChangedEvent("_source", currentState, ClusterState.EMPTY_STATE);
SetOnce<Boolean> changedOperationMode = new SetOnce<>();
doAnswer(invocationOnMock -> {
changedOperationMode.set(true);
return null;
}).when(clusterService).submitStateUpdateTask(eq("ilm_operation_mode_update"), any(OperationModeUpdateTask.class));
indexLifecycleService.applyClusterState(event);
indexLifecycleService.triggerPolicies(currentState, true);
assertTrue(changedOperationMode.get());
}
public void testRequestedStopOnSafeAction() {
@ -236,7 +291,7 @@ public class IndexLifecycleServiceTests extends ESTestCase {
assertThat(task.getOperationMode(), equalTo(OperationMode.STOPPED));
moveToMaintenance.set(true);
return null;
}).when(clusterService).submitStateUpdateTask(anyString(), any(OperationModeUpdateTask.class));
}).when(clusterService).submitStateUpdateTask(eq("ilm_operation_mode_update"), any(OperationModeUpdateTask.class));
indexLifecycleService.applyClusterState(event);
indexLifecycleService.triggerPolicies(currentState, randomBoolean());