Allow ILM to stop if indices have nonexistent policies (#40820)
Prior to this PR, there is a bug in ILM which does not allow ILM to stop if one or more indices have an index.lifecycle.name which refers to a policy that does not exist - the operation_mode will be stuck as STOPPING until either the policy is created or the nonexistent policy is removed from those indices. This change allows ILM to stop in this case and makes the logging more clear as to why ILM is not stopping.
This commit is contained in:
parent
65d25186d3
commit
5347dec55e
|
@ -812,6 +812,28 @@ public class TimeSeriesLifecycleActionsIT extends ESRestTestCase {
|
|||
});
|
||||
}
|
||||
|
||||
public void testCanStopILMWithPolicyUsingNonexistentPolicy() throws Exception {
|
||||
createIndexWithSettings(index, Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
|
||||
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
|
||||
.put(LifecycleSettings.LIFECYCLE_NAME_SETTING.getKey(), randomAlphaOfLengthBetween(5,15)));
|
||||
|
||||
Request stopILMRequest = new Request("POST", "_ilm/stop");
|
||||
assertOK(client().performRequest(stopILMRequest));
|
||||
|
||||
Request statusRequest = new Request("GET", "_ilm/status");
|
||||
assertBusy(() -> {
|
||||
Response statusResponse = client().performRequest(statusRequest);
|
||||
assertOK(statusResponse);
|
||||
Map<String, Object> statusResponseMap = entityAsMap(statusResponse);
|
||||
String status = (String) statusResponseMap.get("operation_mode");
|
||||
assertEquals("STOPPED", status);
|
||||
});
|
||||
|
||||
// Re-start ILM so that subsequent tests don't fail
|
||||
Request startILMReqest = new Request("POST", "_ilm/start");
|
||||
assertOK(client().performRequest(startILMReqest));
|
||||
}
|
||||
|
||||
private void createFullPolicy(TimeValue hotTime) throws IOException {
|
||||
Map<String, LifecycleAction> hotActions = new HashMap<>();
|
||||
hotActions.put(SetPriorityAction.NAME, new SetPriorityAction(100));
|
||||
|
|
|
@ -43,7 +43,7 @@ import java.util.function.LongSupplier;
|
|||
* A service which runs the {@link LifecyclePolicy}s associated with indexes.
|
||||
*/
|
||||
public class IndexLifecycleService
|
||||
implements ClusterStateListener, ClusterStateApplier, SchedulerEngine.Listener, Closeable, LocalNodeMasterListener {
|
||||
implements ClusterStateListener, ClusterStateApplier, SchedulerEngine.Listener, Closeable, LocalNodeMasterListener {
|
||||
private static final Logger logger = LogManager.getLogger(IndexLifecycleService.class);
|
||||
private static final Set<String> IGNORE_ACTIONS_MAINTENANCE_REQUESTED = Collections.singleton(ShrinkAction.NAME);
|
||||
private volatile boolean isMaster = false;
|
||||
|
@ -111,18 +111,26 @@ public class IndexLifecycleService
|
|||
IndexMetaData idxMeta = cursor.value;
|
||||
String policyName = LifecycleSettings.LIFECYCLE_NAME_SETTING.get(idxMeta.getSettings());
|
||||
if (Strings.isNullOrEmpty(policyName) == false) {
|
||||
StepKey stepKey = IndexLifecycleRunner.getCurrentStepKey(LifecycleExecutionState.fromIndexMetadata(idxMeta));
|
||||
if (OperationMode.STOPPING == currentMode &&
|
||||
stepKey != null &&
|
||||
IGNORE_ACTIONS_MAINTENANCE_REQUESTED.contains(stepKey.getAction()) == false) {
|
||||
logger.info("skipping policy [{}] for index [{}]. stopping Index Lifecycle execution",
|
||||
policyName, idxMeta.getIndex().getName());
|
||||
continue;
|
||||
final LifecycleExecutionState lifecycleState = LifecycleExecutionState.fromIndexMetadata(idxMeta);
|
||||
StepKey stepKey = IndexLifecycleRunner.getCurrentStepKey(lifecycleState);
|
||||
|
||||
if (OperationMode.STOPPING == currentMode) {
|
||||
if (stepKey != null && IGNORE_ACTIONS_MAINTENANCE_REQUESTED.contains(stepKey.getAction())) {
|
||||
logger.info("waiting to stop ILM because index [{}] with policy [{}] is currently in action [{}]",
|
||||
idxMeta.getIndex().getName(), policyName, stepKey.getAction());
|
||||
lifecycleRunner.maybeRunAsyncAction(clusterState, idxMeta, policyName, stepKey);
|
||||
// ILM is trying to stop, but this index is in a Shrink action (or other dangerous action) so we can't stop
|
||||
safeToStop = false;
|
||||
} else {
|
||||
logger.info("skipping policy execution for index [{}] with policy [{}] because ILM is stopping",
|
||||
idxMeta.getIndex().getName(), policyName);
|
||||
}
|
||||
} else {
|
||||
lifecycleRunner.maybeRunAsyncAction(clusterState, idxMeta, policyName, stepKey);
|
||||
}
|
||||
lifecycleRunner.maybeRunAsyncAction(clusterState, idxMeta, policyName, stepKey);
|
||||
safeToStop = false; // proven false!
|
||||
}
|
||||
}
|
||||
|
||||
if (safeToStop && OperationMode.STOPPING == currentMode) {
|
||||
submitOperationModeUpdate(OperationMode.STOPPED);
|
||||
}
|
||||
|
@ -184,7 +192,7 @@ public class IndexLifecycleService
|
|||
@Override
|
||||
public void applyClusterState(ClusterChangedEvent event) {
|
||||
if (event.localNodeMaster()) { // only act if we are master, otherwise
|
||||
// keep idle until elected
|
||||
// keep idle until elected
|
||||
if (event.state().metaData().custom(IndexLifecycleMetadata.TYPE) != null) {
|
||||
policyRegistry.update(event.state());
|
||||
}
|
||||
|
@ -237,21 +245,34 @@ public class IndexLifecycleService
|
|||
IndexMetaData idxMeta = cursor.value;
|
||||
String policyName = LifecycleSettings.LIFECYCLE_NAME_SETTING.get(idxMeta.getSettings());
|
||||
if (Strings.isNullOrEmpty(policyName) == false) {
|
||||
StepKey stepKey = IndexLifecycleRunner.getCurrentStepKey(LifecycleExecutionState.fromIndexMetadata(idxMeta));
|
||||
if (OperationMode.STOPPING == currentMode && stepKey != null
|
||||
&& IGNORE_ACTIONS_MAINTENANCE_REQUESTED.contains(stepKey.getAction()) == false) {
|
||||
logger.info("skipping policy [" + policyName + "] for index [" + idxMeta.getIndex().getName()
|
||||
+ "]. stopping Index Lifecycle execution");
|
||||
continue;
|
||||
}
|
||||
if (fromClusterStateChange) {
|
||||
lifecycleRunner.runPolicyAfterStateChange(policyName, idxMeta);
|
||||
final LifecycleExecutionState lifecycleState = LifecycleExecutionState.fromIndexMetadata(idxMeta);
|
||||
StepKey stepKey = IndexLifecycleRunner.getCurrentStepKey(lifecycleState);
|
||||
|
||||
if (OperationMode.STOPPING == currentMode) {
|
||||
if (stepKey != null && IGNORE_ACTIONS_MAINTENANCE_REQUESTED.contains(stepKey.getAction())) {
|
||||
logger.info("waiting to stop ILM because index [{}] with policy [{}] is currently in action [{}]",
|
||||
idxMeta.getIndex().getName(), policyName, stepKey.getAction());
|
||||
if (fromClusterStateChange) {
|
||||
lifecycleRunner.runPolicyAfterStateChange(policyName, idxMeta);
|
||||
} else {
|
||||
lifecycleRunner.runPeriodicStep(policyName, idxMeta);
|
||||
}
|
||||
// ILM is trying to stop, but this index is in a Shrink action (or other dangerous action) so we can't stop
|
||||
safeToStop = false;
|
||||
} else {
|
||||
logger.info("skipping policy execution for index [{}] with policy [{}] because ILM is stopping",
|
||||
idxMeta.getIndex().getName(), policyName);
|
||||
}
|
||||
} else {
|
||||
lifecycleRunner.runPeriodicStep(policyName, idxMeta);
|
||||
if (fromClusterStateChange) {
|
||||
lifecycleRunner.runPolicyAfterStateChange(policyName, idxMeta);
|
||||
} else {
|
||||
lifecycleRunner.runPeriodicStep(policyName, idxMeta);
|
||||
}
|
||||
}
|
||||
safeToStop = false; // proven false!
|
||||
}
|
||||
}
|
||||
|
||||
if (safeToStop && OperationMode.STOPPING == currentMode) {
|
||||
submitOperationModeUpdate(OperationMode.STOPPED);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue