From bd3a70db4e7b092dd97130a58b7dcc979a3a232a Mon Sep 17 00:00:00 2001 From: Andrei Dan Date: Sat, 15 Feb 2020 18:42:05 +0000 Subject: [PATCH] ILM fix the init step to actually be retryable (#52076) (#52375) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We marked the `init` ILM step as retryable but our test used `waitUntil` without an assert so we didn’t catch the fact that we were not actually able to retry this step as our ILM state didn’t contain any information about the policy execution (as we were in the process of initialising it). This commit manually sets the current step to `init` when we’re moving the ilm policy into the ERROR step (this enables us to successfully move to the error step and later retry the step) * ShrunkenIndexCheckStep: Use correct logger (cherry picked from commit f78d4b3d91345a2a8fc0f48b90dd66c9959bd7ff) Signed-off-by: Andrei Dan --- .../core/ilm/InitializePolicyContextStep.java | 34 +++++++++++-------- .../core/ilm/InitializePolicyException.java | 20 +++++++++++ .../core/ilm/ShrunkenIndexCheckStep.java | 2 +- .../ilm/InitializePolicyContextStepTests.java | 16 ++++----- .../ilm/TimeSeriesLifecycleActionsIT.java | 30 ++++++++-------- .../xpack/ilm/IndexLifecycleTransition.java | 13 +++++-- 6 files changed, 75 insertions(+), 40 deletions(-) create mode 100644 x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/InitializePolicyException.java diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/InitializePolicyContextStep.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/InitializePolicyContextStep.java index 194dcaa2e1e..536d72613cc 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/InitializePolicyContextStep.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/InitializePolicyContextStep.java @@ -38,22 +38,26 @@ public final class InitializePolicyContextStep extends ClusterStateActionStep { return clusterState; } - LifecycleExecutionState lifecycleState = LifecycleExecutionState - .fromIndexMetadata(indexMetaData); - - if (lifecycleState.getLifecycleDate() != null) { - return clusterState; - } - IndexMetaData.Builder indexMetadataBuilder = IndexMetaData.builder(indexMetaData); - if (shouldParseIndexName(indexMetaData.getSettings())) { - long parsedOriginationDate = parseIndexNameAndExtractDate(index.getName()); - indexMetadataBuilder.settingsVersion(indexMetaData.getSettingsVersion() + 1) - .settings(Settings.builder() - .put(indexMetaData.getSettings()) - .put(LifecycleSettings.LIFECYCLE_ORIGINATION_DATE, parsedOriginationDate) - .build() - ); + LifecycleExecutionState lifecycleState; + try { + lifecycleState = LifecycleExecutionState.fromIndexMetadata(indexMetaData); + if (lifecycleState.getLifecycleDate() != null) { + return clusterState; + } + + if (shouldParseIndexName(indexMetaData.getSettings())) { + long parsedOriginationDate = parseIndexNameAndExtractDate(index.getName()); + indexMetadataBuilder.settingsVersion(indexMetaData.getSettingsVersion() + 1) + .settings(Settings.builder() + .put(indexMetaData.getSettings()) + .put(LifecycleSettings.LIFECYCLE_ORIGINATION_DATE, parsedOriginationDate) + .build() + ); + } + } catch (Exception e) { + String policy = indexMetaData.getSettings().get(LifecycleSettings.LIFECYCLE_NAME); + throw new InitializePolicyException(policy, index.getName(), e); } ClusterState.Builder newClusterStateBuilder = ClusterState.builder(clusterState); diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/InitializePolicyException.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/InitializePolicyException.java new file mode 100644 index 00000000000..8feda10abdb --- /dev/null +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/InitializePolicyException.java @@ -0,0 +1,20 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.core.ilm; + +import org.elasticsearch.ElasticsearchException; + +import java.util.Locale; + +/** + * Exception thrown when a problem is encountered while initialising an ILM policy for an index. + */ +public class InitializePolicyException extends ElasticsearchException { + + public InitializePolicyException(String policy, String index, Throwable cause) { + super(String.format(Locale.ROOT, "unable to initialize policy [%s] for index [%s]", policy, index), cause); + } +} diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/ShrunkenIndexCheckStep.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/ShrunkenIndexCheckStep.java index 6b6a70ca501..715352dce2b 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/ShrunkenIndexCheckStep.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/ShrunkenIndexCheckStep.java @@ -25,7 +25,7 @@ import java.util.Objects; */ public class ShrunkenIndexCheckStep extends ClusterStateWaitStep { public static final String NAME = "is-shrunken-index"; - private static final Logger logger = LogManager.getLogger(InitializePolicyContextStep.class); + private static final Logger logger = LogManager.getLogger(ShrunkenIndexCheckStep.class); private String shrunkIndexPrefix; public ShrunkenIndexCheckStep(StepKey key, StepKey nextStepKey, String shrunkIndexPrefix) { diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/InitializePolicyContextStepTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/InitializePolicyContextStepTests.java index 404240810f2..482f3e41837 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/InitializePolicyContextStepTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/InitializePolicyContextStepTests.java @@ -33,14 +33,14 @@ public class InitializePolicyContextStepTests extends AbstractStepTestCase { + assertTrue(waitUntil(() -> { try { return client().performRequest(moveToStepRequest).getStatusLine().getStatusCode() == 200; } catch (IOException e) { return false; } - }, 30, TimeUnit.SECONDS); + }, 30, TimeUnit.SECONDS)); // Similar to above, using {@link #waitUntil} as we want to make sure the `attempt-rollover` step started failing and is being // retried (which means ILM moves back and forth between the `attempt-rollover` step and the `error` step) - waitUntil(() -> { + assertTrue("ILM did not start retrying the attempt-rollover step", waitUntil(() -> { try { Map explainIndexResponse = explainIndex(index); - String step = (String) explainIndexResponse.get("step"); + String failedStep = (String) explainIndexResponse.get("failed_step"); Integer retryCount = (Integer) explainIndexResponse.get(FAILED_STEP_RETRY_COUNT_FIELD); - return step != null && step.equals("attempt-rollover") && retryCount != null && retryCount >= 1; + return failedStep != null && failedStep.equals("attempt-rollover") && retryCount != null && retryCount >= 1; } catch (IOException e) { return false; } - }, 30, TimeUnit.SECONDS); + }, 30, TimeUnit.SECONDS)); deleteIndex(rolledIndex); @@ -1246,16 +1246,17 @@ public class TimeSeriesLifecycleActionsIT extends ESRestTestCase { "}"); client().performRequest(moveToStepRequest); - waitUntil(() -> { + assertTrue("ILM did not start retrying the update-rollover-lifecycle-date step", waitUntil(() -> { try { Map explainIndexResponse = explainIndex(index); - String step = (String) explainIndexResponse.get("step"); + String failedStep = (String) explainIndexResponse.get("failed_step"); Integer retryCount = (Integer) explainIndexResponse.get(FAILED_STEP_RETRY_COUNT_FIELD); - return step != null && step.equals(UpdateRolloverLifecycleDateStep.NAME) && retryCount != null && retryCount >= 1; + return failedStep != null && failedStep.equals(UpdateRolloverLifecycleDateStep.NAME) && retryCount != null + && retryCount >= 1; } catch (IOException e) { return false; } - }); + }, 30, TimeUnit.SECONDS)); index(client(), index, "1", "foo", "bar"); Request refreshIndex = new Request("POST", "/" + index + "/_refresh"); @@ -1441,16 +1442,17 @@ public class TimeSeriesLifecycleActionsIT extends ESRestTestCase { assertOK(client().performRequest(startReq)); // Wait until an error has occurred. - waitUntil(() -> { + assertTrue("ILM did not start retrying the init step", waitUntil(() -> { try { Map explainIndexResponse = explainIndex(index); - String step = (String) explainIndexResponse.get("step"); + String failedStep = (String) explainIndexResponse.get("failed_step"); Integer retryCount = (Integer) explainIndexResponse.get(FAILED_STEP_RETRY_COUNT_FIELD); - return step != null && step.equals(InitializePolicyContextStep.KEY.getAction()) && retryCount != null && retryCount >= 1; + return failedStep != null && failedStep.equals(InitializePolicyContextStep.KEY.getAction()) && retryCount != null + && retryCount >= 1; } catch (IOException e) { return false; } - }, 30, TimeUnit.SECONDS); + }, 30, TimeUnit.SECONDS)); // Turn origination date parsing back off updateIndexSettings(index, Settings.builder() diff --git a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransition.java b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransition.java index 6b38515b10e..95b441861bf 100644 --- a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransition.java +++ b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransition.java @@ -24,6 +24,7 @@ import org.elasticsearch.index.Index; import org.elasticsearch.xpack.core.ilm.ErrorStep; import org.elasticsearch.xpack.core.ilm.IndexLifecycleMetadata; import org.elasticsearch.xpack.core.ilm.InitializePolicyContextStep; +import org.elasticsearch.xpack.core.ilm.InitializePolicyException; import org.elasticsearch.xpack.core.ilm.LifecycleExecutionState; import org.elasticsearch.xpack.core.ilm.LifecyclePolicyMetadata; import org.elasticsearch.xpack.core.ilm.LifecycleSettings; @@ -133,8 +134,16 @@ public final class IndexLifecycleTransition { ElasticsearchException.generateThrowableXContent(causeXContentBuilder, STACKTRACE_PARAMS, cause); causeXContentBuilder.endObject(); LifecycleExecutionState currentState = LifecycleExecutionState.fromIndexMetadata(idxMeta); - Step.StepKey currentStep = Objects.requireNonNull(LifecycleExecutionState.getCurrentStepKey(currentState), - "unable to move to an error step where there is no current step, state: " + currentState); + Step.StepKey currentStep; + // if an error is encountered while initialising the policy the lifecycle execution state will not yet contain any step information + // as we haven't yet initialised the policy, so we'll manually set the current step to be the "initialize policy" step so we can + // record the error (and later retry the init policy step) + if (cause instanceof InitializePolicyException) { + currentStep = InitializePolicyContextStep.KEY; + } else { + currentStep = Objects.requireNonNull(LifecycleExecutionState.getCurrentStepKey(currentState), + "unable to move to an error step where there is no current step, state: " + currentState); + } LifecycleExecutionState nextStepState = updateExecutionStateToStep(policyMetadata, currentState, new Step.StepKey(currentStep.getPhase(), currentStep.getAction(), ErrorStep.NAME), nowSupplier, false);