We marked the `init` ILM step as retryable but our test used `waitUntil` without an assert so we didn’t catch the fact that we were not actually able to retry this step as our ILM state didn’t contain any information about the policy execution (as we were in the process of initialising it). This commit manually sets the current step to `init` when we’re moving the ilm policy into the ERROR step (this enables us to successfully move to the error step and later retry the step) * ShrunkenIndexCheckStep: Use correct logger (cherry picked from commit f78d4b3d91345a2a8fc0f48b90dd66c9959bd7ff) Signed-off-by: Andrei Dan <andrei.dan@elastic.co>
This commit is contained in:
parent
0656a33d62
commit
bd3a70db4e
|
@ -38,22 +38,26 @@ public final class InitializePolicyContextStep extends ClusterStateActionStep {
|
|||
return clusterState;
|
||||
}
|
||||
|
||||
LifecycleExecutionState lifecycleState = LifecycleExecutionState
|
||||
.fromIndexMetadata(indexMetaData);
|
||||
|
||||
if (lifecycleState.getLifecycleDate() != null) {
|
||||
return clusterState;
|
||||
}
|
||||
|
||||
IndexMetaData.Builder indexMetadataBuilder = IndexMetaData.builder(indexMetaData);
|
||||
if (shouldParseIndexName(indexMetaData.getSettings())) {
|
||||
long parsedOriginationDate = parseIndexNameAndExtractDate(index.getName());
|
||||
indexMetadataBuilder.settingsVersion(indexMetaData.getSettingsVersion() + 1)
|
||||
.settings(Settings.builder()
|
||||
.put(indexMetaData.getSettings())
|
||||
.put(LifecycleSettings.LIFECYCLE_ORIGINATION_DATE, parsedOriginationDate)
|
||||
.build()
|
||||
);
|
||||
LifecycleExecutionState lifecycleState;
|
||||
try {
|
||||
lifecycleState = LifecycleExecutionState.fromIndexMetadata(indexMetaData);
|
||||
if (lifecycleState.getLifecycleDate() != null) {
|
||||
return clusterState;
|
||||
}
|
||||
|
||||
if (shouldParseIndexName(indexMetaData.getSettings())) {
|
||||
long parsedOriginationDate = parseIndexNameAndExtractDate(index.getName());
|
||||
indexMetadataBuilder.settingsVersion(indexMetaData.getSettingsVersion() + 1)
|
||||
.settings(Settings.builder()
|
||||
.put(indexMetaData.getSettings())
|
||||
.put(LifecycleSettings.LIFECYCLE_ORIGINATION_DATE, parsedOriginationDate)
|
||||
.build()
|
||||
);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
String policy = indexMetaData.getSettings().get(LifecycleSettings.LIFECYCLE_NAME);
|
||||
throw new InitializePolicyException(policy, index.getName(), e);
|
||||
}
|
||||
|
||||
ClusterState.Builder newClusterStateBuilder = ClusterState.builder(clusterState);
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.core.ilm;
|
||||
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
* Exception thrown when a problem is encountered while initialising an ILM policy for an index.
|
||||
*/
|
||||
public class InitializePolicyException extends ElasticsearchException {
|
||||
|
||||
public InitializePolicyException(String policy, String index, Throwable cause) {
|
||||
super(String.format(Locale.ROOT, "unable to initialize policy [%s] for index [%s]", policy, index), cause);
|
||||
}
|
||||
}
|
|
@ -25,7 +25,7 @@ import java.util.Objects;
|
|||
*/
|
||||
public class ShrunkenIndexCheckStep extends ClusterStateWaitStep {
|
||||
public static final String NAME = "is-shrunken-index";
|
||||
private static final Logger logger = LogManager.getLogger(InitializePolicyContextStep.class);
|
||||
private static final Logger logger = LogManager.getLogger(ShrunkenIndexCheckStep.class);
|
||||
private String shrunkIndexPrefix;
|
||||
|
||||
public ShrunkenIndexCheckStep(StepKey key, StepKey nextStepKey, String shrunkIndexPrefix) {
|
||||
|
|
|
@ -33,14 +33,14 @@ public class InitializePolicyContextStepTests extends AbstractStepTestCase<Initi
|
|||
StepKey nextKey = instance.getNextStepKey();
|
||||
|
||||
switch (between(0, 1)) {
|
||||
case 0:
|
||||
key = new StepKey(key.getPhase(), key.getAction(), key.getName() + randomAlphaOfLength(5));
|
||||
break;
|
||||
case 1:
|
||||
nextKey = new StepKey(key.getPhase(), key.getAction(), key.getName() + randomAlphaOfLength(5));
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError("Illegal randomisation branch");
|
||||
case 0:
|
||||
key = new StepKey(key.getPhase(), key.getAction(), key.getName() + randomAlphaOfLength(5));
|
||||
break;
|
||||
case 1:
|
||||
nextKey = new StepKey(key.getPhase(), key.getAction(), key.getName() + randomAlphaOfLength(5));
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError("Illegal randomisation branch");
|
||||
}
|
||||
|
||||
return new InitializePolicyContextStep(key, nextKey);
|
||||
|
|
|
@ -1185,26 +1185,26 @@ public class TimeSeriesLifecycleActionsIT extends ESRestTestCase {
|
|||
// {@link org.elasticsearch.xpack.core.ilm.ErrorStep} in order to retry the failing step. As {@link #assertBusy}
|
||||
// increases the wait time between calls exponentially, we might miss the window where the policy is on
|
||||
// {@link WaitForRolloverReadyStep} and the move to `attempt-rollover` request will not be successful.
|
||||
waitUntil(() -> {
|
||||
assertTrue(waitUntil(() -> {
|
||||
try {
|
||||
return client().performRequest(moveToStepRequest).getStatusLine().getStatusCode() == 200;
|
||||
} catch (IOException e) {
|
||||
return false;
|
||||
}
|
||||
}, 30, TimeUnit.SECONDS);
|
||||
}, 30, TimeUnit.SECONDS));
|
||||
|
||||
// Similar to above, using {@link #waitUntil} as we want to make sure the `attempt-rollover` step started failing and is being
|
||||
// retried (which means ILM moves back and forth between the `attempt-rollover` step and the `error` step)
|
||||
waitUntil(() -> {
|
||||
assertTrue("ILM did not start retrying the attempt-rollover step", waitUntil(() -> {
|
||||
try {
|
||||
Map<String, Object> explainIndexResponse = explainIndex(index);
|
||||
String step = (String) explainIndexResponse.get("step");
|
||||
String failedStep = (String) explainIndexResponse.get("failed_step");
|
||||
Integer retryCount = (Integer) explainIndexResponse.get(FAILED_STEP_RETRY_COUNT_FIELD);
|
||||
return step != null && step.equals("attempt-rollover") && retryCount != null && retryCount >= 1;
|
||||
return failedStep != null && failedStep.equals("attempt-rollover") && retryCount != null && retryCount >= 1;
|
||||
} catch (IOException e) {
|
||||
return false;
|
||||
}
|
||||
}, 30, TimeUnit.SECONDS);
|
||||
}, 30, TimeUnit.SECONDS));
|
||||
|
||||
deleteIndex(rolledIndex);
|
||||
|
||||
|
@ -1246,16 +1246,17 @@ public class TimeSeriesLifecycleActionsIT extends ESRestTestCase {
|
|||
"}");
|
||||
client().performRequest(moveToStepRequest);
|
||||
|
||||
waitUntil(() -> {
|
||||
assertTrue("ILM did not start retrying the update-rollover-lifecycle-date step", waitUntil(() -> {
|
||||
try {
|
||||
Map<String, Object> explainIndexResponse = explainIndex(index);
|
||||
String step = (String) explainIndexResponse.get("step");
|
||||
String failedStep = (String) explainIndexResponse.get("failed_step");
|
||||
Integer retryCount = (Integer) explainIndexResponse.get(FAILED_STEP_RETRY_COUNT_FIELD);
|
||||
return step != null && step.equals(UpdateRolloverLifecycleDateStep.NAME) && retryCount != null && retryCount >= 1;
|
||||
return failedStep != null && failedStep.equals(UpdateRolloverLifecycleDateStep.NAME) && retryCount != null
|
||||
&& retryCount >= 1;
|
||||
} catch (IOException e) {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
}, 30, TimeUnit.SECONDS));
|
||||
|
||||
index(client(), index, "1", "foo", "bar");
|
||||
Request refreshIndex = new Request("POST", "/" + index + "/_refresh");
|
||||
|
@ -1441,16 +1442,17 @@ public class TimeSeriesLifecycleActionsIT extends ESRestTestCase {
|
|||
assertOK(client().performRequest(startReq));
|
||||
|
||||
// Wait until an error has occurred.
|
||||
waitUntil(() -> {
|
||||
assertTrue("ILM did not start retrying the init step", waitUntil(() -> {
|
||||
try {
|
||||
Map<String, Object> explainIndexResponse = explainIndex(index);
|
||||
String step = (String) explainIndexResponse.get("step");
|
||||
String failedStep = (String) explainIndexResponse.get("failed_step");
|
||||
Integer retryCount = (Integer) explainIndexResponse.get(FAILED_STEP_RETRY_COUNT_FIELD);
|
||||
return step != null && step.equals(InitializePolicyContextStep.KEY.getAction()) && retryCount != null && retryCount >= 1;
|
||||
return failedStep != null && failedStep.equals(InitializePolicyContextStep.KEY.getAction()) && retryCount != null
|
||||
&& retryCount >= 1;
|
||||
} catch (IOException e) {
|
||||
return false;
|
||||
}
|
||||
}, 30, TimeUnit.SECONDS);
|
||||
}, 30, TimeUnit.SECONDS));
|
||||
|
||||
// Turn origination date parsing back off
|
||||
updateIndexSettings(index, Settings.builder()
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.elasticsearch.index.Index;
|
|||
import org.elasticsearch.xpack.core.ilm.ErrorStep;
|
||||
import org.elasticsearch.xpack.core.ilm.IndexLifecycleMetadata;
|
||||
import org.elasticsearch.xpack.core.ilm.InitializePolicyContextStep;
|
||||
import org.elasticsearch.xpack.core.ilm.InitializePolicyException;
|
||||
import org.elasticsearch.xpack.core.ilm.LifecycleExecutionState;
|
||||
import org.elasticsearch.xpack.core.ilm.LifecyclePolicyMetadata;
|
||||
import org.elasticsearch.xpack.core.ilm.LifecycleSettings;
|
||||
|
@ -133,8 +134,16 @@ public final class IndexLifecycleTransition {
|
|||
ElasticsearchException.generateThrowableXContent(causeXContentBuilder, STACKTRACE_PARAMS, cause);
|
||||
causeXContentBuilder.endObject();
|
||||
LifecycleExecutionState currentState = LifecycleExecutionState.fromIndexMetadata(idxMeta);
|
||||
Step.StepKey currentStep = Objects.requireNonNull(LifecycleExecutionState.getCurrentStepKey(currentState),
|
||||
"unable to move to an error step where there is no current step, state: " + currentState);
|
||||
Step.StepKey currentStep;
|
||||
// if an error is encountered while initialising the policy the lifecycle execution state will not yet contain any step information
|
||||
// as we haven't yet initialised the policy, so we'll manually set the current step to be the "initialize policy" step so we can
|
||||
// record the error (and later retry the init policy step)
|
||||
if (cause instanceof InitializePolicyException) {
|
||||
currentStep = InitializePolicyContextStep.KEY;
|
||||
} else {
|
||||
currentStep = Objects.requireNonNull(LifecycleExecutionState.getCurrentStepKey(currentState),
|
||||
"unable to move to an error step where there is no current step, state: " + currentState);
|
||||
}
|
||||
LifecycleExecutionState nextStepState = updateExecutionStateToStep(policyMetadata, currentState,
|
||||
new Step.StepKey(currentStep.getPhase(), currentStep.getAction(), ErrorStep.NAME), nowSupplier, false);
|
||||
|
||||
|
|
Loading…
Reference in New Issue