Remove custom metadata tool (#50813)

Adds a command-line tool to remove broken custom metadata from the cluster state.

Relates to #48701
This commit is contained in:
Yannick Welsch 2020-01-14 18:33:53 +01:00
parent a290762df1
commit 4b0581f182
4 changed files with 280 additions and 3 deletions

View File

@ -20,7 +20,7 @@ bin/elasticsearch-node repurpose|unsafe-bootstrap|detach-cluster|override-versio
[float]
=== Description
This tool has five modes:
This tool has a number of modes:
* `elasticsearch-node repurpose` can be used to delete unwanted data from a
node if it used to be a <<data-node,data node>> or a
@ -28,8 +28,12 @@ This tool has five modes:
or other of these roles.
* `elasticsearch-node remove-settings` can be used to remove persistent settings
from the cluster state in case where it contains incompatible settings that
prevent the cluster from forming.
from the cluster state in case where it contains incompatible settings that
prevent the cluster from forming.
* `elasticsearch-node remove-customs` can be used to remove custom metadata
from the cluster state in case where it contains broken metadata that
prevents the cluster state from being loaded.
* `elasticsearch-node unsafe-bootstrap` can be used to perform _unsafe cluster
bootstrapping_. It forces one of the nodes to form a brand-new cluster on
@ -100,6 +104,24 @@ The intended use is:
* Repeat for all other master-eligible nodes
* Start the nodes
[float]
==== Removing custom metadata from the cluster state
There may be situations where a node contains custom metadata, typically
provided by plugins, that prevent the node from starting up and loading
the cluster from disk.
The `elasticsearch-node remove-customs` tool allows you to forcefully remove
the problematic custom metadata. The tool takes a list of custom metadata names
as parameters that should be removed, and also supports wildcard patterns.
The intended use is:
* Stop the node
* Run `elasticsearch-node remove-customs name-of-custom-to-remove` on the node
* Repeat for all other master-eligible nodes
* Start the nodes
[float]
==== Recovering data after a disaster
@ -411,6 +433,33 @@ You can also use wildcards to remove multiple settings, for example using
node$ ./bin/elasticsearch-node remove-settings xpack.monitoring.*
----
[float]
==== Removing custom metadata from the cluster state
If the on-disk cluster state contains custom metadata that prevents the node
from starting up and loading the cluster state, you can run the following
commands to remove this custom metadata.
[source,txt]
----
node$ ./bin/elasticsearch-node remove-customs snapshot_lifecycle
WARNING: Elasticsearch MUST be stopped before running this tool.
The following customs will be removed:
snapshot_lifecycle
You should only run this tool if you have broken custom metadata in the
cluster state that prevents the cluster state from being loaded.
This tool can cause data loss and its use should be your last resort.
Do you want to proceed?
Confirm [y/N] y
Customs were successfully removed from the cluster state
----
[float]
==== Unsafe cluster bootstrapping

View File

@ -42,6 +42,7 @@ public class NodeToolCli extends MultiCommand {
subcommands.put("detach-cluster", new DetachClusterCommand());
subcommands.put("override-version", new OverrideNodeVersionCommand());
subcommands.put("remove-settings", new RemoveSettingsCommand());
subcommands.put("remove-customs", new RemoveCustomsCommand());
}
public static void main(String[] args) throws Exception {

View File

@ -0,0 +1,102 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.cluster.coordination;
import com.carrotsearch.hppc.cursors.ObjectCursor;
import joptsimple.OptionSet;
import joptsimple.OptionSpec;
import org.elasticsearch.cli.ExitCodes;
import org.elasticsearch.cli.Terminal;
import org.elasticsearch.cli.UserException;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.env.Environment;
import org.elasticsearch.gateway.PersistedClusterStateService;
import java.io.IOException;
import java.nio.file.Path;
import java.util.List;
public class RemoveCustomsCommand extends ElasticsearchNodeCommand {
static final String CUSTOMS_REMOVED_MSG = "Customs were successfully removed from the cluster state";
static final String CONFIRMATION_MSG =
DELIMITER +
"\n" +
"You should only run this tool if you have broken custom metadata in the\n" +
"cluster state that prevents the cluster state from being loaded.\n" +
"This tool can cause data loss and its use should be your last resort.\n" +
"\n" +
"Do you want to proceed?\n";
private final OptionSpec<String> arguments;
public RemoveCustomsCommand() {
super("Removes custom metadata from the cluster state");
arguments = parser.nonOptions("custom metadata names");
}
@Override
protected void processNodePaths(Terminal terminal, Path[] dataPaths, int nodeLockId, OptionSet options, Environment env)
throws IOException, UserException {
final List<String> customsToRemove = arguments.values(options);
if (customsToRemove.isEmpty()) {
throw new UserException(ExitCodes.USAGE, "Must supply at least one custom metadata name to remove");
}
final PersistedClusterStateService persistedClusterStateService = createPersistedClusterStateService(env.settings(), dataPaths);
terminal.println(Terminal.Verbosity.VERBOSE, "Loading cluster state");
final Tuple<Long, ClusterState> termAndClusterState = loadTermAndClusterState(persistedClusterStateService, env);
final ClusterState oldClusterState = termAndClusterState.v2();
terminal.println(Terminal.Verbosity.VERBOSE, "custom metadata names: " + oldClusterState.metaData().customs().keys());
final MetaData.Builder metaDataBuilder = MetaData.builder(oldClusterState.metaData());
for (String customToRemove : customsToRemove) {
boolean matched = false;
for (ObjectCursor<String> customKeyCur : oldClusterState.metaData().customs().keys()) {
final String customKey = customKeyCur.value;
if (Regex.simpleMatch(customToRemove, customKey)) {
metaDataBuilder.removeCustom(customKey);
if (matched == false) {
terminal.println("The following customs will be removed:");
}
matched = true;
terminal.println(customKey);
}
}
if (matched == false) {
throw new UserException(ExitCodes.USAGE,
"No custom metadata matching [" + customToRemove + "] were found on this node");
}
}
final ClusterState newClusterState = ClusterState.builder(oldClusterState).metaData(metaDataBuilder.build()).build();
terminal.println(Terminal.Verbosity.VERBOSE,
"[old cluster state = " + oldClusterState + ", new cluster state = " + newClusterState + "]");
confirm(terminal, CONFIRMATION_MSG);
try (PersistedClusterStateService.Writer writer = persistedClusterStateService.createWriter()) {
writer.writeFullStateAndCommit(termAndClusterState.v1(), newClusterState);
}
terminal.println(CUSTOMS_REMOVED_MSG);
}
}

View File

@ -0,0 +1,125 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.cluster.coordination;
import joptsimple.OptionSet;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.cli.MockTerminal;
import org.elasticsearch.cli.UserException;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.env.TestEnvironment;
import org.elasticsearch.test.ESIntegTestCase;
import static org.hamcrest.Matchers.containsString;
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false)
public class RemoveCustomsCommandIT extends ESIntegTestCase {
public void testRemoveCustomsAbortedByUser() throws Exception {
internalCluster().setBootstrapMasterNodeIndex(0);
String node = internalCluster().startNode();
Settings dataPathSettings = internalCluster().dataPathSettings(node);
ensureStableCluster(1);
internalCluster().stopRandomDataNode();
Environment environment = TestEnvironment.newEnvironment(
Settings.builder().put(internalCluster().getDefaultSettings()).put(dataPathSettings).build());
expectThrows(() -> removeCustoms(environment, true, new String[]{ "index-graveyard" }),
ElasticsearchNodeCommand.ABORTED_BY_USER_MSG);
}
public void testRemoveCustomsSuccessful() throws Exception {
internalCluster().setBootstrapMasterNodeIndex(0);
String node = internalCluster().startNode();
createIndex("test");
client().admin().indices().prepareDelete("test").get();
assertEquals(1, client().admin().cluster().prepareState().get().getState().metaData().indexGraveyard().getTombstones().size());
Settings dataPathSettings = internalCluster().dataPathSettings(node);
ensureStableCluster(1);
internalCluster().stopRandomDataNode();
Environment environment = TestEnvironment.newEnvironment(
Settings.builder().put(internalCluster().getDefaultSettings()).put(dataPathSettings).build());
MockTerminal terminal = removeCustoms(environment, false,
randomBoolean() ?
new String[]{ "index-graveyard" } :
new String[]{ "index-*" }
);
assertThat(terminal.getOutput(), containsString(RemoveCustomsCommand.CUSTOMS_REMOVED_MSG));
assertThat(terminal.getOutput(), containsString("The following customs will be removed:"));
assertThat(terminal.getOutput(), containsString("index-graveyard"));
internalCluster().startNode(dataPathSettings);
assertEquals(0, client().admin().cluster().prepareState().get().getState().metaData().indexGraveyard().getTombstones().size());
}
public void testCustomDoesNotMatch() throws Exception {
internalCluster().setBootstrapMasterNodeIndex(0);
String node = internalCluster().startNode();
createIndex("test");
client().admin().indices().prepareDelete("test").get();
assertEquals(1, client().admin().cluster().prepareState().get().getState().metaData().indexGraveyard().getTombstones().size());
Settings dataPathSettings = internalCluster().dataPathSettings(node);
ensureStableCluster(1);
internalCluster().stopRandomDataNode();
Environment environment = TestEnvironment.newEnvironment(
Settings.builder().put(internalCluster().getDefaultSettings()).put(dataPathSettings).build());
UserException ex = expectThrows(UserException.class, () -> removeCustoms(environment, false,
new String[]{ "index-greveyard-with-typos" }));
assertThat(ex.getMessage(), containsString("No custom metadata matching [index-greveyard-with-typos] were " +
"found on this node"));
}
private MockTerminal executeCommand(ElasticsearchNodeCommand command, Environment environment, boolean abort, String... args)
throws Exception {
final MockTerminal terminal = new MockTerminal();
final OptionSet options = command.getParser().parse(args);
final String input;
if (abort) {
input = randomValueOtherThanMany(c -> c.equalsIgnoreCase("y"), () -> randomAlphaOfLength(1));
} else {
input = randomBoolean() ? "y" : "Y";
}
terminal.addTextInput(input);
try {
command.execute(terminal, options, environment);
} finally {
assertThat(terminal.getOutput(), containsString(ElasticsearchNodeCommand.STOP_WARNING_MSG));
}
return terminal;
}
private MockTerminal removeCustoms(Environment environment, boolean abort, String... args) throws Exception {
final MockTerminal terminal = executeCommand(new RemoveCustomsCommand(), environment, abort, args);
assertThat(terminal.getOutput(), containsString(RemoveCustomsCommand.CONFIRMATION_MSG));
assertThat(terminal.getOutput(), containsString(RemoveCustomsCommand.CUSTOMS_REMOVED_MSG));
return terminal;
}
private void expectThrows(ThrowingRunnable runnable, String message) {
ElasticsearchException ex = expectThrows(ElasticsearchException.class, runnable);
assertThat(ex.getMessage(), containsString(message));
}
}