Omit writing index metadata for non-replicated closed indices on data-only node (#47285)

Fixes a bug related to how "closed replicated indices" (introduced in 7.2) interact with the index
metadata storage mechanism, which has special handling for closed indices (but incorrectly
handles replicated closed indices). On non-master-eligible data nodes, it's possible for the
node's manifest file (which tracks the relevant metadata state that the node should persist) to
become out of sync with what's actually stored on disk, leading to an inconsistency that is then
detected at startup, refusing for the node to start up.

Closes #47276
This commit is contained in:
Yannick Welsch 2019-09-30 13:42:46 +02:00
parent d9a7bcef21
commit 467596871a
3 changed files with 72 additions and 83 deletions

View File

@ -139,7 +139,7 @@ public class IncrementalClusterStateWriter {
private Map<Index, Long> writeIndicesMetadata(AtomicClusterStateWriter writer, ClusterState newState, ClusterState previousState)
throws WriteStateException {
Map<Index, Long> previouslyWrittenIndices = previousManifest.getIndexGenerations();
Set<Index> relevantIndices = getRelevantIndices(newState, previousState, previouslyWrittenIndices.keySet());
Set<Index> relevantIndices = getRelevantIndices(newState);
Map<Index, Long> newIndices = new HashMap<>();
@ -207,8 +207,7 @@ public class IncrementalClusterStateWriter {
return actions;
}
private static Set<Index> getRelevantIndicesOnDataOnlyNode(ClusterState state, ClusterState previousState, Set<Index>
previouslyWrittenIndices) {
private static Set<Index> getRelevantIndicesOnDataOnlyNode(ClusterState state) {
RoutingNode newRoutingNode = state.getRoutingNodes().node(state.nodes().getLocalNodeId());
if (newRoutingNode == null) {
throw new IllegalStateException("cluster state does not contain this node - cannot write index meta state");
@ -217,20 +216,6 @@ public class IncrementalClusterStateWriter {
for (ShardRouting routing : newRoutingNode) {
indices.add(routing.index());
}
// we have to check the meta data also: closed indices will not appear in the routing table, but we must still write the state if
// we have it written on disk previously
for (IndexMetaData indexMetaData : state.metaData()) {
boolean isOrWasClosed = indexMetaData.getState().equals(IndexMetaData.State.CLOSE);
// if the index is open we might still have to write the state if it just transitioned from closed to open
// so we have to check for that as well.
IndexMetaData previousMetaData = previousState.metaData().index(indexMetaData.getIndex());
if (previousMetaData != null) {
isOrWasClosed = isOrWasClosed || previousMetaData.getState().equals(IndexMetaData.State.CLOSE);
}
if (previouslyWrittenIndices.contains(indexMetaData.getIndex()) && isOrWasClosed) {
indices.add(indexMetaData.getIndex());
}
}
return indices;
}
@ -244,20 +229,14 @@ public class IncrementalClusterStateWriter {
}
// exposed for tests
static Set<Index> getRelevantIndices(ClusterState state, ClusterState previousState, Set<Index> previouslyWrittenIndices) {
Set<Index> relevantIndices;
if (isDataOnlyNode(state)) {
relevantIndices = getRelevantIndicesOnDataOnlyNode(state, previousState, previouslyWrittenIndices);
} else if (state.nodes().getLocalNode().isMasterNode()) {
relevantIndices = getRelevantIndicesForMasterEligibleNode(state);
static Set<Index> getRelevantIndices(ClusterState state) {
if (state.nodes().getLocalNode().isMasterNode()) {
return getRelevantIndicesForMasterEligibleNode(state);
} else if (state.nodes().getLocalNode().isDataNode()) {
return getRelevantIndicesOnDataOnlyNode(state);
} else {
relevantIndices = Collections.emptySet();
return Collections.emptySet();
}
return relevantIndices;
}
private static boolean isDataOnlyNode(ClusterState state) {
return state.nodes().getLocalNode().isMasterNode() == false && state.nodes().getLocalNode().isDataNode();
}
/**

View File

@ -30,6 +30,7 @@ import org.elasticsearch.cluster.ESAllocationTestCase;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.metadata.Manifest;
import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.cluster.metadata.MetaDataIndexStateService;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.node.DiscoveryNodeRole;
import org.elasticsearch.cluster.node.DiscoveryNodes;
@ -73,17 +74,6 @@ import static org.mockito.Mockito.when;
public class IncrementalClusterStateWriterTests extends ESAllocationTestCase {
private ClusterState noIndexClusterState(boolean masterEligible) {
MetaData metaData = MetaData.builder().build();
RoutingTable routingTable = RoutingTable.builder().build();
return ClusterState.builder(org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY))
.metaData(metaData)
.routingTable(routingTable)
.nodes(generateDiscoveryNodes(masterEligible))
.build();
}
private ClusterState clusterStateWithUnassignedIndex(IndexMetaData indexMetaData, boolean masterEligible) {
MetaData metaData = MetaData.builder()
.put(indexMetaData, false)
@ -119,7 +109,7 @@ public class IncrementalClusterStateWriterTests extends ESAllocationTestCase {
.metaData(metaDataNewClusterState).version(oldClusterState.getVersion() + 1).build();
}
private ClusterState clusterStateWithClosedIndex(IndexMetaData indexMetaData, boolean masterEligible) {
private ClusterState clusterStateWithNonReplicatedClosedIndex(IndexMetaData indexMetaData, boolean masterEligible) {
ClusterState oldClusterState = clusterStateWithAssignedIndex(indexMetaData, masterEligible);
MetaData metaDataNewClusterState = MetaData.builder()
@ -128,23 +118,41 @@ public class IncrementalClusterStateWriterTests extends ESAllocationTestCase {
.version(oldClusterState.metaData().version() + 1)
.build();
RoutingTable routingTable = RoutingTable.builder()
.addAsNew(metaDataNewClusterState.index("test"))
.addAsRecovery(metaDataNewClusterState.index("test"))
.build();
return ClusterState.builder(oldClusterState).routingTable(routingTable)
.metaData(metaDataNewClusterState).version(oldClusterState.getVersion() + 1).build();
}
private ClusterState clusterStateWithJustOpenedIndex(IndexMetaData indexMetaData, boolean masterEligible) {
ClusterState oldClusterState = clusterStateWithClosedIndex(indexMetaData, masterEligible);
private ClusterState clusterStateWithReplicatedClosedIndex(IndexMetaData indexMetaData, boolean masterEligible, boolean assigned) {
ClusterState oldClusterState = clusterStateWithAssignedIndex(indexMetaData, masterEligible);
MetaData metaDataNewClusterState = MetaData.builder()
.put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).state(IndexMetaData.State.OPEN)
.put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)
.put(MetaDataIndexStateService.VERIFIED_BEFORE_CLOSE_SETTING.getKey(), true))
.state(IndexMetaData.State.CLOSE)
.numberOfShards(5).numberOfReplicas(2))
.version(oldClusterState.metaData().version() + 1)
.build();
RoutingTable routingTable = RoutingTable.builder()
.addAsRecovery(metaDataNewClusterState.index("test"))
.build();
return ClusterState.builder(oldClusterState)
oldClusterState = ClusterState.builder(oldClusterState).routingTable(routingTable)
.metaData(metaDataNewClusterState).build();
if (assigned) {
AllocationService strategy = createAllocationService(Settings.builder()
.put("cluster.routing.allocation.node_concurrent_recoveries", 100)
.put(ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(), "always")
.put("cluster.routing.allocation.cluster_concurrent_rebalance", 100)
.put("cluster.routing.allocation.node_initial_primaries_recoveries", 100)
.build());
routingTable = strategy.reroute(oldClusterState, "reroute").routingTable();
}
return ClusterState.builder(oldClusterState).routingTable(routingTable)
.metaData(metaDataNewClusterState).version(oldClusterState.getVersion() + 1).build();
}
@ -154,14 +162,6 @@ public class IncrementalClusterStateWriterTests extends ESAllocationTestCase {
.add(newNode("master_node", MASTER_DATA_ROLES)).localNodeId("node1").masterNodeId(masterEligible ? "node1" : "master_node");
}
private Set<Index> randomPrevWrittenIndices(IndexMetaData indexMetaData) {
if (randomBoolean()) {
return Collections.singleton(indexMetaData.getIndex());
} else {
return Collections.emptySet();
}
}
private IndexMetaData createIndexMetaData(String name) {
return IndexMetaData.builder(name).
settings(settings(Version.CURRENT)).
@ -172,56 +172,41 @@ public class IncrementalClusterStateWriterTests extends ESAllocationTestCase {
public void testGetRelevantIndicesWithUnassignedShardsOnMasterEligibleNode() {
IndexMetaData indexMetaData = createIndexMetaData("test");
Set<Index> indices = IncrementalClusterStateWriter.getRelevantIndices(
clusterStateWithUnassignedIndex(indexMetaData, true),
noIndexClusterState(true),
randomPrevWrittenIndices(indexMetaData));
Set<Index> indices = IncrementalClusterStateWriter.getRelevantIndices(clusterStateWithUnassignedIndex(indexMetaData, true));
assertThat(indices.size(), equalTo(1));
}
public void testGetRelevantIndicesWithUnassignedShardsOnDataOnlyNode() {
IndexMetaData indexMetaData = createIndexMetaData("test");
Set<Index> indices = IncrementalClusterStateWriter.getRelevantIndices(
clusterStateWithUnassignedIndex(indexMetaData, false),
noIndexClusterState(false),
randomPrevWrittenIndices(indexMetaData));
Set<Index> indices = IncrementalClusterStateWriter.getRelevantIndices(clusterStateWithUnassignedIndex(indexMetaData, false));
assertThat(indices.size(), equalTo(0));
}
public void testGetRelevantIndicesWithAssignedShards() {
IndexMetaData indexMetaData = createIndexMetaData("test");
boolean masterEligible = randomBoolean();
Set<Index> indices = IncrementalClusterStateWriter.getRelevantIndices(
clusterStateWithAssignedIndex(indexMetaData, masterEligible),
clusterStateWithUnassignedIndex(indexMetaData, masterEligible),
randomPrevWrittenIndices(indexMetaData));
Set<Index> indices = IncrementalClusterStateWriter.getRelevantIndices(clusterStateWithAssignedIndex(indexMetaData, masterEligible));
assertThat(indices.size(), equalTo(1));
}
public void testGetRelevantIndicesForClosedPrevWrittenIndexOnDataOnlyNode() {
public void testGetRelevantIndicesForNonReplicatedClosedIndexOnDataOnlyNode() {
IndexMetaData indexMetaData = createIndexMetaData("test");
Set<Index> indices = IncrementalClusterStateWriter.getRelevantIndices(
clusterStateWithClosedIndex(indexMetaData, false),
clusterStateWithAssignedIndex(indexMetaData, false),
Collections.singleton(indexMetaData.getIndex()));
assertThat(indices.size(), equalTo(1));
}
public void testGetRelevantIndicesForClosedPrevNotWrittenIndexOnDataOnlyNode() {
IndexMetaData indexMetaData = createIndexMetaData("test");
Set<Index> indices = IncrementalClusterStateWriter.getRelevantIndices(
clusterStateWithJustOpenedIndex(indexMetaData, false),
clusterStateWithClosedIndex(indexMetaData, false),
Collections.emptySet());
clusterStateWithNonReplicatedClosedIndex(indexMetaData, false));
assertThat(indices.size(), equalTo(0));
}
public void testGetRelevantIndicesForWasClosedPrevWrittenIndexOnDataOnlyNode() {
public void testGetRelevantIndicesForReplicatedClosedButUnassignedIndexOnDataOnlyNode() {
IndexMetaData indexMetaData = createIndexMetaData("test");
Set<Index> indices = IncrementalClusterStateWriter.getRelevantIndices(
clusterStateWithJustOpenedIndex(indexMetaData, false),
clusterStateWithClosedIndex(indexMetaData, false),
Collections.singleton(indexMetaData.getIndex()));
clusterStateWithReplicatedClosedIndex(indexMetaData, false, false));
assertThat(indices.size(), equalTo(0));
}
public void testGetRelevantIndicesForReplicatedClosedAndAssignedIndexOnDataOnlyNode() {
IndexMetaData indexMetaData = createIndexMetaData("test");
Set<Index> indices = IncrementalClusterStateWriter.getRelevantIndices(
clusterStateWithReplicatedClosedIndex(indexMetaData, false, true));
assertThat(indices.size(), equalTo(1));
}

View File

@ -435,6 +435,31 @@ public class CloseIndexIT extends ESIntegTestCase {
}
}
/**
* Test for https://github.com/elastic/elasticsearch/issues/47276 which checks that the persisted metadata on a data node does not
* become inconsistent when using replicated closed indices.
*/
public void testRelocatedClosedIndexIssue() throws Exception {
final String indexName = "closed-index";
final List<String> dataNodes = internalCluster().startDataOnlyNodes(2);
// allocate shard to first data node
createIndex(indexName, Settings.builder()
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
.put("index.routing.allocation.include._name", dataNodes.get(0))
.build());
indexRandom(randomBoolean(), randomBoolean(), randomBoolean(), IntStream.range(0, randomIntBetween(0, 50))
.mapToObj(n -> client().prepareIndex(indexName, "_doc").setSource("num", n)).collect(toList()));
assertAcked(client().admin().indices().prepareClose(indexName).setWaitForActiveShards(ActiveShardCount.ALL));
// move single shard to second node
client().admin().indices().prepareUpdateSettings(indexName).setSettings(Settings.builder()
.put("index.routing.allocation.include._name", dataNodes.get(1))).get();
ensureGreen(indexName);
internalCluster().fullRestart();
assertIndexIsClosed(indexName);
ensureGreen(indexName);
}
public void testResyncPropagatePrimaryTerm() throws Exception {
internalCluster().ensureAtLeastNumDataNodes(3);
final String indexName = "closed_indices_promotion";