From e5c1ab0ca68f0badddd9e59807a3bf8d24faea3f Mon Sep 17 00:00:00 2001 From: kimchy Date: Thu, 14 Apr 2011 13:43:01 +0300 Subject: [PATCH] add another reroute method to do no assignment but still elect new primaries on failed nodes --- .../routing/allocation/ShardsAllocation.java | 26 +++ .../allocation/FailedNodeRoutingTests.java | 163 ++++++++++++++++++ 2 files changed, 189 insertions(+) create mode 100644 modules/elasticsearch/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedNodeRoutingTests.java diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardsAllocation.java b/modules/elasticsearch/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardsAllocation.java index cbfa434b2bb..ab88aa2ebd1 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardsAllocation.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardsAllocation.java @@ -102,6 +102,32 @@ public class ShardsAllocation extends AbstractComponent { return new RoutingAllocation.Result(true, new RoutingTable.Builder().updateNodes(routingNodes).build().validateRaiseException(clusterState.metaData()), allocation.explanation()); } + /** + * Only handles reroute but *without* any reassignment of unassigned shards or rebalancing. Does + * make sure to handle removed nodes, but only moved the shards to UNASSIGNED, does not reassign + * them. + */ + public RoutingAllocation.Result rerouteWithNoReassign(ClusterState clusterState) { + RoutingNodes routingNodes = clusterState.routingNodes(); + RoutingAllocation allocation = new RoutingAllocation(routingNodes, clusterState.nodes()); + Iterable dataNodes = allocation.nodes().dataNodes().values(); + boolean changed = false; + // first, clear from the shards any node id they used to belong to that is now dead + changed |= deassociateDeadNodes(allocation.routingNodes(), dataNodes); + + // create a sorted list of from nodes with least number of shards to the maximum ones + applyNewNodes(allocation.routingNodes(), dataNodes); + + // elect primaries *before* allocating unassigned, so backups of primaries that failed + // will be moved to primary state and not wait for primaries to be allocated and recovered (*from gateway*) + changed |= electPrimaries(allocation.routingNodes()); + + if (!changed) { + return new RoutingAllocation.Result(false, clusterState.routingTable(), allocation.explanation()); + } + return new RoutingAllocation.Result(true, new RoutingTable.Builder().updateNodes(routingNodes).build().validateRaiseException(clusterState.metaData()), allocation.explanation()); + } + private boolean reroute(RoutingAllocation allocation) { Iterable dataNodes = allocation.nodes().dataNodes().values(); diff --git a/modules/elasticsearch/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedNodeRoutingTests.java b/modules/elasticsearch/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedNodeRoutingTests.java new file mode 100644 index 00000000000..4f81d7c04ef --- /dev/null +++ b/modules/elasticsearch/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedNodeRoutingTests.java @@ -0,0 +1,163 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.cluster.routing.allocation; + +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.MetaData; +import org.elasticsearch.cluster.routing.RoutingNode; +import org.elasticsearch.cluster.routing.RoutingNodes; +import org.elasticsearch.cluster.routing.RoutingTable; +import org.elasticsearch.common.logging.ESLogger; +import org.elasticsearch.common.logging.Loggers; +import org.testng.annotations.Test; + +import static org.elasticsearch.cluster.ClusterState.*; +import static org.elasticsearch.cluster.metadata.IndexMetaData.*; +import static org.elasticsearch.cluster.metadata.MetaData.*; +import static org.elasticsearch.cluster.node.DiscoveryNodes.*; +import static org.elasticsearch.cluster.routing.RoutingBuilders.*; +import static org.elasticsearch.cluster.routing.ShardRoutingState.*; +import static org.elasticsearch.cluster.routing.allocation.RoutingAllocationTests.*; +import static org.elasticsearch.common.settings.ImmutableSettings.*; +import static org.hamcrest.MatcherAssert.*; +import static org.hamcrest.Matchers.*; + +public class FailedNodeRoutingTests { + + private final ESLogger logger = Loggers.getLogger(FailedNodeRoutingTests.class); + + @Test public void simpleFailedNodeTest() { + ShardsAllocation strategy = new ShardsAllocation(settingsBuilder().put("cluster.routing.allocation.allow_rebalance", ClusterRebalanceNodeAllocation.ClusterRebalanceType.ALWAYS.toString()).build()); + + MetaData metaData = newMetaDataBuilder() + .put(newIndexMetaDataBuilder("test1").numberOfShards(1).numberOfReplicas(1)) + .put(newIndexMetaDataBuilder("test2").numberOfShards(1).numberOfReplicas(1)) + .build(); + + RoutingTable routingTable = routingTable() + .add(indexRoutingTable("test1").initializeEmpty(metaData.index("test1"))) + .add(indexRoutingTable("test2").initializeEmpty(metaData.index("test2"))) + .build(); + + ClusterState clusterState = newClusterStateBuilder().metaData(metaData).routingTable(routingTable).build(); + + logger.info("start 4 nodes"); + clusterState = newClusterStateBuilder().state(clusterState).nodes(newNodesBuilder().put(newNode("node1")).put(newNode("node2")).put(newNode("node3")).put(newNode("node4"))).build(); + RoutingTable prevRoutingTable = routingTable; + routingTable = strategy.reroute(clusterState).routingTable(); + clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); + + logger.info("start all the primary shards, replicas will start initializing"); + RoutingNodes routingNodes = clusterState.routingNodes(); + prevRoutingTable = routingTable; + routingTable = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)).routingTable(); + clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); + routingNodes = clusterState.routingNodes(); + + logger.info("start the replica shards"); + routingNodes = clusterState.routingNodes(); + prevRoutingTable = routingTable; + routingTable = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)).routingTable(); + clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); + routingNodes = clusterState.routingNodes(); + + assertThat(routingNodes.node("node1").numberOfShardsWithState(STARTED), equalTo(1)); + assertThat(routingNodes.node("node2").numberOfShardsWithState(STARTED), equalTo(1)); + assertThat(routingNodes.node("node3").numberOfShardsWithState(STARTED), equalTo(1)); + assertThat(routingNodes.node("node4").numberOfShardsWithState(STARTED), equalTo(1)); + + + logger.info("remove 2 nodes where primaries are allocated, reroute"); + + clusterState = newClusterStateBuilder().state(clusterState).nodes(newNodesBuilder().putAll(clusterState.nodes()) + .remove(routingTable.index("test1").shard(0).primaryShard().currentNodeId()) + .remove(routingTable.index("test2").shard(0).primaryShard().currentNodeId()) + ) + .build(); + prevRoutingTable = routingTable; + routingTable = strategy.reroute(clusterState).routingTable(); + clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); + routingNodes = clusterState.routingNodes(); + + for (RoutingNode routingNode : routingNodes) { + assertThat(routingNode.numberOfShardsWithState(STARTED), equalTo(1)); + assertThat(routingNode.numberOfShardsWithState(INITIALIZING), equalTo(1)); + } + } + + @Test public void simpleFailedNodeTestNoReassign() { + ShardsAllocation strategy = new ShardsAllocation(settingsBuilder().put("cluster.routing.allocation.allow_rebalance", ClusterRebalanceNodeAllocation.ClusterRebalanceType.ALWAYS.toString()).build()); + + MetaData metaData = newMetaDataBuilder() + .put(newIndexMetaDataBuilder("test1").numberOfShards(1).numberOfReplicas(1)) + .put(newIndexMetaDataBuilder("test2").numberOfShards(1).numberOfReplicas(1)) + .build(); + + RoutingTable routingTable = routingTable() + .add(indexRoutingTable("test1").initializeEmpty(metaData.index("test1"))) + .add(indexRoutingTable("test2").initializeEmpty(metaData.index("test2"))) + .build(); + + ClusterState clusterState = newClusterStateBuilder().metaData(metaData).routingTable(routingTable).build(); + + logger.info("start 4 nodes"); + clusterState = newClusterStateBuilder().state(clusterState).nodes(newNodesBuilder().put(newNode("node1")).put(newNode("node2")).put(newNode("node3")).put(newNode("node4"))).build(); + RoutingTable prevRoutingTable = routingTable; + routingTable = strategy.reroute(clusterState).routingTable(); + clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); + + logger.info("start all the primary shards, replicas will start initializing"); + RoutingNodes routingNodes = clusterState.routingNodes(); + prevRoutingTable = routingTable; + routingTable = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)).routingTable(); + clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); + routingNodes = clusterState.routingNodes(); + + logger.info("start the replica shards"); + routingNodes = clusterState.routingNodes(); + prevRoutingTable = routingTable; + routingTable = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)).routingTable(); + clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); + routingNodes = clusterState.routingNodes(); + + assertThat(routingNodes.node("node1").numberOfShardsWithState(STARTED), equalTo(1)); + assertThat(routingNodes.node("node2").numberOfShardsWithState(STARTED), equalTo(1)); + assertThat(routingNodes.node("node3").numberOfShardsWithState(STARTED), equalTo(1)); + assertThat(routingNodes.node("node4").numberOfShardsWithState(STARTED), equalTo(1)); + + + logger.info("remove 2 nodes where primaries are allocated, reroute"); + + clusterState = newClusterStateBuilder().state(clusterState).nodes(newNodesBuilder().putAll(clusterState.nodes()) + .remove(routingTable.index("test1").shard(0).primaryShard().currentNodeId()) + .remove(routingTable.index("test2").shard(0).primaryShard().currentNodeId()) + ) + .build(); + prevRoutingTable = routingTable; + routingTable = strategy.rerouteWithNoReassign(clusterState).routingTable(); + clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); + routingNodes = clusterState.routingNodes(); + + for (RoutingNode routingNode : routingNodes) { + assertThat(routingNode.numberOfShardsWithState(STARTED), equalTo(1)); + } + assertThat(routingNodes.unassigned().size(), equalTo(2)); + } +} \ No newline at end of file