add another reroute method to do no assignment but still elect new primaries on failed nodes

This commit is contained in:
kimchy 2011-04-14 13:43:01 +03:00
parent 25eba4b60d
commit e5c1ab0ca6
2 changed files with 189 additions and 0 deletions

View File

@ -102,6 +102,32 @@ public class ShardsAllocation extends AbstractComponent {
return new RoutingAllocation.Result(true, new RoutingTable.Builder().updateNodes(routingNodes).build().validateRaiseException(clusterState.metaData()), allocation.explanation());
}
/**
* Only handles reroute but *without* any reassignment of unassigned shards or rebalancing. Does
* make sure to handle removed nodes, but only moved the shards to UNASSIGNED, does not reassign
* them.
*/
public RoutingAllocation.Result rerouteWithNoReassign(ClusterState clusterState) {
RoutingNodes routingNodes = clusterState.routingNodes();
RoutingAllocation allocation = new RoutingAllocation(routingNodes, clusterState.nodes());
Iterable<DiscoveryNode> dataNodes = allocation.nodes().dataNodes().values();
boolean changed = false;
// first, clear from the shards any node id they used to belong to that is now dead
changed |= deassociateDeadNodes(allocation.routingNodes(), dataNodes);
// create a sorted list of from nodes with least number of shards to the maximum ones
applyNewNodes(allocation.routingNodes(), dataNodes);
// elect primaries *before* allocating unassigned, so backups of primaries that failed
// will be moved to primary state and not wait for primaries to be allocated and recovered (*from gateway*)
changed |= electPrimaries(allocation.routingNodes());
if (!changed) {
return new RoutingAllocation.Result(false, clusterState.routingTable(), allocation.explanation());
}
return new RoutingAllocation.Result(true, new RoutingTable.Builder().updateNodes(routingNodes).build().validateRaiseException(clusterState.metaData()), allocation.explanation());
}
private boolean reroute(RoutingAllocation allocation) {
Iterable<DiscoveryNode> dataNodes = allocation.nodes().dataNodes().values();

View File

@ -0,0 +1,163 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.cluster.routing.allocation;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.cluster.routing.RoutingNode;
import org.elasticsearch.cluster.routing.RoutingNodes;
import org.elasticsearch.cluster.routing.RoutingTable;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.Loggers;
import org.testng.annotations.Test;
import static org.elasticsearch.cluster.ClusterState.*;
import static org.elasticsearch.cluster.metadata.IndexMetaData.*;
import static org.elasticsearch.cluster.metadata.MetaData.*;
import static org.elasticsearch.cluster.node.DiscoveryNodes.*;
import static org.elasticsearch.cluster.routing.RoutingBuilders.*;
import static org.elasticsearch.cluster.routing.ShardRoutingState.*;
import static org.elasticsearch.cluster.routing.allocation.RoutingAllocationTests.*;
import static org.elasticsearch.common.settings.ImmutableSettings.*;
import static org.hamcrest.MatcherAssert.*;
import static org.hamcrest.Matchers.*;
public class FailedNodeRoutingTests {
private final ESLogger logger = Loggers.getLogger(FailedNodeRoutingTests.class);
@Test public void simpleFailedNodeTest() {
ShardsAllocation strategy = new ShardsAllocation(settingsBuilder().put("cluster.routing.allocation.allow_rebalance", ClusterRebalanceNodeAllocation.ClusterRebalanceType.ALWAYS.toString()).build());
MetaData metaData = newMetaDataBuilder()
.put(newIndexMetaDataBuilder("test1").numberOfShards(1).numberOfReplicas(1))
.put(newIndexMetaDataBuilder("test2").numberOfShards(1).numberOfReplicas(1))
.build();
RoutingTable routingTable = routingTable()
.add(indexRoutingTable("test1").initializeEmpty(metaData.index("test1")))
.add(indexRoutingTable("test2").initializeEmpty(metaData.index("test2")))
.build();
ClusterState clusterState = newClusterStateBuilder().metaData(metaData).routingTable(routingTable).build();
logger.info("start 4 nodes");
clusterState = newClusterStateBuilder().state(clusterState).nodes(newNodesBuilder().put(newNode("node1")).put(newNode("node2")).put(newNode("node3")).put(newNode("node4"))).build();
RoutingTable prevRoutingTable = routingTable;
routingTable = strategy.reroute(clusterState).routingTable();
clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
logger.info("start all the primary shards, replicas will start initializing");
RoutingNodes routingNodes = clusterState.routingNodes();
prevRoutingTable = routingTable;
routingTable = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)).routingTable();
clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
routingNodes = clusterState.routingNodes();
logger.info("start the replica shards");
routingNodes = clusterState.routingNodes();
prevRoutingTable = routingTable;
routingTable = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)).routingTable();
clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
routingNodes = clusterState.routingNodes();
assertThat(routingNodes.node("node1").numberOfShardsWithState(STARTED), equalTo(1));
assertThat(routingNodes.node("node2").numberOfShardsWithState(STARTED), equalTo(1));
assertThat(routingNodes.node("node3").numberOfShardsWithState(STARTED), equalTo(1));
assertThat(routingNodes.node("node4").numberOfShardsWithState(STARTED), equalTo(1));
logger.info("remove 2 nodes where primaries are allocated, reroute");
clusterState = newClusterStateBuilder().state(clusterState).nodes(newNodesBuilder().putAll(clusterState.nodes())
.remove(routingTable.index("test1").shard(0).primaryShard().currentNodeId())
.remove(routingTable.index("test2").shard(0).primaryShard().currentNodeId())
)
.build();
prevRoutingTable = routingTable;
routingTable = strategy.reroute(clusterState).routingTable();
clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
routingNodes = clusterState.routingNodes();
for (RoutingNode routingNode : routingNodes) {
assertThat(routingNode.numberOfShardsWithState(STARTED), equalTo(1));
assertThat(routingNode.numberOfShardsWithState(INITIALIZING), equalTo(1));
}
}
@Test public void simpleFailedNodeTestNoReassign() {
ShardsAllocation strategy = new ShardsAllocation(settingsBuilder().put("cluster.routing.allocation.allow_rebalance", ClusterRebalanceNodeAllocation.ClusterRebalanceType.ALWAYS.toString()).build());
MetaData metaData = newMetaDataBuilder()
.put(newIndexMetaDataBuilder("test1").numberOfShards(1).numberOfReplicas(1))
.put(newIndexMetaDataBuilder("test2").numberOfShards(1).numberOfReplicas(1))
.build();
RoutingTable routingTable = routingTable()
.add(indexRoutingTable("test1").initializeEmpty(metaData.index("test1")))
.add(indexRoutingTable("test2").initializeEmpty(metaData.index("test2")))
.build();
ClusterState clusterState = newClusterStateBuilder().metaData(metaData).routingTable(routingTable).build();
logger.info("start 4 nodes");
clusterState = newClusterStateBuilder().state(clusterState).nodes(newNodesBuilder().put(newNode("node1")).put(newNode("node2")).put(newNode("node3")).put(newNode("node4"))).build();
RoutingTable prevRoutingTable = routingTable;
routingTable = strategy.reroute(clusterState).routingTable();
clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
logger.info("start all the primary shards, replicas will start initializing");
RoutingNodes routingNodes = clusterState.routingNodes();
prevRoutingTable = routingTable;
routingTable = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)).routingTable();
clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
routingNodes = clusterState.routingNodes();
logger.info("start the replica shards");
routingNodes = clusterState.routingNodes();
prevRoutingTable = routingTable;
routingTable = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)).routingTable();
clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
routingNodes = clusterState.routingNodes();
assertThat(routingNodes.node("node1").numberOfShardsWithState(STARTED), equalTo(1));
assertThat(routingNodes.node("node2").numberOfShardsWithState(STARTED), equalTo(1));
assertThat(routingNodes.node("node3").numberOfShardsWithState(STARTED), equalTo(1));
assertThat(routingNodes.node("node4").numberOfShardsWithState(STARTED), equalTo(1));
logger.info("remove 2 nodes where primaries are allocated, reroute");
clusterState = newClusterStateBuilder().state(clusterState).nodes(newNodesBuilder().putAll(clusterState.nodes())
.remove(routingTable.index("test1").shard(0).primaryShard().currentNodeId())
.remove(routingTable.index("test2").shard(0).primaryShard().currentNodeId())
)
.build();
prevRoutingTable = routingTable;
routingTable = strategy.rerouteWithNoReassign(clusterState).routingTable();
clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
routingNodes = clusterState.routingNodes();
for (RoutingNode routingNode : routingNodes) {
assertThat(routingNode.numberOfShardsWithState(STARTED), equalTo(1));
}
assertThat(routingNodes.unassigned().size(), equalTo(2));
}
}