Index Allocation: allow to specify maximum total number of shards per node, closes #1650.

This commit is contained in:
Shay Banon 2012-01-30 01:43:18 +02:00
parent 8e6b171205
commit 70c334ec01
4 changed files with 312 additions and 0 deletions

View File

@ -47,6 +47,7 @@ public class AllocationDeciders extends AllocationDecider {
.add(new ConcurrentRebalanceAllocationDecider(settings, nodeSettingsService))
.add(new DisableAllocationDecider(settings, nodeSettingsService))
.add(new AwarenessAllocationDecider(settings, nodeSettingsService))
.add(new ShardsLimitAllocationDecider(settings))
.build()
);
}

View File

@ -38,6 +38,11 @@ public class AllocationDecidersModule extends AbstractModule {
this.settings = settings;
}
public AllocationDecidersModule add(Class<? extends AllocationDecider> allocationDecider) {
this.allocations.add(allocationDecider);
return this;
}
@Override
protected void configure() {
Multibinder<AllocationDecider> allocationMultibinder = Multibinder.newSetBinder(binder(), AllocationDecider.class);
@ -50,6 +55,7 @@ public class AllocationDecidersModule extends AbstractModule {
allocationMultibinder.addBinding().to(ConcurrentRebalanceAllocationDecider.class);
allocationMultibinder.addBinding().to(DisableAllocationDecider.class);
allocationMultibinder.addBinding().to(AwarenessAllocationDecider.class);
allocationMultibinder.addBinding().to(ShardsLimitAllocationDecider.class);
for (Class<? extends AllocationDecider> allocation : allocations) {
allocationMultibinder.addBinding().to(allocation);
}

View File

@ -0,0 +1,103 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.cluster.routing.allocation.decider;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.routing.MutableShardRouting;
import org.elasticsearch.cluster.routing.RoutingNode;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import java.util.List;
/**
*
*/
public class ShardsLimitAllocationDecider extends AllocationDecider {
public static final String INDEX_TOTAL_SHARDS_PER_NODE = "index.routing.allocation.total_shards_per_node";
static {
IndexMetaData.addDynamicSettings(
INDEX_TOTAL_SHARDS_PER_NODE
);
}
@Inject
public ShardsLimitAllocationDecider(Settings settings) {
super(settings);
}
@Override
public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
IndexMetaData indexMd = allocation.routingNodes().metaData().index(shardRouting.index());
int totalShardsPerNode = indexMd.settings().getAsInt(INDEX_TOTAL_SHARDS_PER_NODE, -1);
if (totalShardsPerNode <= 0) {
return Decision.YES;
}
int nodeCount = 0;
List<MutableShardRouting> shards = node.shards();
for (int i = 0; i < shards.size(); i++) {
MutableShardRouting nodeShard = shards.get(i);
if (!nodeShard.index().equals(shardRouting.index())) {
continue;
}
// don't count relocating shards...
if (nodeShard.relocating()) {
continue;
}
nodeCount++;
}
if (nodeCount >= totalShardsPerNode) {
return Decision.NO;
}
return Decision.YES;
}
@Override
public boolean canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
IndexMetaData indexMd = allocation.routingNodes().metaData().index(shardRouting.index());
int totalShardsPerNode = indexMd.settings().getAsInt(INDEX_TOTAL_SHARDS_PER_NODE, -1);
if (totalShardsPerNode <= 0) {
return true;
}
int nodeCount = 0;
List<MutableShardRouting> shards = node.shards();
for (int i = 0; i < shards.size(); i++) {
MutableShardRouting nodeShard = shards.get(i);
if (!nodeShard.index().equals(shardRouting.index())) {
continue;
}
// don't count relocating shards...
if (nodeShard.relocating()) {
continue;
}
nodeCount++;
}
if (nodeCount > totalShardsPerNode) {
return false;
}
return true;
}
}

View File

@ -0,0 +1,202 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.unit.cluster.routing.allocation;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.cluster.routing.MutableShardRouting;
import org.elasticsearch.cluster.routing.RoutingNodes;
import org.elasticsearch.cluster.routing.RoutingTable;
import org.elasticsearch.cluster.routing.ShardRoutingState;
import org.elasticsearch.cluster.routing.allocation.AllocationService;
import org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.testng.annotations.Test;
import static org.elasticsearch.cluster.ClusterState.newClusterStateBuilder;
import static org.elasticsearch.cluster.metadata.IndexMetaData.newIndexMetaDataBuilder;
import static org.elasticsearch.cluster.metadata.MetaData.newMetaDataBuilder;
import static org.elasticsearch.cluster.node.DiscoveryNodes.newNodesBuilder;
import static org.elasticsearch.cluster.routing.RoutingBuilders.indexRoutingTable;
import static org.elasticsearch.cluster.routing.RoutingBuilders.routingTable;
import static org.elasticsearch.cluster.routing.ShardRoutingState.*;
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
import static org.elasticsearch.test.unit.cluster.routing.allocation.RoutingAllocationTests.newNode;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
/**
*/
@Test
public class ShardsLimitAllocationTests {
private final ESLogger logger = Loggers.getLogger(ShardsLimitAllocationTests.class);
@Test
public void indexLevelShardsLimitAllocate() {
AllocationService strategy = new AllocationService(settingsBuilder().put("cluster.routing.allocation.concurrent_recoveries", 10).build());
logger.info("Building initial routing table");
MetaData metaData = newMetaDataBuilder()
.put(newIndexMetaDataBuilder("test").settings(ImmutableSettings.settingsBuilder()
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 4)
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1)
.put(ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE, 2)))
.build();
RoutingTable routingTable = routingTable()
.add(indexRoutingTable("test").initializeEmpty(metaData.index("test")))
.build();
ClusterState clusterState = newClusterStateBuilder().metaData(metaData).routingTable(routingTable).build();
logger.info("Adding two nodes and performing rerouting");
clusterState = newClusterStateBuilder().state(clusterState).nodes(newNodesBuilder().put(newNode("node1")).put(newNode("node2"))).build();
routingTable = strategy.reroute(clusterState).routingTable();
clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
assertThat(clusterState.readOnlyRoutingNodes().node("node1").numberOfShardsWithState(ShardRoutingState.INITIALIZING), equalTo(2));
assertThat(clusterState.readOnlyRoutingNodes().node("node2").numberOfShardsWithState(ShardRoutingState.INITIALIZING), equalTo(2));
logger.info("Start the primary shards");
RoutingNodes routingNodes = clusterState.routingNodes();
routingTable = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)).routingTable();
clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
assertThat(clusterState.readOnlyRoutingNodes().node("node1").numberOfShardsWithState(ShardRoutingState.STARTED), equalTo(2));
assertThat(clusterState.readOnlyRoutingNodes().node("node1").numberOfShardsWithState(ShardRoutingState.INITIALIZING), equalTo(0));
assertThat(clusterState.readOnlyRoutingNodes().node("node2").numberOfShardsWithState(ShardRoutingState.STARTED), equalTo(2));
assertThat(clusterState.readOnlyRoutingNodes().node("node2").numberOfShardsWithState(ShardRoutingState.INITIALIZING), equalTo(0));
assertThat(clusterState.readOnlyRoutingNodes().unassigned().size(), equalTo(4));
logger.info("Do another reroute, make sure its still not allocated");
routingNodes = clusterState.routingNodes();
routingTable = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)).routingTable();
clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
}
@Test
public void indexLevelShardsLimitRemain() {
AllocationService strategy = new AllocationService(settingsBuilder()
.put("cluster.routing.allocation.concurrent_recoveries", 10)
.put("cluster.routing.allocation.node_initial_primaries_recoveries", 10)
.put("cluster.routing.allocation.allow_rebalance", "always")
.put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
.build());
logger.info("Building initial routing table");
MetaData metaData = newMetaDataBuilder()
.put(newIndexMetaDataBuilder("test").settings(ImmutableSettings.settingsBuilder()
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 5)
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
))
.build();
RoutingTable routingTable = routingTable()
.add(indexRoutingTable("test").initializeEmpty(metaData.index("test")))
.build();
ClusterState clusterState = newClusterStateBuilder().metaData(metaData).routingTable(routingTable).build();
logger.info("Adding one node and reroute");
clusterState = newClusterStateBuilder().state(clusterState).nodes(newNodesBuilder().put(newNode("node1"))).build();
routingTable = strategy.reroute(clusterState).routingTable();
clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
logger.info("Start the primary shards");
RoutingNodes routingNodes = clusterState.routingNodes();
routingTable = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)).routingTable();
clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
assertThat(clusterState.readOnlyRoutingNodes().numberOfShardsOfType(STARTED), equalTo(5));
logger.info("add another index with 5 shards");
metaData = newMetaDataBuilder().metaData(metaData)
.put(newIndexMetaDataBuilder("test1").settings(ImmutableSettings.settingsBuilder()
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 5)
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
))
.build();
routingTable = routingTable().routingTable(routingTable)
.add(indexRoutingTable("test1").initializeEmpty(metaData.index("test1")))
.build();
clusterState = newClusterStateBuilder().state(clusterState).metaData(metaData).routingTable(routingTable).build();
logger.info("Add another one node and reroute");
clusterState = newClusterStateBuilder().state(clusterState).nodes(newNodesBuilder().putAll(clusterState.nodes()).put(newNode("node2"))).build();
routingTable = strategy.reroute(clusterState).routingTable();
clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
routingNodes = clusterState.routingNodes();
routingTable = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)).routingTable();
clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
assertThat(clusterState.readOnlyRoutingNodes().numberOfShardsOfType(STARTED), equalTo(10));
for (MutableShardRouting shardRouting : clusterState.readOnlyRoutingNodes().node("node1")) {
assertThat(shardRouting.index(), equalTo("test"));
}
for (MutableShardRouting shardRouting : clusterState.readOnlyRoutingNodes().node("node2")) {
assertThat(shardRouting.index(), equalTo("test1"));
}
logger.info("update " + ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE + " for test, see that things move");
metaData = newMetaDataBuilder().metaData(metaData)
.put(newIndexMetaDataBuilder("test").settings(ImmutableSettings.settingsBuilder()
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 5)
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
.put(ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE, 3)
))
.build();
clusterState = newClusterStateBuilder().state(clusterState).metaData(metaData).build();
logger.info("reroute after setting");
routingTable = strategy.reroute(clusterState).routingTable();
clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
assertThat(clusterState.readOnlyRoutingNodes().node("node1").numberOfShardsWithState(STARTED), equalTo(3));
assertThat(clusterState.readOnlyRoutingNodes().node("node1").numberOfShardsWithState(RELOCATING), equalTo(2));
logger.info("start the moving shards, a shard from test1 should move back to node1");
routingNodes = clusterState.routingNodes();
routingTable = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)).routingTable();
clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
assertThat(clusterState.readOnlyRoutingNodes().node("node1").numberOfShardsWithState(STARTED), equalTo(3));
assertThat(clusterState.readOnlyRoutingNodes().node("node1").numberOfShardsWithState(RELOCATING), equalTo(0));
assertThat(clusterState.readOnlyRoutingNodes().node("node1").numberOfShardsWithState(INITIALIZING), equalTo(2));
assertThat(clusterState.readOnlyRoutingNodes().node("node2").numberOfShardsWithState(STARTED), equalTo(5));
assertThat(clusterState.readOnlyRoutingNodes().node("node2").numberOfShardsWithState(RELOCATING), equalTo(2));
logger.info("finalize movement, another shard will move");
routingNodes = clusterState.routingNodes();
routingTable = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)).routingTable();
clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
assertThat(clusterState.readOnlyRoutingNodes().node("node1").numberOfShardsWithState(STARTED), equalTo(5));
assertThat(clusterState.readOnlyRoutingNodes().node("node2").numberOfShardsWithState(STARTED), equalTo(5));
}
}