Index Allocation: allow to specify maximum total number of shards per node, closes #1650.

2012-01-30 01:43:18 +02:00 · 2012-01-30 01:43:18 +02:00 · 70c334ec01
parent 8e6b171205
commit 70c334ec01
4 changed files with 312 additions and 0 deletions
--- a/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AllocationDeciders.java
+++ b/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AllocationDeciders.java
@ -47,6 +47,7 @@ public class AllocationDeciders extends AllocationDecider {
                .add(new ConcurrentRebalanceAllocationDecider(settings, nodeSettingsService))
                .add(new DisableAllocationDecider(settings, nodeSettingsService))
                .add(new AwarenessAllocationDecider(settings, nodeSettingsService))
+                .add(new ShardsLimitAllocationDecider(settings))
                .build()
        );
    }
--- a/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AllocationDecidersModule.java
+++ b/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AllocationDecidersModule.java
@ -38,6 +38,11 @@ public class AllocationDecidersModule extends AbstractModule {
        this.settings = settings;
    }

+    public AllocationDecidersModule add(Class<? extends AllocationDecider> allocationDecider) {
+        this.allocations.add(allocationDecider);
+        return this;
+    }
+
    @Override
    protected void configure() {
        Multibinder<AllocationDecider> allocationMultibinder = Multibinder.newSetBinder(binder(), AllocationDecider.class);
@ -50,6 +55,7 @@ public class AllocationDecidersModule extends AbstractModule {
        allocationMultibinder.addBinding().to(ConcurrentRebalanceAllocationDecider.class);
        allocationMultibinder.addBinding().to(DisableAllocationDecider.class);
        allocationMultibinder.addBinding().to(AwarenessAllocationDecider.class);
+        allocationMultibinder.addBinding().to(ShardsLimitAllocationDecider.class);
        for (Class<? extends AllocationDecider> allocation : allocations) {
            allocationMultibinder.addBinding().to(allocation);
        }
--- a/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ShardsLimitAllocationDecider.java
+++ b/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ShardsLimitAllocationDecider.java
@ -0,0 +1,103 @@
+/*
+ * Licensed to ElasticSearch and Shay Banon under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. ElasticSearch licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.cluster.routing.allocation.decider;
+
+import org.elasticsearch.cluster.metadata.IndexMetaData;
+import org.elasticsearch.cluster.routing.MutableShardRouting;
+import org.elasticsearch.cluster.routing.RoutingNode;
+import org.elasticsearch.cluster.routing.ShardRouting;
+import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
+import org.elasticsearch.common.inject.Inject;
+import org.elasticsearch.common.settings.Settings;
+
+import java.util.List;
+
+/**
+ *
+ */
+public class ShardsLimitAllocationDecider extends AllocationDecider {
+
+    public static final String INDEX_TOTAL_SHARDS_PER_NODE = "index.routing.allocation.total_shards_per_node";
+
+    static {
+        IndexMetaData.addDynamicSettings(
+                INDEX_TOTAL_SHARDS_PER_NODE
+        );
+    }
+
+    @Inject
+    public ShardsLimitAllocationDecider(Settings settings) {
+        super(settings);
+    }
+
+    @Override
+    public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
+        IndexMetaData indexMd = allocation.routingNodes().metaData().index(shardRouting.index());
+        int totalShardsPerNode = indexMd.settings().getAsInt(INDEX_TOTAL_SHARDS_PER_NODE, -1);
+        if (totalShardsPerNode <= 0) {
+            return Decision.YES;
+        }
+
+        int nodeCount = 0;
+        List<MutableShardRouting> shards = node.shards();
+        for (int i = 0; i < shards.size(); i++) {
+            MutableShardRouting nodeShard = shards.get(i);
+            if (!nodeShard.index().equals(shardRouting.index())) {
+                continue;
+            }
+            // don't count relocating shards...
+            if (nodeShard.relocating()) {
+                continue;
+            }
+            nodeCount++;
+        }
+        if (nodeCount >= totalShardsPerNode) {
+            return Decision.NO;
+        }
+        return Decision.YES;
+    }
+
+    @Override
+    public boolean canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
+        IndexMetaData indexMd = allocation.routingNodes().metaData().index(shardRouting.index());
+        int totalShardsPerNode = indexMd.settings().getAsInt(INDEX_TOTAL_SHARDS_PER_NODE, -1);
+        if (totalShardsPerNode <= 0) {
+            return true;
+        }
+
+        int nodeCount = 0;
+        List<MutableShardRouting> shards = node.shards();
+        for (int i = 0; i < shards.size(); i++) {
+            MutableShardRouting nodeShard = shards.get(i);
+            if (!nodeShard.index().equals(shardRouting.index())) {
+                continue;
+            }
+            // don't count relocating shards...
+            if (nodeShard.relocating()) {
+                continue;
+            }
+            nodeCount++;
+        }
+        if (nodeCount > totalShardsPerNode) {
+            return false;
+        }
+        return true;
+    }
+}
--- a/src/test/java/org/elasticsearch/test/unit/cluster/routing/allocation/ShardsLimitAllocationTests.java
+++ b/src/test/java/org/elasticsearch/test/unit/cluster/routing/allocation/ShardsLimitAllocationTests.java
@ -0,0 +1,202 @@
+/*
+ * Licensed to ElasticSearch and Shay Banon under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. ElasticSearch licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.test.unit.cluster.routing.allocation;
+
+import org.elasticsearch.cluster.ClusterState;
+import org.elasticsearch.cluster.metadata.IndexMetaData;
+import org.elasticsearch.cluster.metadata.MetaData;
+import org.elasticsearch.cluster.routing.MutableShardRouting;
+import org.elasticsearch.cluster.routing.RoutingNodes;
+import org.elasticsearch.cluster.routing.RoutingTable;
+import org.elasticsearch.cluster.routing.ShardRoutingState;
+import org.elasticsearch.cluster.routing.allocation.AllocationService;
+import org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider;
+import org.elasticsearch.common.logging.ESLogger;
+import org.elasticsearch.common.logging.Loggers;
+import org.elasticsearch.common.settings.ImmutableSettings;
+import org.testng.annotations.Test;
+
+import static org.elasticsearch.cluster.ClusterState.newClusterStateBuilder;
+import static org.elasticsearch.cluster.metadata.IndexMetaData.newIndexMetaDataBuilder;
+import static org.elasticsearch.cluster.metadata.MetaData.newMetaDataBuilder;
+import static org.elasticsearch.cluster.node.DiscoveryNodes.newNodesBuilder;
+import static org.elasticsearch.cluster.routing.RoutingBuilders.indexRoutingTable;
+import static org.elasticsearch.cluster.routing.RoutingBuilders.routingTable;
+import static org.elasticsearch.cluster.routing.ShardRoutingState.*;
+import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
+import static org.elasticsearch.test.unit.cluster.routing.allocation.RoutingAllocationTests.newNode;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.equalTo;
+
+/**
+ */
+@Test
+public class ShardsLimitAllocationTests {
+
+    private final ESLogger logger = Loggers.getLogger(ShardsLimitAllocationTests.class);
+
+    @Test
+    public void indexLevelShardsLimitAllocate() {
+        AllocationService strategy = new AllocationService(settingsBuilder().put("cluster.routing.allocation.concurrent_recoveries", 10).build());
+
+        logger.info("Building initial routing table");
+
+        MetaData metaData = newMetaDataBuilder()
+                .put(newIndexMetaDataBuilder("test").settings(ImmutableSettings.settingsBuilder()
+                        .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 4)
+                        .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1)
+                        .put(ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE, 2)))
+                .build();
+
+        RoutingTable routingTable = routingTable()
+                .add(indexRoutingTable("test").initializeEmpty(metaData.index("test")))
+                .build();
+
+        ClusterState clusterState = newClusterStateBuilder().metaData(metaData).routingTable(routingTable).build();
+        logger.info("Adding two nodes and performing rerouting");
+        clusterState = newClusterStateBuilder().state(clusterState).nodes(newNodesBuilder().put(newNode("node1")).put(newNode("node2"))).build();
+        routingTable = strategy.reroute(clusterState).routingTable();
+        clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
+
+        assertThat(clusterState.readOnlyRoutingNodes().node("node1").numberOfShardsWithState(ShardRoutingState.INITIALIZING), equalTo(2));
+        assertThat(clusterState.readOnlyRoutingNodes().node("node2").numberOfShardsWithState(ShardRoutingState.INITIALIZING), equalTo(2));
+
+        logger.info("Start the primary shards");
+        RoutingNodes routingNodes = clusterState.routingNodes();
+        routingTable = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)).routingTable();
+        clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
+
+        assertThat(clusterState.readOnlyRoutingNodes().node("node1").numberOfShardsWithState(ShardRoutingState.STARTED), equalTo(2));
+        assertThat(clusterState.readOnlyRoutingNodes().node("node1").numberOfShardsWithState(ShardRoutingState.INITIALIZING), equalTo(0));
+        assertThat(clusterState.readOnlyRoutingNodes().node("node2").numberOfShardsWithState(ShardRoutingState.STARTED), equalTo(2));
+        assertThat(clusterState.readOnlyRoutingNodes().node("node2").numberOfShardsWithState(ShardRoutingState.INITIALIZING), equalTo(0));
+        assertThat(clusterState.readOnlyRoutingNodes().unassigned().size(), equalTo(4));
+
+        logger.info("Do another reroute, make sure its still not allocated");
+        routingNodes = clusterState.routingNodes();
+        routingTable = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)).routingTable();
+        clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
+    }
+
+    @Test
+    public void indexLevelShardsLimitRemain() {
+        AllocationService strategy = new AllocationService(settingsBuilder()
+                .put("cluster.routing.allocation.concurrent_recoveries", 10)
+                .put("cluster.routing.allocation.node_initial_primaries_recoveries", 10)
+                .put("cluster.routing.allocation.allow_rebalance", "always")
+                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
+                .build());
+
+        logger.info("Building initial routing table");
+
+        MetaData metaData = newMetaDataBuilder()
+                .put(newIndexMetaDataBuilder("test").settings(ImmutableSettings.settingsBuilder()
+                        .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 5)
+                        .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
+                ))
+                .build();
+
+        RoutingTable routingTable = routingTable()
+                .add(indexRoutingTable("test").initializeEmpty(metaData.index("test")))
+                .build();
+
+        ClusterState clusterState = newClusterStateBuilder().metaData(metaData).routingTable(routingTable).build();
+        logger.info("Adding one node and reroute");
+        clusterState = newClusterStateBuilder().state(clusterState).nodes(newNodesBuilder().put(newNode("node1"))).build();
+        routingTable = strategy.reroute(clusterState).routingTable();
+        clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
+
+        logger.info("Start the primary shards");
+        RoutingNodes routingNodes = clusterState.routingNodes();
+        routingTable = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)).routingTable();
+        clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
+
+        assertThat(clusterState.readOnlyRoutingNodes().numberOfShardsOfType(STARTED), equalTo(5));
+
+        logger.info("add another index with 5 shards");
+        metaData = newMetaDataBuilder().metaData(metaData)
+                .put(newIndexMetaDataBuilder("test1").settings(ImmutableSettings.settingsBuilder()
+                        .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 5)
+                        .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
+                ))
+                .build();
+        routingTable = routingTable().routingTable(routingTable)
+                .add(indexRoutingTable("test1").initializeEmpty(metaData.index("test1")))
+                .build();
+
+        clusterState = newClusterStateBuilder().state(clusterState).metaData(metaData).routingTable(routingTable).build();
+
+        logger.info("Add another one node and reroute");
+        clusterState = newClusterStateBuilder().state(clusterState).nodes(newNodesBuilder().putAll(clusterState.nodes()).put(newNode("node2"))).build();
+        routingTable = strategy.reroute(clusterState).routingTable();
+        clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
+
+        routingNodes = clusterState.routingNodes();
+        routingTable = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)).routingTable();
+        clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
+
+        assertThat(clusterState.readOnlyRoutingNodes().numberOfShardsOfType(STARTED), equalTo(10));
+
+        for (MutableShardRouting shardRouting : clusterState.readOnlyRoutingNodes().node("node1")) {
+            assertThat(shardRouting.index(), equalTo("test"));
+        }
+        for (MutableShardRouting shardRouting : clusterState.readOnlyRoutingNodes().node("node2")) {
+            assertThat(shardRouting.index(), equalTo("test1"));
+        }
+
+        logger.info("update " + ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE + " for test, see that things move");
+        metaData = newMetaDataBuilder().metaData(metaData)
+                .put(newIndexMetaDataBuilder("test").settings(ImmutableSettings.settingsBuilder()
+                        .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 5)
+                        .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
+                        .put(ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE, 3)
+                ))
+                .build();
+
+
+        clusterState = newClusterStateBuilder().state(clusterState).metaData(metaData).build();
+
+        logger.info("reroute after setting");
+        routingTable = strategy.reroute(clusterState).routingTable();
+        clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
+
+        assertThat(clusterState.readOnlyRoutingNodes().node("node1").numberOfShardsWithState(STARTED), equalTo(3));
+        assertThat(clusterState.readOnlyRoutingNodes().node("node1").numberOfShardsWithState(RELOCATING), equalTo(2));
+
+        logger.info("start the moving shards, a shard from test1 should move back to node1");
+        routingNodes = clusterState.routingNodes();
+        routingTable = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)).routingTable();
+        clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
+
+        assertThat(clusterState.readOnlyRoutingNodes().node("node1").numberOfShardsWithState(STARTED), equalTo(3));
+        assertThat(clusterState.readOnlyRoutingNodes().node("node1").numberOfShardsWithState(RELOCATING), equalTo(0));
+        assertThat(clusterState.readOnlyRoutingNodes().node("node1").numberOfShardsWithState(INITIALIZING), equalTo(2));
+        assertThat(clusterState.readOnlyRoutingNodes().node("node2").numberOfShardsWithState(STARTED), equalTo(5));
+        assertThat(clusterState.readOnlyRoutingNodes().node("node2").numberOfShardsWithState(RELOCATING), equalTo(2));
+
+        logger.info("finalize movement, another shard will move");
+        routingNodes = clusterState.routingNodes();
+        routingTable = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)).routingTable();
+        clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
+
+        assertThat(clusterState.readOnlyRoutingNodes().node("node1").numberOfShardsWithState(STARTED), equalTo(5));
+        assertThat(clusterState.readOnlyRoutingNodes().node("node2").numberOfShardsWithState(STARTED), equalTo(5));
+    }
+}