From e636db1d7f4bee02cefaa7435167787c315ff6a0 Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Wed, 27 Apr 2016 10:43:08 -0600 Subject: [PATCH] Reorganize final decision, explanation, and store copy calculation, add tests --- ...ansportClusterAllocationExplainAction.java | 58 +++--- .../ClusterAllocationExplanationTests.java | 188 ++++++++++++------ 2 files changed, 154 insertions(+), 92 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java b/core/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java index 89ff941794d..2c00a033d0c 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java @@ -141,32 +141,20 @@ public class TransportClusterAllocationExplainAction IndicesShardStoresResponse.StoreStatus storeStatus, String assignedNodeId, Set activeAllocationIds) { - ClusterAllocationExplanation.FinalDecision finalDecision; - ClusterAllocationExplanation.StoreCopy storeCopy; - String finalExplanation; + final ClusterAllocationExplanation.FinalDecision finalDecision; + final ClusterAllocationExplanation.StoreCopy storeCopy; + final String finalExplanation; - if (node.getId().equals(assignedNodeId)) { - finalDecision = ClusterAllocationExplanation.FinalDecision.ALREADY_ASSIGNED; - finalExplanation = "the shard is already assigned to this node"; - } else if (nodeDecision.type() == Decision.Type.NO) { - finalDecision = ClusterAllocationExplanation.FinalDecision.NO; - finalExplanation = "the shard cannot be assigned because one or more allocation decider returns a 'NO' decision"; + if (storeStatus == null) { + // No copies of the data + storeCopy = ClusterAllocationExplanation.StoreCopy.NONE; } else { - finalDecision = ClusterAllocationExplanation.FinalDecision.YES; - finalExplanation = "the shard can be assigned"; - } - - if (storeStatus != null) { final Throwable storeErr = storeStatus.getStoreException(); - // The store error only influences the decision if the shard is primary and has not been allocated before - if (storeErr != null && shard.primary() && shard.allocatedPostIndexCreate(indexMetaData) == false) { - finalDecision = ClusterAllocationExplanation.FinalDecision.NO; + if (storeErr != null) { if (ExceptionsHelper.unwrapCause(storeErr) instanceof CorruptIndexException) { storeCopy = ClusterAllocationExplanation.StoreCopy.CORRUPT; - finalExplanation = "the copy of data in the shard store is corrupt"; } else { storeCopy = ClusterAllocationExplanation.StoreCopy.IO_ERROR; - finalExplanation = "there was an IO error reading from data in the shard store"; } } else if (activeAllocationIds.isEmpty()) { // The ids are only empty if dealing with a legacy index @@ -174,18 +162,36 @@ public class TransportClusterAllocationExplainAction storeCopy = ClusterAllocationExplanation.StoreCopy.UNKNOWN; } else if (activeAllocationIds.contains(storeStatus.getAllocationId())) { storeCopy = ClusterAllocationExplanation.StoreCopy.AVAILABLE; - if (finalDecision == ClusterAllocationExplanation.FinalDecision.YES) { - finalExplanation = "the shard can be assigned and the node contains a valid copy of the shard data"; - } } else { // Otherwise, this is a stale copy of the data (allocation ids don't match) storeCopy = ClusterAllocationExplanation.StoreCopy.STALE; - finalExplanation = "the copy of the shard is stale, allocation ids do not match"; - finalDecision = ClusterAllocationExplanation.FinalDecision.NO; } + } + + if (node.getId().equals(assignedNodeId)) { + finalDecision = ClusterAllocationExplanation.FinalDecision.ALREADY_ASSIGNED; + finalExplanation = "the shard is already assigned to this node"; + } else if (shard.primary() && shard.unassigned() && storeCopy == ClusterAllocationExplanation.StoreCopy.STALE) { + finalExplanation = "the copy of the shard is stale, allocation ids do not match"; + finalDecision = ClusterAllocationExplanation.FinalDecision.NO; + } else if (shard.primary() && shard.unassigned() && storeCopy == ClusterAllocationExplanation.StoreCopy.CORRUPT) { + finalExplanation = "the copy of the shard is corrupt"; + finalDecision = ClusterAllocationExplanation.FinalDecision.NO; + } else if (shard.primary() && shard.unassigned() && storeCopy == ClusterAllocationExplanation.StoreCopy.IO_ERROR) { + finalExplanation = "the copy of the shard cannot be read"; + finalDecision = ClusterAllocationExplanation.FinalDecision.NO; } else { - // No copies of the data, so deciders are what influence the decision and explanation - storeCopy = ClusterAllocationExplanation.StoreCopy.NONE; + if (nodeDecision.type() == Decision.Type.NO) { + finalDecision = ClusterAllocationExplanation.FinalDecision.NO; + finalExplanation = "the shard cannot be assigned because one or more allocation decider returns a 'NO' decision"; + } else { + finalDecision = ClusterAllocationExplanation.FinalDecision.YES; + if (storeCopy == ClusterAllocationExplanation.StoreCopy.AVAILABLE) { + finalExplanation = "the shard can be assigned and the node contains a valid copy of the shard data"; + } else { + finalExplanation = "the shard can be assigned"; + } + } } return new NodeExplanation(node, nodeDecision, nodeWeight, storeStatus, finalDecision, finalExplanation, storeCopy); } diff --git a/core/src/test/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplanationTests.java b/core/src/test/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplanationTests.java index 14a3b8fc96e..9346739f107 100644 --- a/core/src/test/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplanationTests.java +++ b/core/src/test/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplanationTests.java @@ -19,6 +19,7 @@ package org.elasticsearch.action.admin.cluster.allocation; +import org.apache.lucene.index.CorruptIndexException; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.Version; @@ -39,6 +40,7 @@ import org.elasticsearch.index.Index; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.test.ESTestCase; +import java.io.IOException; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -54,46 +56,115 @@ import static java.util.Collections.emptySet; */ public final class ClusterAllocationExplanationTests extends ESTestCase { - private NodeExplanation makeNodeExplanation(String idxName, boolean isAssigned) { - Index i = new Index(idxName, "uuid"); - ShardRouting shard = ShardRouting.newUnassigned(i, 0, null, false, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); - IndexMetaData indexMetaData = IndexMetaData.builder(idxName) - .settings(Settings.builder() - .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) - .put(IndexMetaData.SETTING_INDEX_UUID, "uuid")) - .numberOfShards(1) - .numberOfReplicas(0) - .build(); - DiscoveryNode node = new DiscoveryNode("node-0", DummyTransportAddress.INSTANCE, emptyMap(), emptySet(), Version.CURRENT); - Decision.Multi d = new Decision.Multi(); - d.add(Decision.single(Decision.Type.NO, "no label", "because I said no")); - d.add(Decision.single(Decision.Type.YES, "yes label", "yes please")); - d.add(Decision.single(Decision.Type.THROTTLE, "throttle label", "wait a sec")); + private Index i = new Index("foo", "uuid"); + private ShardRouting primaryShard = ShardRouting.newUnassigned(i, 0, null, true, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); + private ShardRouting replicaShard = ShardRouting.newUnassigned(i, 0, null, false, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); + private IndexMetaData indexMetaData = IndexMetaData.builder("foo") + .settings(Settings.builder() + .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetaData.SETTING_INDEX_UUID, "uuid")) + .numberOfShards(1) + .numberOfReplicas(1) + .build(); + private DiscoveryNode node = new DiscoveryNode("node-0", DummyTransportAddress.INSTANCE, emptyMap(), emptySet(), Version.CURRENT); + private static Decision.Multi yesDecision = new Decision.Multi(); + private static Decision.Multi noDecision = new Decision.Multi(); + + static { + yesDecision.add(Decision.single(Decision.Type.YES, "yes label", "yes please")); + noDecision.add(Decision.single(Decision.Type.NO, "no label", "no thanks")); + } + + + private NodeExplanation makeNodeExplanation(boolean primary, boolean isAssigned, boolean hasErr, boolean hasActiveId) { Float nodeWeight = randomFloat(); + Exception e = hasErr ? new ElasticsearchException("stuff's broke, yo") : null; IndicesShardStoresResponse.StoreStatus storeStatus = new IndicesShardStoresResponse.StoreStatus(node, 42, "eggplant", - IndicesShardStoresResponse.StoreStatus.AllocationStatus.PRIMARY, new ElasticsearchException("stuff's broke, yo")); - String assignedNodeId = "node-0"; - Set activeAllocationIds = new HashSet<>(); + IndicesShardStoresResponse.StoreStatus.AllocationStatus.PRIMARY, e); + String assignedNodeId; if (isAssigned) { + assignedNodeId = "node-0"; + } else { + assignedNodeId = "node-9"; + } + Set activeAllocationIds = new HashSet<>(); + if (hasActiveId) { activeAllocationIds.add("eggplant"); } - return TransportClusterAllocationExplainAction.calculateNodeExplanation(shard, indexMetaData, node, d, nodeWeight, - storeStatus, assignedNodeId, activeAllocationIds); + return TransportClusterAllocationExplainAction.calculateNodeExplanation(primary ? primaryShard : replicaShard, + indexMetaData, node, noDecision, nodeWeight, storeStatus, assignedNodeId, activeAllocationIds); } - - public void testDecisionAndExplanation() { - NodeExplanation ne = makeNodeExplanation("foo", true); - assertEquals("the shard is already assigned to this node", ne.getFinalExplanation()); - assertEquals(ClusterAllocationExplanation.FinalDecision.ALREADY_ASSIGNED, ne.getFinalDecision()); - assertEquals(ClusterAllocationExplanation.StoreCopy.AVAILABLE, ne.getStoreCopy()); - ne = makeNodeExplanation("foo", false); - assertEquals("the shard is already assigned to this node", ne.getFinalExplanation()); - assertEquals(ClusterAllocationExplanation.FinalDecision.ALREADY_ASSIGNED, ne.getFinalDecision()); - assertEquals(ClusterAllocationExplanation.StoreCopy.UNKNOWN, ne.getStoreCopy()); + private void assertExplanations(NodeExplanation ne, String finalExplanation, ClusterAllocationExplanation.FinalDecision finalDecision, + ClusterAllocationExplanation.StoreCopy storeCopy) { + assertEquals(finalExplanation, ne.getFinalExplanation()); + assertEquals(finalDecision, ne.getFinalDecision()); + assertEquals(storeCopy, ne.getStoreCopy()); } + public void testDecisionAndExplanation() { + Exception e = new IOException("stuff's broke, yo"); + Exception corruptE = new CorruptIndexException("stuff's corrupt, yo", ""); + Float nodeWeight = randomFloat(); + Set activeAllocationIds = new HashSet<>(); + activeAllocationIds.add("eggplant"); + + IndicesShardStoresResponse.StoreStatus storeStatus = new IndicesShardStoresResponse.StoreStatus(node, 42, "eggplant", + IndicesShardStoresResponse.StoreStatus.AllocationStatus.PRIMARY, e); + NodeExplanation ne = TransportClusterAllocationExplainAction.calculateNodeExplanation(primaryShard, indexMetaData, node, + yesDecision, nodeWeight, storeStatus, "", activeAllocationIds); + assertExplanations(ne, "the copy of the shard cannot be read", + ClusterAllocationExplanation.FinalDecision.NO, ClusterAllocationExplanation.StoreCopy.IO_ERROR); + + ne = TransportClusterAllocationExplainAction.calculateNodeExplanation(primaryShard, indexMetaData, node, yesDecision, nodeWeight, + null, "", activeAllocationIds); + assertExplanations(ne, "the shard can be assigned", + ClusterAllocationExplanation.FinalDecision.YES, ClusterAllocationExplanation.StoreCopy.NONE); + + ne = TransportClusterAllocationExplainAction.calculateNodeExplanation(primaryShard, indexMetaData, node, noDecision, nodeWeight, + null, "", activeAllocationIds); + assertExplanations(ne, "the shard cannot be assigned because one or more allocation decider returns a 'NO' decision", + ClusterAllocationExplanation.FinalDecision.NO, ClusterAllocationExplanation.StoreCopy.NONE); + + storeStatus = new IndicesShardStoresResponse.StoreStatus(node, 42, "eggplant", + IndicesShardStoresResponse.StoreStatus.AllocationStatus.PRIMARY, null); + ne = TransportClusterAllocationExplainAction.calculateNodeExplanation(primaryShard, indexMetaData, node, noDecision, nodeWeight, + storeStatus, "", activeAllocationIds); + assertExplanations(ne, "the shard cannot be assigned because one or more allocation decider returns a 'NO' decision", + ClusterAllocationExplanation.FinalDecision.NO, ClusterAllocationExplanation.StoreCopy.AVAILABLE); + + storeStatus = new IndicesShardStoresResponse.StoreStatus(node, 42, "eggplant", + IndicesShardStoresResponse.StoreStatus.AllocationStatus.PRIMARY, corruptE); + ne = TransportClusterAllocationExplainAction.calculateNodeExplanation(primaryShard, indexMetaData, node, yesDecision, nodeWeight, + storeStatus, "", activeAllocationIds); + assertExplanations(ne, "the copy of the shard is corrupt", + ClusterAllocationExplanation.FinalDecision.NO, ClusterAllocationExplanation.StoreCopy.CORRUPT); + + storeStatus = new IndicesShardStoresResponse.StoreStatus(node, 42, "banana", + IndicesShardStoresResponse.StoreStatus.AllocationStatus.PRIMARY, null); + ne = TransportClusterAllocationExplainAction.calculateNodeExplanation(primaryShard, indexMetaData, node, yesDecision, nodeWeight, + storeStatus, "", activeAllocationIds); + assertExplanations(ne, "the copy of the shard is stale, allocation ids do not match", + ClusterAllocationExplanation.FinalDecision.NO, ClusterAllocationExplanation.StoreCopy.STALE); + + storeStatus = new IndicesShardStoresResponse.StoreStatus(node, 42, "eggplant", + IndicesShardStoresResponse.StoreStatus.AllocationStatus.PRIMARY, null); + ne = TransportClusterAllocationExplainAction.calculateNodeExplanation(primaryShard, indexMetaData, node, yesDecision, nodeWeight, + storeStatus, "node-0", activeAllocationIds); + assertExplanations(ne, "the shard is already assigned to this node", + ClusterAllocationExplanation.FinalDecision.ALREADY_ASSIGNED, ClusterAllocationExplanation.StoreCopy.AVAILABLE); + + storeStatus = new IndicesShardStoresResponse.StoreStatus(node, 42, "eggplant", + IndicesShardStoresResponse.StoreStatus.AllocationStatus.PRIMARY, null); + ne = TransportClusterAllocationExplainAction.calculateNodeExplanation(primaryShard, indexMetaData, node, yesDecision, nodeWeight, + storeStatus, "", activeAllocationIds); + assertExplanations(ne, "the shard can be assigned and the node contains a valid copy of the shard data", + ClusterAllocationExplanation.FinalDecision.YES, ClusterAllocationExplanation.StoreCopy.AVAILABLE); +} + public void testDecisionEquality() { Decision.Multi d = new Decision.Multi(); Decision.Multi d2 = new Decision.Multi(); @@ -108,21 +179,16 @@ public final class ClusterAllocationExplanationTests extends ESTestCase { public void testExplanationSerialization() throws Exception { ShardId shard = new ShardId("test", "uuid", 0); - Map nodeToDecisions = new HashMap<>(); - Map nodeToWeight = new HashMap<>(); - for (int i = randomIntBetween(2, 5); i > 0; i--) { - DiscoveryNode dn = new DiscoveryNode("node-" + i, DummyTransportAddress.INSTANCE, emptyMap(), emptySet(), Version.CURRENT); - Decision.Multi d = new Decision.Multi(); - d.add(Decision.single(Decision.Type.NO, "no label", "because I said no")); - d.add(Decision.single(Decision.Type.YES, "yes label", "yes please")); - d.add(Decision.single(Decision.Type.THROTTLE, "throttle label", "wait a sec")); - nodeToDecisions.put(dn, d); - nodeToWeight.put(dn, randomFloat()); - } - long remainingDelay = randomIntBetween(0, 500); Map nodeExplanations = new HashMap<>(1); - NodeExplanation ne = makeNodeExplanation("bar", true); + Float nodeWeight = randomFloat(); + Set activeAllocationIds = new HashSet<>(); + activeAllocationIds.add("eggplant"); + + IndicesShardStoresResponse.StoreStatus storeStatus = new IndicesShardStoresResponse.StoreStatus(node, 42, "eggplant", + IndicesShardStoresResponse.StoreStatus.AllocationStatus.PRIMARY, null); + NodeExplanation ne = TransportClusterAllocationExplainAction.calculateNodeExplanation(primaryShard, indexMetaData, node, + yesDecision, nodeWeight, storeStatus, "", activeAllocationIds); nodeExplanations.put(ne.getNode(), ne); ClusterAllocationExplanation cae = new ClusterAllocationExplanation(shard, true, "assignedNode", remainingDelay, null, nodeExplanations); @@ -142,23 +208,13 @@ public final class ClusterAllocationExplanationTests extends ESTestCase { IndicesShardStoresResponse.StoreStatus status = explanation.getStoreStatus(); assertNotNull(explanation.getStoreStatus()); assertNotNull(explanation.getDecision()); - assertNotNull(explanation.getWeight()); + assertEquals(nodeWeight, explanation.getWeight()); } } - public void testStaleShardExplanation() throws Exception { + public void testExplanationToXContent() throws Exception { + ShardId shardId = new ShardId("foo", "uuid", 0); long remainingDelay = 42; - Index i = new Index("test", "uuid"); - ShardId shardId = new ShardId(i, 0); - ShardRouting shard = ShardRouting.newUnassigned(i, 0, null, false, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); - IndexMetaData indexMetaData = IndexMetaData.builder("test") - .settings(Settings.builder() - .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) - .put(IndexMetaData.SETTING_INDEX_UUID, "uuid")) - .numberOfShards(1) - .numberOfReplicas(0) - .build(); - DiscoveryNode node = new DiscoveryNode("node-0", DummyTransportAddress.INSTANCE, emptyMap(), emptySet(), Version.CURRENT); Decision.Multi d = new Decision.Multi(); d.add(Decision.single(Decision.Type.NO, "no label", "because I said no")); d.add(Decision.single(Decision.Type.YES, "yes label", "yes please")); @@ -168,22 +224,22 @@ public final class ClusterAllocationExplanationTests extends ESTestCase { allocationIds.add("bar"); IndicesShardStoresResponse.StoreStatus storeStatus = new IndicesShardStoresResponse.StoreStatus(node, 42, "eggplant", IndicesShardStoresResponse.StoreStatus.AllocationStatus.PRIMARY, new ElasticsearchException("stuff's broke, yo")); - NodeExplanation ne = TransportClusterAllocationExplainAction.calculateNodeExplanation(shard, indexMetaData, node, d, nodeWeight, - storeStatus, "node-0", allocationIds); + NodeExplanation ne = TransportClusterAllocationExplainAction.calculateNodeExplanation(primaryShard, indexMetaData, node, + d, nodeWeight, storeStatus, "node-0", allocationIds); Map nodeExplanations = new HashMap<>(1); nodeExplanations.put(ne.getNode(), ne); ClusterAllocationExplanation cae = new ClusterAllocationExplanation(shardId, true, "assignedNode", remainingDelay, null, nodeExplanations); XContentBuilder builder = XContentFactory.jsonBuilder(); cae.toXContent(builder, ToXContent.EMPTY_PARAMS); - assertEquals("{\"shard\":{\"index\":\"test\",\"index_uuid\":\"uuid\",\"id\":0,\"primary\":true},\"assigned\":true," + - "\"assigned_node_id\":\"assignedNode\",\"nodes\":{\"node-0\":{\"node_name\":\"\",\"node_attributes" + - "\":{},\"store\":{\"shard_copy\":\"STALE\",\"store_exception\":\"ElasticsearchException[stuff's br" + - "oke, yo]\"},\"final_decision\":\"NO\",\"final_explanation\":\"the copy of the shard is stale, all" + - "ocation ids do not match\",\"weight\":1.5,\"decisions\":[{\"decider\":\"no label\",\"decision\":" + - "\"NO\",\"explanation\":\"because I said no\"},{\"decider\":\"yes label\",\"decision\":\"YES\",\"e" + - "xplanation\":\"yes please\"},{\"decider\":\"throttle label\",\"decision\":\"THROTTLE\",\"explanat" + - "ion\":\"wait a sec\"}]}}}", + assertEquals("{\"shard\":{\"index\":\"foo\",\"index_uuid\":\"uuid\",\"id\":0,\"primary\":true},\"assigned\":true," + + "\"assigned_node_id\":\"assignedNode\",\"nodes\":{\"node-0\":{\"node_name\":\"\",\"node_attribute" + + "s\":{},\"store\":{\"shard_copy\":\"IO_ERROR\",\"store_exception\":\"ElasticsearchException[stuff" + + "'s broke, yo]\"},\"final_decision\":\"ALREADY_ASSIGNED\",\"final_explanation\":\"the shard is al" + + "ready assigned to this node\",\"weight\":1.5,\"decisions\":[{\"decider\":\"no label\",\"decision" + + "\":\"NO\",\"explanation\":\"because I said no\"},{\"decider\":\"yes label\",\"decision\":\"YES\"" + + ",\"explanation\":\"yes please\"},{\"decider\":\"throttle label\",\"decision\":\"THROTTLE\",\"exp" + + "lanation\":\"wait a sec\"}]}}}", builder.string()); } }