Reorganize final decision, explanation, and store copy calculation, add tests

This commit is contained in:
Lee Hinman 2016-04-27 10:43:08 -06:00
parent 1c0fc0a914
commit e636db1d7f
2 changed files with 154 additions and 92 deletions

View File

@ -141,32 +141,20 @@ public class TransportClusterAllocationExplainAction
IndicesShardStoresResponse.StoreStatus storeStatus,
String assignedNodeId,
Set<String> activeAllocationIds) {
ClusterAllocationExplanation.FinalDecision finalDecision;
ClusterAllocationExplanation.StoreCopy storeCopy;
String finalExplanation;
final ClusterAllocationExplanation.FinalDecision finalDecision;
final ClusterAllocationExplanation.StoreCopy storeCopy;
final String finalExplanation;
if (node.getId().equals(assignedNodeId)) {
finalDecision = ClusterAllocationExplanation.FinalDecision.ALREADY_ASSIGNED;
finalExplanation = "the shard is already assigned to this node";
} else if (nodeDecision.type() == Decision.Type.NO) {
finalDecision = ClusterAllocationExplanation.FinalDecision.NO;
finalExplanation = "the shard cannot be assigned because one or more allocation decider returns a 'NO' decision";
if (storeStatus == null) {
// No copies of the data
storeCopy = ClusterAllocationExplanation.StoreCopy.NONE;
} else {
finalDecision = ClusterAllocationExplanation.FinalDecision.YES;
finalExplanation = "the shard can be assigned";
}
if (storeStatus != null) {
final Throwable storeErr = storeStatus.getStoreException();
// The store error only influences the decision if the shard is primary and has not been allocated before
if (storeErr != null && shard.primary() && shard.allocatedPostIndexCreate(indexMetaData) == false) {
finalDecision = ClusterAllocationExplanation.FinalDecision.NO;
if (storeErr != null) {
if (ExceptionsHelper.unwrapCause(storeErr) instanceof CorruptIndexException) {
storeCopy = ClusterAllocationExplanation.StoreCopy.CORRUPT;
finalExplanation = "the copy of data in the shard store is corrupt";
} else {
storeCopy = ClusterAllocationExplanation.StoreCopy.IO_ERROR;
finalExplanation = "there was an IO error reading from data in the shard store";
}
} else if (activeAllocationIds.isEmpty()) {
// The ids are only empty if dealing with a legacy index
@ -174,18 +162,36 @@ public class TransportClusterAllocationExplainAction
storeCopy = ClusterAllocationExplanation.StoreCopy.UNKNOWN;
} else if (activeAllocationIds.contains(storeStatus.getAllocationId())) {
storeCopy = ClusterAllocationExplanation.StoreCopy.AVAILABLE;
if (finalDecision == ClusterAllocationExplanation.FinalDecision.YES) {
finalExplanation = "the shard can be assigned and the node contains a valid copy of the shard data";
}
} else {
// Otherwise, this is a stale copy of the data (allocation ids don't match)
storeCopy = ClusterAllocationExplanation.StoreCopy.STALE;
finalExplanation = "the copy of the shard is stale, allocation ids do not match";
finalDecision = ClusterAllocationExplanation.FinalDecision.NO;
}
}
if (node.getId().equals(assignedNodeId)) {
finalDecision = ClusterAllocationExplanation.FinalDecision.ALREADY_ASSIGNED;
finalExplanation = "the shard is already assigned to this node";
} else if (shard.primary() && shard.unassigned() && storeCopy == ClusterAllocationExplanation.StoreCopy.STALE) {
finalExplanation = "the copy of the shard is stale, allocation ids do not match";
finalDecision = ClusterAllocationExplanation.FinalDecision.NO;
} else if (shard.primary() && shard.unassigned() && storeCopy == ClusterAllocationExplanation.StoreCopy.CORRUPT) {
finalExplanation = "the copy of the shard is corrupt";
finalDecision = ClusterAllocationExplanation.FinalDecision.NO;
} else if (shard.primary() && shard.unassigned() && storeCopy == ClusterAllocationExplanation.StoreCopy.IO_ERROR) {
finalExplanation = "the copy of the shard cannot be read";
finalDecision = ClusterAllocationExplanation.FinalDecision.NO;
} else {
// No copies of the data, so deciders are what influence the decision and explanation
storeCopy = ClusterAllocationExplanation.StoreCopy.NONE;
if (nodeDecision.type() == Decision.Type.NO) {
finalDecision = ClusterAllocationExplanation.FinalDecision.NO;
finalExplanation = "the shard cannot be assigned because one or more allocation decider returns a 'NO' decision";
} else {
finalDecision = ClusterAllocationExplanation.FinalDecision.YES;
if (storeCopy == ClusterAllocationExplanation.StoreCopy.AVAILABLE) {
finalExplanation = "the shard can be assigned and the node contains a valid copy of the shard data";
} else {
finalExplanation = "the shard can be assigned";
}
}
}
return new NodeExplanation(node, nodeDecision, nodeWeight, storeStatus, finalDecision, finalExplanation, storeCopy);
}

View File

@ -19,6 +19,7 @@
package org.elasticsearch.action.admin.cluster.allocation;
import org.apache.lucene.index.CorruptIndexException;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.Version;
@ -39,6 +40,7 @@ import org.elasticsearch.index.Index;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.test.ESTestCase;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
@ -54,46 +56,115 @@ import static java.util.Collections.emptySet;
*/
public final class ClusterAllocationExplanationTests extends ESTestCase {
private NodeExplanation makeNodeExplanation(String idxName, boolean isAssigned) {
Index i = new Index(idxName, "uuid");
ShardRouting shard = ShardRouting.newUnassigned(i, 0, null, false, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo"));
IndexMetaData indexMetaData = IndexMetaData.builder(idxName)
.settings(Settings.builder()
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.put(IndexMetaData.SETTING_INDEX_UUID, "uuid"))
.numberOfShards(1)
.numberOfReplicas(0)
.build();
DiscoveryNode node = new DiscoveryNode("node-0", DummyTransportAddress.INSTANCE, emptyMap(), emptySet(), Version.CURRENT);
Decision.Multi d = new Decision.Multi();
d.add(Decision.single(Decision.Type.NO, "no label", "because I said no"));
d.add(Decision.single(Decision.Type.YES, "yes label", "yes please"));
d.add(Decision.single(Decision.Type.THROTTLE, "throttle label", "wait a sec"));
private Index i = new Index("foo", "uuid");
private ShardRouting primaryShard = ShardRouting.newUnassigned(i, 0, null, true,
new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo"));
private ShardRouting replicaShard = ShardRouting.newUnassigned(i, 0, null, false,
new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo"));
private IndexMetaData indexMetaData = IndexMetaData.builder("foo")
.settings(Settings.builder()
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.put(IndexMetaData.SETTING_INDEX_UUID, "uuid"))
.numberOfShards(1)
.numberOfReplicas(1)
.build();
private DiscoveryNode node = new DiscoveryNode("node-0", DummyTransportAddress.INSTANCE, emptyMap(), emptySet(), Version.CURRENT);
private static Decision.Multi yesDecision = new Decision.Multi();
private static Decision.Multi noDecision = new Decision.Multi();
static {
yesDecision.add(Decision.single(Decision.Type.YES, "yes label", "yes please"));
noDecision.add(Decision.single(Decision.Type.NO, "no label", "no thanks"));
}
private NodeExplanation makeNodeExplanation(boolean primary, boolean isAssigned, boolean hasErr, boolean hasActiveId) {
Float nodeWeight = randomFloat();
Exception e = hasErr ? new ElasticsearchException("stuff's broke, yo") : null;
IndicesShardStoresResponse.StoreStatus storeStatus = new IndicesShardStoresResponse.StoreStatus(node, 42, "eggplant",
IndicesShardStoresResponse.StoreStatus.AllocationStatus.PRIMARY, new ElasticsearchException("stuff's broke, yo"));
String assignedNodeId = "node-0";
Set<String> activeAllocationIds = new HashSet<>();
IndicesShardStoresResponse.StoreStatus.AllocationStatus.PRIMARY, e);
String assignedNodeId;
if (isAssigned) {
assignedNodeId = "node-0";
} else {
assignedNodeId = "node-9";
}
Set<String> activeAllocationIds = new HashSet<>();
if (hasActiveId) {
activeAllocationIds.add("eggplant");
}
return TransportClusterAllocationExplainAction.calculateNodeExplanation(shard, indexMetaData, node, d, nodeWeight,
storeStatus, assignedNodeId, activeAllocationIds);
return TransportClusterAllocationExplainAction.calculateNodeExplanation(primary ? primaryShard : replicaShard,
indexMetaData, node, noDecision, nodeWeight, storeStatus, assignedNodeId, activeAllocationIds);
}
public void testDecisionAndExplanation() {
NodeExplanation ne = makeNodeExplanation("foo", true);
assertEquals("the shard is already assigned to this node", ne.getFinalExplanation());
assertEquals(ClusterAllocationExplanation.FinalDecision.ALREADY_ASSIGNED, ne.getFinalDecision());
assertEquals(ClusterAllocationExplanation.StoreCopy.AVAILABLE, ne.getStoreCopy());
ne = makeNodeExplanation("foo", false);
assertEquals("the shard is already assigned to this node", ne.getFinalExplanation());
assertEquals(ClusterAllocationExplanation.FinalDecision.ALREADY_ASSIGNED, ne.getFinalDecision());
assertEquals(ClusterAllocationExplanation.StoreCopy.UNKNOWN, ne.getStoreCopy());
private void assertExplanations(NodeExplanation ne, String finalExplanation, ClusterAllocationExplanation.FinalDecision finalDecision,
ClusterAllocationExplanation.StoreCopy storeCopy) {
assertEquals(finalExplanation, ne.getFinalExplanation());
assertEquals(finalDecision, ne.getFinalDecision());
assertEquals(storeCopy, ne.getStoreCopy());
}
public void testDecisionAndExplanation() {
Exception e = new IOException("stuff's broke, yo");
Exception corruptE = new CorruptIndexException("stuff's corrupt, yo", "");
Float nodeWeight = randomFloat();
Set<String> activeAllocationIds = new HashSet<>();
activeAllocationIds.add("eggplant");
IndicesShardStoresResponse.StoreStatus storeStatus = new IndicesShardStoresResponse.StoreStatus(node, 42, "eggplant",
IndicesShardStoresResponse.StoreStatus.AllocationStatus.PRIMARY, e);
NodeExplanation ne = TransportClusterAllocationExplainAction.calculateNodeExplanation(primaryShard, indexMetaData, node,
yesDecision, nodeWeight, storeStatus, "", activeAllocationIds);
assertExplanations(ne, "the copy of the shard cannot be read",
ClusterAllocationExplanation.FinalDecision.NO, ClusterAllocationExplanation.StoreCopy.IO_ERROR);
ne = TransportClusterAllocationExplainAction.calculateNodeExplanation(primaryShard, indexMetaData, node, yesDecision, nodeWeight,
null, "", activeAllocationIds);
assertExplanations(ne, "the shard can be assigned",
ClusterAllocationExplanation.FinalDecision.YES, ClusterAllocationExplanation.StoreCopy.NONE);
ne = TransportClusterAllocationExplainAction.calculateNodeExplanation(primaryShard, indexMetaData, node, noDecision, nodeWeight,
null, "", activeAllocationIds);
assertExplanations(ne, "the shard cannot be assigned because one or more allocation decider returns a 'NO' decision",
ClusterAllocationExplanation.FinalDecision.NO, ClusterAllocationExplanation.StoreCopy.NONE);
storeStatus = new IndicesShardStoresResponse.StoreStatus(node, 42, "eggplant",
IndicesShardStoresResponse.StoreStatus.AllocationStatus.PRIMARY, null);
ne = TransportClusterAllocationExplainAction.calculateNodeExplanation(primaryShard, indexMetaData, node, noDecision, nodeWeight,
storeStatus, "", activeAllocationIds);
assertExplanations(ne, "the shard cannot be assigned because one or more allocation decider returns a 'NO' decision",
ClusterAllocationExplanation.FinalDecision.NO, ClusterAllocationExplanation.StoreCopy.AVAILABLE);
storeStatus = new IndicesShardStoresResponse.StoreStatus(node, 42, "eggplant",
IndicesShardStoresResponse.StoreStatus.AllocationStatus.PRIMARY, corruptE);
ne = TransportClusterAllocationExplainAction.calculateNodeExplanation(primaryShard, indexMetaData, node, yesDecision, nodeWeight,
storeStatus, "", activeAllocationIds);
assertExplanations(ne, "the copy of the shard is corrupt",
ClusterAllocationExplanation.FinalDecision.NO, ClusterAllocationExplanation.StoreCopy.CORRUPT);
storeStatus = new IndicesShardStoresResponse.StoreStatus(node, 42, "banana",
IndicesShardStoresResponse.StoreStatus.AllocationStatus.PRIMARY, null);
ne = TransportClusterAllocationExplainAction.calculateNodeExplanation(primaryShard, indexMetaData, node, yesDecision, nodeWeight,
storeStatus, "", activeAllocationIds);
assertExplanations(ne, "the copy of the shard is stale, allocation ids do not match",
ClusterAllocationExplanation.FinalDecision.NO, ClusterAllocationExplanation.StoreCopy.STALE);
storeStatus = new IndicesShardStoresResponse.StoreStatus(node, 42, "eggplant",
IndicesShardStoresResponse.StoreStatus.AllocationStatus.PRIMARY, null);
ne = TransportClusterAllocationExplainAction.calculateNodeExplanation(primaryShard, indexMetaData, node, yesDecision, nodeWeight,
storeStatus, "node-0", activeAllocationIds);
assertExplanations(ne, "the shard is already assigned to this node",
ClusterAllocationExplanation.FinalDecision.ALREADY_ASSIGNED, ClusterAllocationExplanation.StoreCopy.AVAILABLE);
storeStatus = new IndicesShardStoresResponse.StoreStatus(node, 42, "eggplant",
IndicesShardStoresResponse.StoreStatus.AllocationStatus.PRIMARY, null);
ne = TransportClusterAllocationExplainAction.calculateNodeExplanation(primaryShard, indexMetaData, node, yesDecision, nodeWeight,
storeStatus, "", activeAllocationIds);
assertExplanations(ne, "the shard can be assigned and the node contains a valid copy of the shard data",
ClusterAllocationExplanation.FinalDecision.YES, ClusterAllocationExplanation.StoreCopy.AVAILABLE);
}
public void testDecisionEquality() {
Decision.Multi d = new Decision.Multi();
Decision.Multi d2 = new Decision.Multi();
@ -108,21 +179,16 @@ public final class ClusterAllocationExplanationTests extends ESTestCase {
public void testExplanationSerialization() throws Exception {
ShardId shard = new ShardId("test", "uuid", 0);
Map<DiscoveryNode, Decision> nodeToDecisions = new HashMap<>();
Map<DiscoveryNode, Float> nodeToWeight = new HashMap<>();
for (int i = randomIntBetween(2, 5); i > 0; i--) {
DiscoveryNode dn = new DiscoveryNode("node-" + i, DummyTransportAddress.INSTANCE, emptyMap(), emptySet(), Version.CURRENT);
Decision.Multi d = new Decision.Multi();
d.add(Decision.single(Decision.Type.NO, "no label", "because I said no"));
d.add(Decision.single(Decision.Type.YES, "yes label", "yes please"));
d.add(Decision.single(Decision.Type.THROTTLE, "throttle label", "wait a sec"));
nodeToDecisions.put(dn, d);
nodeToWeight.put(dn, randomFloat());
}
long remainingDelay = randomIntBetween(0, 500);
Map<DiscoveryNode, NodeExplanation> nodeExplanations = new HashMap<>(1);
NodeExplanation ne = makeNodeExplanation("bar", true);
Float nodeWeight = randomFloat();
Set<String> activeAllocationIds = new HashSet<>();
activeAllocationIds.add("eggplant");
IndicesShardStoresResponse.StoreStatus storeStatus = new IndicesShardStoresResponse.StoreStatus(node, 42, "eggplant",
IndicesShardStoresResponse.StoreStatus.AllocationStatus.PRIMARY, null);
NodeExplanation ne = TransportClusterAllocationExplainAction.calculateNodeExplanation(primaryShard, indexMetaData, node,
yesDecision, nodeWeight, storeStatus, "", activeAllocationIds);
nodeExplanations.put(ne.getNode(), ne);
ClusterAllocationExplanation cae = new ClusterAllocationExplanation(shard, true,
"assignedNode", remainingDelay, null, nodeExplanations);
@ -142,23 +208,13 @@ public final class ClusterAllocationExplanationTests extends ESTestCase {
IndicesShardStoresResponse.StoreStatus status = explanation.getStoreStatus();
assertNotNull(explanation.getStoreStatus());
assertNotNull(explanation.getDecision());
assertNotNull(explanation.getWeight());
assertEquals(nodeWeight, explanation.getWeight());
}
}
public void testStaleShardExplanation() throws Exception {
public void testExplanationToXContent() throws Exception {
ShardId shardId = new ShardId("foo", "uuid", 0);
long remainingDelay = 42;
Index i = new Index("test", "uuid");
ShardId shardId = new ShardId(i, 0);
ShardRouting shard = ShardRouting.newUnassigned(i, 0, null, false, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo"));
IndexMetaData indexMetaData = IndexMetaData.builder("test")
.settings(Settings.builder()
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.put(IndexMetaData.SETTING_INDEX_UUID, "uuid"))
.numberOfShards(1)
.numberOfReplicas(0)
.build();
DiscoveryNode node = new DiscoveryNode("node-0", DummyTransportAddress.INSTANCE, emptyMap(), emptySet(), Version.CURRENT);
Decision.Multi d = new Decision.Multi();
d.add(Decision.single(Decision.Type.NO, "no label", "because I said no"));
d.add(Decision.single(Decision.Type.YES, "yes label", "yes please"));
@ -168,22 +224,22 @@ public final class ClusterAllocationExplanationTests extends ESTestCase {
allocationIds.add("bar");
IndicesShardStoresResponse.StoreStatus storeStatus = new IndicesShardStoresResponse.StoreStatus(node, 42, "eggplant",
IndicesShardStoresResponse.StoreStatus.AllocationStatus.PRIMARY, new ElasticsearchException("stuff's broke, yo"));
NodeExplanation ne = TransportClusterAllocationExplainAction.calculateNodeExplanation(shard, indexMetaData, node, d, nodeWeight,
storeStatus, "node-0", allocationIds);
NodeExplanation ne = TransportClusterAllocationExplainAction.calculateNodeExplanation(primaryShard, indexMetaData, node,
d, nodeWeight, storeStatus, "node-0", allocationIds);
Map<DiscoveryNode, NodeExplanation> nodeExplanations = new HashMap<>(1);
nodeExplanations.put(ne.getNode(), ne);
ClusterAllocationExplanation cae = new ClusterAllocationExplanation(shardId, true,
"assignedNode", remainingDelay, null, nodeExplanations);
XContentBuilder builder = XContentFactory.jsonBuilder();
cae.toXContent(builder, ToXContent.EMPTY_PARAMS);
assertEquals("{\"shard\":{\"index\":\"test\",\"index_uuid\":\"uuid\",\"id\":0,\"primary\":true},\"assigned\":true," +
"\"assigned_node_id\":\"assignedNode\",\"nodes\":{\"node-0\":{\"node_name\":\"\",\"node_attributes" +
"\":{},\"store\":{\"shard_copy\":\"STALE\",\"store_exception\":\"ElasticsearchException[stuff's br" +
"oke, yo]\"},\"final_decision\":\"NO\",\"final_explanation\":\"the copy of the shard is stale, all" +
"ocation ids do not match\",\"weight\":1.5,\"decisions\":[{\"decider\":\"no label\",\"decision\":" +
"\"NO\",\"explanation\":\"because I said no\"},{\"decider\":\"yes label\",\"decision\":\"YES\",\"e" +
"xplanation\":\"yes please\"},{\"decider\":\"throttle label\",\"decision\":\"THROTTLE\",\"explanat" +
"ion\":\"wait a sec\"}]}}}",
assertEquals("{\"shard\":{\"index\":\"foo\",\"index_uuid\":\"uuid\",\"id\":0,\"primary\":true},\"assigned\":true," +
"\"assigned_node_id\":\"assignedNode\",\"nodes\":{\"node-0\":{\"node_name\":\"\",\"node_attribute" +
"s\":{},\"store\":{\"shard_copy\":\"IO_ERROR\",\"store_exception\":\"ElasticsearchException[stuff" +
"'s broke, yo]\"},\"final_decision\":\"ALREADY_ASSIGNED\",\"final_explanation\":\"the shard is al" +
"ready assigned to this node\",\"weight\":1.5,\"decisions\":[{\"decider\":\"no label\",\"decision" +
"\":\"NO\",\"explanation\":\"because I said no\"},{\"decider\":\"yes label\",\"decision\":\"YES\"" +
",\"explanation\":\"yes please\"},{\"decider\":\"throttle label\",\"decision\":\"THROTTLE\",\"exp" +
"lanation\":\"wait a sec\"}]}}}",
builder.string());
}
}