Improve some logging around master election and cluster state

Tweaks done while debugging http://build-us-00.elastic.co/job/es_core_master_window-2008/2477/

Closes #14481
This commit is contained in:
Boaz Leskes 2015-11-03 16:14:15 +00:00
parent c4b68801bf
commit 8eff4e211a
5 changed files with 39 additions and 25 deletions

View File

@ -21,7 +21,6 @@ package org.elasticsearch.cluster;
import com.carrotsearch.hppc.cursors.ObjectCursor; import com.carrotsearch.hppc.cursors.ObjectCursor;
import com.carrotsearch.hppc.cursors.ObjectObjectCursor; import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
import org.elasticsearch.cluster.DiffableUtils.KeyedReader; import org.elasticsearch.cluster.DiffableUtils.KeyedReader;
import org.elasticsearch.cluster.block.ClusterBlock; import org.elasticsearch.cluster.block.ClusterBlock;
import org.elasticsearch.cluster.block.ClusterBlocks; import org.elasticsearch.cluster.block.ClusterBlocks;
@ -31,12 +30,7 @@ import org.elasticsearch.cluster.metadata.MappingMetaData;
import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.cluster.routing.IndexRoutingTable; import org.elasticsearch.cluster.routing.*;
import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
import org.elasticsearch.cluster.routing.RoutingNode;
import org.elasticsearch.cluster.routing.RoutingNodes;
import org.elasticsearch.cluster.routing.RoutingTable;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
import org.elasticsearch.cluster.service.InternalClusterService; import org.elasticsearch.cluster.service.InternalClusterService;
import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Nullable;
@ -57,11 +51,7 @@ import org.elasticsearch.discovery.local.LocalDiscovery;
import org.elasticsearch.discovery.zen.publish.PublishClusterStateAction; import org.elasticsearch.discovery.zen.publish.PublishClusterStateAction;
import java.io.IOException; import java.io.IOException;
import java.util.EnumSet; import java.util.*;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
/** /**
* Represents the current state of the cluster. * Represents the current state of the cluster.
@ -283,6 +273,7 @@ public class ClusterState implements ToXContent, Diffable<ClusterState> {
sb.append("state uuid: ").append(stateUUID).append("\n"); sb.append("state uuid: ").append(stateUUID).append("\n");
sb.append("from_diff: ").append(wasReadFromDiff).append("\n"); sb.append("from_diff: ").append(wasReadFromDiff).append("\n");
sb.append("meta data version: ").append(metaData.version()).append("\n"); sb.append("meta data version: ").append(metaData.version()).append("\n");
sb.append(blocks().prettyPrint());
sb.append(nodes().prettyPrint()); sb.append(nodes().prettyPrint());
sb.append(routingTable().prettyPrint()); sb.append(routingTable().prettyPrint());
sb.append(getRoutingNodes().prettyPrint()); sb.append(getRoutingNodes().prettyPrint());

View File

@ -20,7 +20,6 @@
package org.elasticsearch.cluster.block; package org.elasticsearch.cluster.block;
import com.carrotsearch.hppc.cursors.ObjectObjectCursor; import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
import org.elasticsearch.cluster.AbstractDiffable; import org.elasticsearch.cluster.AbstractDiffable;
import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.metadata.MetaDataIndexStateService; import org.elasticsearch.cluster.metadata.MetaDataIndexStateService;
@ -199,6 +198,28 @@ public class ClusterBlocks extends AbstractDiffable<ClusterBlocks> {
return new ClusterBlockException(unmodifiableSet(blocks.collect(toSet()))); return new ClusterBlockException(unmodifiableSet(blocks.collect(toSet())));
} }
public String prettyPrint() {
if (global.isEmpty() && indices().isEmpty()) {
return "";
}
StringBuilder sb = new StringBuilder();
sb.append("blocks: \n");
if (global.isEmpty() == false) {
sb.append(" _global_:\n");
for (ClusterBlock block : global) {
sb.append(" ").append(block);
}
}
for (ObjectObjectCursor<String, Set<ClusterBlock>> entry : indices()) {
sb.append(" ").append(entry.key).append(":\n");
for (ClusterBlock block : entry.value) {
sb.append(" ").append(block);
}
}
sb.append("\n");
return sb.toString();
}
@Override @Override
public void writeTo(StreamOutput out) throws IOException { public void writeTo(StreamOutput out) throws IOException {
writeBlockSet(global, out); writeBlockSet(global, out);

View File

@ -86,7 +86,7 @@ public class NodeJoinController extends AbstractComponent {
@Override @Override
void onClose() { void onClose() {
if (electionContext.compareAndSet(this, null)) { if (electionContext.compareAndSet(this, null)) {
stopAccumulatingJoins(); stopAccumulatingJoins("election closed");
} else { } else {
assert false : "failed to remove current election context"; assert false : "failed to remove current election context";
} }
@ -156,7 +156,7 @@ public class NodeJoinController extends AbstractComponent {
/** /**
* Accumulates any future incoming join request. Pending join requests will be processed in the final steps of becoming a * Accumulates any future incoming join request. Pending join requests will be processed in the final steps of becoming a
* master or when {@link #stopAccumulatingJoins()} is called. * master or when {@link #stopAccumulatingJoins(String)} is called.
*/ */
public void startAccumulatingJoins() { public void startAccumulatingJoins() {
logger.trace("starting to accumulate joins"); logger.trace("starting to accumulate joins");
@ -166,14 +166,14 @@ public class NodeJoinController extends AbstractComponent {
} }
/** Stopped accumulating joins. All pending joins will be processed. Future joins will be processed immediately */ /** Stopped accumulating joins. All pending joins will be processed. Future joins will be processed immediately */
public void stopAccumulatingJoins() { public void stopAccumulatingJoins(String reason) {
logger.trace("stopping join accumulation"); logger.trace("stopping join accumulation ([{}])", reason);
assert electionContext.get() == null : "stopAccumulatingJoins() called, but there is an ongoing election context"; assert electionContext.get() == null : "stopAccumulatingJoins() called, but there is an ongoing election context";
boolean b = accumulateJoins.getAndSet(false); boolean b = accumulateJoins.getAndSet(false);
assert b : "stopAccumulatingJoins() called but not accumulating"; assert b : "stopAccumulatingJoins() called but not accumulating";
synchronized (pendingJoinRequests) { synchronized (pendingJoinRequests) {
if (pendingJoinRequests.size() > 0) { if (pendingJoinRequests.size() > 0) {
processJoins("stopping to accumulate joins"); processJoins("pending joins after accumulation stop [" + reason + "]");
} }
} }
} }
@ -210,7 +210,7 @@ public class NodeJoinController extends AbstractComponent {
return; return;
} }
int pendingMasterJoins=0; int pendingMasterJoins = 0;
synchronized (pendingJoinRequests) { synchronized (pendingJoinRequests) {
for (DiscoveryNode node : pendingJoinRequests.keySet()) { for (DiscoveryNode node : pendingJoinRequests.keySet()) {
if (node.isMasterNode()) { if (node.isMasterNode()) {
@ -219,7 +219,9 @@ public class NodeJoinController extends AbstractComponent {
} }
} }
if (pendingMasterJoins < context.requiredMasterJoins) { if (pendingMasterJoins < context.requiredMasterJoins) {
if (context.pendingSetAsMasterTask.get() == false) {
logger.trace("not enough joins for election. Got [{}], required [{}]", pendingMasterJoins, context.requiredMasterJoins); logger.trace("not enough joins for election. Got [{}], required [{}]", pendingMasterJoins, context.requiredMasterJoins);
}
return; return;
} }
if (context.pendingSetAsMasterTask.getAndSet(true)) { if (context.pendingSetAsMasterTask.getAndSet(true)) {

View File

@ -44,7 +44,6 @@ import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.discovery.Discovery; import org.elasticsearch.discovery.Discovery;
import org.elasticsearch.discovery.DiscoverySettings; import org.elasticsearch.discovery.DiscoverySettings;
import org.elasticsearch.discovery.DiscoveryStats; import org.elasticsearch.discovery.DiscoveryStats;
import org.elasticsearch.discovery.zen.publish.PendingClusterStateStats;
import org.elasticsearch.discovery.InitialStateDiscoveryListener; import org.elasticsearch.discovery.InitialStateDiscoveryListener;
import org.elasticsearch.discovery.zen.elect.ElectMasterService; import org.elasticsearch.discovery.zen.elect.ElectMasterService;
import org.elasticsearch.discovery.zen.fd.MasterFaultDetection; import org.elasticsearch.discovery.zen.fd.MasterFaultDetection;
@ -53,6 +52,7 @@ import org.elasticsearch.discovery.zen.membership.MembershipAction;
import org.elasticsearch.discovery.zen.ping.PingContextProvider; import org.elasticsearch.discovery.zen.ping.PingContextProvider;
import org.elasticsearch.discovery.zen.ping.ZenPing; import org.elasticsearch.discovery.zen.ping.ZenPing;
import org.elasticsearch.discovery.zen.ping.ZenPingService; import org.elasticsearch.discovery.zen.ping.ZenPingService;
import org.elasticsearch.discovery.zen.publish.PendingClusterStateStats;
import org.elasticsearch.discovery.zen.publish.PublishClusterStateAction; import org.elasticsearch.discovery.zen.publish.PublishClusterStateAction;
import org.elasticsearch.node.service.NodeService; import org.elasticsearch.node.service.NodeService;
import org.elasticsearch.node.settings.NodeSettingsService; import org.elasticsearch.node.settings.NodeSettingsService;
@ -401,7 +401,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
); );
} else { } else {
// process any incoming joins (they will fail because we are not the master) // process any incoming joins (they will fail because we are not the master)
nodeJoinController.stopAccumulatingJoins(); nodeJoinController.stopAccumulatingJoins("not master");
// send join request // send join request
final boolean success = joinElectedMaster(masterNode); final boolean success = joinElectedMaster(masterNode);

View File

@ -86,7 +86,7 @@ public class NodeJoinControllerTests extends ESTestCase {
nodes.add(node); nodes.add(node);
pendingJoins.add(joinNodeAsync(node)); pendingJoins.add(joinNodeAsync(node));
} }
nodeJoinController.stopAccumulatingJoins(); nodeJoinController.stopAccumulatingJoins("test");
for (int i = randomInt(5); i > 0; i--) { for (int i = randomInt(5); i > 0; i--) {
DiscoveryNode node = newNode(nodeId++); DiscoveryNode node = newNode(nodeId++);
nodes.add(node); nodes.add(node);
@ -119,7 +119,7 @@ public class NodeJoinControllerTests extends ESTestCase {
pendingJoins.add(future); pendingJoins.add(future);
assertThat(future.isDone(), equalTo(false)); assertThat(future.isDone(), equalTo(false));
} }
nodeJoinController.stopAccumulatingJoins(); nodeJoinController.stopAccumulatingJoins("test");
for (Future<Void> future : pendingJoins) { for (Future<Void> future : pendingJoins) {
try { try {
future.get(); future.get();
@ -284,7 +284,7 @@ public class NodeJoinControllerTests extends ESTestCase {
logger.debug("--> testing accumulation stopped"); logger.debug("--> testing accumulation stopped");
nodeJoinController.startAccumulatingJoins(); nodeJoinController.startAccumulatingJoins();
nodeJoinController.stopAccumulatingJoins(); nodeJoinController.stopAccumulatingJoins("test");
} }