Log warning if minimum_master_nodes is set to less than a quorum of master-eligible nodes (#15625)

The setting minimum_master_nodes is crucial to avoid split brains in a cluster. In order to avoid data loss, it should always be configured to at least a quorum (majority) of master-eligible nodes.

This commit adds a warning to the logs on the master node if the value is set to less than quorum of master-eligible nodes.
This commit is contained in:
Yannick Welsch 2016-05-25 16:49:08 +02:00
parent bdee8c2632
commit 1c59c7e349
4 changed files with 32 additions and 2 deletions

View File

@ -33,6 +33,7 @@ import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.discovery.DiscoverySettings;
import org.elasticsearch.discovery.zen.elect.ElectMasterService;
import org.elasticsearch.discovery.zen.membership.MembershipAction;
import java.util.ArrayList;
@ -53,6 +54,7 @@ public class NodeJoinController extends AbstractComponent {
final ClusterService clusterService;
final RoutingService routingService;
final ElectMasterService electMaster;
final DiscoverySettings discoverySettings;
final AtomicBoolean accumulateJoins = new AtomicBoolean(false);
@ -62,10 +64,11 @@ public class NodeJoinController extends AbstractComponent {
protected final Map<DiscoveryNode, List<MembershipAction.JoinCallback>> pendingJoinRequests = new HashMap<>();
public NodeJoinController(ClusterService clusterService, RoutingService routingService, DiscoverySettings discoverySettings, Settings settings) {
public NodeJoinController(ClusterService clusterService, RoutingService routingService, ElectMasterService electMaster, DiscoverySettings discoverySettings, Settings settings) {
super(settings);
this.clusterService = clusterService;
this.routingService = routingService;
this.electMaster = electMaster;
this.discoverySettings = discoverySettings;
}
@ -449,6 +452,8 @@ public class NodeJoinController extends AbstractComponent {
logger.error("unexpected error during [{}]", e, source);
}
}
NodeJoinController.this.electMaster.logMinimumMasterNodesWarningIfNecessary(oldState, newState);
}
}
}

View File

@ -215,7 +215,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
nodesFD.setLocalNode(clusterService.localNode());
joinThreadControl.start();
pingService.start();
this.nodeJoinController = new NodeJoinController(clusterService, routingService, discoverySettings, settings);
this.nodeJoinController = new NodeJoinController(clusterService, routingService, electMaster, discoverySettings, settings);
}
@Override
@ -617,6 +617,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
electMaster.logMinimumMasterNodesWarningIfNecessary(oldState, newState);
}
});
}

View File

@ -22,6 +22,7 @@ package org.elasticsearch.discovery.zen.elect;
import com.carrotsearch.hppc.ObjectContainer;
import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
@ -80,6 +81,27 @@ public class ElectMasterService extends AbstractComponent {
return count >= minimumMasterNodes;
}
public boolean hasTooManyMasterNodes(Iterable<DiscoveryNode> nodes) {
int count = 0;
for (DiscoveryNode node : nodes) {
if (node.isMasterNode()) {
count++;
}
}
return count > 1 && minimumMasterNodes <= count / 2;
}
public void logMinimumMasterNodesWarningIfNecessary(ClusterState oldState, ClusterState newState) {
// check if min_master_nodes setting is too low and log warning
if (hasTooManyMasterNodes(oldState.nodes()) == false && hasTooManyMasterNodes(newState.nodes())) {
logger.warn("value for setting \""
+ ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.getKey()
+ "\" is too low. This can result in data loss! Please set it to at least a quorum of master-eligible nodes "
+ "(current value: [{}], total number of master-eligible nodes used for publishing in this round: [{}])",
minimumMasterNodes(), newState.getNodes().getMasterNodes().size());
}
}
/**
* Returns the given nodes sorted by likelihood of being elected as master, most likely first.
* Non-master nodes are not removed but are rather put in the end

View File

@ -39,6 +39,7 @@ import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
import org.elasticsearch.common.util.concurrent.BaseFuture;
import org.elasticsearch.discovery.DiscoverySettings;
import org.elasticsearch.discovery.zen.elect.ElectMasterService;
import org.elasticsearch.discovery.zen.membership.MembershipAction;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.junit.annotations.TestLogging;
@ -99,6 +100,7 @@ public class NodeJoinControllerTests extends ESTestCase {
setState(clusterService, ClusterState.builder(clusterService.state()).nodes(
DiscoveryNodes.builder(initialNodes).masterNodeId(localNode.getId())));
nodeJoinController = new NodeJoinController(clusterService, new NoopRoutingService(Settings.EMPTY),
new ElectMasterService(Settings.EMPTY, Version.CURRENT),
new DiscoverySettings(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)),
Settings.EMPTY);
}