Include pings from client nodes in master election

We currently have a `discovery.zen.master_election.filter_client` setting that control whether their ping responses are ignored for master election (which is the current default). With the push to treat client nodes as normal nodes (and promote the transport/rest clients for client work), this should be changed. This commit remove this setting and it's companion `discovery.zen.master_election.filter_data` setting (currently defaulting to  false) in favor of singe `discovery.zen.master_election.ignore_non_master_pings` setting with more intuitive name (defaulting to false).

Resolves #17325
Closes #17329
This commit is contained in:
Boaz Leskes 2016-03-24 17:10:45 +01:00
parent d0413f0f0e
commit 6dd164d0bd
5 changed files with 81 additions and 41 deletions

View File

@ -344,9 +344,8 @@ public final class ClusterSettings extends AbstractScopedSettings {
ZenDiscovery.JOIN_RETRY_DELAY_SETTING,
ZenDiscovery.MAX_PINGS_FROM_ANOTHER_MASTER_SETTING,
ZenDiscovery.SEND_LEAVE_REQUEST_SETTING,
ZenDiscovery.MASTER_ELECTION_FILTER_CLIENT_SETTING,
ZenDiscovery.MASTER_ELECTION_WAIT_FOR_JOINS_TIMEOUT_SETTING,
ZenDiscovery.MASTER_ELECTION_FILTER_DATA_SETTING,
ZenDiscovery.MASTER_ELECTION_IGNORE_NON_MASTER_PINGS_SETTING,
UnicastZenPing.DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING,
UnicastZenPing.DISCOVERY_ZEN_PING_UNICAST_CONCURRENT_CONNECTS_SETTING,
SearchService.DEFAULT_KEEPALIVE_SETTING,

View File

@ -71,6 +71,7 @@ import org.elasticsearch.transport.TransportService;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@ -78,6 +79,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
import static org.elasticsearch.common.unit.TimeValue.timeValueSeconds;
@ -100,14 +102,12 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
Setting.intSetting("discovery.zen.max_pings_from_another_master", 3, 1, Property.NodeScope);
public final static Setting<Boolean> SEND_LEAVE_REQUEST_SETTING =
Setting.boolSetting("discovery.zen.send_leave_request", true, Property.NodeScope);
public final static Setting<Boolean> MASTER_ELECTION_FILTER_CLIENT_SETTING =
Setting.boolSetting("discovery.zen.master_election.filter_client", true, Property.NodeScope);
public final static Setting<TimeValue> MASTER_ELECTION_WAIT_FOR_JOINS_TIMEOUT_SETTING =
Setting.timeSetting("discovery.zen.master_election.wait_for_joins_timeout",
settings -> TimeValue.timeValueMillis(JOIN_TIMEOUT_SETTING.get(settings).millis() / 2).toString(), TimeValue.timeValueMillis(0),
Property.NodeScope);
public final static Setting<Boolean> MASTER_ELECTION_FILTER_DATA_SETTING =
Setting.boolSetting("discovery.zen.master_election.filter_data", false, Property.NodeScope);
public final static Setting<Boolean> MASTER_ELECTION_IGNORE_NON_MASTER_PINGS_SETTING =
Setting.boolSetting("discovery.zen.master_election.ignore_non_master_pings", false, Property.NodeScope);
public static final String DISCOVERY_REJOIN_ACTION_NAME = "internal:discovery/zen/rejoin";
@ -138,8 +138,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
private final ElectMasterService electMaster;
private final boolean masterElectionFilterClientNodes;
private final boolean masterElectionFilterDataNodes;
private final boolean masterElectionIgnoreNonMasters;
private final TimeValue masterElectionWaitForJoinsTimeout;
private final JoinThreadControl joinThreadControl;
@ -169,11 +168,11 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
this.maxPingsFromAnotherMaster = MAX_PINGS_FROM_ANOTHER_MASTER_SETTING.get(settings);
this.sendLeaveRequest = SEND_LEAVE_REQUEST_SETTING.get(settings);
this.masterElectionFilterClientNodes = MASTER_ELECTION_FILTER_CLIENT_SETTING.get(settings);
this.masterElectionFilterDataNodes = MASTER_ELECTION_FILTER_DATA_SETTING.get(settings);
this.masterElectionIgnoreNonMasters = MASTER_ELECTION_IGNORE_NON_MASTER_PINGS_SETTING.get(settings);
this.masterElectionWaitForJoinsTimeout = MASTER_ELECTION_WAIT_FOR_JOINS_TIMEOUT_SETTING.get(settings);
logger.debug("using ping_timeout [{}], join.timeout [{}], master_election.filter_client [{}], master_election.filter_data [{}]", this.pingTimeout, joinTimeout, masterElectionFilterClientNodes, masterElectionFilterDataNodes);
logger.debug("using ping_timeout [{}], join.timeout [{}], master_election.ignore_non_master [{}]",
this.pingTimeout, joinTimeout, masterElectionIgnoreNonMasters);
clusterSettings.addSettingsUpdateConsumer(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING, this::handleMinimumMasterNodesChanged, (value) -> {
final ClusterState clusterState = clusterService.state();
@ -846,30 +845,8 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
}
// filter responses
List<ZenPing.PingResponse> pingResponses = new ArrayList<>();
for (ZenPing.PingResponse pingResponse : fullPingResponses) {
DiscoveryNode node = pingResponse.node();
if (masterElectionFilterClientNodes && (node.clientNode() || (!node.masterNode() && !node.dataNode()))) {
// filter out the client node, which is a client node, or also one that is not data and not master (effectively, client)
} else if (masterElectionFilterDataNodes && (!node.masterNode() && node.dataNode())) {
// filter out data node that is not also master
} else {
pingResponses.add(pingResponse);
}
}
if (logger.isDebugEnabled()) {
StringBuilder sb = new StringBuilder();
if (pingResponses.isEmpty()) {
sb.append(" {none}");
} else {
for (ZenPing.PingResponse pingResponse : pingResponses) {
sb.append("\n\t--> ").append(pingResponse);
}
}
logger.debug("filtered ping responses: (filter_client[{}], filter_data[{}]){}", masterElectionFilterClientNodes,
masterElectionFilterDataNodes, sb);
}
final List<ZenPing.PingResponse> pingResponses;
pingResponses = filterPingResponses(fullPingResponses, masterElectionIgnoreNonMasters, logger);
final DiscoveryNode localNode = clusterService.localNode();
List<DiscoveryNode> pingMasters = new ArrayList<>();
@ -925,6 +902,28 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
}
}
static List<ZenPing.PingResponse> filterPingResponses(ZenPing.PingResponse[] fullPingResponses, boolean masterElectionIgnoreNonMasters, ESLogger logger) {
List<ZenPing.PingResponse> pingResponses;
if (masterElectionIgnoreNonMasters) {
pingResponses = Arrays.stream(fullPingResponses).filter(ping -> ping.node().isMasterNode()).collect(Collectors.toList());
} else {
pingResponses = Arrays.asList(fullPingResponses);
}
if (logger.isDebugEnabled()) {
StringBuilder sb = new StringBuilder();
if (pingResponses.isEmpty()) {
sb.append(" {none}");
} else {
for (ZenPing.PingResponse pingResponse : pingResponses) {
sb.append("\n\t--> ").append(pingResponse);
}
}
logger.debug("filtered ping responses: (ignore_non_masters [{}]){}", masterElectionIgnoreNonMasters, sb);
}
return pingResponses;
}
protected ClusterState rejoin(ClusterState clusterState, String reason) {
// *** called from within an cluster state update task *** //

View File

@ -25,10 +25,19 @@ import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.common.transport.DummyTransportAddress;
import org.elasticsearch.discovery.zen.ping.ZenPing;
import org.elasticsearch.test.ESTestCase;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import static org.elasticsearch.discovery.zen.ZenDiscovery.shouldIgnoreOrRejectNewClusterState;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
/**
*/
@ -89,4 +98,34 @@ public class ZenDiscoveryUnitTests extends ESTestCase {
}
assertFalse("should not ignore, because current state doesn't have a master", shouldIgnoreOrRejectNewClusterState(logger, currentState.build(), newState.build()));
}
public void testFilterNonMasterPingResponse() {
ArrayList<ZenPing.PingResponse> responses = new ArrayList<>();
ArrayList<DiscoveryNode> masterNodes = new ArrayList<>();
ArrayList<DiscoveryNode> allNodes = new ArrayList<>();
for (int i = randomIntBetween(10, 20); i >= 0; i--) {
Map<String, String> attrs = new HashMap<>();
for (String attr : randomSubsetOf(
Arrays.asList(DiscoveryNode.INGEST_ATTR, DiscoveryNode.DATA_ATTR, DiscoveryNode.MASTER_ATTR))) {
attrs.put(attr, randomBoolean() + "");
}
DiscoveryNode node = new DiscoveryNode("node_" + i, "id_" + i, DummyTransportAddress.INSTANCE, attrs, Version.CURRENT);
responses.add(new ZenPing.PingResponse(node, randomBoolean() ? null : node, new ClusterName("test"), randomBoolean()));
allNodes.add(node);
if (node.isMasterNode()) {
masterNodes.add(node);
}
}
boolean ignore = randomBoolean();
List<ZenPing.PingResponse> filtered = ZenDiscovery.filterPingResponses(
responses.toArray(new ZenPing.PingResponse[responses.size()]), ignore, logger);
final List<DiscoveryNode> filteredNodes = filtered.stream().map(ZenPing.PingResponse::node).collect(Collectors.toList());
if (ignore) {
assertThat(filteredNodes, equalTo(masterNodes));
} else {
assertThat(filteredNodes, equalTo(allNodes));
}
}
}

View File

@ -82,7 +82,7 @@ The `index.analysis.analyzer.default_index` analyzer is not supported anymore.
If you wish to change the analyzer to use for indexing, change the
`index.analysis.analyzer.default` analyzer instead.
==== Ping timeout settings
==== Ping settings
Previously, there were three settings for the ping timeout:
`discovery.zen.initial_ping_timeout`, `discovery.zen.ping.timeout` and
@ -90,6 +90,11 @@ Previously, there were three settings for the ping timeout:
setting key for the ping timeout is now `discovery.zen.ping_timeout`. The
default value for ping timeouts remains at three seconds.
`discovery.zen.master_election.filter_client` and `discovery.zen.master_election.filter_data` have
been removed in favor of the new `discovery.zen.master_election.ignore_non_master_pings`. This setting control how ping responses
are interpreted during master election and should be used with care and only in extreme cases. See documentation for details.
==== Recovery settings
Recovery settings deprecated in 1.x have been removed:

View File

@ -56,11 +56,9 @@ serves as a protection against (partial) network failures where node may unjustl
think that the master has failed. In this case the node will simply hear from
other nodes about the currently active master.
If `discovery.zen.master_election.filter_client` is `true`, pings from client nodes (nodes where `node.client` is
`true`, or both `node.data` and `node.master` are `false`) are ignored during master election; the default value is
`true`. If `discovery.zen.master_election.filter_data` is `true`, pings from non-master-eligible data nodes (nodes
where `node.data` is `true` and `node.master` is `false`) are ignored during master election; the default value is
`false`. Pings from master-eligible nodes are always observed during master election.
If `discovery.zen.master_election.ignore_non_master_pings` is `true`, pings from nodes that are not master
eligible (nodes where `node.master` is `false`) are ignored during master election; the default value is
`false`.
Nodes can be excluded from becoming a master by setting `node.master` to
`false`. Note, once a node is a client node (`node.client` set to