diff --git a/core/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/core/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java index 14e79586617..5f587cc270d 100644 --- a/core/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java +++ b/core/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java @@ -344,9 +344,8 @@ public final class ClusterSettings extends AbstractScopedSettings { ZenDiscovery.JOIN_RETRY_DELAY_SETTING, ZenDiscovery.MAX_PINGS_FROM_ANOTHER_MASTER_SETTING, ZenDiscovery.SEND_LEAVE_REQUEST_SETTING, - ZenDiscovery.MASTER_ELECTION_FILTER_CLIENT_SETTING, ZenDiscovery.MASTER_ELECTION_WAIT_FOR_JOINS_TIMEOUT_SETTING, - ZenDiscovery.MASTER_ELECTION_FILTER_DATA_SETTING, + ZenDiscovery.MASTER_ELECTION_IGNORE_NON_MASTER_PINGS_SETTING, UnicastZenPing.DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING, UnicastZenPing.DISCOVERY_ZEN_PING_UNICAST_CONCURRENT_CONNECTS_SETTING, SearchService.DEFAULT_KEEPALIVE_SETTING, diff --git a/core/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java b/core/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java index 07cd3853cb6..6e0f17812ce 100644 --- a/core/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java +++ b/core/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java @@ -71,6 +71,7 @@ import org.elasticsearch.transport.TransportService; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -78,6 +79,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; import static org.elasticsearch.common.unit.TimeValue.timeValueSeconds; @@ -100,14 +102,12 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen Setting.intSetting("discovery.zen.max_pings_from_another_master", 3, 1, Property.NodeScope); public final static Setting SEND_LEAVE_REQUEST_SETTING = Setting.boolSetting("discovery.zen.send_leave_request", true, Property.NodeScope); - public final static Setting MASTER_ELECTION_FILTER_CLIENT_SETTING = - Setting.boolSetting("discovery.zen.master_election.filter_client", true, Property.NodeScope); public final static Setting MASTER_ELECTION_WAIT_FOR_JOINS_TIMEOUT_SETTING = Setting.timeSetting("discovery.zen.master_election.wait_for_joins_timeout", settings -> TimeValue.timeValueMillis(JOIN_TIMEOUT_SETTING.get(settings).millis() / 2).toString(), TimeValue.timeValueMillis(0), Property.NodeScope); - public final static Setting MASTER_ELECTION_FILTER_DATA_SETTING = - Setting.boolSetting("discovery.zen.master_election.filter_data", false, Property.NodeScope); + public final static Setting MASTER_ELECTION_IGNORE_NON_MASTER_PINGS_SETTING = + Setting.boolSetting("discovery.zen.master_election.ignore_non_master_pings", false, Property.NodeScope); public static final String DISCOVERY_REJOIN_ACTION_NAME = "internal:discovery/zen/rejoin"; @@ -138,8 +138,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen private final ElectMasterService electMaster; - private final boolean masterElectionFilterClientNodes; - private final boolean masterElectionFilterDataNodes; + private final boolean masterElectionIgnoreNonMasters; private final TimeValue masterElectionWaitForJoinsTimeout; private final JoinThreadControl joinThreadControl; @@ -169,11 +168,11 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen this.maxPingsFromAnotherMaster = MAX_PINGS_FROM_ANOTHER_MASTER_SETTING.get(settings); this.sendLeaveRequest = SEND_LEAVE_REQUEST_SETTING.get(settings); - this.masterElectionFilterClientNodes = MASTER_ELECTION_FILTER_CLIENT_SETTING.get(settings); - this.masterElectionFilterDataNodes = MASTER_ELECTION_FILTER_DATA_SETTING.get(settings); + this.masterElectionIgnoreNonMasters = MASTER_ELECTION_IGNORE_NON_MASTER_PINGS_SETTING.get(settings); this.masterElectionWaitForJoinsTimeout = MASTER_ELECTION_WAIT_FOR_JOINS_TIMEOUT_SETTING.get(settings); - logger.debug("using ping_timeout [{}], join.timeout [{}], master_election.filter_client [{}], master_election.filter_data [{}]", this.pingTimeout, joinTimeout, masterElectionFilterClientNodes, masterElectionFilterDataNodes); + logger.debug("using ping_timeout [{}], join.timeout [{}], master_election.ignore_non_master [{}]", + this.pingTimeout, joinTimeout, masterElectionIgnoreNonMasters); clusterSettings.addSettingsUpdateConsumer(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING, this::handleMinimumMasterNodesChanged, (value) -> { final ClusterState clusterState = clusterService.state(); @@ -846,30 +845,8 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen } // filter responses - List pingResponses = new ArrayList<>(); - for (ZenPing.PingResponse pingResponse : fullPingResponses) { - DiscoveryNode node = pingResponse.node(); - if (masterElectionFilterClientNodes && (node.clientNode() || (!node.masterNode() && !node.dataNode()))) { - // filter out the client node, which is a client node, or also one that is not data and not master (effectively, client) - } else if (masterElectionFilterDataNodes && (!node.masterNode() && node.dataNode())) { - // filter out data node that is not also master - } else { - pingResponses.add(pingResponse); - } - } - - if (logger.isDebugEnabled()) { - StringBuilder sb = new StringBuilder(); - if (pingResponses.isEmpty()) { - sb.append(" {none}"); - } else { - for (ZenPing.PingResponse pingResponse : pingResponses) { - sb.append("\n\t--> ").append(pingResponse); - } - } - logger.debug("filtered ping responses: (filter_client[{}], filter_data[{}]){}", masterElectionFilterClientNodes, - masterElectionFilterDataNodes, sb); - } + final List pingResponses; + pingResponses = filterPingResponses(fullPingResponses, masterElectionIgnoreNonMasters, logger); final DiscoveryNode localNode = clusterService.localNode(); List pingMasters = new ArrayList<>(); @@ -925,6 +902,28 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen } } + static List filterPingResponses(ZenPing.PingResponse[] fullPingResponses, boolean masterElectionIgnoreNonMasters, ESLogger logger) { + List pingResponses; + if (masterElectionIgnoreNonMasters) { + pingResponses = Arrays.stream(fullPingResponses).filter(ping -> ping.node().isMasterNode()).collect(Collectors.toList()); + } else { + pingResponses = Arrays.asList(fullPingResponses); + } + + if (logger.isDebugEnabled()) { + StringBuilder sb = new StringBuilder(); + if (pingResponses.isEmpty()) { + sb.append(" {none}"); + } else { + for (ZenPing.PingResponse pingResponse : pingResponses) { + sb.append("\n\t--> ").append(pingResponse); + } + } + logger.debug("filtered ping responses: (ignore_non_masters [{}]){}", masterElectionIgnoreNonMasters, sb); + } + return pingResponses; + } + protected ClusterState rejoin(ClusterState clusterState, String reason) { // *** called from within an cluster state update task *** // diff --git a/core/src/test/java/org/elasticsearch/discovery/zen/ZenDiscoveryUnitTests.java b/core/src/test/java/org/elasticsearch/discovery/zen/ZenDiscoveryUnitTests.java index e1d6beecaa9..468d01aaecb 100644 --- a/core/src/test/java/org/elasticsearch/discovery/zen/ZenDiscoveryUnitTests.java +++ b/core/src/test/java/org/elasticsearch/discovery/zen/ZenDiscoveryUnitTests.java @@ -25,10 +25,19 @@ import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.common.transport.DummyTransportAddress; +import org.elasticsearch.discovery.zen.ping.ZenPing; import org.elasticsearch.test.ESTestCase; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + import static org.elasticsearch.discovery.zen.ZenDiscovery.shouldIgnoreOrRejectNewClusterState; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; /** */ @@ -89,4 +98,34 @@ public class ZenDiscoveryUnitTests extends ESTestCase { } assertFalse("should not ignore, because current state doesn't have a master", shouldIgnoreOrRejectNewClusterState(logger, currentState.build(), newState.build())); } + + public void testFilterNonMasterPingResponse() { + ArrayList responses = new ArrayList<>(); + ArrayList masterNodes = new ArrayList<>(); + ArrayList allNodes = new ArrayList<>(); + for (int i = randomIntBetween(10, 20); i >= 0; i--) { + Map attrs = new HashMap<>(); + for (String attr : randomSubsetOf( + Arrays.asList(DiscoveryNode.INGEST_ATTR, DiscoveryNode.DATA_ATTR, DiscoveryNode.MASTER_ATTR))) { + attrs.put(attr, randomBoolean() + ""); + } + + DiscoveryNode node = new DiscoveryNode("node_" + i, "id_" + i, DummyTransportAddress.INSTANCE, attrs, Version.CURRENT); + responses.add(new ZenPing.PingResponse(node, randomBoolean() ? null : node, new ClusterName("test"), randomBoolean())); + allNodes.add(node); + if (node.isMasterNode()) { + masterNodes.add(node); + } + } + + boolean ignore = randomBoolean(); + List filtered = ZenDiscovery.filterPingResponses( + responses.toArray(new ZenPing.PingResponse[responses.size()]), ignore, logger); + final List filteredNodes = filtered.stream().map(ZenPing.PingResponse::node).collect(Collectors.toList()); + if (ignore) { + assertThat(filteredNodes, equalTo(masterNodes)); + } else { + assertThat(filteredNodes, equalTo(allNodes)); + } + } } diff --git a/docs/reference/migration/migrate_5_0/settings.asciidoc b/docs/reference/migration/migrate_5_0/settings.asciidoc index f9b104e2fcf..55b0bcc3cca 100644 --- a/docs/reference/migration/migrate_5_0/settings.asciidoc +++ b/docs/reference/migration/migrate_5_0/settings.asciidoc @@ -82,7 +82,7 @@ The `index.analysis.analyzer.default_index` analyzer is not supported anymore. If you wish to change the analyzer to use for indexing, change the `index.analysis.analyzer.default` analyzer instead. -==== Ping timeout settings +==== Ping settings Previously, there were three settings for the ping timeout: `discovery.zen.initial_ping_timeout`, `discovery.zen.ping.timeout` and @@ -90,6 +90,11 @@ Previously, there were three settings for the ping timeout: setting key for the ping timeout is now `discovery.zen.ping_timeout`. The default value for ping timeouts remains at three seconds. + +`discovery.zen.master_election.filter_client` and `discovery.zen.master_election.filter_data` have +been removed in favor of the new `discovery.zen.master_election.ignore_non_master_pings`. This setting control how ping responses +are interpreted during master election and should be used with care and only in extreme cases. See documentation for details. + ==== Recovery settings Recovery settings deprecated in 1.x have been removed: diff --git a/docs/reference/modules/discovery/zen.asciidoc b/docs/reference/modules/discovery/zen.asciidoc index 4c750fd0c19..59076fd3874 100644 --- a/docs/reference/modules/discovery/zen.asciidoc +++ b/docs/reference/modules/discovery/zen.asciidoc @@ -56,11 +56,9 @@ serves as a protection against (partial) network failures where node may unjustl think that the master has failed. In this case the node will simply hear from other nodes about the currently active master. -If `discovery.zen.master_election.filter_client` is `true`, pings from client nodes (nodes where `node.client` is -`true`, or both `node.data` and `node.master` are `false`) are ignored during master election; the default value is -`true`. If `discovery.zen.master_election.filter_data` is `true`, pings from non-master-eligible data nodes (nodes -where `node.data` is `true` and `node.master` is `false`) are ignored during master election; the default value is -`false`. Pings from master-eligible nodes are always observed during master election. +If `discovery.zen.master_election.ignore_non_master_pings` is `true`, pings from nodes that are not master +eligible (nodes where `node.master` is `false`) are ignored during master election; the default value is +`false`. Nodes can be excluded from becoming a master by setting `node.master` to `false`. Note, once a node is a client node (`node.client` set to