diff --git a/pom.xml b/pom.xml index 3f1d74ab3a6..d07a71cb471 100644 --- a/pom.xml +++ b/pom.xml @@ -184,7 +184,7 @@ 0.8.13 true - + @@ -485,7 +485,8 @@ ${tests.failfast} false - . + . + ${tests.bwc} ${tests.bwc.path} @@ -539,15 +540,15 @@ 1.7 - validate - - run - - - - Using ${java.runtime.name} ${java.runtime.version} ${java.vendor} - - + validate + + run + + + + Using ${java.runtime.name} ${java.runtime.version} ${java.vendor} + + invalid-patterns @@ -575,7 +576,9 @@ - The following files contain tabs or nocommits:${line.separator}${validate.patternsFound} + The following files contain tabs or + nocommits:${line.separator}${validate.patternsFound} + @@ -583,7 +586,8 @@ tests test - ${skipTests} + ${skipTests} + false @@ -597,7 +601,7 @@ - + @@ -710,7 +714,7 @@ org.elasticsearch.common.compress - com.github.mustachejava + com.github.mustachejava org.elasticsearch.common.mustache @@ -1221,6 +1225,11 @@ jdk-unsafe jdk-deprecated + + + org/elasticsearch/test/disruption/LongGCDisruption.class + + test-signatures.txt all-signatures.txt @@ -1345,219 +1354,220 @@ - - - default - - true - - - - - com.carrotsearch.randomizedtesting - junit4-maven-plugin - - ${tests.jvm.argline} - - - - com.mycila - license-maven-plugin - 2.5 - -
dev-tools/elasticsearch_license_header.txt
- - dev-tools/license_header_definition.xml - - - src/main/java/org/elasticsearch/**/*.java - src/test/java/org/elasticsearch/**/*.java - - - src/main/java/org/elasticsearch/common/inject/** - - src/main/java/org/elasticsearch/common/geo/GeoHashUtils.java - src/main/java/org/elasticsearch/common/lucene/search/XBooleanFilter.java - src/main/java/org/elasticsearch/common/lucene/search/XFilteredQuery.java - src/main/java/org/apache/lucene/queryparser/XSimpleQueryParser.java - src/main/java/org/apache/lucene/**/X*.java - - src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/TDigestState.java - src/test/java/org/elasticsearch/search/aggregations/metrics/GroupTree.java - -
- - - compile - - check - - - -
-
-
-
- - - dev - - true - - - - - de.thetaphi - forbiddenapis - 1.5.1 - - - check-forbidden-apis - none - - - check-forbidden-test-apis - none - - - - - - - - - license - - - license.generation - true - - - - - - - coverage - - - tests.coverage - true - - - - - - org.jacoco - org.jacoco.agent - runtime - 0.6.4.201312101107 - test - - - - - - org.jacoco - jacoco-maven-plugin - 0.6.4.201312101107 - - - default-prepare-agent - - prepare-agent - - - - default-report - prepare-package - - report - - - - default-check - - check - - - - - - jsr166e/** - org/apache/lucene/** - - - - - - - - static - - - tests.static - true - - - - - - org.codehaus.mojo - findbugs-maven-plugin - 2.5.3 - - - - - - - org.apache.maven.plugins - maven-jxr-plugin - 2.3 - - - org.apache.maven.plugins - maven-pmd-plugin - 3.0.1 - - - ${basedir}/dev-tools/pmd/custom.xml - - 1.7 - - **/jsr166e/** - **/org/apache/lucene/** - **/org/apache/elasticsearch/common/Base64.java - - - - - org.codehaus.mojo - findbugs-maven-plugin - 2.5.3 - - true - target/site - true - 2048 - 1800000 - org.elasticsearch.- - - - - org.apache.maven.plugins - maven-project-info-reports-plugin - 2.7 - - - - index - - - - - - - + + + default + + true + + + + + com.carrotsearch.randomizedtesting + junit4-maven-plugin + + ${tests.jvm.argline} + + + + com.mycila + license-maven-plugin + 2.5 + +
dev-tools/elasticsearch_license_header.txt
+ + dev-tools/license_header_definition.xml + + + src/main/java/org/elasticsearch/**/*.java + src/test/java/org/elasticsearch/**/*.java + + + src/main/java/org/elasticsearch/common/inject/** + + src/main/java/org/elasticsearch/common/geo/GeoHashUtils.java + src/main/java/org/elasticsearch/common/lucene/search/XBooleanFilter.java + src/main/java/org/elasticsearch/common/lucene/search/XFilteredQuery.java + src/main/java/org/apache/lucene/queryparser/XSimpleQueryParser.java + src/main/java/org/apache/lucene/**/X*.java + + src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/TDigestState.java + + src/test/java/org/elasticsearch/search/aggregations/metrics/GroupTree.java + +
+ + + compile + + check + + + +
+
+
+
+ + + dev + + true + + + + + de.thetaphi + forbiddenapis + 1.5.1 + + + check-forbidden-apis + none + + + check-forbidden-test-apis + none + + + + + + + + + license + + + license.generation + true + + + + + + + coverage + + + tests.coverage + true + + + + + + org.jacoco + org.jacoco.agent + runtime + 0.6.4.201312101107 + test + + + + + + org.jacoco + jacoco-maven-plugin + 0.6.4.201312101107 + + + default-prepare-agent + + prepare-agent + + + + default-report + prepare-package + + report + + + + default-check + + check + + + + + + jsr166e/** + org/apache/lucene/** + + + + + + + + static + + + tests.static + true + + + + + + org.codehaus.mojo + findbugs-maven-plugin + 2.5.3 + + + + + + + org.apache.maven.plugins + maven-jxr-plugin + 2.3 + + + org.apache.maven.plugins + maven-pmd-plugin + 3.0.1 + + + ${basedir}/dev-tools/pmd/custom.xml + + 1.7 + + **/jsr166e/** + **/org/apache/lucene/** + **/org/apache/elasticsearch/common/Base64.java + + + + + org.codehaus.mojo + findbugs-maven-plugin + 2.5.3 + + true + target/site + true + 2048 + 1800000 + org.elasticsearch.- + + + + org.apache.maven.plugins + maven-project-info-reports-plugin + 2.7 + + + + index + + + + + + +
diff --git a/src/main/java/org/elasticsearch/action/admin/cluster/settings/TransportClusterUpdateSettingsAction.java b/src/main/java/org/elasticsearch/action/admin/cluster/settings/TransportClusterUpdateSettingsAction.java index fa77ae88478..edcf8334640 100644 --- a/src/main/java/org/elasticsearch/action/admin/cluster/settings/TransportClusterUpdateSettingsAction.java +++ b/src/main/java/org/elasticsearch/action/admin/cluster/settings/TransportClusterUpdateSettingsAction.java @@ -137,6 +137,12 @@ public class TransportClusterUpdateSettingsAction extends TransportMasterNodeOpe return new ClusterUpdateSettingsResponse(updateSettingsAcked && acknowledged, transientUpdates.build(), persistentUpdates.build()); } + @Override + public void onNoLongerMaster(String source) { + logger.debug("failed to preform reroute after cluster settings were updated - current node is no longer a master"); + listener.onResponse(new ClusterUpdateSettingsResponse(updateSettingsAcked, transientUpdates.build(), persistentUpdates.build())); + } + @Override public void onFailure(String source, Throwable t) { //if the reroute fails we only log diff --git a/src/main/java/org/elasticsearch/action/admin/indices/recovery/TransportRecoveryAction.java b/src/main/java/org/elasticsearch/action/admin/indices/recovery/TransportRecoveryAction.java index c0e9a65de34..2e54d5cf181 100644 --- a/src/main/java/org/elasticsearch/action/admin/indices/recovery/TransportRecoveryAction.java +++ b/src/main/java/org/elasticsearch/action/admin/indices/recovery/TransportRecoveryAction.java @@ -173,12 +173,12 @@ public class TransportRecoveryAction extends @Override protected ClusterBlockException checkGlobalBlock(ClusterState state, RecoveryRequest request) { - return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA); + return state.blocks().globalBlockedException(ClusterBlockLevel.READ); } @Override protected ClusterBlockException checkRequestBlock(ClusterState state, RecoveryRequest request, String[] concreteIndices) { - return state.blocks().indicesBlockedException(ClusterBlockLevel.METADATA, concreteIndices); + return state.blocks().indicesBlockedException(ClusterBlockLevel.READ, concreteIndices); } static class ShardRecoveryRequest extends BroadcastShardOperationRequest { diff --git a/src/main/java/org/elasticsearch/action/bench/BenchmarkService.java b/src/main/java/org/elasticsearch/action/bench/BenchmarkService.java index 0ebfd47593e..5868aa12b5a 100644 --- a/src/main/java/org/elasticsearch/action/bench/BenchmarkService.java +++ b/src/main/java/org/elasticsearch/action/bench/BenchmarkService.java @@ -66,11 +66,11 @@ public class BenchmarkService extends AbstractLifecycleComponent listener) { @@ -171,8 +174,8 @@ public class BenchmarkService extends AbstractLifecycleComponent listener) { @@ -228,7 +231,7 @@ public class BenchmarkService extends AbstractLifecycleComponent builder = new ImmutableList.Builder(); for (BenchmarkMetaData.Entry e : bmd.entries()) { if (benchmarkId == null || match(e)) { - e = process(e) ; + e = process(e); instances.add(e); } // Don't keep finished benchmarks around in cluster state @@ -741,7 +745,7 @@ public class BenchmarkService extends AbstractLifecycleComponent implements TimeoutClusterStateUpdateTask { + public abstract class BenchmarkStateChangeAction extends TimeoutClusterStateUpdateTask { protected final R request; public BenchmarkStateChangeAction(R request) { diff --git a/src/main/java/org/elasticsearch/cluster/AckedClusterStateUpdateTask.java b/src/main/java/org/elasticsearch/cluster/AckedClusterStateUpdateTask.java index 7cdee753873..087bd1c6ad6 100644 --- a/src/main/java/org/elasticsearch/cluster/AckedClusterStateUpdateTask.java +++ b/src/main/java/org/elasticsearch/cluster/AckedClusterStateUpdateTask.java @@ -28,7 +28,7 @@ import org.elasticsearch.common.unit.TimeValue; * An extension interface to {@link ClusterStateUpdateTask} that allows to be notified when * all the nodes have acknowledged a cluster state update request */ -public abstract class AckedClusterStateUpdateTask implements TimeoutClusterStateUpdateTask { +public abstract class AckedClusterStateUpdateTask extends TimeoutClusterStateUpdateTask { private final ActionListener listener; private final AckedRequest request; @@ -40,6 +40,7 @@ public abstract class AckedClusterStateUpdateTask implements TimeoutCl /** * Called to determine which nodes the acknowledgement is expected from + * * @param discoveryNode a node * @return true if the node is expected to send ack back, false otherwise */ @@ -50,6 +51,7 @@ public abstract class AckedClusterStateUpdateTask implements TimeoutCl /** * Called once all the nodes have acknowledged the cluster state update request. Must be * very lightweight execution, since it gets executed on the cluster service thread. + * * @param t optional error that might have been thrown */ public void onAllNodesAcked(@Nullable Throwable t) { diff --git a/src/main/java/org/elasticsearch/cluster/ClusterService.java b/src/main/java/org/elasticsearch/cluster/ClusterService.java index 6204599f57d..080fce84a36 100644 --- a/src/main/java/org/elasticsearch/cluster/ClusterService.java +++ b/src/main/java/org/elasticsearch/cluster/ClusterService.java @@ -110,4 +110,5 @@ public interface ClusterService extends LifecycleComponent { * Returns the tasks that are pending. */ List pendingTasks(); + } diff --git a/src/main/java/org/elasticsearch/cluster/ClusterState.java b/src/main/java/org/elasticsearch/cluster/ClusterState.java index ecb041a233e..d208d6a20a7 100644 --- a/src/main/java/org/elasticsearch/cluster/ClusterState.java +++ b/src/main/java/org/elasticsearch/cluster/ClusterState.java @@ -115,6 +115,8 @@ public class ClusterState implements ToXContent { } + public static final long UNKNOWN_VERSION = -1; + private final long version; private final RoutingTable routingTable; diff --git a/src/main/java/org/elasticsearch/cluster/ClusterStateNonMasterUpdateTask.java b/src/main/java/org/elasticsearch/cluster/ClusterStateNonMasterUpdateTask.java new file mode 100644 index 00000000000..48afbb8f1fe --- /dev/null +++ b/src/main/java/org/elasticsearch/cluster/ClusterStateNonMasterUpdateTask.java @@ -0,0 +1,32 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.cluster; + +/** + * This is a marker interface to indicate that the task should be executed + * even if the current node is not a master. + */ +public abstract class ClusterStateNonMasterUpdateTask extends ClusterStateUpdateTask { + + @Override + public boolean runOnlyOnMaster() { + return false; + } +} diff --git a/src/main/java/org/elasticsearch/cluster/ClusterStateUpdateTask.java b/src/main/java/org/elasticsearch/cluster/ClusterStateUpdateTask.java index 490a556ab12..921b6d149ee 100644 --- a/src/main/java/org/elasticsearch/cluster/ClusterStateUpdateTask.java +++ b/src/main/java/org/elasticsearch/cluster/ClusterStateUpdateTask.java @@ -19,19 +19,37 @@ package org.elasticsearch.cluster; +import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; + /** * A task that can update the cluster state. */ -public interface ClusterStateUpdateTask { +abstract public class ClusterStateUpdateTask { /** * Update the cluster state based on the current state. Return the *same instance* if no state * should be changed. */ - ClusterState execute(ClusterState currentState) throws Exception; + abstract public ClusterState execute(ClusterState currentState) throws Exception; /** * A callback called when execute fails. */ - void onFailure(String source, Throwable t); + abstract public void onFailure(String source, @Nullable Throwable t); + + + /** + * indicates whether this task should only run if current node is master + */ + public boolean runOnlyOnMaster() { + return true; + } + + /** + * called when the task was rejected because the local node is no longer master + */ + public void onNoLongerMaster(String source) { + onFailure(source, new EsRejectedExecutionException("no longer master. source: [" + source + "]")); + } } diff --git a/src/main/java/org/elasticsearch/cluster/ProcessedClusterStateNonMasterUpdateTask.java b/src/main/java/org/elasticsearch/cluster/ProcessedClusterStateNonMasterUpdateTask.java new file mode 100644 index 00000000000..4af05b43581 --- /dev/null +++ b/src/main/java/org/elasticsearch/cluster/ProcessedClusterStateNonMasterUpdateTask.java @@ -0,0 +1,31 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.cluster; + +/** + * A combination between {@link org.elasticsearch.cluster.ProcessedClusterStateUpdateTask} and + * {@link org.elasticsearch.cluster.ClusterStateNonMasterUpdateTask} to allow easy creation of anonymous classes + */ +abstract public class ProcessedClusterStateNonMasterUpdateTask extends ProcessedClusterStateUpdateTask { + + @Override + public boolean runOnlyOnMaster() { + return false; + } +} diff --git a/src/main/java/org/elasticsearch/cluster/ProcessedClusterStateUpdateTask.java b/src/main/java/org/elasticsearch/cluster/ProcessedClusterStateUpdateTask.java index 72074965f95..2d703ed2621 100644 --- a/src/main/java/org/elasticsearch/cluster/ProcessedClusterStateUpdateTask.java +++ b/src/main/java/org/elasticsearch/cluster/ProcessedClusterStateUpdateTask.java @@ -23,11 +23,11 @@ package org.elasticsearch.cluster; * An extension interface to {@link ClusterStateUpdateTask} that allows to be notified when * the cluster state update has been processed. */ -public interface ProcessedClusterStateUpdateTask extends ClusterStateUpdateTask { +public abstract class ProcessedClusterStateUpdateTask extends ClusterStateUpdateTask { /** * Called when the result of the {@link #execute(ClusterState)} have been processed * properly by all listeners. */ - void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState); + public abstract void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState); } diff --git a/src/main/java/org/elasticsearch/cluster/TimeoutClusterStateUpdateTask.java b/src/main/java/org/elasticsearch/cluster/TimeoutClusterStateUpdateTask.java index 1083e1ddcbe..1ae767c6560 100644 --- a/src/main/java/org/elasticsearch/cluster/TimeoutClusterStateUpdateTask.java +++ b/src/main/java/org/elasticsearch/cluster/TimeoutClusterStateUpdateTask.java @@ -25,11 +25,11 @@ import org.elasticsearch.common.unit.TimeValue; * An extension interface to {@link org.elasticsearch.cluster.ClusterStateUpdateTask} that allows to associate * a timeout. */ -public interface TimeoutClusterStateUpdateTask extends ProcessedClusterStateUpdateTask { +abstract public class TimeoutClusterStateUpdateTask extends ProcessedClusterStateUpdateTask { /** * If the cluster state update task wasn't processed by the provided timeout, call * {@link #onFailure(String, Throwable)} */ - TimeValue timeout(); + abstract public TimeValue timeout(); } diff --git a/src/main/java/org/elasticsearch/cluster/block/ClusterBlocks.java b/src/main/java/org/elasticsearch/cluster/block/ClusterBlocks.java index 957bd406263..bb7d332de4f 100644 --- a/src/main/java/org/elasticsearch/cluster/block/ClusterBlocks.java +++ b/src/main/java/org/elasticsearch/cluster/block/ClusterBlocks.java @@ -108,6 +108,19 @@ public class ClusterBlocks { return global.contains(block); } + public boolean hasGlobalBlock(int blockId) { + for (ClusterBlock clusterBlock : global) { + if (clusterBlock.id() == blockId) { + return true; + } + } + return false; + } + + public boolean hasGlobalBlock(ClusterBlockLevel level) { + return global(level).size() > 0; + } + /** * Is there a global block with the provided status? */ diff --git a/src/main/java/org/elasticsearch/cluster/routing/RoutingService.java b/src/main/java/org/elasticsearch/cluster/routing/RoutingService.java index b33804de564..555b8b3ef1b 100644 --- a/src/main/java/org/elasticsearch/cluster/routing/RoutingService.java +++ b/src/main/java/org/elasticsearch/cluster/routing/RoutingService.java @@ -149,10 +149,15 @@ public class RoutingService extends AbstractLifecycleComponent i return ClusterState.builder(currentState).routingResult(routingResult).build(); } + @Override + public void onNoLongerMaster(String source) { + // no biggie + } + @Override public void onFailure(String source, Throwable t) { - ClusterState state = clusterService.state(); - logger.error("unexpected failure during [{}], current state:\n{}", t, source, state.prettyPrint()); + ClusterState state = clusterService.state(); + logger.error("unexpected failure during [{}], current state:\n{}", t, source, state.prettyPrint()); } }); routingTableDirty = false; diff --git a/src/main/java/org/elasticsearch/cluster/service/InternalClusterService.java b/src/main/java/org/elasticsearch/cluster/service/InternalClusterService.java index fad94ba1944..c5fe004f8b9 100644 --- a/src/main/java/org/elasticsearch/cluster/service/InternalClusterService.java +++ b/src/main/java/org/elasticsearch/cluster/service/InternalClusterService.java @@ -84,7 +84,7 @@ public class InternalClusterService extends AbstractLifecycleComponent { - final ClusterBlock NO_MASTER_BLOCK = new ClusterBlock(2, "no master", true, true, RestStatus.SERVICE_UNAVAILABLE, ClusterBlockLevel.ALL); - DiscoveryNode localNode(); void addListener(InitialStateDiscoveryListener listener); diff --git a/src/main/java/org/elasticsearch/discovery/DiscoveryService.java b/src/main/java/org/elasticsearch/discovery/DiscoveryService.java index 0108db12a19..f73f2bbb593 100644 --- a/src/main/java/org/elasticsearch/discovery/DiscoveryService.java +++ b/src/main/java/org/elasticsearch/discovery/DiscoveryService.java @@ -22,6 +22,7 @@ package org.elasticsearch.discovery; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ElasticsearchTimeoutException; import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.block.ClusterBlock; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.common.Strings; import org.elasticsearch.common.component.AbstractLifecycleComponent; @@ -38,6 +39,8 @@ import java.util.concurrent.TimeUnit; */ public class DiscoveryService extends AbstractLifecycleComponent { + public static final String SETTING_INITIAL_STATE_TIMEOUT = "discovery.initial_state_timeout"; + private static class InitialStateListener implements InitialStateDiscoveryListener { private final CountDownLatch latch = new CountDownLatch(1); @@ -60,12 +63,18 @@ public class DiscoveryService extends AbstractLifecycleComponent implem private final TransportService transportService; private final ClusterService clusterService; + private final DiscoveryService discoveryService; private final DiscoveryNodeService discoveryNodeService; private AllocationService allocationService; private final ClusterName clusterName; @@ -77,7 +78,7 @@ public class LocalDiscovery extends AbstractLifecycleComponent implem @Inject public LocalDiscovery(Settings settings, ClusterName clusterName, TransportService transportService, ClusterService clusterService, - DiscoveryNodeService discoveryNodeService, Version version, DiscoverySettings discoverySettings) { + DiscoveryNodeService discoveryNodeService, Version version, DiscoverySettings discoverySettings, DiscoveryService discoveryService) { super(settings); this.clusterName = clusterName; this.clusterService = clusterService; @@ -85,6 +86,7 @@ public class LocalDiscovery extends AbstractLifecycleComponent implem this.discoveryNodeService = discoveryNodeService; this.version = version; this.discoverySettings = discoverySettings; + this.discoveryService = discoveryService; } @Override @@ -123,7 +125,7 @@ public class LocalDiscovery extends AbstractLifecycleComponent implem // we are the first master (and the master) master = true; final LocalDiscovery master = firstMaster; - clusterService.submitStateUpdateTask("local-disco-initial_connect(master)", new ProcessedClusterStateUpdateTask() { + clusterService.submitStateUpdateTask("local-disco-initial_connect(master)", new ProcessedClusterStateNonMasterUpdateTask() { @Override public ClusterState execute(ClusterState currentState) { DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(); @@ -132,7 +134,7 @@ public class LocalDiscovery extends AbstractLifecycleComponent implem } nodesBuilder.localNodeId(master.localNode().id()).masterNodeId(master.localNode().id()); // remove the NO_MASTER block in this case - ClusterBlocks.Builder blocks = ClusterBlocks.builder().blocks(currentState.blocks()).removeGlobalBlock(Discovery.NO_MASTER_BLOCK); + ClusterBlocks.Builder blocks = ClusterBlocks.builder().blocks(currentState.blocks()).removeGlobalBlock(discoverySettings.getNoMasterBlock()); return ClusterState.builder(currentState).nodes(nodesBuilder).blocks(blocks).build(); } @@ -149,7 +151,7 @@ public class LocalDiscovery extends AbstractLifecycleComponent implem } else if (firstMaster != null) { // update as fast as we can the local node state with the new metadata (so we create indices for example) final ClusterState masterState = firstMaster.clusterService.state(); - clusterService.submitStateUpdateTask("local-disco(detected_master)", new ClusterStateUpdateTask() { + clusterService.submitStateUpdateTask("local-disco(detected_master)", new ClusterStateNonMasterUpdateTask() { @Override public ClusterState execute(ClusterState currentState) { // make sure we have the local node id set, we might need it as a result of the new metadata @@ -165,7 +167,7 @@ public class LocalDiscovery extends AbstractLifecycleComponent implem // tell the master to send the fact that we are here final LocalDiscovery master = firstMaster; - firstMaster.clusterService.submitStateUpdateTask("local-disco-receive(from node[" + localNode + "])", new ProcessedClusterStateUpdateTask() { + firstMaster.clusterService.submitStateUpdateTask("local-disco-receive(from node[" + localNode + "])", new ProcessedClusterStateNonMasterUpdateTask() { @Override public ClusterState execute(ClusterState currentState) { DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(); @@ -225,7 +227,7 @@ public class LocalDiscovery extends AbstractLifecycleComponent implem } final LocalDiscovery master = firstMaster; - master.clusterService.submitStateUpdateTask("local-disco-update", new ClusterStateUpdateTask() { + master.clusterService.submitStateUpdateTask("local-disco-update", new ClusterStateNonMasterUpdateTask() { @Override public ClusterState execute(ClusterState currentState) { DiscoveryNodes newNodes = currentState.nodes().removeDeadMembers(newMembers, master.localNode.id()); @@ -305,13 +307,22 @@ public class LocalDiscovery extends AbstractLifecycleComponent implem nodeSpecificClusterState.status(ClusterState.ClusterStateStatus.RECEIVED); // ignore cluster state messages that do not include "me", not in the game yet... if (nodeSpecificClusterState.nodes().localNode() != null) { - discovery.clusterService.submitStateUpdateTask("local-disco-receive(from master)", new ProcessedClusterStateUpdateTask() { + assert nodeSpecificClusterState.nodes().masterNode() != null : "received a cluster state without a master"; + assert !nodeSpecificClusterState.blocks().hasGlobalBlock(discoveryService.getNoMasterBlock()) : "received a cluster state with a master block"; + + discovery.clusterService.submitStateUpdateTask("local-disco-receive(from master)", new ProcessedClusterStateNonMasterUpdateTask() { @Override public ClusterState execute(ClusterState currentState) { if (nodeSpecificClusterState.version() < currentState.version() && Objects.equal(nodeSpecificClusterState.nodes().masterNodeId(), currentState.nodes().masterNodeId())) { return currentState; } + if (currentState.blocks().hasGlobalBlock(discoveryService.getNoMasterBlock())) { + // its a fresh update from the master as we transition from a start of not having a master to having one + logger.debug("got first state from fresh master [{}]", nodeSpecificClusterState.nodes().masterNodeId()); + return nodeSpecificClusterState; + } + ClusterState.Builder builder = ClusterState.builder(nodeSpecificClusterState); // if the routing table did not change, use the original one if (nodeSpecificClusterState.routingTable().version() == currentState.routingTable().version()) { diff --git a/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java b/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java index b1149cbbf55..d7c8c0ccafc 100644 --- a/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java +++ b/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java @@ -22,9 +22,7 @@ package org.elasticsearch.discovery.zen; import com.google.common.base.Objects; import com.google.common.collect.Lists; import com.google.common.collect.Sets; -import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.ElasticsearchIllegalStateException; -import org.elasticsearch.Version; +import org.elasticsearch.*; import org.elasticsearch.cluster.*; import org.elasticsearch.cluster.block.ClusterBlocks; import org.elasticsearch.cluster.metadata.IndexMetaData; @@ -32,10 +30,10 @@ import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodeService; import org.elasticsearch.cluster.node.DiscoveryNodes; -import org.elasticsearch.cluster.routing.RoutingTable; import org.elasticsearch.cluster.routing.allocation.AllocationService; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; import org.elasticsearch.common.Priority; +import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.component.AbstractLifecycleComponent; import org.elasticsearch.common.component.Lifecycle; import org.elasticsearch.common.inject.Inject; @@ -45,6 +43,7 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.concurrent.ConcurrentCollections; +import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; import org.elasticsearch.discovery.Discovery; import org.elasticsearch.discovery.DiscoveryService; import org.elasticsearch.discovery.DiscoverySettings; @@ -56,19 +55,20 @@ import org.elasticsearch.discovery.zen.membership.MembershipAction; import org.elasticsearch.discovery.zen.ping.ZenPing; import org.elasticsearch.discovery.zen.ping.ZenPingService; import org.elasticsearch.discovery.zen.publish.PublishClusterStateAction; -import org.elasticsearch.gateway.GatewayService; import org.elasticsearch.node.service.NodeService; import org.elasticsearch.node.settings.NodeSettingsService; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.*; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.BlockingQueue; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; import static com.google.common.collect.Lists.newArrayList; import static org.elasticsearch.common.unit.TimeValue.timeValueSeconds; @@ -78,6 +78,16 @@ import static org.elasticsearch.common.unit.TimeValue.timeValueSeconds; */ public class ZenDiscovery extends AbstractLifecycleComponent implements Discovery, DiscoveryNodesProvider { + public final static String SETTING_REJOIN_ON_MASTER_GONE = "discovery.zen.rejoin_on_master_gone"; + public final static String SETTING_PING_TIMEOUT = "discovery.zen.ping.timeout"; + public final static String SETTING_JOIN_TIMEOUT = "discovery.zen.join_timeout"; + public final static String SETTING_JOIN_RETRY_ATTEMPTS = "discovery.zen.join_retry_attempts"; + public final static String SETTING_JOIN_RETRY_DELAY = "discovery.zen.join_retry_delay"; + public final static String SETTING_MAX_PINGS_FROM_ANOTHER_MASTER = "discovery.zen.max_pings_from_another_master"; + public final static String SETTING_SEND_LEAVE_REQUEST = "discovery.zen.send_leave_request"; + public final static String SETTING_MASTER_ELECTION_FILTER_CLIENT = "discovery.zen.master_election.filter_client"; + public final static String SETTING_MASTER_ELECTION_FILTER_DATA = "discovery.zen.master_election.filter_data"; + public static final String DISCOVERY_REJOIN_ACTION_NAME = "internal:discovery/zen/rejoin"; private final ThreadPool threadPool; @@ -86,6 +96,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen private AllocationService allocationService; private final ClusterName clusterName; private final DiscoveryNodeService discoveryNodeService; + private final DiscoverySettings discoverySettings; private final ZenPingService pingService; private final MasterFaultDetection masterFD; private final NodesFaultDetection nodesFD; @@ -97,6 +108,14 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen private final TimeValue pingTimeout; private final TimeValue joinTimeout; + /** how many retry attempts to perform if join request failed with an retriable error */ + private final int joinRetryAttempts; + /** how long to wait before performing another join attempt after a join request failed with an retriable error */ + private final TimeValue joinRetryDelay; + + /** how many pings from *another* master to tolerate before forcing a rejoin on other or local master */ + private final int maxPingsFromAnotherMaster; + // a flag that should be used only for testing private final boolean sendLeaveRequest; @@ -118,41 +137,61 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen private final AtomicBoolean initialStateSent = new AtomicBoolean(); + private volatile boolean rejoinOnMasterGone; @Nullable private NodeService nodeService; + private final BlockingQueue> processJoinRequests = ConcurrentCollections.newBlockingQueue(); + @Inject public ZenDiscovery(Settings settings, ClusterName clusterName, ThreadPool threadPool, TransportService transportService, ClusterService clusterService, NodeSettingsService nodeSettingsService, - DiscoveryNodeService discoveryNodeService, ZenPingService pingService, Version version, DiscoverySettings discoverySettings) { + DiscoveryNodeService discoveryNodeService, ZenPingService pingService, ElectMasterService electMasterService, Version version, + DiscoverySettings discoverySettings) { super(settings); this.clusterName = clusterName; this.threadPool = threadPool; this.clusterService = clusterService; this.transportService = transportService; this.discoveryNodeService = discoveryNodeService; + this.discoverySettings = discoverySettings; this.pingService = pingService; this.version = version; + this.electMaster = electMasterService; - // also support direct discovery.zen settings, for cases when it gets extended - this.pingTimeout = settings.getAsTime("discovery.zen.ping.timeout", settings.getAsTime("discovery.zen.ping_timeout", componentSettings.getAsTime("ping_timeout", componentSettings.getAsTime("initial_ping_timeout", timeValueSeconds(3))))); - this.joinTimeout = settings.getAsTime("discovery.zen.join_timeout", TimeValue.timeValueMillis(pingTimeout.millis() * 20)); - this.sendLeaveRequest = componentSettings.getAsBoolean("send_leave_request", true); + // keep using componentSettings for BWC, in case this class gets extended. + TimeValue pingTimeout = componentSettings.getAsTime("initial_ping_timeout", timeValueSeconds(3)); + pingTimeout = componentSettings.getAsTime("ping_timeout", pingTimeout); + pingTimeout = settings.getAsTime("discovery.zen.ping_timeout", pingTimeout); + this.pingTimeout = settings.getAsTime(SETTING_PING_TIMEOUT, pingTimeout); - this.masterElectionFilterClientNodes = settings.getAsBoolean("discovery.zen.master_election.filter_client", true); - this.masterElectionFilterDataNodes = settings.getAsBoolean("discovery.zen.master_election.filter_data", false); + this.joinTimeout = settings.getAsTime(SETTING_JOIN_TIMEOUT, TimeValue.timeValueMillis(pingTimeout.millis() * 20)); + this.joinRetryAttempts = settings.getAsInt(SETTING_JOIN_RETRY_ATTEMPTS, 3); + this.joinRetryDelay = settings.getAsTime(SETTING_JOIN_RETRY_DELAY, TimeValue.timeValueMillis(100)); + this.maxPingsFromAnotherMaster = settings.getAsInt(SETTING_MAX_PINGS_FROM_ANOTHER_MASTER, 3); + this.sendLeaveRequest = settings.getAsBoolean(SETTING_SEND_LEAVE_REQUEST, true); + + this.masterElectionFilterClientNodes = settings.getAsBoolean(SETTING_MASTER_ELECTION_FILTER_CLIENT, true); + this.masterElectionFilterDataNodes = settings.getAsBoolean(SETTING_MASTER_ELECTION_FILTER_DATA, false); + this.rejoinOnMasterGone = settings.getAsBoolean(SETTING_REJOIN_ON_MASTER_GONE, true); + + if (this.joinRetryAttempts < 1) { + throw new ElasticsearchIllegalArgumentException("'" + SETTING_JOIN_RETRY_ATTEMPTS + "' must be a positive number. got [" + this.SETTING_JOIN_RETRY_ATTEMPTS + "]"); + } + if (this.maxPingsFromAnotherMaster < 1) { + throw new ElasticsearchIllegalArgumentException("'" + SETTING_MAX_PINGS_FROM_ANOTHER_MASTER + "' must be a positive number. got [" + this.maxPingsFromAnotherMaster + "]"); + } logger.debug("using ping.timeout [{}], join.timeout [{}], master_election.filter_client [{}], master_election.filter_data [{}]", pingTimeout, joinTimeout, masterElectionFilterClientNodes, masterElectionFilterDataNodes); - this.electMaster = new ElectMasterService(settings); nodeSettingsService.addListener(new ApplySettings()); - this.masterFD = new MasterFaultDetection(settings, threadPool, transportService, this); + this.masterFD = new MasterFaultDetection(settings, threadPool, transportService, this, clusterName); this.masterFD.addListener(new MasterNodeFailureListener()); - this.nodesFD = new NodesFaultDetection(settings, threadPool, transportService); - this.nodesFD.addListener(new NodeFailureListener()); + this.nodesFD = new NodesFaultDetection(settings, threadPool, transportService, clusterName); + this.nodesFD.addListener(new NodeFaultDetectionListener()); this.publishClusterState = new PublishClusterStateAction(settings, transportService, this, new NewClusterStateListener(), discoverySettings, clusterName); this.pingService.setNodesProvider(this); @@ -178,7 +217,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen final String nodeId = DiscoveryService.generateNodeId(settings); localNode = new DiscoveryNode(settings.get("name"), nodeId, transportService.boundAddress().publishAddress(), nodeAttributes, version); latestDiscoNodes = new DiscoveryNodes.Builder().put(localNode).localNodeId(localNode.id()).build(); - nodesFD.updateNodes(latestDiscoNodes); + nodesFD.updateNodes(latestDiscoNodes, ClusterState.UNKNOWN_VERSION); pingService.start(); // do the join on a different thread, the DiscoveryService waits for 30s anyhow till it is discovered @@ -272,7 +311,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen throw new ElasticsearchIllegalStateException("Shouldn't publish state when not master"); } latestDiscoNodes = clusterState.nodes(); - nodesFD.updateNodes(clusterState.nodes()); + nodesFD.updateNodes(clusterState.nodes(), clusterState.version()); publishClusterState.publish(clusterState, ackListener); } @@ -295,6 +334,15 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen }); } + + /** + * returns true if there is a currently a background thread active for (re)joining the cluster + * used for testing. + */ + public boolean joiningCluster() { + return currentJoinThread != null; + } + private void innerJoinCluster() { boolean retry = true; while (retry) { @@ -311,18 +359,24 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen if (localNode.equals(masterNode)) { this.master = true; nodesFD.start(); // start the nodes FD - clusterService.submitStateUpdateTask("zen-disco-join (elected_as_master)", Priority.URGENT, new ProcessedClusterStateUpdateTask() { + clusterService.submitStateUpdateTask("zen-disco-join (elected_as_master)", Priority.URGENT, new ProcessedClusterStateNonMasterUpdateTask() { @Override public ClusterState execute(ClusterState currentState) { - DiscoveryNodes.Builder builder = new DiscoveryNodes.Builder() + // Take into account the previous known nodes, if they happen not to be available + // then fault detection will remove these nodes. + DiscoveryNodes.Builder builder = new DiscoveryNodes.Builder(latestDiscoNodes) .localNodeId(localNode.id()) .masterNodeId(localNode.id()) // put our local node .put(localNode); // update the fact that we are the master... latestDiscoNodes = builder.build(); - ClusterBlocks clusterBlocks = ClusterBlocks.builder().blocks(currentState.blocks()).removeGlobalBlock(NO_MASTER_BLOCK).build(); - return ClusterState.builder(currentState).nodes(latestDiscoNodes).blocks(clusterBlocks).build(); + ClusterBlocks clusterBlocks = ClusterBlocks.builder().blocks(currentState.blocks()).removeGlobalBlock(discoverySettings.getNoMasterBlock()).build(); + currentState = ClusterState.builder(currentState).nodes(latestDiscoNodes).blocks(clusterBlocks).build(); + + // eagerly run reroute to remove dead nodes from routing table + RoutingAllocation.Result result = allocationService.reroute(currentState); + return ClusterState.builder(currentState).routingResult(result).build(); } @Override @@ -337,30 +391,18 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen }); } else { this.master = false; - try { - // first, make sure we can connect to the master - transportService.connectToNode(masterNode); - } catch (Exception e) { - logger.warn("failed to connect to master [{}], retrying...", e, masterNode); - retry = true; - continue; - } // send join request - try { - membership.sendJoinRequestBlocking(masterNode, localNode, joinTimeout); - } catch (Exception e) { - if (e instanceof ElasticsearchException) { - logger.info("failed to send join request to master [{}], reason [{}]", masterNode, ((ElasticsearchException) e).getDetailedMessage()); - } else { - logger.info("failed to send join request to master [{}], reason [{}]", masterNode, e.getMessage()); - } - if (logger.isTraceEnabled()) { - logger.trace("detailed failed reason", e); - } - // failed to send the join request, retry + retry = !joinElectedMaster(masterNode); + if (retry) { + continue; + } + + if (latestDiscoNodes.masterNode() == null) { + logger.debug("no master node is set, despite of join request completing. retrying pings"); retry = true; continue; } + masterFD.start(masterNode, "initial_join"); // no need to submit the received cluster state, we will get it from the master when it publishes // the fact that we joined @@ -368,6 +410,52 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen } } + /** + * Join a newly elected master. + * + * @return true if successful + */ + private boolean joinElectedMaster(DiscoveryNode masterNode) { + try { + // first, make sure we can connect to the master + transportService.connectToNode(masterNode); + } catch (Exception e) { + logger.warn("failed to connect to master [{}], retrying...", e, masterNode); + return false; + } + int joinAttempt = 0; // we retry on illegal state if the master is not yet ready + while (true) { + try { + logger.trace("joining master {}", masterNode); + membership.sendJoinRequestBlocking(masterNode, localNode, joinTimeout); + return true; + } catch (Throwable t) { + Throwable unwrap = ExceptionsHelper.unwrapCause(t); + if (unwrap instanceof ElasticsearchIllegalStateException) { + if (++joinAttempt == this.joinRetryAttempts) { + logger.info("failed to send join request to master [{}], reason [{}], tried [{}] times", masterNode, ExceptionsHelper.detailedMessage(t), joinAttempt); + return false; + } else { + logger.trace("master {} failed with [{}]. retrying... (attempts done: [{}])", masterNode, ExceptionsHelper.detailedMessage(t), joinAttempt); + } + } else { + if (logger.isTraceEnabled()) { + logger.trace("failed to send join request to master [{}]", t, masterNode); + } else { + logger.info("failed to send join request to master [{}], reason [{}]", masterNode, ExceptionsHelper.detailedMessage(t)); + } + return false; + } + } + + try { + Thread.sleep(this.joinRetryDelay.millis()); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + } + private void handleLeaveRequest(final DiscoveryNode node) { if (lifecycleState() != Lifecycle.State.STARTED) { // not started, ignore a node failure @@ -389,6 +477,11 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen return ClusterState.builder(currentState).routingResult(routingResult).build(); } + @Override + public void onNoLongerMaster(String source) { + // ignoring (already logged) + } + @Override public void onFailure(String source, Throwable t) { logger.error("unexpected failure during [{}]", t, source); @@ -424,6 +517,11 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen return ClusterState.builder(currentState).routingResult(routingResult).build(); } + @Override + public void onNoLongerMaster(String source) { + // already logged + } + @Override public void onFailure(String source, Throwable t) { logger.error("unexpected failure during [{}]", t, source); @@ -457,6 +555,12 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen return currentState; } + + @Override + public void onNoLongerMaster(String source) { + // ignoring (already logged) + } + @Override public void onFailure(String source, Throwable t) { logger.error("unexpected failure during [{}]", t, source); @@ -481,7 +585,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen logger.info("master_left [{}], reason [{}]", masterNode, reason); - clusterService.submitStateUpdateTask("zen-disco-master_failed (" + masterNode + ")", Priority.IMMEDIATE, new ProcessedClusterStateUpdateTask() { + clusterService.submitStateUpdateTask("zen-disco-master_failed (" + masterNode + ")", Priority.IMMEDIATE, new ProcessedClusterStateNonMasterUpdateTask() { @Override public ClusterState execute(ClusterState currentState) { if (!masterNode.id().equals(currentState.nodes().masterNodeId())) { @@ -493,6 +597,16 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen // make sure the old master node, which has failed, is not part of the nodes we publish .remove(masterNode.id()) .masterNodeId(null).build(); + latestDiscoNodes = discoveryNodes; + + // flush any pending cluster states from old master, so it will not be set as master again + ArrayList pendingNewClusterStates = new ArrayList<>(); + processNewClusterStates.drainTo(pendingNewClusterStates); + logger.trace("removed [{}] pending cluster states", pendingNewClusterStates.size()); + + if (rejoinOnMasterGone) { + return rejoin(ClusterState.builder(currentState).nodes(discoveryNodes).build(), "master left (reason = " + reason + ")"); + } if (!electMaster.hasEnoughMasterNodes(discoveryNodes)) { return rejoin(ClusterState.builder(currentState).nodes(discoveryNodes).build(), "not enough master nodes after master left (reason = " + reason + ")"); @@ -561,29 +675,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen clusterService.submitStateUpdateTask("zen-disco-master_receive_cluster_state_from_another_master [" + newState.nodes().masterNode() + "]", Priority.URGENT, new ProcessedClusterStateUpdateTask() { @Override public ClusterState execute(ClusterState currentState) { - if (newState.version() > currentState.version()) { - logger.warn("received cluster state from [{}] which is also master but with a newer cluster_state, rejoining to cluster...", newState.nodes().masterNode()); - return rejoin(currentState, "zen-disco-master_receive_cluster_state_from_another_master [" + newState.nodes().masterNode() + "]"); - } else { - logger.warn("received cluster state from [{}] which is also master but with an older cluster_state, telling [{}] to rejoin the cluster", newState.nodes().masterNode(), newState.nodes().masterNode()); - - try { - // make sure we're connected to this node (connect to node does nothing if we're already connected) - // since the network connections are asymmetric, it may be that we received a state but have disconnected from the node - // in the past (after a master failure, for example) - transportService.connectToNode(newState.nodes().masterNode()); - transportService.sendRequest(newState.nodes().masterNode(), DISCOVERY_REJOIN_ACTION_NAME, new RejoinClusterRequest(currentState.nodes().localNodeId()), new EmptyTransportResponseHandler(ThreadPool.Names.SAME) { - @Override - public void handleException(TransportException exp) { - logger.warn("failed to send rejoin request to [{}]", exp, newState.nodes().masterNode()); - } - }); - } catch (Exception e) { - logger.warn("failed to send rejoin request to [{}]", e, newState.nodes().masterNode()); - } - - return currentState; - } + return handleAnotherMaster(currentState, newState.nodes().masterNode(), newState.version(), "via a new cluster state"); } @Override @@ -610,7 +702,11 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen final ProcessClusterState processClusterState = new ProcessClusterState(newClusterState, newStateProcessed); processNewClusterStates.add(processClusterState); - clusterService.submitStateUpdateTask("zen-disco-receive(from master [" + newClusterState.nodes().masterNode() + "])", Priority.URGENT, new ProcessedClusterStateUpdateTask() { + + assert newClusterState.nodes().masterNode() != null : "received a cluster state without a master"; + assert !newClusterState.blocks().hasGlobalBlock(discoverySettings.getNoMasterBlock()) : "received a cluster state with a master block"; + + clusterService.submitStateUpdateTask("zen-disco-receive(from master [" + newClusterState.nodes().masterNode() + "])", Priority.URGENT, new ProcessedClusterStateNonMasterUpdateTask() { @Override public ClusterState execute(ClusterState currentState) { // we already processed it in a previous event @@ -642,6 +738,11 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen // we are going to use it for sure, poll (remove) it potentialState = processNewClusterStates.poll(); + if (potentialState == null) { + // might happen if the queue is drained + break; + } + potentialState.processed = true; if (potentialState.clusterState.version() > stateToProcess.clusterState.version()) { @@ -670,7 +771,16 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen masterFD.restart(latestDiscoNodes.masterNode(), "new cluster state received and we are monitoring the wrong master [" + masterFD.masterNode() + "]"); } + if (currentState.blocks().hasGlobalBlock(discoverySettings.getNoMasterBlock())) { + // its a fresh update from the master as we transition from a start of not having a master to having one + logger.debug("got first state from fresh master [{}]", updatedState.nodes().masterNodeId()); + return updatedState; + } + + + // some optimizations to make sure we keep old objects where possible ClusterState.Builder builder = ClusterState.builder(updatedState); + // if the routing table did not change, use the original one if (updatedState.routingTable().version() == currentState.routingTable().version()) { builder.routingTable(currentState.routingTable()); @@ -726,37 +836,75 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen // validate the join request, will throw a failure if it fails, which will get back to the // node calling the join request membership.sendValidateJoinRequestBlocking(node, joinTimeout); - + processJoinRequests.add(new Tuple<>(node, callback)); clusterService.submitStateUpdateTask("zen-disco-receive(join from node[" + node + "])", Priority.IMMEDIATE, new ProcessedClusterStateUpdateTask() { + + private final List> drainedTasks = new ArrayList<>(); + @Override public ClusterState execute(ClusterState currentState) { - if (currentState.nodes().nodeExists(node.id())) { - // the node already exists in the cluster - logger.info("received a join request for an existing node [{}]", node); - // still send a new cluster state, so it will be re published and possibly update the other node - return ClusterState.builder(currentState).build(); + processJoinRequests.drainTo(drainedTasks); + if (drainedTasks.isEmpty()) { + return currentState; } - DiscoveryNodes.Builder builder = DiscoveryNodes.builder(currentState.nodes()); - for (DiscoveryNode existingNode : currentState.nodes()) { - if (node.address().equals(existingNode.address())) { - builder.remove(existingNode.id()); - logger.warn("received join request from node [{}], but found existing node {} with same address, removing existing node", node, existingNode); + + boolean modified = false; + DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(currentState.nodes()); + for (Tuple task : drainedTasks) { + DiscoveryNode node = task.v1(); + if (currentState.nodes().nodeExists(node.id())) { + logger.debug("received a join request for an existing node [{}]", node); + } else { + modified = true; + nodesBuilder.put(node); + for (DiscoveryNode existingNode : currentState.nodes()) { + if (node.address().equals(existingNode.address())) { + nodesBuilder.remove(existingNode.id()); + logger.warn("received join request from node [{}], but found existing node {} with same address, removing existing node", node, existingNode); + } + } + } + } + + ClusterState.Builder stateBuilder = ClusterState.builder(currentState); + if (modified) { + latestDiscoNodes = nodesBuilder.build(); + stateBuilder.nodes(latestDiscoNodes); + } + return stateBuilder.build(); + } + + @Override + public void onNoLongerMaster(String source) { + Exception e = new EsRejectedExecutionException("no longer master. source: [" + source + "]"); + innerOnFailure(e); + } + + void innerOnFailure(Throwable t) { + for (Tuple drainedTask : drainedTasks) { + try { + drainedTask.v2().onFailure(t); + } catch (Exception e) { + logger.error("error during task failure", e); } } - latestDiscoNodes = builder.build(); - // add the new node now (will update latestDiscoNodes on publish) - return ClusterState.builder(currentState).nodes(latestDiscoNodes.newNode(node)).build(); } @Override public void onFailure(String source, Throwable t) { logger.error("unexpected failure during [{}]", t, source); - callback.onFailure(t); + innerOnFailure(t); } @Override public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { - callback.onSuccess(); + for (Tuple drainedTask : drainedTasks) { + try { + drainedTask.v2().onSuccess(); + } catch (Exception e) { + logger.error("unexpected error during [{}]", e, source); + } + } } }); } @@ -807,35 +955,36 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen List pingMasters = newArrayList(); for (ZenPing.PingResponse pingResponse : pingResponses) { if (pingResponse.master() != null) { - pingMasters.add(pingResponse.master()); + // We can't include the local node in pingMasters list, otherwise we may up electing ourselves without + // any check / verifications from other nodes in ZenDiscover#innerJoinCluster() + if (!localNode.equals(pingResponse.master())) { + pingMasters.add(pingResponse.master()); + } } } Set possibleMasterNodes = Sets.newHashSet(); - possibleMasterNodes.add(localNode); + if (localNode.masterNode()) { + possibleMasterNodes.add(localNode); + } for (ZenPing.PingResponse pingResponse : pingResponses) { possibleMasterNodes.add(pingResponse.target()); } - // if we don't have enough master nodes, we bail, even if we get a response that indicates - // there is a master by other node, we don't see enough... - if (!electMaster.hasEnoughMasterNodes(possibleMasterNodes)) { - logger.trace("not enough master nodes [{}]", possibleMasterNodes); - return null; - } if (pingMasters.isEmpty()) { - // lets tie break between discovered nodes - DiscoveryNode electedMaster = electMaster.electMaster(possibleMasterNodes); - if (localNode.equals(electedMaster)) { - return localNode; + // if we don't have enough master nodes, we bail, because there are not enough master to elect from + if (electMaster.hasEnoughMasterNodes(possibleMasterNodes)) { + return electMaster.electMaster(possibleMasterNodes); + } else { + logger.trace("not enough master nodes [{}]", possibleMasterNodes); + return null; } } else { - DiscoveryNode electedMaster = electMaster.electMaster(pingMasters); - if (electedMaster != null) { - return electedMaster; - } + + assert !pingMasters.contains(localNode) : "local node should never be elected as master when other nodes indicate an active master"; + // lets tie break between discovered nodes + return electMaster.electMaster(pingMasters); } - return null; } private ClusterState rejoin(ClusterState clusterState, String reason) { @@ -845,28 +994,45 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen master = false; ClusterBlocks clusterBlocks = ClusterBlocks.builder().blocks(clusterState.blocks()) - .addGlobalBlock(NO_MASTER_BLOCK) - .addGlobalBlock(GatewayService.STATE_NOT_RECOVERED_BLOCK) + .addGlobalBlock(discoverySettings.getNoMasterBlock()) .build(); - // clear the routing table, we have no master, so we need to recreate the routing when we reform the cluster - RoutingTable routingTable = RoutingTable.builder().build(); - // we also clean the metadata, since we are going to recover it if we become master - MetaData metaData = MetaData.builder().build(); - // clean the nodes, we are now not connected to anybody, since we try and reform the cluster - latestDiscoNodes = new DiscoveryNodes.Builder().put(localNode).localNodeId(localNode.id()).build(); + latestDiscoNodes = new DiscoveryNodes.Builder(latestDiscoNodes).masterNodeId(null).build(); asyncJoinCluster(); return ClusterState.builder(clusterState) .blocks(clusterBlocks) .nodes(latestDiscoNodes) - .routingTable(routingTable) - .metaData(metaData) .build(); } + private ClusterState handleAnotherMaster(ClusterState localClusterState, final DiscoveryNode otherMaster, long otherClusterStateVersion, String reason) { + assert master : "handleAnotherMaster called but current node is not a master"; + if (otherClusterStateVersion > localClusterState.version()) { + return rejoin(localClusterState, "zen-disco-discovered another master with a new cluster_state [" + otherMaster + "][" + reason + "]"); + } else { + logger.warn("discovered [{}] which is also master but with an older cluster_state, telling [{}] to rejoin the cluster ([{}])", otherMaster, otherMaster, reason); + try { + // make sure we're connected to this node (connect to node does nothing if we're already connected) + // since the network connections are asymmetric, it may be that we received a state but have disconnected from the node + // in the past (after a master failure, for example) + transportService.connectToNode(otherMaster); + transportService.sendRequest(otherMaster, DISCOVERY_REJOIN_ACTION_NAME, new RejoinClusterRequest(localClusterState.nodes().localNodeId()), new EmptyTransportResponseHandler(ThreadPool.Names.SAME) { + + @Override + public void handleException(TransportException exp) { + logger.warn("failed to send rejoin request to [{}]", exp, otherMaster); + } + }); + } catch (Exception e) { + logger.warn("failed to send rejoin request to [{}]", e, otherMaster); + } + return localClusterState; + } + } + private void sendInitialStateEventIfNeeded() { if (initialStateSent.compareAndSet(false, true)) { for (InitialStateDiscoveryListener listener : initialStateListeners) { @@ -895,12 +1061,48 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen } } - private class NodeFailureListener implements NodesFaultDetection.Listener { + private class NodeFaultDetectionListener extends NodesFaultDetection.Listener { + + private final AtomicInteger pingsWhileMaster = new AtomicInteger(0); @Override public void onNodeFailure(DiscoveryNode node, String reason) { handleNodeFailure(node, reason); } + + @Override + public void onPingReceived(final NodesFaultDetection.PingRequest pingRequest) { + // if we are master, we don't expect any fault detection from another node. If we get it + // means we potentially have two masters in the cluster. + if (!master) { + pingsWhileMaster.set(0); + return; + } + + // nodes pre 1.4.0 do not send this information + if (pingRequest.masterNode() == null) { + return; + } + + if (pingsWhileMaster.incrementAndGet() < maxPingsFromAnotherMaster) { + logger.trace("got a ping from another master {}. current ping count: [{}]", pingRequest.masterNode(), pingsWhileMaster.get()); + return; + } + logger.debug("got a ping from another master {}. resolving who should rejoin. current ping count: [{}]", pingRequest.masterNode(), pingsWhileMaster.get()); + clusterService.submitStateUpdateTask("ping from another master", Priority.URGENT, new ClusterStateUpdateTask() { + + @Override + public ClusterState execute(ClusterState currentState) throws Exception { + pingsWhileMaster.set(0); + return handleAnotherMaster(currentState, pingRequest.masterNode(), pingRequest.clusterStateVersion(), "node fd ping"); + } + + @Override + public void onFailure(String source, Throwable t) { + logger.debug("unexpected error during cluster state update task after pings from another master", t); + } + }); + } } private class MasterNodeFailureListener implements MasterFaultDetection.Listener { @@ -922,6 +1124,10 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen } } + boolean isRejoinOnMasterGone() { + return rejoinOnMasterGone; + } + static class RejoinClusterRequest extends TransportRequest { private String fromNodeId; @@ -955,7 +1161,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen @Override public void messageReceived(final RejoinClusterRequest request, final TransportChannel channel) throws Exception { - clusterService.submitStateUpdateTask("received a request to rejoin the cluster from [" + request.fromNodeId + "]", Priority.URGENT, new ClusterStateUpdateTask() { + clusterService.submitStateUpdateTask("received a request to rejoin the cluster from [" + request.fromNodeId + "]", Priority.URGENT, new ClusterStateNonMasterUpdateTask() { @Override public ClusterState execute(ClusterState currentState) { try { @@ -966,6 +1172,11 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen return rejoin(currentState, "received a request to rejoin the cluster from [" + request.fromNodeId + "]"); } + @Override + public void onNoLongerMaster(String source) { + // already logged + } + @Override public void onFailure(String source, Throwable t) { logger.error("unexpected failure during [{}]", t, source); @@ -989,6 +1200,12 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen ZenDiscovery.this.electMaster.minimumMasterNodes(), minimumMasterNodes); handleMinimumMasterNodesChanged(minimumMasterNodes); } + + boolean rejoinOnMasterGone = settings.getAsBoolean(SETTING_REJOIN_ON_MASTER_GONE, ZenDiscovery.this.rejoinOnMasterGone); + if (rejoinOnMasterGone != ZenDiscovery.this.rejoinOnMasterGone) { + logger.info("updating {} from [{}] to [{}]", SETTING_REJOIN_ON_MASTER_GONE, ZenDiscovery.this.rejoinOnMasterGone, rejoinOnMasterGone); + ZenDiscovery.this.rejoinOnMasterGone = rejoinOnMasterGone; + } } } } diff --git a/src/main/java/org/elasticsearch/discovery/zen/ZenDiscoveryModule.java b/src/main/java/org/elasticsearch/discovery/zen/ZenDiscoveryModule.java index e67c4e2af39..33987662bfa 100644 --- a/src/main/java/org/elasticsearch/discovery/zen/ZenDiscoveryModule.java +++ b/src/main/java/org/elasticsearch/discovery/zen/ZenDiscoveryModule.java @@ -23,6 +23,7 @@ import com.google.common.collect.Lists; import org.elasticsearch.common.inject.AbstractModule; import org.elasticsearch.common.inject.multibindings.Multibinder; import org.elasticsearch.discovery.Discovery; +import org.elasticsearch.discovery.zen.elect.ElectMasterService; import org.elasticsearch.discovery.zen.ping.ZenPingService; import org.elasticsearch.discovery.zen.ping.unicast.UnicastHostsProvider; @@ -44,6 +45,7 @@ public class ZenDiscoveryModule extends AbstractModule { @Override protected void configure() { + bind(ElectMasterService.class).asEagerSingleton(); bind(ZenPingService.class).asEagerSingleton(); Multibinder unicastHostsProviderMultibinder = Multibinder.newSetBinder(binder(), UnicastHostsProvider.class); for (Class unicastHostProvider : unicastHostProviders) { diff --git a/src/main/java/org/elasticsearch/discovery/zen/elect/ElectMasterService.java b/src/main/java/org/elasticsearch/discovery/zen/elect/ElectMasterService.java index bcfa1dc2f02..9ba26387ec5 100644 --- a/src/main/java/org/elasticsearch/discovery/zen/elect/ElectMasterService.java +++ b/src/main/java/org/elasticsearch/discovery/zen/elect/ElectMasterService.java @@ -24,12 +24,10 @@ import com.google.common.collect.Lists; import org.apache.lucene.util.CollectionUtil; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.common.component.AbstractComponent; +import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.Settings; -import java.util.Arrays; -import java.util.Comparator; -import java.util.Iterator; -import java.util.List; +import java.util.*; /** * @@ -42,6 +40,7 @@ public class ElectMasterService extends AbstractComponent { private volatile int minimumMasterNodes; + @Inject public ElectMasterService(Settings settings) { super(settings); this.minimumMasterNodes = settings.getAsInt(DISCOVERY_ZEN_MINIMUM_MASTER_NODES, -1); @@ -69,6 +68,18 @@ public class ElectMasterService extends AbstractComponent { return count >= minimumMasterNodes; } + /** + * Returns the given nodes sorted by likelyhood of being elected as master, most likely first. + * Non-master nodes are not removed but are rather put in the end + * @param nodes + * @return + */ + public List sortByMasterLikelihood(Iterable nodes) { + ArrayList sortedNodes = Lists.newArrayList(nodes); + CollectionUtil.introSort(sortedNodes, nodeComparator); + return sortedNodes; + } + /** * Returns a list of the next possible masters. */ @@ -120,6 +131,12 @@ public class ElectMasterService extends AbstractComponent { @Override public int compare(DiscoveryNode o1, DiscoveryNode o2) { + if (o1.masterNode() && !o2.masterNode()) { + return -1; + } + if (!o1.masterNode() && o2.masterNode()) { + return 1; + } return o1.id().compareTo(o2.id()); } } diff --git a/src/main/java/org/elasticsearch/discovery/zen/fd/FaultDetection.java b/src/main/java/org/elasticsearch/discovery/zen/fd/FaultDetection.java new file mode 100644 index 00000000000..d3e644f2166 --- /dev/null +++ b/src/main/java/org/elasticsearch/discovery/zen/fd/FaultDetection.java @@ -0,0 +1,95 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.discovery.zen.fd; + +import org.elasticsearch.cluster.ClusterName; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.common.component.AbstractComponent; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportConnectionListener; +import org.elasticsearch.transport.TransportService; + +import static org.elasticsearch.common.unit.TimeValue.timeValueSeconds; + +/** + * A base class for {@link org.elasticsearch.discovery.zen.fd.MasterFaultDetection} & {@link org.elasticsearch.discovery.zen.fd.NodesFaultDetection}, + * making sure both use the same setting. + */ +public abstract class FaultDetection extends AbstractComponent { + + public static final String SETTING_CONNECT_ON_NETWORK_DISCONNECT = "discovery.zen.fd.connect_on_network_disconnect"; + public static final String SETTING_PING_INTERVAL = "discovery.zen.fd.ping_interval"; + public static final String SETTING_PING_TIMEOUT = "discovery.zen.fd.ping_timeout"; + public static final String SETTING_PING_RETRIES = "discovery.zen.fd.ping_retries"; + public static final String SETTING_REGISTER_CONNECTION_LISTENER = "discovery.zen.fd.register_connection_listener"; + + protected final ThreadPool threadPool; + protected final ClusterName clusterName; + protected final TransportService transportService; + + // used mainly for testing, should always be true + protected final boolean registerConnectionListener; + protected final FDConnectionListener connectionListener; + protected final boolean connectOnNetworkDisconnect; + + protected final TimeValue pingInterval; + protected final TimeValue pingRetryTimeout; + protected final int pingRetryCount; + + public FaultDetection(Settings settings, ThreadPool threadPool, TransportService transportService, ClusterName clusterName) { + super(settings); + this.threadPool = threadPool; + this.transportService = transportService; + this.clusterName = clusterName; + + this.connectOnNetworkDisconnect = settings.getAsBoolean(SETTING_CONNECT_ON_NETWORK_DISCONNECT, false); + this.pingInterval = settings.getAsTime(SETTING_PING_INTERVAL, timeValueSeconds(1)); + this.pingRetryTimeout = settings.getAsTime(SETTING_PING_TIMEOUT, timeValueSeconds(30)); + this.pingRetryCount = settings.getAsInt(SETTING_PING_RETRIES, 3); + this.registerConnectionListener = settings.getAsBoolean(SETTING_REGISTER_CONNECTION_LISTENER, true); + + this.connectionListener = new FDConnectionListener(); + if (registerConnectionListener) { + transportService.addConnectionListener(connectionListener); + } + } + + public void close() { + transportService.removeConnectionListener(connectionListener); + } + + /** + * This method will be called when the {@link org.elasticsearch.transport.TransportService} raised a node disconnected event + */ + abstract void handleTransportDisconnect(DiscoveryNode node); + + private class FDConnectionListener implements TransportConnectionListener { + @Override + public void onNodeConnected(DiscoveryNode node) { + } + + @Override + public void onNodeDisconnected(DiscoveryNode node) { + handleTransportDisconnect(node); + } + } + +} diff --git a/src/main/java/org/elasticsearch/discovery/zen/fd/MasterFaultDetection.java b/src/main/java/org/elasticsearch/discovery/zen/fd/MasterFaultDetection.java index 26fd2b00e94..49709b7905b 100644 --- a/src/main/java/org/elasticsearch/discovery/zen/fd/MasterFaultDetection.java +++ b/src/main/java/org/elasticsearch/discovery/zen/fd/MasterFaultDetection.java @@ -20,9 +20,10 @@ package org.elasticsearch.discovery.zen.fd; import org.elasticsearch.ElasticsearchIllegalStateException; +import org.elasticsearch.Version; +import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; -import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.settings.Settings; @@ -35,13 +36,12 @@ import java.io.IOException; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.atomic.AtomicBoolean; -import static org.elasticsearch.common.unit.TimeValue.timeValueSeconds; import static org.elasticsearch.transport.TransportRequestOptions.options; /** * A fault detection that pings the master periodically to see if its alive. */ -public class MasterFaultDetection extends AbstractComponent { +public class MasterFaultDetection extends FaultDetection { public static final String MASTER_PING_ACTION_NAME = "internal:discovery/zen/fd/master_ping"; @@ -52,29 +52,10 @@ public class MasterFaultDetection extends AbstractComponent { void onDisconnectedFromMaster(); } - private final ThreadPool threadPool; - - private final TransportService transportService; - private final DiscoveryNodesProvider nodesProvider; private final CopyOnWriteArrayList listeners = new CopyOnWriteArrayList<>(); - - private final boolean connectOnNetworkDisconnect; - - private final TimeValue pingInterval; - - private final TimeValue pingRetryTimeout; - - private final int pingRetryCount; - - // used mainly for testing, should always be true - private final boolean registerConnectionListener; - - - private final FDConnectionListener connectionListener; - private volatile MasterPinger masterPinger; private final Object masterNodeMutex = new Object(); @@ -85,25 +66,13 @@ public class MasterFaultDetection extends AbstractComponent { private final AtomicBoolean notifiedMasterFailure = new AtomicBoolean(); - public MasterFaultDetection(Settings settings, ThreadPool threadPool, TransportService transportService, DiscoveryNodesProvider nodesProvider) { - super(settings); - this.threadPool = threadPool; - this.transportService = transportService; + public MasterFaultDetection(Settings settings, ThreadPool threadPool, TransportService transportService, + DiscoveryNodesProvider nodesProvider, ClusterName clusterName) { + super(settings, threadPool, transportService, clusterName); this.nodesProvider = nodesProvider; - this.connectOnNetworkDisconnect = componentSettings.getAsBoolean("connect_on_network_disconnect", true); - this.pingInterval = componentSettings.getAsTime("ping_interval", timeValueSeconds(1)); - this.pingRetryTimeout = componentSettings.getAsTime("ping_timeout", timeValueSeconds(30)); - this.pingRetryCount = componentSettings.getAsInt("ping_retries", 3); - this.registerConnectionListener = componentSettings.getAsBoolean("register_connection_listener", true); - logger.debug("[master] uses ping_interval [{}], ping_timeout [{}], ping_retries [{}]", pingInterval, pingRetryTimeout, pingRetryCount); - this.connectionListener = new FDConnectionListener(); - if (registerConnectionListener) { - transportService.addConnectionListener(connectionListener); - } - transportService.registerHandler(MASTER_PING_ACTION_NAME, new MasterPingRequestHandler()); } @@ -155,7 +124,8 @@ public class MasterFaultDetection extends AbstractComponent { masterPinger.stop(); } this.masterPinger = new MasterPinger(); - // start the ping process + + // we start pinging slightly later to allow the chosen master to complete it's own master election threadPool.schedule(pingInterval, ThreadPool.Names.SAME, masterPinger); } @@ -181,13 +151,14 @@ public class MasterFaultDetection extends AbstractComponent { } public void close() { + super.close(); stop("closing"); this.listeners.clear(); - transportService.removeConnectionListener(connectionListener); transportService.removeHandler(MASTER_PING_ACTION_NAME); } - private void handleTransportDisconnect(DiscoveryNode node) { + @Override + protected void handleTransportDisconnect(DiscoveryNode node) { synchronized (masterNodeMutex) { if (!node.equals(this.masterNode)) { return; @@ -200,7 +171,8 @@ public class MasterFaultDetection extends AbstractComponent { masterPinger.stop(); } this.masterPinger = new MasterPinger(); - threadPool.schedule(pingInterval, ThreadPool.Names.SAME, masterPinger); + // we use schedule with a 0 time value to run the pinger on the pool as it will run on later + threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, masterPinger); } catch (Exception e) { logger.trace("[master] [{}] transport disconnected (with verified connect)", masterNode); notifyMasterFailure(masterNode, "transport disconnected (with verified connect)"); @@ -237,17 +209,6 @@ public class MasterFaultDetection extends AbstractComponent { } } - private class FDConnectionListener implements TransportConnectionListener { - @Override - public void onNodeConnected(DiscoveryNode node) { - } - - @Override - public void onNodeDisconnected(DiscoveryNode node) { - handleTransportDisconnect(node); - } - } - private class MasterPinger implements Runnable { private volatile boolean running = true; @@ -268,8 +229,10 @@ public class MasterFaultDetection extends AbstractComponent { threadPool.schedule(pingInterval, ThreadPool.Names.SAME, MasterPinger.this); return; } - transportService.sendRequest(masterToPing, MASTER_PING_ACTION_NAME, new MasterPingRequest(nodesProvider.nodes().localNode().id(), masterToPing.id()), options().withType(TransportRequestOptions.Type.PING).withTimeout(pingRetryTimeout), - new BaseTransportResponseHandler() { + final MasterPingRequest request = new MasterPingRequest(nodesProvider.nodes().localNode().id(), masterToPing.id(), clusterName); + final TransportRequestOptions options = options().withType(TransportRequestOptions.Type.PING).withTimeout(pingRetryTimeout); + transportService.sendRequest(masterToPing, MASTER_PING_ACTION_NAME, request, options, new BaseTransportResponseHandler() { + @Override public MasterPingResponseResponse newInstance() { return new MasterPingResponseResponse(); @@ -326,7 +289,7 @@ public class MasterFaultDetection extends AbstractComponent { notifyMasterFailure(masterToPing, "failed to ping, tried [" + pingRetryCount + "] times, each with maximum [" + pingRetryTimeout + "] timeout"); } else { // resend the request, not reschedule, rely on send timeout - transportService.sendRequest(masterToPing, MASTER_PING_ACTION_NAME, new MasterPingRequest(nodesProvider.nodes().localNode().id(), masterToPing.id()), options().withType(TransportRequestOptions.Type.PING).withTimeout(pingRetryTimeout), this); + transportService.sendRequest(masterToPing, MASTER_PING_ACTION_NAME, request, options, this); } } } @@ -349,6 +312,14 @@ public class MasterFaultDetection extends AbstractComponent { } static class NotMasterException extends ElasticsearchIllegalStateException { + + NotMasterException(String msg) { + super(msg); + } + + NotMasterException() { + } + @Override public Throwable fillInStackTrace() { return null; @@ -377,6 +348,13 @@ public class MasterFaultDetection extends AbstractComponent { if (!request.masterNodeId.equals(nodes.localNodeId())) { throw new NotMasterException(); } + + // ping from nodes of version < 1.4.0 will have the clustername set to null + if (request.clusterName != null && !request.clusterName.equals(clusterName)) { + logger.trace("master fault detection ping request is targeted for a different [{}] cluster then us [{}]", request.clusterName, clusterName); + throw new NotMasterException("master fault detection ping request is targeted for a different [" + request.clusterName + "] cluster then us [" + clusterName + "]"); + } + // if we are no longer master, fail... if (!nodes.localNodeMaster()) { throw new NoLongerMasterException(); @@ -400,13 +378,15 @@ public class MasterFaultDetection extends AbstractComponent { private String nodeId; private String masterNodeId; + private ClusterName clusterName; private MasterPingRequest() { } - private MasterPingRequest(String nodeId, String masterNodeId) { + private MasterPingRequest(String nodeId, String masterNodeId, ClusterName clusterName) { this.nodeId = nodeId; this.masterNodeId = masterNodeId; + this.clusterName = clusterName; } @Override @@ -414,6 +394,9 @@ public class MasterFaultDetection extends AbstractComponent { super.readFrom(in); nodeId = in.readString(); masterNodeId = in.readString(); + if (in.getVersion().onOrAfter(Version.V_1_4_0)) { + clusterName = ClusterName.readClusterName(in); + } } @Override @@ -421,6 +404,9 @@ public class MasterFaultDetection extends AbstractComponent { super.writeTo(out); out.writeString(nodeId); out.writeString(masterNodeId); + if (out.getVersion().onOrAfter(Version.V_1_4_0)) { + clusterName.writeTo(out); + } } } diff --git a/src/main/java/org/elasticsearch/discovery/zen/fd/NodesFaultDetection.java b/src/main/java/org/elasticsearch/discovery/zen/fd/NodesFaultDetection.java index 6f4e403610c..90012099116 100644 --- a/src/main/java/org/elasticsearch/discovery/zen/fd/NodesFaultDetection.java +++ b/src/main/java/org/elasticsearch/discovery/zen/fd/NodesFaultDetection.java @@ -20,9 +20,11 @@ package org.elasticsearch.discovery.zen.fd; import org.elasticsearch.ElasticsearchIllegalStateException; +import org.elasticsearch.Version; +import org.elasticsearch.cluster.ClusterName; +import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; -import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.settings.Settings; @@ -35,68 +37,40 @@ import java.util.concurrent.ConcurrentMap; import java.util.concurrent.CopyOnWriteArrayList; import static org.elasticsearch.cluster.node.DiscoveryNodes.EMPTY_NODES; -import static org.elasticsearch.common.unit.TimeValue.timeValueSeconds; import static org.elasticsearch.common.util.concurrent.ConcurrentCollections.newConcurrentMap; import static org.elasticsearch.transport.TransportRequestOptions.options; /** * A fault detection of multiple nodes. */ -public class NodesFaultDetection extends AbstractComponent { +public class NodesFaultDetection extends FaultDetection { public static final String PING_ACTION_NAME = "internal:discovery/zen/fd/ping"; + + public abstract static class Listener { - public static interface Listener { + public void onNodeFailure(DiscoveryNode node, String reason) {} + + public void onPingReceived(PingRequest pingRequest) {} - void onNodeFailure(DiscoveryNode node, String reason); } - private final ThreadPool threadPool; - - private final TransportService transportService; - - - private final boolean connectOnNetworkDisconnect; - - private final TimeValue pingInterval; - - private final TimeValue pingRetryTimeout; - - private final int pingRetryCount; - - // used mainly for testing, should always be true - private final boolean registerConnectionListener; - - private final CopyOnWriteArrayList listeners = new CopyOnWriteArrayList<>(); private final ConcurrentMap nodesFD = newConcurrentMap(); - private final FDConnectionListener connectionListener; - private volatile DiscoveryNodes latestNodes = EMPTY_NODES; + private volatile long clusterStateVersion = ClusterState.UNKNOWN_VERSION; + private volatile boolean running = false; - public NodesFaultDetection(Settings settings, ThreadPool threadPool, TransportService transportService) { - super(settings); - this.threadPool = threadPool; - this.transportService = transportService; - - this.connectOnNetworkDisconnect = componentSettings.getAsBoolean("connect_on_network_disconnect", true); - this.pingInterval = componentSettings.getAsTime("ping_interval", timeValueSeconds(1)); - this.pingRetryTimeout = componentSettings.getAsTime("ping_timeout", timeValueSeconds(30)); - this.pingRetryCount = componentSettings.getAsInt("ping_retries", 3); - this.registerConnectionListener = componentSettings.getAsBoolean("register_connection_listener", true); + public NodesFaultDetection(Settings settings, ThreadPool threadPool, TransportService transportService, ClusterName clusterName) { + super(settings, threadPool, transportService, clusterName); logger.debug("[node ] uses ping_interval [{}], ping_timeout [{}], ping_retries [{}]", pingInterval, pingRetryTimeout, pingRetryCount); transportService.registerHandler(PING_ACTION_NAME, new PingRequestHandler()); - - this.connectionListener = new FDConnectionListener(); - if (registerConnectionListener) { - transportService.addConnectionListener(connectionListener); - } } public void addListener(Listener listener) { @@ -107,9 +81,10 @@ public class NodesFaultDetection extends AbstractComponent { listeners.remove(listener); } - public void updateNodes(DiscoveryNodes nodes) { + public void updateNodes(DiscoveryNodes nodes, long clusterStateVersion) { DiscoveryNodes prevNodes = latestNodes; this.latestNodes = nodes; + this.clusterStateVersion = clusterStateVersion; if (!running) { return; } @@ -121,7 +96,8 @@ public class NodesFaultDetection extends AbstractComponent { } if (!nodesFD.containsKey(newNode)) { nodesFD.put(newNode, new NodeFD()); - threadPool.schedule(pingInterval, ThreadPool.Names.SAME, new SendPingRequest(newNode)); + // we use schedule with a 0 time value to run the pinger on the pool as it will run on later + threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, new SendPingRequest(newNode)); } } for (DiscoveryNode removedNode : delta.removedNodes()) { @@ -146,12 +122,13 @@ public class NodesFaultDetection extends AbstractComponent { } public void close() { + super.close(); stop(); transportService.removeHandler(PING_ACTION_NAME); - transportService.removeConnectionListener(connectionListener); } - private void handleTransportDisconnect(DiscoveryNode node) { + @Override + protected void handleTransportDisconnect(DiscoveryNode node) { if (!latestNodes.nodeExists(node.id())) { return; } @@ -167,7 +144,8 @@ public class NodesFaultDetection extends AbstractComponent { try { transportService.connectToNode(node); nodesFD.put(node, new NodeFD()); - threadPool.schedule(pingInterval, ThreadPool.Names.SAME, new SendPingRequest(node)); + // we use schedule with a 0 time value to run the pinger on the pool as it will run on later + threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, new SendPingRequest(node)); } catch (Exception e) { logger.trace("[node ] [{}] transport disconnected (with verified connect)", node); notifyNodeFailure(node, "transport disconnected (with verified connect)"); @@ -189,6 +167,19 @@ public class NodesFaultDetection extends AbstractComponent { }); } + private void notifyPingReceived(final PingRequest pingRequest) { + threadPool.generic().execute(new Runnable() { + + @Override + public void run() { + for (Listener listener : listeners) { + listener.onPingReceived(pingRequest); + } + } + + }); + } + private class SendPingRequest implements Runnable { private final DiscoveryNode node; @@ -202,8 +193,9 @@ public class NodesFaultDetection extends AbstractComponent { if (!running) { return; } - transportService.sendRequest(node, PING_ACTION_NAME, new PingRequest(node.id()), options().withType(TransportRequestOptions.Type.PING).withTimeout(pingRetryTimeout), - new BaseTransportResponseHandler() { + final PingRequest pingRequest = new PingRequest(node.id(), clusterName, latestNodes.localNode(), clusterStateVersion); + final TransportRequestOptions options = options().withType(TransportRequestOptions.Type.PING).withTimeout(pingRetryTimeout); + transportService.sendRequest(node, PING_ACTION_NAME, pingRequest, options, new BaseTransportResponseHandler() { @Override public PingResponse newInstance() { return new PingResponse(); @@ -250,8 +242,7 @@ public class NodesFaultDetection extends AbstractComponent { } } else { // resend the request, not reschedule, rely on send timeout - transportService.sendRequest(node, PING_ACTION_NAME, new PingRequest(node.id()), - options().withType(TransportRequestOptions.Type.PING).withTimeout(pingRetryTimeout), this); + transportService.sendRequest(node, PING_ACTION_NAME, pingRequest, options, this); } } } @@ -270,18 +261,6 @@ public class NodesFaultDetection extends AbstractComponent { volatile boolean running = true; } - private class FDConnectionListener implements TransportConnectionListener { - @Override - public void onNodeConnected(DiscoveryNode node) { - } - - @Override - public void onNodeDisconnected(DiscoveryNode node) { - handleTransportDisconnect(node); - } - } - - class PingRequestHandler extends BaseTransportRequestHandler { @Override @@ -296,6 +275,15 @@ public class NodesFaultDetection extends AbstractComponent { if (!latestNodes.localNodeId().equals(request.nodeId)) { throw new ElasticsearchIllegalStateException("Got pinged as node [" + request.nodeId + "], but I am node [" + latestNodes.localNodeId() + "]"); } + + // PingRequest will have clusterName set to null if it came from a node of version <1.4.0 + if (request.clusterName != null && !request.clusterName.equals(clusterName)) { + // Don't introduce new exception for bwc reasons + throw new ElasticsearchIllegalStateException("Got pinged with cluster name [" + request.clusterName + "], but I'm part of cluster [" + clusterName + "]"); + } + + notifyPingReceived(request); + channel.sendResponse(new PingResponse()); } @@ -306,28 +294,63 @@ public class NodesFaultDetection extends AbstractComponent { } - static class PingRequest extends TransportRequest { + public static class PingRequest extends TransportRequest { // the (assumed) node id we are pinging private String nodeId; + private ClusterName clusterName; + + private DiscoveryNode masterNode; + + private long clusterStateVersion = ClusterState.UNKNOWN_VERSION; + PingRequest() { } - PingRequest(String nodeId) { + PingRequest(String nodeId, ClusterName clusterName, DiscoveryNode masterNode, long clusterStateVersion) { this.nodeId = nodeId; + this.clusterName = clusterName; + this.masterNode = masterNode; + this.clusterStateVersion = clusterStateVersion; + } + + public String nodeId() { + return nodeId; + } + + public ClusterName clusterName() { + return clusterName; + } + + public DiscoveryNode masterNode() { + return masterNode; + } + + public long clusterStateVersion() { + return clusterStateVersion; } @Override public void readFrom(StreamInput in) throws IOException { super.readFrom(in); nodeId = in.readString(); + if (in.getVersion().onOrAfter(Version.V_1_4_0)) { + clusterName = ClusterName.readClusterName(in); + masterNode = DiscoveryNode.readNode(in); + clusterStateVersion = in.readLong(); + } } @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); out.writeString(nodeId); + if (out.getVersion().onOrAfter(Version.V_1_4_0)) { + clusterName.writeTo(out); + masterNode.writeTo(out); + out.writeLong(clusterStateVersion); + } } } diff --git a/src/main/java/org/elasticsearch/discovery/zen/ping/ZenPingService.java b/src/main/java/org/elasticsearch/discovery/zen/ping/ZenPingService.java index 53ee9248eac..39f710f7acd 100644 --- a/src/main/java/org/elasticsearch/discovery/zen/ping/ZenPingService.java +++ b/src/main/java/org/elasticsearch/discovery/zen/ping/ZenPingService.java @@ -34,6 +34,7 @@ import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.concurrent.ConcurrentCollections; import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; import org.elasticsearch.discovery.zen.DiscoveryNodesProvider; +import org.elasticsearch.discovery.zen.elect.ElectMasterService; import org.elasticsearch.discovery.zen.ping.multicast.MulticastZenPing; import org.elasticsearch.discovery.zen.ping.unicast.UnicastHostsProvider; import org.elasticsearch.discovery.zen.ping.unicast.UnicastZenPing; @@ -55,20 +56,20 @@ public class ZenPingService extends AbstractLifecycleComponent implemen // here for backward comp. with discovery plugins public ZenPingService(Settings settings, ThreadPool threadPool, TransportService transportService, ClusterName clusterName, NetworkService networkService, - @Nullable Set unicastHostsProviders) { - this(settings, threadPool, transportService, clusterName, networkService, Version.CURRENT, unicastHostsProviders); + ElectMasterService electMasterService, @Nullable Set unicastHostsProviders) { + this(settings, threadPool, transportService, clusterName, networkService, Version.CURRENT, electMasterService, unicastHostsProviders); } @Inject public ZenPingService(Settings settings, ThreadPool threadPool, TransportService transportService, ClusterName clusterName, NetworkService networkService, - Version version, @Nullable Set unicastHostsProviders) { + Version version, ElectMasterService electMasterService, @Nullable Set unicastHostsProviders) { super(settings); ImmutableList.Builder zenPingsBuilder = ImmutableList.builder(); if (componentSettings.getAsBoolean("multicast.enabled", true)) { zenPingsBuilder.add(new MulticastZenPing(settings, threadPool, transportService, clusterName, networkService, version)); } // always add the unicast hosts, so it will be able to receive unicast requests even when working in multicast - zenPingsBuilder.add(new UnicastZenPing(settings, threadPool, transportService, clusterName, version, unicastHostsProviders)); + zenPingsBuilder.add(new UnicastZenPing(settings, threadPool, transportService, clusterName, version, electMasterService, unicastHostsProviders)); this.zenPings = zenPingsBuilder.build(); } diff --git a/src/main/java/org/elasticsearch/discovery/zen/ping/unicast/UnicastZenPing.java b/src/main/java/org/elasticsearch/discovery/zen/ping/unicast/UnicastZenPing.java index 25a43ead8ef..123f2d7fc7f 100644 --- a/src/main/java/org/elasticsearch/discovery/zen/ping/unicast/UnicastZenPing.java +++ b/src/main/java/org/elasticsearch/discovery/zen/ping/unicast/UnicastZenPing.java @@ -19,8 +19,12 @@ package org.elasticsearch.discovery.zen.ping.unicast; +import com.carrotsearch.hppc.cursors.ObjectCursor; import com.google.common.collect.Lists; -import org.elasticsearch.*; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.ElasticsearchIllegalStateException; +import org.elasticsearch.Version; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; @@ -35,6 +39,7 @@ import org.elasticsearch.common.util.concurrent.ConcurrentCollections; import org.elasticsearch.common.util.concurrent.EsExecutors; import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; import org.elasticsearch.discovery.zen.DiscoveryNodesProvider; +import org.elasticsearch.discovery.zen.elect.ElectMasterService; import org.elasticsearch.discovery.zen.ping.ZenPing; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.*; @@ -62,10 +67,11 @@ public class UnicastZenPing extends AbstractLifecycleComponent implemen private final ThreadPool threadPool; private final TransportService transportService; private final ClusterName clusterName; + private final ElectMasterService electMasterService; private final int concurrentConnects; - private final DiscoveryNode[] nodes; + private final DiscoveryNode[] configuredTargetNodes; private volatile DiscoveryNodesProvider nodesProvider; @@ -73,16 +79,18 @@ public class UnicastZenPing extends AbstractLifecycleComponent implemen private final Map> receivedResponses = newConcurrentMap(); - // a list of temporal responses a node will return for a request (holds requests from other nodes) + // a list of temporal responses a node will return for a request (holds requests from other configuredTargetNodes) private final Queue temporalResponses = ConcurrentCollections.newQueue(); private final CopyOnWriteArrayList hostsProviders = new CopyOnWriteArrayList<>(); - public UnicastZenPing(Settings settings, ThreadPool threadPool, TransportService transportService, ClusterName clusterName, Version version, @Nullable Set unicastHostsProviders) { + public UnicastZenPing(Settings settings, ThreadPool threadPool, TransportService transportService, ClusterName clusterName, + Version version, ElectMasterService electMasterService, @Nullable Set unicastHostsProviders) { super(settings); this.threadPool = threadPool; this.transportService = transportService; this.clusterName = clusterName; + this.electMasterService = electMasterService; if (unicastHostsProviders != null) { for (UnicastHostsProvider unicastHostsProvider : unicastHostsProviders) { @@ -99,20 +107,20 @@ public class UnicastZenPing extends AbstractLifecycleComponent implemen List hosts = Lists.newArrayList(hostArr); logger.debug("using initial hosts {}, with concurrent_connects [{}]", hosts, concurrentConnects); - List nodes = Lists.newArrayList(); + List configuredTargetNodes = Lists.newArrayList(); int idCounter = 0; for (String host : hosts) { try { TransportAddress[] addresses = transportService.addressesFromString(host); // we only limit to 1 addresses, makes no sense to ping 100 ports for (int i = 0; (i < addresses.length && i < LIMIT_PORTS_COUNT); i++) { - nodes.add(new DiscoveryNode("#zen_unicast_" + (++idCounter) + "#", addresses[i], version.minimumCompatibilityVersion())); + configuredTargetNodes.add(new DiscoveryNode("#zen_unicast_" + (++idCounter) + "#", addresses[i], version.minimumCompatibilityVersion())); } } catch (Exception e) { throw new ElasticsearchIllegalArgumentException("Failed to resolve address for [" + host + "]", e); } } - this.nodes = nodes.toArray(new DiscoveryNode[nodes.size()]); + this.configuredTargetNodes = configuredTargetNodes.toArray(new DiscoveryNode[configuredTargetNodes.size()]); transportService.registerHandler(ACTION_NAME, new UnicastPingRequestHandler()); } @@ -143,6 +151,13 @@ public class UnicastZenPing extends AbstractLifecycleComponent implemen this.nodesProvider = nodesProvider; } + /** + * Clears the list of cached ping responses. + */ + public void clearTemporalReponses() { + temporalResponses.clear(); + } + public PingResponse[] pingAndWait(TimeValue timeout) { final AtomicReference response = new AtomicReference<>(); final CountDownLatch latch = new CountDownLatch(1); @@ -237,18 +252,30 @@ public class UnicastZenPing extends AbstractLifecycleComponent implemen DiscoveryNodes discoNodes = nodesProvider.nodes(); pingRequest.pingResponse = new PingResponse(discoNodes.localNode(), discoNodes.masterNode(), clusterName); - HashSet nodesToPing = new HashSet<>(Arrays.asList(nodes)); + HashSet nodesToPingSet = new HashSet<>(); for (PingResponse temporalResponse : temporalResponses) { // Only send pings to nodes that have the same cluster name. if (clusterName.equals(temporalResponse.clusterName())) { - nodesToPing.add(temporalResponse.target()); + nodesToPingSet.add(temporalResponse.target()); } } for (UnicastHostsProvider provider : hostsProviders) { - nodesToPing.addAll(provider.buildDynamicNodes()); + nodesToPingSet.addAll(provider.buildDynamicNodes()); } + // add all possible master nodes that were active in the last known cluster configuration + for (ObjectCursor masterNode : discoNodes.getMasterNodes().values()) { + nodesToPingSet.add(masterNode.value); + } + + // sort the nodes by likelihood of being an active master + List sortedNodesToPing = electMasterService.sortByMasterLikelihood(nodesToPingSet); + + // new add the the unicast targets first + ArrayList nodesToPing = Lists.newArrayList(configuredTargetNodes); + nodesToPing.addAll(sortedNodesToPing); + final CountDownLatch latch = new CountDownLatch(nodesToPing.size()); for (final DiscoveryNode node : nodesToPing) { // make sure we are connected diff --git a/src/main/java/org/elasticsearch/discovery/zen/publish/PublishClusterStateAction.java b/src/main/java/org/elasticsearch/discovery/zen/publish/PublishClusterStateAction.java index d716a336a05..1e46bbb0171 100644 --- a/src/main/java/org/elasticsearch/discovery/zen/publish/PublishClusterStateAction.java +++ b/src/main/java/org/elasticsearch/discovery/zen/publish/PublishClusterStateAction.java @@ -40,6 +40,7 @@ import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.*; import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; /** * @@ -85,12 +86,15 @@ public class PublishClusterStateAction extends AbstractComponent { publish(clusterState, new AckClusterStatePublishResponseHandler(clusterState.nodes().size() - 1, ackListener)); } - private void publish(ClusterState clusterState, final ClusterStatePublishResponseHandler publishResponseHandler) { + private void publish(final ClusterState clusterState, final ClusterStatePublishResponseHandler publishResponseHandler) { DiscoveryNode localNode = nodesProvider.nodes().localNode(); Map serializedStates = Maps.newHashMap(); + final AtomicBoolean timedOutWaitingForNodes = new AtomicBoolean(false); + final TimeValue publishTimeout = discoverySettings.getPublishTimeout(); + for (final DiscoveryNode node : clusterState.nodes()) { if (node.equals(localNode)) { continue; @@ -125,28 +129,30 @@ public class PublishClusterStateAction extends AbstractComponent { @Override public void handleResponse(TransportResponse.Empty response) { + if (timedOutWaitingForNodes.get()) { + logger.debug("node {} responded for cluster state [{}] (took longer than [{}])", node, clusterState.version(), publishTimeout); + } publishResponseHandler.onResponse(node); } @Override public void handleException(TransportException exp) { - logger.debug("failed to send cluster state to [{}]", exp, node); + logger.debug("failed to send cluster state to {}", exp, node); publishResponseHandler.onFailure(node, exp); } }); } catch (Throwable t) { - logger.debug("error sending cluster state to [{}]", t, node); + logger.debug("error sending cluster state to {}", t, node); publishResponseHandler.onFailure(node, t); } } - TimeValue publishTimeout = discoverySettings.getPublishTimeout(); if (publishTimeout.millis() > 0) { // only wait if the publish timeout is configured... try { - boolean awaited = publishResponseHandler.awaitAllNodes(publishTimeout); - if (!awaited) { - logger.debug("awaiting all nodes to process published state {} timed out, timeout {}", clusterState.version(), publishTimeout); + timedOutWaitingForNodes.set(!publishResponseHandler.awaitAllNodes(publishTimeout)); + if (timedOutWaitingForNodes.get()) { + logger.debug("timed out waiting for all nodes to process published state [{}] (timeout [{}])", clusterState.version(), publishTimeout); } } catch (InterruptedException e) { // ignore & restore interrupt diff --git a/src/main/java/org/elasticsearch/gateway/GatewayService.java b/src/main/java/org/elasticsearch/gateway/GatewayService.java index 5f5eaa8e3e5..827a6559bf9 100644 --- a/src/main/java/org/elasticsearch/gateway/GatewayService.java +++ b/src/main/java/org/elasticsearch/gateway/GatewayService.java @@ -35,7 +35,6 @@ import org.elasticsearch.common.component.AbstractLifecycleComponent; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; -import org.elasticsearch.discovery.Discovery; import org.elasticsearch.discovery.DiscoveryService; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.threadpool.ThreadPool; @@ -134,12 +133,6 @@ public class GatewayService extends AbstractLifecycleComponent i if (lifecycle.stoppedOrClosed()) { return; } - if (event.state().blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK)) { - // we need to clear those flags, since we might need to recover again in case we disconnect - // from the cluster and then reconnect - recovered.set(false); - scheduledRecovery.set(false); - } if (event.localNodeMaster() && event.state().blocks().hasGlobalBlock(STATE_NOT_RECOVERED_BLOCK)) { checkStateMeetsSettingsAndMaybeRecover(event.state(), true); } @@ -147,7 +140,7 @@ public class GatewayService extends AbstractLifecycleComponent i protected void checkStateMeetsSettingsAndMaybeRecover(ClusterState state, boolean asyncRecovery) { DiscoveryNodes nodes = state.nodes(); - if (state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK)) { + if (state.blocks().hasGlobalBlock(discoveryService.getNoMasterBlock())) { logger.debug("not recovering from gateway, no master elected yet"); } else if (recoverAfterNodes != -1 && (nodes.masterAndDataNodes().size()) < recoverAfterNodes) { logger.debug("not recovering from gateway, nodes_size (data+master) [" + nodes.masterAndDataNodes().size() + "] < recover_after_nodes [" + recoverAfterNodes + "]"); diff --git a/src/main/java/org/elasticsearch/indices/store/IndicesStore.java b/src/main/java/org/elasticsearch/indices/store/IndicesStore.java index 02420d0e3d5..ecf5e6b6b22 100644 --- a/src/main/java/org/elasticsearch/indices/store/IndicesStore.java +++ b/src/main/java/org/elasticsearch/indices/store/IndicesStore.java @@ -307,7 +307,7 @@ public class IndicesStore extends AbstractComponent implements ClusterStateListe return; } - clusterService.submitStateUpdateTask("indices_store", new ClusterStateUpdateTask() { + clusterService.submitStateUpdateTask("indices_store", new ClusterStateNonMasterUpdateTask() { @Override public ClusterState execute(ClusterState currentState) throws Exception { if (clusterState.getVersion() != currentState.getVersion()) { diff --git a/src/main/java/org/elasticsearch/transport/TransportService.java b/src/main/java/org/elasticsearch/transport/TransportService.java index e922f1b4932..e2e6f502e89 100644 --- a/src/main/java/org/elasticsearch/transport/TransportService.java +++ b/src/main/java/org/elasticsearch/transport/TransportService.java @@ -245,6 +245,10 @@ public class TransportService extends AbstractLifecycleComponent implements Transport { private final ThreadPool threadPool; + private final ThreadPoolExecutor workers; private final Version version; private volatile TransportServiceAdapter transportServiceAdapter; private volatile BoundTransportAddress boundAddress; @@ -58,13 +62,20 @@ public class LocalTransport extends AbstractLifecycleComponent implem private static final AtomicLong transportAddressIdGenerator = new AtomicLong(); private final ConcurrentMap connectedNodes = newConcurrentMap(); - public static final String TRANSPORT_LOCAL_ADDRESS = "transport.local_address"; + public static final String TRANSPORT_LOCAL_ADDRESS = "transport.local.address"; + public static final String TRANSPORT_LOCAL_WORKERS = "transport.local.workers"; + public static final String TRANSPORT_LOCAL_QUEUE = "transport.local.queue"; @Inject public LocalTransport(Settings settings, ThreadPool threadPool, Version version) { super(settings); this.threadPool = threadPool; this.version = version; + + int workerCount = this.settings.getAsInt(TRANSPORT_LOCAL_WORKERS, EsExecutors.boundedNumberOfProcessors(settings)); + int queueSize = this.settings.getAsInt(TRANSPORT_LOCAL_QUEUE, -1); + logger.debug("creating [{}] workers, queue_size [{}]", workerCount, queueSize); + this.workers = EsExecutors.newFixed(workerCount, queueSize, EsExecutors.daemonThreadFactory(this.settings, "local_transport")); } @Override @@ -106,6 +117,13 @@ public class LocalTransport extends AbstractLifecycleComponent implem @Override protected void doClose() throws ElasticsearchException { + workers.shutdown(); + try { + workers.awaitTermination(10, TimeUnit.SECONDS); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + workers.shutdownNow(); } @Override @@ -185,7 +203,7 @@ public class LocalTransport extends AbstractLifecycleComponent implem transportServiceAdapter.sent(data.length); - threadPool.generic().execute(new Runnable() { + targetTransport.workers().execute(new Runnable() { @Override public void run() { targetTransport.messageReceived(data, action, LocalTransport.this, version, requestId); @@ -193,8 +211,8 @@ public class LocalTransport extends AbstractLifecycleComponent implem }); } - ThreadPool threadPool() { - return this.threadPool; + ThreadPoolExecutor workers() { + return this.workers; } protected void messageReceived(byte[] data, String action, LocalTransport sourceTransport, Version version, @Nullable final Long sendRequestId) { diff --git a/src/main/java/org/elasticsearch/transport/local/LocalTransportChannel.java b/src/main/java/org/elasticsearch/transport/local/LocalTransportChannel.java index f4d5e83053a..f316e9ba69d 100644 --- a/src/main/java/org/elasticsearch/transport/local/LocalTransportChannel.java +++ b/src/main/java/org/elasticsearch/transport/local/LocalTransportChannel.java @@ -72,7 +72,7 @@ public class LocalTransportChannel implements TransportChannel { response.writeTo(stream); stream.close(); final byte[] data = bStream.bytes().toBytes(); - targetTransport.threadPool().generic().execute(new Runnable() { + targetTransport.workers().execute(new Runnable() { @Override public void run() { targetTransport.messageReceived(data, action, sourceTransport, version, null); @@ -98,7 +98,7 @@ public class LocalTransportChannel implements TransportChannel { too.close(); } final byte[] data = stream.bytes().toBytes(); - targetTransport.threadPool().generic().execute(new Runnable() { + targetTransport.workers().execute(new Runnable() { @Override public void run() { targetTransport.messageReceived(data, action, sourceTransport, version, null); diff --git a/src/main/java/org/elasticsearch/tribe/TribeService.java b/src/main/java/org/elasticsearch/tribe/TribeService.java index e706e400658..a335f47b53c 100644 --- a/src/main/java/org/elasticsearch/tribe/TribeService.java +++ b/src/main/java/org/elasticsearch/tribe/TribeService.java @@ -23,7 +23,6 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.ElasticsearchIllegalStateException; import org.elasticsearch.action.support.master.TransportMasterNodeReadOperationAction; import org.elasticsearch.cluster.*; import org.elasticsearch.cluster.block.ClusterBlock; @@ -43,7 +42,7 @@ import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.ConcurrentCollections; -import org.elasticsearch.discovery.Discovery; +import org.elasticsearch.discovery.DiscoveryService; import org.elasticsearch.gateway.GatewayService; import org.elasticsearch.node.NodeBuilder; import org.elasticsearch.node.internal.InternalNode; @@ -53,7 +52,6 @@ import java.util.EnumSet; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.concurrent.CountDownLatch; /** * The tribe service holds a list of node clients connected to a list of tribe members, and uses their @@ -121,7 +119,7 @@ public class TribeService extends AbstractLifecycleComponent { private final List nodes = Lists.newCopyOnWriteArrayList(); @Inject - public TribeService(Settings settings, ClusterService clusterService) { + public TribeService(Settings settings, ClusterService clusterService, DiscoveryService discoveryService) { super(settings); this.clusterService = clusterService; Map nodesSettings = Maps.newHashMap(settings.getGroups("tribe", true)); @@ -143,7 +141,7 @@ public class TribeService extends AbstractLifecycleComponent { if (!nodes.isEmpty()) { // remove the initial election / recovery blocks since we are not going to have a // master elected in this single tribe node local "cluster" - clusterService.removeInitialStateBlock(Discovery.NO_MASTER_BLOCK); + clusterService.removeInitialStateBlock(discoveryService.getNoMasterBlock()); clusterService.removeInitialStateBlock(GatewayService.STATE_NOT_RECOVERED_BLOCK); if (settings.getAsBoolean("tribe.blocks.write", false)) { clusterService.addInitialStateBlock(TRIBE_WRITE_BLOCK); @@ -222,7 +220,7 @@ public class TribeService extends AbstractLifecycleComponent { @Override public void clusterChanged(final ClusterChangedEvent event) { logger.debug("[{}] received cluster event, [{}]", tribeName, event.source()); - clusterService.submitStateUpdateTask("cluster event from " + tribeName + ", " + event.source(), new ClusterStateUpdateTask() { + clusterService.submitStateUpdateTask("cluster event from " + tribeName + ", " + event.source(), new ClusterStateNonMasterUpdateTask() { @Override public ClusterState execute(ClusterState currentState) throws Exception { ClusterState tribeState = event.state(); diff --git a/src/test/java/org/elasticsearch/cluster/ClusterServiceTests.java b/src/test/java/org/elasticsearch/cluster/ClusterServiceTests.java index dde9eedc4e1..1d0a2038615 100644 --- a/src/test/java/org/elasticsearch/cluster/ClusterServiceTests.java +++ b/src/test/java/org/elasticsearch/cluster/ClusterServiceTests.java @@ -19,6 +19,7 @@ package org.elasticsearch.cluster; import com.google.common.base.Predicate; +import com.google.common.util.concurrent.ListenableFuture; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse; import org.elasticsearch.action.admin.cluster.tasks.PendingClusterTasksResponse; @@ -256,6 +257,58 @@ public class ClusterServiceTests extends ElasticsearchIntegrationTest { assertThat(processedLatch.await(1, TimeUnit.SECONDS), equalTo(true)); } + @Test + public void testMasterAwareExecution() throws Exception { + Settings settings = settingsBuilder() + .put("discovery.type", "local") + .build(); + + ListenableFuture master = internalCluster().startNodeAsync(settings); + ListenableFuture nonMaster = internalCluster().startNodeAsync(settingsBuilder().put(settings).put("node.master", false).build()); + master.get(); + ensureGreen(); // make sure we have a cluster + + ClusterService clusterService = internalCluster().getInstance(ClusterService.class, nonMaster.get()); + + final boolean[] taskFailed = {false}; + final CountDownLatch latch1 = new CountDownLatch(1); + clusterService.submitStateUpdateTask("test", new ClusterStateUpdateTask() { + @Override + public ClusterState execute(ClusterState currentState) throws Exception { + latch1.countDown(); + return currentState; + } + + @Override + public void onFailure(String source, Throwable t) { + taskFailed[0] = true; + latch1.countDown(); + } + }); + + latch1.await(); + assertTrue("cluster state update task was executed on a non-master", taskFailed[0]); + + taskFailed[0] = true; + final CountDownLatch latch2 = new CountDownLatch(1); + clusterService.submitStateUpdateTask("test", new ClusterStateNonMasterUpdateTask() { + @Override + public ClusterState execute(ClusterState currentState) throws Exception { + taskFailed[0] = false; + latch2.countDown(); + return currentState; + } + + @Override + public void onFailure(String source, Throwable t) { + taskFailed[0] = true; + latch2.countDown(); + } + }); + latch2.await(); + assertFalse("non-master cluster state update task was not executed", taskFailed[0]); + } + @Test public void testAckedUpdateTaskNoAckExpected() throws Exception { Settings settings = settingsBuilder() @@ -655,7 +708,7 @@ public class ClusterServiceTests extends ElasticsearchIntegrationTest { } } - private static class BlockingTask implements ClusterStateUpdateTask { + private static class BlockingTask extends ClusterStateUpdateTask { private final CountDownLatch latch = new CountDownLatch(1); @Override @@ -674,7 +727,7 @@ public class ClusterServiceTests extends ElasticsearchIntegrationTest { } - private static class PrioritiezedTask implements ClusterStateUpdateTask { + private static class PrioritiezedTask extends ClusterStateUpdateTask { private final Priority priority; private final CountDownLatch latch; diff --git a/src/test/java/org/elasticsearch/cluster/MinimumMasterNodesTests.java b/src/test/java/org/elasticsearch/cluster/MinimumMasterNodesTests.java index 3fe477cc989..5e63990fe04 100644 --- a/src/test/java/org/elasticsearch/cluster/MinimumMasterNodesTests.java +++ b/src/test/java/org/elasticsearch/cluster/MinimumMasterNodesTests.java @@ -25,7 +25,7 @@ import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus; import org.elasticsearch.client.Client; import org.elasticsearch.common.Priority; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.discovery.Discovery; +import org.elasticsearch.discovery.DiscoverySettings; import org.elasticsearch.discovery.zen.elect.ElectMasterService; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.test.ElasticsearchIntegrationTest; @@ -60,7 +60,7 @@ public class MinimumMasterNodesTests extends ElasticsearchIntegrationTest { logger.info("--> should be blocked, no master..."); ClusterState state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState(); - assertThat(state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK), equalTo(true)); + assertThat(state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID), equalTo(true)); assertThat(state.nodes().size(), equalTo(1)); // verify that we still see the local node in the cluster state logger.info("--> start second node, cluster should be formed"); @@ -70,9 +70,9 @@ public class MinimumMasterNodesTests extends ElasticsearchIntegrationTest { assertThat(clusterHealthResponse.isTimedOut(), equalTo(false)); state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState(); - assertThat(state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK), equalTo(false)); + assertThat(state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID), equalTo(false)); state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState(); - assertThat(state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK), equalTo(false)); + assertThat(state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID), equalTo(false)); state = client().admin().cluster().prepareState().execute().actionGet().getState(); assertThat(state.nodes().size(), equalTo(2)); @@ -98,11 +98,11 @@ public class MinimumMasterNodesTests extends ElasticsearchIntegrationTest { awaitBusy(new Predicate() { public boolean apply(Object obj) { ClusterState state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState(); - return state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK); + return state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID); } }); state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState(); - assertThat(state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK), equalTo(true)); + assertThat(state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID), equalTo(true)); assertThat(state.nodes().size(), equalTo(1)); // verify that we still see the local node in the cluster state logger.info("--> starting the previous master node again..."); @@ -112,9 +112,9 @@ public class MinimumMasterNodesTests extends ElasticsearchIntegrationTest { assertThat(clusterHealthResponse.isTimedOut(), equalTo(false)); state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState(); - assertThat(state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK), equalTo(false)); + assertThat(state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID), equalTo(false)); state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState(); - assertThat(state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK), equalTo(false)); + assertThat(state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID), equalTo(false)); state = client().admin().cluster().prepareState().execute().actionGet().getState(); assertThat(state.nodes().size(), equalTo(2)); @@ -135,7 +135,7 @@ public class MinimumMasterNodesTests extends ElasticsearchIntegrationTest { assertThat(awaitBusy(new Predicate() { public boolean apply(Object obj) { ClusterState state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState(); - return state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK); + return state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID); } }), equalTo(true)); @@ -146,9 +146,9 @@ public class MinimumMasterNodesTests extends ElasticsearchIntegrationTest { assertThat(clusterHealthResponse.isTimedOut(), equalTo(false)); state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState(); - assertThat(state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK), equalTo(false)); + assertThat(state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID), equalTo(false)); state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState(); - assertThat(state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK), equalTo(false)); + assertThat(state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID), equalTo(false)); state = client().admin().cluster().prepareState().execute().actionGet().getState(); assertThat(state.nodes().size(), equalTo(2)); @@ -183,21 +183,21 @@ public class MinimumMasterNodesTests extends ElasticsearchIntegrationTest { awaitBusy(new Predicate() { public boolean apply(Object obj) { ClusterState state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState(); - return state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK); + return state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID); } }); awaitBusy(new Predicate() { public boolean apply(Object obj) { ClusterState state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState(); - return state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK); + return state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID); } }); state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState(); - assertThat(state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK), equalTo(true)); + assertThat(state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID), equalTo(true)); state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState(); - assertThat(state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK), equalTo(true)); + assertThat(state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID), equalTo(true)); logger.info("--> start two more nodes"); internalCluster().startNode(settings); @@ -298,9 +298,9 @@ public class MinimumMasterNodesTests extends ElasticsearchIntegrationTest { boolean success = true; for (Client client : internalCluster()) { ClusterState state = client.admin().cluster().prepareState().setLocal(true).execute().actionGet().getState(); - success &= state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK); + success &= state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID); if (logger.isDebugEnabled()) { - logger.debug("Checking for NO_MASTER_BLOCK on client: {} NO_MASTER_BLOCK: [{}]", client, state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK)); + logger.debug("Checking for NO_MASTER_BLOCK on client: {} NO_MASTER_BLOCK: [{}]", client, state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID)); } } return success; diff --git a/src/test/java/org/elasticsearch/cluster/NoMasterNodeTests.java b/src/test/java/org/elasticsearch/cluster/NoMasterNodeTests.java index a9464abc7cd..95dbbf652ab 100644 --- a/src/test/java/org/elasticsearch/cluster/NoMasterNodeTests.java +++ b/src/test/java/org/elasticsearch/cluster/NoMasterNodeTests.java @@ -19,14 +19,20 @@ package org.elasticsearch.cluster; +import com.google.common.base.Predicate; import org.elasticsearch.action.ActionRequestBuilder; +import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse; import org.elasticsearch.action.bulk.BulkRequestBuilder; +import org.elasticsearch.action.count.CountResponse; +import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.action.percolate.PercolateSourceBuilder; +import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.cluster.block.ClusterBlockException; +import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.xcontent.XContentFactory; -import org.elasticsearch.discovery.Discovery; +import org.elasticsearch.discovery.DiscoverySettings; import org.elasticsearch.discovery.MasterNotDiscoveredException; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.script.ScriptService; @@ -40,7 +46,7 @@ import java.util.HashMap; import static org.elasticsearch.action.percolate.PercolateSourceBuilder.docBuilder; import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder; import static org.elasticsearch.test.ElasticsearchIntegrationTest.Scope; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertThrows; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*; import static org.hamcrest.Matchers.*; /** @@ -61,6 +67,7 @@ public class NoMasterNodeTests extends ElasticsearchIntegrationTest { .put("discovery.zen.minimum_master_nodes", 2) .put("discovery.zen.ping_timeout", "200ms") .put("discovery.initial_state_timeout", "500ms") + .put(DiscoverySettings.NO_MASTER_BLOCK, "all") .build(); TimeValue timeout = TimeValue.timeValueMillis(200); @@ -75,7 +82,7 @@ public class NoMasterNodeTests extends ElasticsearchIntegrationTest { @Override public void run() { ClusterState state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState(); - assertTrue(state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK)); + assertTrue(state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID)); } }); @@ -128,7 +135,7 @@ public class NoMasterNodeTests extends ElasticsearchIntegrationTest { ClusterBlockException.class, RestStatus.SERVICE_UNAVAILABLE ); - checkWriteAction(autoCreateIndex, timeout, + checkWriteAction(false, timeout, client().prepareUpdate("test", "type1", "1").setScript("test script", ScriptService.ScriptType.INLINE).setTimeout(timeout)); @@ -136,7 +143,7 @@ public class NoMasterNodeTests extends ElasticsearchIntegrationTest { client().prepareUpdate("no_index", "type1", "1").setScript("test script", ScriptService.ScriptType.INLINE).setTimeout(timeout)); - checkWriteAction(autoCreateIndex, timeout, + checkWriteAction(false, timeout, client().prepareIndex("test", "type1", "1").setSource(XContentFactory.jsonBuilder().startObject().endObject()).setTimeout(timeout)); checkWriteAction(autoCreateIndex, timeout, @@ -145,9 +152,7 @@ public class NoMasterNodeTests extends ElasticsearchIntegrationTest { BulkRequestBuilder bulkRequestBuilder = client().prepareBulk(); bulkRequestBuilder.add(client().prepareIndex("test", "type1", "1").setSource(XContentFactory.jsonBuilder().startObject().endObject())); bulkRequestBuilder.add(client().prepareIndex("test", "type1", "2").setSource(XContentFactory.jsonBuilder().startObject().endObject())); - // today, we clear the metadata on when there is no master, so it will go through the auto create logic and - // add it... (if autoCreate is set to true) - checkBulkAction(autoCreateIndex, bulkRequestBuilder); + checkBulkAction(false, bulkRequestBuilder); bulkRequestBuilder = client().prepareBulk(); bulkRequestBuilder.add(client().prepareIndex("no_index", "type1", "1").setSource(XContentFactory.jsonBuilder().startObject().endObject())); @@ -203,4 +208,75 @@ public class NoMasterNodeTests extends ElasticsearchIntegrationTest { } } } + + @Test + public void testNoMasterActions_writeMasterBlock() throws Exception { + Settings settings = settingsBuilder() + .put("discovery.type", "zen") + .put("action.auto_create_index", false) + .put("discovery.zen.minimum_master_nodes", 2) + .put("discovery.zen.ping_timeout", "200ms") + .put("discovery.initial_state_timeout", "500ms") + .put(DiscoverySettings.NO_MASTER_BLOCK, "write") + .build(); + + internalCluster().startNode(settings); + // start a second node, create an index, and then shut it down so we have no master block + internalCluster().startNode(settings); + prepareCreate("test1").setSettings(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).get(); + prepareCreate("test2").setSettings(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 2, IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0).get(); + client().admin().cluster().prepareHealth("_all").setWaitForGreenStatus().get(); + client().prepareIndex("test1", "type1", "1").setSource("field", "value1").get(); + client().prepareIndex("test2", "type1", "1").setSource("field", "value1").get(); + refresh(); + + ensureSearchable("test1", "test2"); + + ClusterStateResponse clusterState = client().admin().cluster().prepareState().get(); + logger.info("Cluster state:\n" + clusterState.getState().prettyPrint()); + + internalCluster().stopRandomDataNode(); + assertThat(awaitBusy(new Predicate() { + public boolean apply(Object o) { + ClusterState state = client().admin().cluster().prepareState().setLocal(true).get().getState(); + return state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID); + } + }), equalTo(true)); + + + GetResponse getResponse = client().prepareGet("test1", "type1", "1").get(); + assertExists(getResponse); + + CountResponse countResponse = client().prepareCount("test1").get(); + assertHitCount(countResponse, 1l); + + SearchResponse searchResponse = client().prepareSearch("test1").get(); + assertHitCount(searchResponse, 1l); + + countResponse = client().prepareCount("test2").get(); + assertThat(countResponse.getTotalShards(), equalTo(2)); + assertThat(countResponse.getSuccessfulShards(), equalTo(1)); + + TimeValue timeout = TimeValue.timeValueMillis(200); + long now = System.currentTimeMillis(); + try { + client().prepareUpdate("test1", "type1", "1").setDoc("field", "value2").setTimeout(timeout).get(); + fail("Expected ClusterBlockException"); + } catch (ClusterBlockException e) { + assertThat(System.currentTimeMillis() - now, greaterThan(timeout.millis() - 50)); + assertThat(e.status(), equalTo(RestStatus.SERVICE_UNAVAILABLE)); + } + + now = System.currentTimeMillis(); + try { + client().prepareIndex("test1", "type1", "1").setSource(XContentFactory.jsonBuilder().startObject().endObject()).setTimeout(timeout).get(); + fail("Expected ClusterBlockException"); + } catch (ClusterBlockException e) { + assertThat(System.currentTimeMillis() - now, greaterThan(timeout.millis() - 50)); + assertThat(e.status(), equalTo(RestStatus.SERVICE_UNAVAILABLE)); + } + + internalCluster().startNode(settings); + client().admin().cluster().prepareHealth().setWaitForGreenStatus().setWaitForNodes("2").get(); + } } diff --git a/src/test/java/org/elasticsearch/discovery/ClusterDiscoveryConfiguration.java b/src/test/java/org/elasticsearch/discovery/ClusterDiscoveryConfiguration.java new file mode 100644 index 00000000000..ded26ddc305 --- /dev/null +++ b/src/test/java/org/elasticsearch/discovery/ClusterDiscoveryConfiguration.java @@ -0,0 +1,141 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.discovery; + +import com.carrotsearch.randomizedtesting.RandomizedTest; +import com.google.common.primitives.Ints; +import org.elasticsearch.common.settings.ImmutableSettings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.test.ElasticsearchIntegrationTest; +import org.elasticsearch.test.InternalTestCluster; +import org.elasticsearch.test.SettingsSource; +import org.elasticsearch.transport.local.LocalTransport; + +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; + +public class ClusterDiscoveryConfiguration extends SettingsSource { + + public static Settings DEFAULT_SETTINGS = ImmutableSettings.settingsBuilder() + .put("gateway.type", "local") + .put("discovery.type", "zen") + .build(); + + final int numOfNodes; + + final Settings baseSettings; + + public ClusterDiscoveryConfiguration(int numOfNodes) { + this(numOfNodes, ImmutableSettings.EMPTY); + } + + public ClusterDiscoveryConfiguration(int numOfNodes, Settings extraSettings) { + this.numOfNodes = numOfNodes; + this.baseSettings = ImmutableSettings.builder().put(DEFAULT_SETTINGS).put(extraSettings).build(); + } + + @Override + public Settings node(int nodeOrdinal) { + return baseSettings; + } + + @Override + public Settings transportClient() { + return baseSettings; + } + + public static class UnicastZen extends ClusterDiscoveryConfiguration { + + private final static AtomicInteger portRangeCounter = new AtomicInteger(); + + private final int[] unicastHostOrdinals; + private final int basePort; + + public UnicastZen(int numOfNodes) { + this(numOfNodes, numOfNodes); + } + + public UnicastZen(int numOfNodes, Settings extraSettings) { + this(numOfNodes, numOfNodes, extraSettings); + } + + public UnicastZen(int numOfNodes, int numOfUnicastHosts) { + this(numOfNodes, numOfUnicastHosts, ImmutableSettings.EMPTY); + } + + public UnicastZen(int numOfNodes, int numOfUnicastHosts, Settings extraSettings) { + super(numOfNodes, extraSettings); + if (numOfUnicastHosts == numOfNodes) { + unicastHostOrdinals = new int[numOfNodes]; + for (int i = 0; i < numOfNodes; i++) { + unicastHostOrdinals[i] = i; + } + } else { + Set ordinals = new HashSet<>(numOfUnicastHosts); + while (ordinals.size() != numOfUnicastHosts) { + ordinals.add(RandomizedTest.randomInt(numOfNodes - 1)); + } + unicastHostOrdinals = Ints.toArray(ordinals); + } + this.basePort = calcBasePort(); + } + + public UnicastZen(int numOfNodes, int[] unicastHostOrdinals) { + this(numOfNodes, ImmutableSettings.EMPTY, unicastHostOrdinals); + } + + public UnicastZen(int numOfNodes, Settings extraSettings, int[] unicastHostOrdinals) { + super(numOfNodes, extraSettings); + this.unicastHostOrdinals = unicastHostOrdinals; + this.basePort = calcBasePort(); + } + + private final static int calcBasePort() { + return 10000 + + 1000 * (ElasticsearchIntegrationTest.CHILD_JVM_ID % 60) + // up to 60 jvms + 100 * portRangeCounter.incrementAndGet(); // up to 100 nodes + } + + + @Override + public Settings node(int nodeOrdinal) { + ImmutableSettings.Builder builder = ImmutableSettings.builder() + .put("discovery.zen.ping.multicast.enabled", false); + + String[] unicastHosts = new String[unicastHostOrdinals.length]; + String mode = baseSettings.get("node.mode", InternalTestCluster.NODE_MODE); + if (mode.equals("local")) { + builder.put(LocalTransport.TRANSPORT_LOCAL_ADDRESS, "node_" + nodeOrdinal); + for (int i = 0; i < unicastHosts.length; i++) { + unicastHosts[i] = "node_" + unicastHostOrdinals[i]; + } + } else { + // we need to pin the node port & host so we'd know where to point things + builder.put("transport.tcp.port", basePort + nodeOrdinal); + builder.put("transport.host", "localhost"); + for (int i = 0; i < unicastHosts.length; i++) { + unicastHosts[i] = "localhost:" + (basePort + unicastHostOrdinals[i]); + } + } + builder.putArray("discovery.zen.ping.unicast.hosts", unicastHosts); + return builder.put(super.node(nodeOrdinal)).build(); + } + } +} diff --git a/src/test/java/org/elasticsearch/discovery/DiscoveryWithNetworkFailuresTests.java b/src/test/java/org/elasticsearch/discovery/DiscoveryWithNetworkFailuresTests.java deleted file mode 100644 index d2987f77ad0..00000000000 --- a/src/test/java/org/elasticsearch/discovery/DiscoveryWithNetworkFailuresTests.java +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.discovery; - -import com.google.common.base.Predicate; -import org.apache.lucene.util.LuceneTestCase; -import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse; -import org.elasticsearch.client.Client; -import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.node.DiscoveryNode; -import org.elasticsearch.cluster.node.DiscoveryNodes; -import org.elasticsearch.common.Priority; -import org.elasticsearch.common.settings.ImmutableSettings; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.test.ElasticsearchIntegrationTest; -import org.elasticsearch.test.transport.MockTransportService; -import org.elasticsearch.transport.TransportModule; -import org.elasticsearch.transport.TransportService; -import org.junit.Test; - -import java.util.Arrays; -import java.util.List; - -import static org.elasticsearch.test.ElasticsearchIntegrationTest.ClusterScope; -import static org.elasticsearch.test.ElasticsearchIntegrationTest.Scope; -import static org.hamcrest.Matchers.*; - -/** - */ -@ClusterScope(scope= Scope.SUITE, numDataNodes =0) -public class DiscoveryWithNetworkFailuresTests extends ElasticsearchIntegrationTest { - - @Test - @LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/elasticsearch/elasticsearch/issues/2488") - public void failWithMinimumMasterNodesConfigured() throws Exception { - final Settings settings = ImmutableSettings.settingsBuilder() - .put("discovery.zen.minimum_master_nodes", 2) - .put("discovery.zen.fd.ping_timeout", "1s") // <-- for hitting simulated network failures quickly - .put(TransportModule.TRANSPORT_SERVICE_TYPE_KEY, MockTransportService.class.getName()) - .build(); - Listnodes = internalCluster().startNodesAsync(3, settings).get(); - - // Wait until a green status has been reaches and 3 nodes are part of the cluster - List nodesList = Arrays.asList(nodes.toArray(new String[3])); - ClusterHealthResponse clusterHealthResponse = client().admin().cluster().prepareHealth() - .setWaitForEvents(Priority.LANGUID) - .setWaitForNodes("3") - .get(); - assertThat(clusterHealthResponse.isTimedOut(), is(false)); - - // Figure out what is the elected master node - DiscoveryNode masterDiscoNode = null; - for (String node : nodesList) { - ClusterState state = internalCluster().client(node).admin().cluster().prepareState().setLocal(true).execute().actionGet().getState(); - assertThat(state.nodes().size(), equalTo(3)); - if (masterDiscoNode == null) { - masterDiscoNode = state.nodes().masterNode(); - } else { - assertThat(state.nodes().masterNode(), equalTo(masterDiscoNode)); - } - } - assert masterDiscoNode != null; - logger.info("---> legit elected master node=" + masterDiscoNode); - final Client masterClient = internalCluster().masterClient(); - - // Everything is stable now, it is now time to simulate evil... - - // Pick a node that isn't the elected master. - String unluckyNode = null; - for (String node : nodesList) { - if (!node.equals(masterDiscoNode.getName())) { - unluckyNode = node; - } - } - assert unluckyNode != null; - - // Simulate a network issue between the unlucky node and elected master node in both directions. - addFailToSendNoConnectRule(masterDiscoNode.getName(), unluckyNode); - addFailToSendNoConnectRule(unluckyNode, masterDiscoNode.getName()); - try { - // Wait until elected master has removed that the unlucky node... - awaitBusy(new Predicate() { - @Override - public boolean apply(Object input) { - return masterClient.admin().cluster().prepareState().setLocal(true).get().getState().nodes().size() == 2; - } - }); - - // The unlucky node must report *no* master node, since it can't connect to master and in fact it should - // continuously ping until network failures have been resolved. - Client isolatedNodeClient = internalCluster().client(unluckyNode); - ClusterState localClusterState = isolatedNodeClient.admin().cluster().prepareState().setLocal(true).get().getState(); - DiscoveryNodes localDiscoveryNodes = localClusterState.nodes(); - assertThat(localDiscoveryNodes.masterNode(), nullValue()); - } finally { - // stop simulating network failures, from this point on the unlucky node is able to rejoin - // We also need to do this even if assertions fail, since otherwise the test framework can't work properly - clearNoConnectRule(masterDiscoNode.getName(), unluckyNode); - clearNoConnectRule(unluckyNode, masterDiscoNode.getName()); - } - - // Wait until the master node sees all 3 nodes again. - clusterHealthResponse = masterClient.admin().cluster().prepareHealth() - .setWaitForEvents(Priority.LANGUID) - .setWaitForNodes("3") - .get(); - assertThat(clusterHealthResponse.isTimedOut(), is(false)); - - for (String node : nodesList) { - ClusterState state = internalCluster().client(node).admin().cluster().prepareState().setLocal(true).execute().actionGet().getState(); - assertThat(state.nodes().size(), equalTo(3)); - // The elected master shouldn't have changed, since the unlucky node never could have elected himself as - // master since m_m_n of 2 could never be satisfied. - assertThat(state.nodes().masterNode(), equalTo(masterDiscoNode)); - } - } - - private void addFailToSendNoConnectRule(String fromNode, String toNode) { - TransportService mockTransportService = internalCluster().getInstance(TransportService.class, fromNode); - ((MockTransportService) mockTransportService).addFailToSendNoConnectRule(internalCluster().getInstance(Discovery.class, toNode).localNode()); - } - - private void clearNoConnectRule(String fromNode, String toNode) { - TransportService mockTransportService = internalCluster().getInstance(TransportService.class, fromNode); - ((MockTransportService) mockTransportService).clearRule(internalCluster().getInstance(Discovery.class, toNode).localNode()); - } - -} diff --git a/src/test/java/org/elasticsearch/discovery/DiscoveryWithServiceDisruptions.java b/src/test/java/org/elasticsearch/discovery/DiscoveryWithServiceDisruptions.java new file mode 100644 index 00000000000..82abe2eccb1 --- /dev/null +++ b/src/test/java/org/elasticsearch/discovery/DiscoveryWithServiceDisruptions.java @@ -0,0 +1,863 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.discovery; + +import com.google.common.base.Predicate; +import org.apache.lucene.util.LuceneTestCase; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse; +import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse; +import org.elasticsearch.action.get.GetResponse; +import org.elasticsearch.action.index.IndexResponse; +import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.ClusterService; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.block.ClusterBlock; +import org.elasticsearch.cluster.block.ClusterBlockLevel; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.cluster.routing.operation.hash.djb.DjbHashFunction; +import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.Priority; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.settings.ImmutableSettings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.discovery.zen.ZenDiscovery; +import org.elasticsearch.discovery.zen.elect.ElectMasterService; +import org.elasticsearch.discovery.zen.fd.FaultDetection; +import org.elasticsearch.discovery.zen.membership.MembershipAction; +import org.elasticsearch.discovery.zen.ping.ZenPing; +import org.elasticsearch.discovery.zen.ping.ZenPingService; +import org.elasticsearch.discovery.zen.ping.unicast.UnicastZenPing; +import org.elasticsearch.discovery.zen.publish.PublishClusterStateAction; +import org.elasticsearch.test.ElasticsearchIntegrationTest; +import org.elasticsearch.test.InternalTestCluster; +import org.elasticsearch.test.disruption.*; +import org.elasticsearch.test.junit.annotations.TestLogging; +import org.elasticsearch.test.transport.MockTransportService; +import org.elasticsearch.transport.*; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.util.*; +import java.util.concurrent.*; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +import static org.elasticsearch.test.ElasticsearchIntegrationTest.ClusterScope; +import static org.elasticsearch.test.ElasticsearchIntegrationTest.Scope; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.hamcrest.Matchers.*; + +/** + */ +@LuceneTestCase.Slow +@TestLogging("discovery.zen:TRACE") +@ClusterScope(scope = Scope.TEST, numDataNodes = 0, transportClientRatio = 0) +public class DiscoveryWithServiceDisruptions extends ElasticsearchIntegrationTest { + + private static final TimeValue DISRUPTION_HEALING_OVERHEAD = TimeValue.timeValueSeconds(40); // we use 30s as timeout in many places. + + private ClusterDiscoveryConfiguration discoveryConfig; + + + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return discoveryConfig.node(nodeOrdinal); + } + + @Before + public void clearConfig() { + discoveryConfig = null; + } + + @Override + protected int numberOfShards() { + return 3; + } + + @Override + protected int numberOfReplicas() { + return 1; + } + + private List startCluster(int numberOfNodes) throws ExecutionException, InterruptedException { + return startCluster(numberOfNodes, -1); + } + + private List startCluster(int numberOfNodes, int minimumMasterNode) throws ExecutionException, InterruptedException { + if (randomBoolean()) { + return startMulticastCluster(numberOfNodes, minimumMasterNode); + } else { + return startUnicastCluster(numberOfNodes, null, minimumMasterNode); + } + } + + final static Settings DEFAULT_SETTINGS = ImmutableSettings.builder() + .put(FaultDetection.SETTING_PING_TIMEOUT, "1s") // for hitting simulated network failures quickly + .put(FaultDetection.SETTING_PING_RETRIES, "1") // for hitting simulated network failures quickly + .put("discovery.zen.join_timeout", "10s") // still long to induce failures but to long so test won't time out + .put(DiscoverySettings.PUBLISH_TIMEOUT, "1s") // <-- for hitting simulated network failures quickly + .put("http.enabled", false) // just to make test quicker + .put("gateway.local.list_timeout", "10s") // still long to induce failures but to long so test won't time out + .put(TransportModule.TRANSPORT_SERVICE_TYPE_KEY, MockTransportService.class.getName()) + .build(); + + private List startMulticastCluster(int numberOfNodes, int minimumMasterNode) throws ExecutionException, InterruptedException { + if (minimumMasterNode < 0) { + minimumMasterNode = numberOfNodes / 2 + 1; + } + // TODO: Rarely use default settings form some of these + Settings settings = ImmutableSettings.builder() + .put(DEFAULT_SETTINGS) + .put(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES, minimumMasterNode) + .build(); + + if (discoveryConfig == null) { + discoveryConfig = new ClusterDiscoveryConfiguration(numberOfNodes, settings); + } + List nodes = internalCluster().startNodesAsync(numberOfNodes).get(); + ensureStableCluster(numberOfNodes); + + return nodes; + } + + private List startUnicastCluster(int numberOfNodes, @Nullable int[] unicastHostsOrdinals, int minimumMasterNode) throws ExecutionException, InterruptedException { + if (minimumMasterNode < 0) { + minimumMasterNode = numberOfNodes / 2 + 1; + } + // TODO: Rarely use default settings form some of these + Settings nodeSettings = ImmutableSettings.builder() + .put(DEFAULT_SETTINGS) + .put(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES, minimumMasterNode) + .build(); + + if (discoveryConfig == null) { + if (unicastHostsOrdinals == null) { + discoveryConfig = new ClusterDiscoveryConfiguration.UnicastZen(numberOfNodes, nodeSettings); + } else { + discoveryConfig = new ClusterDiscoveryConfiguration.UnicastZen(numberOfNodes, nodeSettings, unicastHostsOrdinals); + } + } + List nodes = internalCluster().startNodesAsync(numberOfNodes).get(); + ensureStableCluster(numberOfNodes); + + // TODO: this is a temporary solution so that nodes will not base their reaction to a partition based on previous successful results + for (ZenPingService pingService : internalCluster().getInstances(ZenPingService.class)) { + for (ZenPing zenPing : pingService.zenPings()) { + if (zenPing instanceof UnicastZenPing) { + ((UnicastZenPing) zenPing).clearTemporalReponses(); + } + } + } + + return nodes; + } + + + /** + * Test that no split brain occurs under partial network partition. See https://github.com/elasticsearch/elasticsearch/issues/2488 + * + * @throws Exception + */ + @Test + public void failWithMinimumMasterNodesConfigured() throws Exception { + + List nodes = startCluster(3); + + // Figure out what is the elected master node + final String masterNode = internalCluster().getMasterName(); + logger.info("---> legit elected master node=" + masterNode); + + // Pick a node that isn't the elected master. + Set nonMasters = new HashSet<>(nodes); + nonMasters.remove(masterNode); + final String unluckyNode = randomFrom(nonMasters.toArray(Strings.EMPTY_ARRAY)); + + + // Simulate a network issue between the unlucky node and elected master node in both directions. + + NetworkDisconnectPartition networkDisconnect = new NetworkDisconnectPartition(masterNode, unluckyNode, getRandom()); + setDisruptionScheme(networkDisconnect); + networkDisconnect.startDisrupting(); + + // Wait until elected master has removed that the unlucky node... + ensureStableCluster(2, masterNode); + + // The unlucky node must report *no* master node, since it can't connect to master and in fact it should + // continuously ping until network failures have been resolved. However + // It may a take a bit before the node detects it has been cut off from the elected master + assertNoMaster(unluckyNode); + + networkDisconnect.stopDisrupting(); + + // Wait until the master node sees all 3 nodes again. + ensureStableCluster(3); + + // The elected master shouldn't have changed, since the unlucky node never could have elected himself as + // master since m_m_n of 2 could never be satisfied. + assertMaster(masterNode, nodes); + } + + /** + * Verify that the proper block is applied when nodes loose their master + */ + @Test + @TestLogging(value = "cluster.service:TRACE,indices.recovery:TRACE") + public void testVerifyApiBlocksDuringPartition() throws Exception { + startCluster(3); + + // Makes sure that the get request can be executed on each node locally: + assertAcked(prepareCreate("test").setSettings(ImmutableSettings.builder() + .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 2) + )); + + // Everything is stable now, it is now time to simulate evil... + // but first make sure we have no initializing shards and all is green + // (waiting for green here, because indexing / search in a yellow index is fine as long as no other nodes go down) + ensureGreen("test"); + + NetworkPartition networkPartition = addRandomPartition(); + + final String isolatedNode = networkPartition.getMinoritySide().get(0); + final String nonIsolatedNode = networkPartition.getMajoritySide().get(0); + + // Simulate a network issue between the unlucky node and the rest of the cluster. + networkPartition.startDisrupting(); + + + // The unlucky node must report *no* master node, since it can't connect to master and in fact it should + // continuously ping until network failures have been resolved. However + // It may a take a bit before the node detects it has been cut off from the elected master + logger.info("waiting for isolated node [{}] to have no master", isolatedNode); + assertNoMaster(isolatedNode, DiscoverySettings.NO_MASTER_BLOCK_WRITES, TimeValue.timeValueSeconds(10)); + + + logger.info("wait until elected master has been removed and a new 2 node cluster was from (via [{}])", isolatedNode); + ensureStableCluster(2, nonIsolatedNode); + + for (String node : networkPartition.getMajoritySide()) { + ClusterState nodeState = getNodeClusterState(node); + boolean success = true; + if (nodeState.nodes().getMasterNode() == null) { + success = false; + } + if (!nodeState.blocks().global().isEmpty()) { + success = false; + } + if (!success) { + fail("node [" + node + "] has no master or has blocks, despite of being on the right side of the partition. State dump:\n" + + nodeState.prettyPrint()); + } + } + + + networkPartition.stopDisrupting(); + + // Wait until the master node sees al 3 nodes again. + ensureStableCluster(3, new TimeValue(DISRUPTION_HEALING_OVERHEAD.millis() + networkPartition.expectedTimeToHeal().millis())); + + logger.info("Verify no master block with {} set to {}", DiscoverySettings.NO_MASTER_BLOCK, "all"); + client().admin().cluster().prepareUpdateSettings() + .setTransientSettings(ImmutableSettings.builder().put(DiscoverySettings.NO_MASTER_BLOCK, "all")) + .get(); + + networkPartition.startDisrupting(); + + + // The unlucky node must report *no* master node, since it can't connect to master and in fact it should + // continuously ping until network failures have been resolved. However + // It may a take a bit before the node detects it has been cut off from the elected master + logger.info("waiting for isolated node [{}] to have no master", isolatedNode); + assertNoMaster(isolatedNode, DiscoverySettings.NO_MASTER_BLOCK_ALL, TimeValue.timeValueSeconds(10)); + + // make sure we have stable cluster & cross partition recoveries are canceled by the removal of the missing node + // the unresponsive partition causes recoveries to only time out after 15m (default) and these will cause + // the test to fail due to unfreed resources + ensureStableCluster(2, nonIsolatedNode); + + } + + /** + * This test isolates the master from rest of the cluster, waits for a new master to be elected, restores the partition + * and verifies that all node agree on the new cluster state + */ + @Test + @TestLogging("discovery.zen:TRACE,action:TRACE,cluster.service:TRACE,indices.recovery:TRACE,indices.cluster:TRACE") + public void testIsolateMasterAndVerifyClusterStateConsensus() throws Exception { + final List nodes = startCluster(3); + + assertAcked(prepareCreate("test") + .setSettings(ImmutableSettings.builder() + .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1 + randomInt(2)) + .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, randomInt(2)) + )); + + ensureGreen(); + String isolatedNode = internalCluster().getMasterName(); + NetworkPartition networkPartition = addRandomIsolation(isolatedNode); + networkPartition.startDisrupting(); + + String nonIsolatedNode = networkPartition.getMajoritySide().get(0); + + // make sure cluster reforms + ensureStableCluster(2, nonIsolatedNode); + + // make sure isolated need picks up on things. + assertNoMaster(isolatedNode, TimeValue.timeValueSeconds(40)); + + // restore isolation + networkPartition.stopDisrupting(); + + ensureStableCluster(3, new TimeValue(DISRUPTION_HEALING_OVERHEAD.millis() + networkPartition.expectedTimeToHeal().millis())); + + logger.info("issue a reroute"); + // trigger a reroute now, instead of waiting for the background reroute of RerouteService + assertAcked(client().admin().cluster().prepareReroute()); + // and wait for it to finish and for the cluster to stabilize + ensureGreen("test"); + + // verify all cluster states are the same + ClusterState state = null; + for (String node : nodes) { + ClusterState nodeState = getNodeClusterState(node); + if (state == null) { + state = nodeState; + continue; + } + // assert nodes are identical + try { + assertEquals("unequal versions", state.version(), nodeState.version()); + assertEquals("unequal node count", state.nodes().size(), nodeState.nodes().size()); + assertEquals("different masters ", state.nodes().masterNodeId(), nodeState.nodes().masterNodeId()); + assertEquals("different meta data version", state.metaData().version(), nodeState.metaData().version()); + if (!state.routingTable().prettyPrint().equals(nodeState.routingTable().prettyPrint())) { + fail("different routing"); + } + } catch (AssertionError t) { + fail("failed comparing cluster state: " + t.getMessage() + "\n" + + "--- cluster state of node [" + nodes.get(0) + "]: ---\n" + state.prettyPrint() + + "\n--- cluster state [" + node + "]: ---\n" + nodeState.prettyPrint()); + } + + } + } + + /** + * Test the we do not loose document whose indexing request was successful, under a randomly selected disruption scheme + * We also collect & report the type of indexing failures that occur. + */ + @Test + @LuceneTestCase.AwaitsFix(bugUrl = "needs some more work to stabilize") + @TestLogging("action.index:TRACE,action.get:TRACE,discovery:TRACE,cluster.service:TRACE,indices.recovery:TRACE,indices.cluster:TRACE") + public void testAckedIndexing() throws Exception { + final List nodes = startCluster(3); + + assertAcked(prepareCreate("test") + .setSettings(ImmutableSettings.builder() + .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1 + randomInt(2)) + .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, randomInt(2)) + )); + ensureGreen(); + + ServiceDisruptionScheme disruptionScheme = addRandomDisruptionScheme(); + logger.info("disruption scheme [{}] added", disruptionScheme); + + final ConcurrentHashMap ackedDocs = new ConcurrentHashMap<>(); // id -> node sent. + + final AtomicBoolean stop = new AtomicBoolean(false); + List indexers = new ArrayList<>(nodes.size()); + List semaphores = new ArrayList<>(nodes.size()); + final AtomicInteger idGenerator = new AtomicInteger(0); + final AtomicReference countDownLatchRef = new AtomicReference<>(); + final List exceptedExceptions = Collections.synchronizedList(new ArrayList()); + + logger.info("starting indexers"); + try { + for (final String node : nodes) { + final Semaphore semaphore = new Semaphore(0); + semaphores.add(semaphore); + final Client client = client(node); + final String name = "indexer_" + indexers.size(); + final int numPrimaries = getNumShards("test").numPrimaries; + Thread thread = new Thread(new Runnable() { + @Override + public void run() { + while (!stop.get()) { + String id = null; + try { + if (!semaphore.tryAcquire(10, TimeUnit.SECONDS)) { + continue; + } + logger.info("[{}] Acquired semaphore and it has {} permits left", name, semaphore.availablePermits()); + try { + id = Integer.toString(idGenerator.incrementAndGet()); + int shard = ((InternalTestCluster) cluster()).getInstance(DjbHashFunction.class).hash(id) % numPrimaries; + logger.trace("[{}] indexing id [{}] through node [{}] targeting shard [{}]", name, id, node, shard); + IndexResponse response = client.prepareIndex("test", "type", id).setSource("{}").setTimeout("1s").get(); + assertThat(response.getVersion(), equalTo(1l)); + ackedDocs.put(id, node); + logger.trace("[{}] indexed id [{}] through node [{}]", name, id, node); + } catch (ElasticsearchException e) { + exceptedExceptions.add(e); + logger.trace("[{}] failed id [{}] through node [{}]", e, name, id, node); + } finally { + countDownLatchRef.get().countDown(); + logger.trace("[{}] decreased counter : {}", name, countDownLatchRef.get().getCount()); + } + } catch (InterruptedException e) { + // fine - semaphore interrupt + } catch (Throwable t) { + logger.info("unexpected exception in background thread of [{}]", t, node); + } + } + } + }); + + thread.setName(name); + thread.setDaemon(true); + thread.start(); + indexers.add(thread); + } + + int docsPerIndexer = randomInt(3); + logger.info("indexing " + docsPerIndexer + " docs per indexer before partition"); + countDownLatchRef.set(new CountDownLatch(docsPerIndexer * indexers.size())); + for (Semaphore semaphore : semaphores) { + semaphore.release(docsPerIndexer); + } + assertTrue(countDownLatchRef.get().await(1, TimeUnit.MINUTES)); + + for (int iter = 1 + randomInt(2); iter > 0; iter--) { + logger.info("starting disruptions & indexing (iteration [{}])", iter); + disruptionScheme.startDisrupting(); + + docsPerIndexer = 1 + randomInt(5); + logger.info("indexing " + docsPerIndexer + " docs per indexer during partition"); + countDownLatchRef.set(new CountDownLatch(docsPerIndexer * indexers.size())); + Collections.shuffle(semaphores); + for (Semaphore semaphore : semaphores) { + assertThat(semaphore.availablePermits(), equalTo(0)); + semaphore.release(docsPerIndexer); + } + assertTrue(countDownLatchRef.get().await(60000 + disruptionScheme.expectedTimeToHeal().millis() * (docsPerIndexer * indexers.size()), TimeUnit.MILLISECONDS)); + + logger.info("stopping disruption"); + disruptionScheme.stopDisrupting(); + ensureStableCluster(3, TimeValue.timeValueMillis(disruptionScheme.expectedTimeToHeal().millis() + DISRUPTION_HEALING_OVERHEAD.millis())); + ensureGreen("test"); + + logger.info("validating successful docs"); + for (String node : nodes) { + try { + logger.debug("validating through node [{}]", node); + for (String id : ackedDocs.keySet()) { + assertTrue("doc [" + id + "] indexed via node [" + ackedDocs.get(id) + "] not found", + client(node).prepareGet("test", "type", id).setPreference("_local").get().isExists()); + } + } catch (AssertionError e) { + throw new AssertionError(e.getMessage() + " (checked via node [" + node + "]", e); + } + } + + logger.info("done validating (iteration [{}])", iter); + } + } finally { + if (exceptedExceptions.size() > 0) { + StringBuilder sb = new StringBuilder("Indexing exceptions during disruption:"); + for (Exception e : exceptedExceptions) { + sb.append("\n").append(e.getMessage()); + } + logger.debug(sb.toString()); + } + logger.info("shutting down indexers"); + stop.set(true); + for (Thread indexer : indexers) { + indexer.interrupt(); + indexer.join(60000); + } + } + } + + /** + * Test that cluster recovers from a long GC on master that causes other nodes to elect a new one + */ + @Test + @TestLogging("discovery.zen:TRACE,action:TRACE,cluster.service:TRACE,indices.recovery:TRACE,indices.cluster:TRACE") + public void testMasterNodeGCs() throws Exception { + // TODO: on mac OS multicast threads are shared between nodes and we therefore we can't simulate GC and stop pinging for just one node + // find a way to block thread creation in the generic thread pool to avoid this. + List nodes = startUnicastCluster(3, null, -1); + + String oldMasterNode = internalCluster().getMasterName(); + // a very long GC, but it's OK as we remove the disruption when it has had an effect + SingleNodeDisruption masterNodeDisruption = new LongGCDisruption(oldMasterNode, getRandom(), 100, 200, 30000, 60000); + internalCluster().setDisruptionScheme(masterNodeDisruption); + masterNodeDisruption.startDisrupting(); + + Set oldNonMasterNodesSet = new HashSet<>(nodes); + oldNonMasterNodesSet.remove(oldMasterNode); + + List oldNonMasterNodes = new ArrayList<>(oldNonMasterNodesSet); + + logger.info("waiting for nodes to de-elect master [{}]", oldMasterNode); + for (String node : oldNonMasterNodesSet) { + assertDifferentMaster(node, oldMasterNode); + } + + logger.info("waiting for nodes to elect a new master"); + ensureStableCluster(2, oldNonMasterNodes.get(0)); + + logger.info("waiting for any pinging to stop"); + for (final String node : oldNonMasterNodes) { + assertTrue("node [" + node + "] is still joining master", awaitBusy(new Predicate() { + @Override + public boolean apply(Object input) { + return !((ZenDiscovery) internalCluster().getInstance(Discovery.class, node)).joiningCluster(); + } + }, 30, TimeUnit.SECONDS)); + } + + // restore GC + masterNodeDisruption.stopDisrupting(); + ensureStableCluster(3, new TimeValue(DISRUPTION_HEALING_OVERHEAD.millis() + masterNodeDisruption.expectedTimeToHeal().millis()), + oldNonMasterNodes.get(0)); + + // make sure all nodes agree on master + String newMaster = internalCluster().getMasterName(); + assertThat(newMaster, not(equalTo(oldMasterNode))); + assertMaster(newMaster, nodes); + } + + /** + * Test that a document which is indexed on the majority side of a partition, is available from the minory side, + * once the partition is healed + * + * @throws Exception + */ + @Test + @TestLogging("discovery.zen:TRACE,action:TRACE,cluster.service:TRACE,indices.recovery:TRACE,indices.cluster:TRACE") + public void testRejoinDocumentExistsInAllShardCopies() throws Exception { + List nodes = startCluster(3); + + assertAcked(prepareCreate("test") + .setSettings(ImmutableSettings.builder() + .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 2) + ) + .get()); + ensureGreen("test"); + + nodes = new ArrayList<>(nodes); + Collections.shuffle(nodes, getRandom()); + String isolatedNode = nodes.get(0); + String notIsolatedNode = nodes.get(1); + + ServiceDisruptionScheme scheme = addRandomIsolation(isolatedNode); + scheme.startDisrupting(); + ensureStableCluster(2, notIsolatedNode); + assertFalse(client(notIsolatedNode).admin().cluster().prepareHealth("test").setWaitForYellowStatus().get().isTimedOut()); + + + IndexResponse indexResponse = internalCluster().client(notIsolatedNode).prepareIndex("test", "type").setSource("field", "value").get(); + assertThat(indexResponse.getVersion(), equalTo(1l)); + + logger.info("Verifying if document exists via node[" + notIsolatedNode + "]"); + GetResponse getResponse = internalCluster().client(notIsolatedNode).prepareGet("test", "type", indexResponse.getId()) + .setPreference("_local") + .get(); + assertThat(getResponse.isExists(), is(true)); + assertThat(getResponse.getVersion(), equalTo(1l)); + assertThat(getResponse.getId(), equalTo(indexResponse.getId())); + + scheme.stopDisrupting(); + + ensureStableCluster(3); + ensureGreen("test"); + + for (String node : nodes) { + logger.info("Verifying if document exists after isolating node[" + isolatedNode + "] via node[" + node + "]"); + getResponse = internalCluster().client(node).prepareGet("test", "type", indexResponse.getId()) + .setPreference("_local") + .get(); + assertThat(getResponse.isExists(), is(true)); + assertThat(getResponse.getVersion(), equalTo(1l)); + assertThat(getResponse.getId(), equalTo(indexResponse.getId())); + } + } + + /** + * A 4 node cluster with m_m_n set to 3 and each node has one unicast enpoint. One node partitions from the master node. + * The temporal unicast responses is empty. When partition is solved the one ping response contains a master node. + * The rejoining node should take this master node and connect. + */ + @Test + @TestLogging("discovery.zen:TRACE,action:TRACE") + public void unicastSinglePingResponseContainsMaster() throws Exception { + List nodes = startUnicastCluster(4, new int[]{0}, -1); + // Figure out what is the elected master node + final String masterNode = internalCluster().getMasterName(); + logger.info("---> legit elected master node=" + masterNode); + List otherNodes = new ArrayList<>(nodes); + otherNodes.remove(masterNode); + otherNodes.remove(nodes.get(0)); // <-- Don't isolate the node that is in the unicast endpoint for all the other nodes. + final String isolatedNode = otherNodes.get(0); + + // Forcefully clean temporal response lists on all nodes. Otherwise the node in the unicast host list + // includes all the other nodes that have pinged it and the issue doesn't manifest + for (ZenPingService pingService : internalCluster().getInstances(ZenPingService.class)) { + for (ZenPing zenPing : pingService.zenPings()) { + ((UnicastZenPing) zenPing).clearTemporalReponses(); + } + } + + // Simulate a network issue between the unlucky node and elected master node in both directions. + NetworkDisconnectPartition networkDisconnect = new NetworkDisconnectPartition(masterNode, isolatedNode, getRandom()); + setDisruptionScheme(networkDisconnect); + networkDisconnect.startDisrupting(); + // Wait until elected master has removed that the unlucky node... + ensureStableCluster(3, masterNode); + + // The isolate master node must report no master, so it starts with pinging + assertNoMaster(isolatedNode); + networkDisconnect.stopDisrupting(); + // Wait until the master node sees all 4 nodes again. + ensureStableCluster(4); + // The elected master shouldn't have changed, since the isolated node never could have elected himself as + // master since m_m_n of 3 could never be satisfied. + assertMaster(masterNode, nodes); + } + + @Test + @TestLogging("discovery.zen:TRACE,action:TRACE") + public void isolatedUnicastNodes() throws Exception { + List nodes = startUnicastCluster(3, new int[]{0}, -1); + // Figure out what is the elected master node + final String unicastTarget = nodes.get(0); + + Set unicastTargetSide = new HashSet<>(); + unicastTargetSide.add(unicastTarget); + + Set restOfClusterSide = new HashSet<>(); + restOfClusterSide.addAll(nodes); + restOfClusterSide.remove(unicastTarget); + + // Forcefully clean temporal response lists on all nodes. Otherwise the node in the unicast host list + // includes all the other nodes that have pinged it and the issue doesn't manifest + for (ZenPingService pingService : internalCluster().getInstances(ZenPingService.class)) { + for (ZenPing zenPing : pingService.zenPings()) { + ((UnicastZenPing) zenPing).clearTemporalReponses(); + } + } + + // Simulate a network issue between the unicast target node and the rest of the cluster + NetworkDisconnectPartition networkDisconnect = new NetworkDisconnectPartition(unicastTargetSide, restOfClusterSide, getRandom()); + setDisruptionScheme(networkDisconnect); + networkDisconnect.startDisrupting(); + // Wait until elected master has removed that the unlucky node... + ensureStableCluster(2, nodes.get(1)); + + // The isolate master node must report no master, so it starts with pinging + assertNoMaster(unicastTarget); + networkDisconnect.stopDisrupting(); + // Wait until the master node sees all 3 nodes again. + ensureStableCluster(3); + } + + + /** Test cluster join with issues in cluster state publishing * */ + @Test + @TestLogging("discovery.zen:TRACE,action:TRACE") + public void testClusterJoinDespiteOfPublishingIssues() throws Exception { + List nodes = startCluster(2, 1); + + String masterNode = internalCluster().getMasterName(); + String nonMasterNode; + if (masterNode.equals(nodes.get(0))) { + nonMasterNode = nodes.get(1); + } else { + nonMasterNode = nodes.get(0); + } + + DiscoveryNodes discoveryNodes = internalCluster().getInstance(ClusterService.class, nonMasterNode).state().nodes(); + + logger.info("blocking requests from non master [{}] to master [{}]", nonMasterNode, masterNode); + MockTransportService nonMasterTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, nonMasterNode); + nonMasterTransportService.addFailToSendNoConnectRule(discoveryNodes.masterNode()); + + assertNoMaster(nonMasterNode); + + logger.info("blocking cluster state publishing from master [{}] to non master [{}]", masterNode, nonMasterNode); + MockTransportService masterTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, masterNode); + masterTransportService.addFailToSendNoConnectRule(discoveryNodes.localNode(), PublishClusterStateAction.ACTION_NAME); + + logger.info("allowing requests from non master [{}] to master [{}], waiting for two join request", nonMasterNode, masterNode); + final CountDownLatch countDownLatch = new CountDownLatch(2); + nonMasterTransportService.addDelegate(discoveryNodes.masterNode(), new MockTransportService.DelegateTransport(nonMasterTransportService.original()) { + @Override + public void sendRequest(DiscoveryNode node, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException, TransportException { + if (action.equals(MembershipAction.DISCOVERY_JOIN_ACTION_NAME)) { + countDownLatch.countDown(); + } + super.sendRequest(node, requestId, action, request, options); + } + }); + + countDownLatch.await(); + + logger.info("waiting for cluster to reform"); + masterTransportService.clearRule(discoveryNodes.localNode()); + nonMasterTransportService.clearRule(discoveryNodes.masterNode()); + + ensureStableCluster(2); + } + + + protected NetworkPartition addRandomPartition() { + NetworkPartition partition; + if (randomBoolean()) { + partition = new NetworkUnresponsivePartition(getRandom()); + } else { + partition = new NetworkDisconnectPartition(getRandom()); + } + + setDisruptionScheme(partition); + + return partition; + } + + protected NetworkPartition addRandomIsolation(String isolatedNode) { + Set side1 = new HashSet<>(); + Set side2 = new HashSet<>(Arrays.asList(internalCluster().getNodeNames())); + side1.add(isolatedNode); + side2.remove(isolatedNode); + + NetworkPartition partition; + if (randomBoolean()) { + partition = new NetworkUnresponsivePartition(side1, side2, getRandom()); + } else { + partition = new NetworkDisconnectPartition(side1, side2, getRandom()); + } + + internalCluster().setDisruptionScheme(partition); + + return partition; + } + + private ServiceDisruptionScheme addRandomDisruptionScheme() { + List list = Arrays.asList( + new NetworkUnresponsivePartition(getRandom()), + new NetworkDelaysPartition(getRandom()), + new NetworkDisconnectPartition(getRandom()), + new SlowClusterStateProcessing(getRandom()) + ); + Collections.shuffle(list); + setDisruptionScheme(list.get(0)); + return list.get(0); + } + + private void ensureStableCluster(int nodeCount) { + ensureStableCluster(nodeCount, TimeValue.timeValueSeconds(30), null); + } + + private void ensureStableCluster(int nodeCount, TimeValue timeValue) { + ensureStableCluster(nodeCount, timeValue, null); + } + + private void ensureStableCluster(int nodeCount, @Nullable String viaNode) { + ensureStableCluster(nodeCount, TimeValue.timeValueSeconds(30), viaNode); + } + + private void ensureStableCluster(int nodeCount, TimeValue timeValue, @Nullable String viaNode) { + if (viaNode == null) { + viaNode = randomFrom(internalCluster().getNodeNames()); + } + logger.debug("ensuring cluster is stable with [{}] nodes. access node: [{}]. timeout: [{}]", nodeCount, viaNode, timeValue); + ClusterHealthResponse clusterHealthResponse = client(viaNode).admin().cluster().prepareHealth() + .setWaitForEvents(Priority.LANGUID) + .setWaitForNodes(Integer.toString(nodeCount)) + .setTimeout(timeValue) + .setWaitForRelocatingShards(0) + .get(); + if (clusterHealthResponse.isTimedOut()) { + ClusterStateResponse stateResponse = client(viaNode).admin().cluster().prepareState().get(); + fail("failed to reach a stable cluster of [" + nodeCount + "] nodes. Tried via [" + viaNode + "]. last cluster state:\n" + + stateResponse.getState().prettyPrint()); + } + assertThat(clusterHealthResponse.isTimedOut(), is(false)); + } + + private ClusterState getNodeClusterState(String node) { + return client(node).admin().cluster().prepareState().setLocal(true).get().getState(); + } + + private void assertNoMaster(final String node) throws Exception { + assertNoMaster(node, null, TimeValue.timeValueSeconds(10)); + } + + private void assertNoMaster(final String node, TimeValue maxWaitTime) throws Exception { + assertNoMaster(node, null, maxWaitTime); + } + + private void assertNoMaster(final String node, @Nullable final ClusterBlock expectedBlocks, TimeValue maxWaitTime) throws Exception { + assertBusy(new Runnable() { + @Override + public void run() { + ClusterState state = getNodeClusterState(node); + assertNull("node [" + node + "] still has [" + state.nodes().masterNode() + "] as master", state.nodes().masterNode()); + if (expectedBlocks != null) { + for (ClusterBlockLevel level : expectedBlocks.levels()) { + assertTrue("node [" + node + "] does have level [" + level + "] in it's blocks", state.getBlocks().hasGlobalBlock(level)); + } + } + } + }, maxWaitTime.getMillis(), TimeUnit.MILLISECONDS); + } + + private void assertDifferentMaster(final String node, final String oldMasterNode) throws Exception { + assertBusy(new Runnable() { + @Override + public void run() { + ClusterState state = getNodeClusterState(node); + String masterNode = null; + if (state.nodes().masterNode() != null) { + masterNode = state.nodes().masterNode().name(); + } + logger.trace("[{}] master is [{}]", node, state.nodes().masterNode()); + assertThat("node [" + node + "] still has [" + masterNode + "] as master", + oldMasterNode, not(equalTo(masterNode))); + } + }, 10, TimeUnit.SECONDS); + } + + private void assertMaster(String masterNode, List nodes) { + for (String node : nodes) { + ClusterState state = getNodeClusterState(node); + String failMsgSuffix = "cluster_state:\n" + state.prettyPrint(); + assertThat("wrong node count on [" + node + "]. " + failMsgSuffix, state.nodes().size(), equalTo(nodes.size())); + assertThat("wrong master on node [" + node + "]. " + failMsgSuffix, state.nodes().masterNode().name(), equalTo(masterNode)); + } + } +} diff --git a/src/test/java/org/elasticsearch/discovery/ZenFaultDetectionTests.java b/src/test/java/org/elasticsearch/discovery/ZenFaultDetectionTests.java new file mode 100644 index 00000000000..082148921e6 --- /dev/null +++ b/src/test/java/org/elasticsearch/discovery/ZenFaultDetectionTests.java @@ -0,0 +1,219 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.discovery; + +import com.google.common.collect.ImmutableMap; +import org.elasticsearch.Version; +import org.elasticsearch.cluster.ClusterName; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.common.settings.ImmutableSettings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.discovery.zen.DiscoveryNodesProvider; +import org.elasticsearch.discovery.zen.fd.FaultDetection; +import org.elasticsearch.discovery.zen.fd.MasterFaultDetection; +import org.elasticsearch.discovery.zen.fd.NodesFaultDetection; +import org.elasticsearch.node.service.NodeService; +import org.elasticsearch.test.ElasticsearchTestCase; +import org.elasticsearch.test.transport.MockTransportService; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportConnectionListener; +import org.elasticsearch.transport.local.LocalTransport; +import org.hamcrest.Matcher; +import org.hamcrest.Matchers; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import static org.hamcrest.Matchers.equalTo; + +public class ZenFaultDetectionTests extends ElasticsearchTestCase { + + protected ThreadPool threadPool; + + protected static final Version version0 = Version.fromId(/*0*/99); + protected DiscoveryNode nodeA; + protected MockTransportService serviceA; + + protected static final Version version1 = Version.fromId(199); + protected DiscoveryNode nodeB; + protected MockTransportService serviceB; + + @Before + public void setUp() throws Exception { + super.setUp(); + threadPool = new ThreadPool(getClass().getName()); + serviceA = build(ImmutableSettings.builder().put("name", "TS_A").build(), version0); + nodeA = new DiscoveryNode("TS_A", "TS_A", serviceA.boundAddress().publishAddress(), ImmutableMap.of(), version0); + serviceB = build(ImmutableSettings.builder().put("name", "TS_B").build(), version1); + nodeB = new DiscoveryNode("TS_B", "TS_B", serviceB.boundAddress().publishAddress(), ImmutableMap.of(), version1); + + // wait till all nodes are properly connected and the event has been sent, so tests in this class + // will not get this callback called on the connections done in this setup + final CountDownLatch latch = new CountDownLatch(4); + TransportConnectionListener waitForConnection = new TransportConnectionListener() { + @Override + public void onNodeConnected(DiscoveryNode node) { + latch.countDown(); + } + + @Override + public void onNodeDisconnected(DiscoveryNode node) { + fail("disconnect should not be called " + node); + } + }; + serviceA.addConnectionListener(waitForConnection); + serviceB.addConnectionListener(waitForConnection); + + serviceA.connectToNode(nodeB); + serviceA.connectToNode(nodeA); + serviceB.connectToNode(nodeA); + serviceB.connectToNode(nodeB); + + assertThat("failed to wait for all nodes to connect", latch.await(5, TimeUnit.SECONDS), equalTo(true)); + serviceA.removeConnectionListener(waitForConnection); + serviceB.removeConnectionListener(waitForConnection); + } + + @After + public void tearDown() throws Exception { + super.tearDown(); + serviceA.close(); + serviceB.close(); + threadPool.shutdown(); + } + + protected MockTransportService build(Settings settings, Version version) { + MockTransportService transportService = new MockTransportService(ImmutableSettings.EMPTY, new LocalTransport(settings, threadPool, version), threadPool); + transportService.start(); + return transportService; + } + + private DiscoveryNodes buildNodesForA(boolean master) { + DiscoveryNodes.Builder builder = DiscoveryNodes.builder(); + builder.put(nodeA); + builder.put(nodeB); + builder.localNodeId(nodeA.id()); + builder.masterNodeId(master ? nodeA.id() : nodeB.id()); + return builder.build(); + } + + private DiscoveryNodes buildNodesForB(boolean master) { + DiscoveryNodes.Builder builder = DiscoveryNodes.builder(); + builder.put(nodeA); + builder.put(nodeB); + builder.localNodeId(nodeB.id()); + builder.masterNodeId(master ? nodeB.id() : nodeA.id()); + return builder.build(); + } + + @Test + public void testNodesFaultDetectionConnectOnDisconnect() throws InterruptedException { + ImmutableSettings.Builder settings = ImmutableSettings.builder(); + boolean shouldRetry = randomBoolean(); + // make sure we don't ping + settings.put(FaultDetection.SETTING_CONNECT_ON_NETWORK_DISCONNECT, shouldRetry) + .put(FaultDetection.SETTING_PING_INTERVAL, "5m"); + NodesFaultDetection nodesFD = new NodesFaultDetection(settings.build(), threadPool, serviceA, new ClusterName("test")); + nodesFD.start(); + nodesFD.updateNodes(buildNodesForA(true), -1); + final String[] failureReason = new String[1]; + final DiscoveryNode[] failureNode = new DiscoveryNode[1]; + final CountDownLatch notified = new CountDownLatch(1); + nodesFD.addListener(new NodesFaultDetection.Listener() { + @Override + public void onNodeFailure(DiscoveryNode node, String reason) { + failureNode[0] = node; + failureReason[0] = reason; + notified.countDown(); + } + }); + // will raise a disconnect on A + serviceB.stop(); + notified.await(30, TimeUnit.SECONDS); + + assertEquals(nodeB, failureNode[0]); + Matcher matcher = Matchers.containsString("verified"); + if (!shouldRetry) { + matcher = Matchers.not(matcher); + } + + assertThat(failureReason[0], matcher); + } + + @Test + public void testMasterFaultDetectionConnectOnDisconnect() throws InterruptedException { + + ImmutableSettings.Builder settings = ImmutableSettings.builder(); + boolean shouldRetry = randomBoolean(); + // make sure we don't ping + settings.put(FaultDetection.SETTING_CONNECT_ON_NETWORK_DISCONNECT, shouldRetry) + .put(FaultDetection.SETTING_PING_INTERVAL, "5m"); + ClusterName clusterName = new ClusterName(randomAsciiOfLengthBetween(3, 20)); + final DiscoveryNodes nodes = buildNodesForA(false); + MasterFaultDetection masterFD = new MasterFaultDetection(settings.build(), threadPool, serviceA, + new DiscoveryNodesProvider() { + @Override + public DiscoveryNodes nodes() { + return nodes; + } + + @Override + public NodeService nodeService() { + return null; + } + }, + clusterName + ); + masterFD.start(nodeB, "test"); + + final String[] failureReason = new String[1]; + final DiscoveryNode[] failureNode = new DiscoveryNode[1]; + final CountDownLatch notified = new CountDownLatch(1); + masterFD.addListener(new MasterFaultDetection.Listener() { + + @Override + public void onMasterFailure(DiscoveryNode masterNode, String reason) { + failureNode[0] = masterNode; + failureReason[0] = reason; + notified.countDown(); + } + + @Override + public void onDisconnectedFromMaster() { + + } + }); + // will raise a disconnect on A + serviceB.stop(); + notified.await(30, TimeUnit.SECONDS); + + assertEquals(nodeB, failureNode[0]); + Matcher matcher = Matchers.containsString("verified"); + if (!shouldRetry) { + matcher = Matchers.not(matcher); + } + + assertThat(failureReason[0], matcher); + } +} \ No newline at end of file diff --git a/src/test/java/org/elasticsearch/discovery/ZenUnicastDiscoveryTests.java b/src/test/java/org/elasticsearch/discovery/ZenUnicastDiscoveryTests.java index 984b24b3782..c36834d7cf9 100644 --- a/src/test/java/org/elasticsearch/discovery/ZenUnicastDiscoveryTests.java +++ b/src/test/java/org/elasticsearch/discovery/ZenUnicastDiscoveryTests.java @@ -26,7 +26,6 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.test.ElasticsearchIntegrationTest; import org.elasticsearch.test.ElasticsearchIntegrationTest.ClusterScope; import org.elasticsearch.test.ElasticsearchIntegrationTest.Scope; -import org.elasticsearch.transport.local.LocalTransport; import org.junit.Before; import org.junit.Test; @@ -38,47 +37,24 @@ import static org.hamcrest.Matchers.equalTo; @ClusterScope(scope = Scope.TEST, numDataNodes = 0) public class ZenUnicastDiscoveryTests extends ElasticsearchIntegrationTest { - private static int currentNumNodes = -1; - - static int currentBaseHttpPort = -1; - static int currentNumOfUnicastHosts = -1; - - @Before - public void setUP() throws Exception { - ElasticsearchIntegrationTest.beforeClass(); - currentNumNodes = randomIntBetween(3, 5); - currentNumOfUnicastHosts = randomIntBetween(1, currentNumNodes); - currentBaseHttpPort = 25000 + randomInt(100); - } + private ClusterDiscoveryConfiguration discoveryConfig; @Override protected Settings nodeSettings(int nodeOrdinal) { - ImmutableSettings.Builder builder = ImmutableSettings.settingsBuilder() - .put(super.nodeSettings(nodeOrdinal)) - .put("discovery.type", "zen") - .put("discovery.zen.ping.multicast.enabled", false) - .put("http.enabled", false); // just to make test quicker + return discoveryConfig.node(nodeOrdinal); + } - - String[] unicastHosts = new String[currentNumOfUnicastHosts]; - if (internalCluster().getDefaultSettings().get("node.mode").equals("local")) { - builder.put(LocalTransport.TRANSPORT_LOCAL_ADDRESS, "unicast_test_" + nodeOrdinal); - for (int i = 0; i < unicastHosts.length; i++) { - unicastHosts[i] = "unicast_test_" + i; - } - } else { - // we need to pin the node ports so we'd know where to point things - builder.put("transport.tcp.port", currentBaseHttpPort + nodeOrdinal); - for (int i = 0; i < unicastHosts.length; i++) { - unicastHosts[i] = "localhost:" + (currentBaseHttpPort + i); - } - } - builder.putArray("discovery.zen.ping.unicast.hosts", unicastHosts); - return builder.build(); + @Before + public void clearConfig() { + discoveryConfig = null; } @Test public void testNormalClusterForming() throws ExecutionException, InterruptedException { + int currentNumNodes = randomIntBetween(3, 5); + int currentNumOfUnicastHosts = randomIntBetween(1, currentNumNodes); + discoveryConfig = new ClusterDiscoveryConfiguration.UnicastZen(currentNumNodes, currentNumOfUnicastHosts); + internalCluster().startNodesAsync(currentNumNodes).get(); if (client().admin().cluster().prepareHealth().setWaitForNodes("" + currentNumNodes).get().isTimedOut()) { @@ -92,9 +68,12 @@ public class ZenUnicastDiscoveryTests extends ElasticsearchIntegrationTest { // test fails, because 2 nodes elect themselves as master and the health request times out b/c waiting_for_nodes=N // can't be satisfied. public void testMinimumMasterNodes() throws Exception { + int currentNumNodes = randomIntBetween(3, 5); + int currentNumOfUnicastHosts = randomIntBetween(1, currentNumNodes); final Settings settings = ImmutableSettings.settingsBuilder().put("discovery.zen.minimum_master_nodes", currentNumNodes / 2 + 1).build(); + discoveryConfig = new ClusterDiscoveryConfiguration.UnicastZen(currentNumNodes, currentNumOfUnicastHosts, settings); - List nodes = internalCluster().startNodesAsync(currentNumNodes, settings).get(); + List nodes = internalCluster().startNodesAsync(currentNumNodes).get(); ensureGreen(); diff --git a/src/test/java/org/elasticsearch/discovery/zen/ElectMasterServiceTest.java b/src/test/java/org/elasticsearch/discovery/zen/ElectMasterServiceTest.java new file mode 100644 index 00000000000..df8f67c536f --- /dev/null +++ b/src/test/java/org/elasticsearch/discovery/zen/ElectMasterServiceTest.java @@ -0,0 +1,105 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.discovery.zen; + +import org.elasticsearch.Version; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.common.settings.ImmutableSettings; +import org.elasticsearch.common.transport.DummyTransportAddress; +import org.elasticsearch.discovery.zen.elect.ElectMasterService; +import org.elasticsearch.test.ElasticsearchTestCase; +import org.junit.Test; + +import java.util.*; + +public class ElectMasterServiceTest extends ElasticsearchTestCase { + + ElectMasterService electMasterService() { + return new ElectMasterService(ImmutableSettings.EMPTY); + } + + List generateRandomNodes() { + int count = scaledRandomIntBetween(1, 100); + ArrayList nodes = new ArrayList<>(count); + + Map master = new HashMap<>(); + master.put("master", "true"); + Map nonMaster = new HashMap<>(); + nonMaster.put("master", "false"); + + for (int i = 0; i < count; i++) { + Map attributes = randomBoolean() ? master : nonMaster; + DiscoveryNode node = new DiscoveryNode("n_" + i, "n_" + i, DummyTransportAddress.INSTANCE, attributes, Version.CURRENT); + nodes.add(node); + } + + Collections.shuffle(nodes, getRandom()); + return nodes; + } + + @Test + public void sortByMasterLikelihood() { + List nodes = generateRandomNodes(); + List sortedNodes = electMasterService().sortByMasterLikelihood(nodes); + assertEquals(nodes.size(), sortedNodes.size()); + DiscoveryNode prevNode = sortedNodes.get(0); + for (int i = 1; i < sortedNodes.size(); i++) { + DiscoveryNode node = sortedNodes.get(i); + if (!prevNode.masterNode()) { + assertFalse(node.masterNode()); + } else if (node.masterNode()) { + assertTrue(prevNode.id().compareTo(node.id()) < 0); + } + prevNode = node; + } + + } + + @Test + public void electMaster() { + List nodes = generateRandomNodes(); + ElectMasterService service = electMasterService(); + int min_master_nodes = randomIntBetween(0, nodes.size()); + service.minimumMasterNodes(min_master_nodes); + + int master_nodes = 0; + for (DiscoveryNode node : nodes) { + if (node.masterNode()) { + master_nodes++; + } + } + DiscoveryNode master = null; + if (service.hasEnoughMasterNodes(nodes)) { + master = service.electMaster(nodes); + } + + if (master_nodes == 0) { + assertNull(master); + } else if (min_master_nodes > 0 && master_nodes < min_master_nodes) { + assertNull(master); + } else { + for (DiscoveryNode node : nodes) { + if (node.masterNode()) { + assertTrue(master.id().compareTo(node.id()) <= 0); + } + } + } + } +} diff --git a/src/test/java/org/elasticsearch/discovery/zen/ZenDiscoveryRejoinOnMaster.java b/src/test/java/org/elasticsearch/discovery/zen/ZenDiscoveryRejoinOnMaster.java new file mode 100644 index 00000000000..1ee31505d5e --- /dev/null +++ b/src/test/java/org/elasticsearch/discovery/zen/ZenDiscoveryRejoinOnMaster.java @@ -0,0 +1,102 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.discovery.zen; + +import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse; +import org.elasticsearch.action.admin.indices.recovery.RecoveryResponse; +import org.elasticsearch.common.Priority; +import org.elasticsearch.common.settings.ImmutableSettings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.discovery.Discovery; +import org.elasticsearch.discovery.zen.fd.FaultDetection; +import org.elasticsearch.test.ElasticsearchIntegrationTest; +import org.junit.Test; + +import static org.hamcrest.Matchers.*; + +/** + */ +@ElasticsearchIntegrationTest.ClusterScope(scope = ElasticsearchIntegrationTest.Scope.TEST, numDataNodes = 0, numClientNodes = 0) +public class ZenDiscoveryRejoinOnMaster extends ElasticsearchIntegrationTest { + + @Test + public void testChangeRejoinOnMasterOptionIsDynamic() throws Exception { + Settings nodeSettings = ImmutableSettings.settingsBuilder() + .put("discovery.type", "zen") // <-- To override the local setting if set externally + .build(); + String nodeName = internalCluster().startNode(nodeSettings); + ZenDiscovery zenDiscovery = (ZenDiscovery) internalCluster().getInstance(Discovery.class, nodeName); + assertThat(zenDiscovery.isRejoinOnMasterGone(), is(true)); + + client().admin().cluster().prepareUpdateSettings() + .setTransientSettings(ImmutableSettings.builder().put(ZenDiscovery.SETTING_REJOIN_ON_MASTER_GONE, false)) + .get(); + + assertThat(zenDiscovery.isRejoinOnMasterGone(), is(false)); + } + + @Test + public void testNoShardRelocationsOccurWhenElectedMasterNodeFails() throws Exception { + Settings defaultSettings = ImmutableSettings.builder() + .put(FaultDetection.SETTING_PING_TIMEOUT, "1s") + .put(FaultDetection.SETTING_PING_RETRIES, "1") + .put("discovery.type", "zen") + .build(); + + Settings masterNodeSettings = ImmutableSettings.builder() + .put("node.data", false) + .put(defaultSettings) + .build(); + internalCluster().startNodesAsync(2, masterNodeSettings).get(); + Settings dateNodeSettings = ImmutableSettings.builder() + .put("node.master", false) + .put(defaultSettings) + .build(); + internalCluster().startNodesAsync(2, dateNodeSettings).get(); + ClusterHealthResponse clusterHealthResponse = client().admin().cluster().prepareHealth() + .setWaitForEvents(Priority.LANGUID) + .setWaitForNodes("4") + .setWaitForRelocatingShards(0) + .get(); + assertThat(clusterHealthResponse.isTimedOut(), is(false)); + + createIndex("test"); + ensureSearchable("test"); + RecoveryResponse r = client().admin().indices().prepareRecoveries("test").get(); + int numRecoveriesBeforeNewMaster = r.shardResponses().get("test").size(); + + final String oldMaster = internalCluster().getMasterName(); + internalCluster().stopCurrentMasterNode(); + assertBusy(new Runnable() { + @Override + public void run() { + String current = internalCluster().getMasterName(); + assertThat(current, notNullValue()); + assertThat(current, not(equalTo(oldMaster))); + } + }); + ensureSearchable("test"); + + r = client().admin().indices().prepareRecoveries("test").get(); + int numRecoveriesAfterNewMaster = r.shardResponses().get("test").size(); + assertThat(numRecoveriesAfterNewMaster, equalTo(numRecoveriesBeforeNewMaster)); + } + +} diff --git a/src/test/java/org/elasticsearch/discovery/zen/ping/unicast/UnicastZenPingTests.java b/src/test/java/org/elasticsearch/discovery/zen/ping/unicast/UnicastZenPingTests.java index 8f18cb11d38..7ecc23b68ef 100644 --- a/src/test/java/org/elasticsearch/discovery/zen/ping/unicast/UnicastZenPingTests.java +++ b/src/test/java/org/elasticsearch/discovery/zen/ping/unicast/UnicastZenPingTests.java @@ -30,6 +30,7 @@ import org.elasticsearch.common.transport.InetSocketTransportAddress; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.discovery.zen.DiscoveryNodesProvider; +import org.elasticsearch.discovery.zen.elect.ElectMasterService; import org.elasticsearch.discovery.zen.ping.ZenPing; import org.elasticsearch.node.service.NodeService; import org.elasticsearch.test.ElasticsearchTestCase; @@ -55,6 +56,7 @@ public class UnicastZenPingTests extends ElasticsearchTestCase { ThreadPool threadPool = new ThreadPool(getClass().getName()); ClusterName clusterName = new ClusterName("test"); NetworkService networkService = new NetworkService(settings); + ElectMasterService electMasterService = new ElectMasterService(settings); NettyTransport transportA = new NettyTransport(settings, threadPool, networkService, BigArrays.NON_RECYCLING_INSTANCE, Version.CURRENT); final TransportService transportServiceA = new TransportService(transportA, threadPool).start(); @@ -73,7 +75,7 @@ public class UnicastZenPingTests extends ElasticsearchTestCase { addressB.address().getAddress().getHostAddress() + ":" + addressB.address().getPort()) .build(); - UnicastZenPing zenPingA = new UnicastZenPing(hostsSettings, threadPool, transportServiceA, clusterName, Version.CURRENT, null); + UnicastZenPing zenPingA = new UnicastZenPing(hostsSettings, threadPool, transportServiceA, clusterName, Version.CURRENT, electMasterService, null); zenPingA.setNodesProvider(new DiscoveryNodesProvider() { @Override public DiscoveryNodes nodes() { @@ -87,7 +89,7 @@ public class UnicastZenPingTests extends ElasticsearchTestCase { }); zenPingA.start(); - UnicastZenPing zenPingB = new UnicastZenPing(hostsSettings, threadPool, transportServiceB, clusterName, Version.CURRENT, null); + UnicastZenPing zenPingB = new UnicastZenPing(hostsSettings, threadPool, transportServiceB, clusterName, Version.CURRENT, electMasterService, null); zenPingB.setNodesProvider(new DiscoveryNodesProvider() { @Override public DiscoveryNodes nodes() { diff --git a/src/test/java/org/elasticsearch/index/TransportIndexFailuresTest.java b/src/test/java/org/elasticsearch/index/TransportIndexFailuresTest.java index f8fe46cae1f..c7c20b790dd 100644 --- a/src/test/java/org/elasticsearch/index/TransportIndexFailuresTest.java +++ b/src/test/java/org/elasticsearch/index/TransportIndexFailuresTest.java @@ -33,6 +33,7 @@ import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.discovery.Discovery; import org.elasticsearch.discovery.DiscoverySettings; +import org.elasticsearch.discovery.zen.fd.FaultDetection; import org.elasticsearch.test.ElasticsearchIntegrationTest; import org.elasticsearch.test.junit.annotations.TestLogging; import org.elasticsearch.test.transport.MockTransportService; @@ -54,8 +55,8 @@ public class TransportIndexFailuresTest extends ElasticsearchIntegrationTest { private static final Settings nodeSettings = ImmutableSettings.settingsBuilder() .put("discovery.type", "zen") // <-- To override the local setting if set externally - .put("discovery.zen.fd.ping_timeout", "1s") // <-- for hitting simulated network failures quickly - .put("discovery.zen.fd.ping_retries", "1") // <-- for hitting simulated network failures quickly + .put(FaultDetection.SETTING_PING_TIMEOUT, "1s") // <-- for hitting simulated network failures quickly + .put(FaultDetection.SETTING_PING_RETRIES, "1") // <-- for hitting simulated network failures quickly .put(DiscoverySettings.PUBLISH_TIMEOUT, "1s") // <-- for hitting simulated network failures quickly .put("discovery.zen.minimum_master_nodes", 1) .put(TransportModule.TRANSPORT_SERVICE_TYPE_KEY, MockTransportService.class.getName()) diff --git a/src/test/java/org/elasticsearch/recovery/FullRollingRestartTests.java b/src/test/java/org/elasticsearch/recovery/FullRollingRestartTests.java index 26bf890f85b..94121d71a63 100644 --- a/src/test/java/org/elasticsearch/recovery/FullRollingRestartTests.java +++ b/src/test/java/org/elasticsearch/recovery/FullRollingRestartTests.java @@ -30,7 +30,7 @@ import org.elasticsearch.test.junit.annotations.TestLogging; import org.junit.Test; import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; -import static org.elasticsearch.test.ElasticsearchIntegrationTest.*; +import static org.elasticsearch.test.ElasticsearchIntegrationTest.Scope; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; /** @@ -54,7 +54,7 @@ public class FullRollingRestartTests extends ElasticsearchIntegrationTest { @Test @Slow - @TestLogging("indices.cluster:TRACE,cluster.service:TRACE") + @TestLogging("indices.cluster:TRACE,cluster.service:TRACE,action.search:TRACE,indices.recovery:TRACE") public void testFullRollingRestart() throws Exception { internalCluster().startNode(); createIndex("test"); diff --git a/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadTests.java b/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadTests.java index ca2f8a5b050..ff4512b4113 100644 --- a/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadTests.java +++ b/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadTests.java @@ -43,7 +43,6 @@ import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder; import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoTimeout; import static org.hamcrest.Matchers.equalTo; public class RecoveryWhileUnderLoadTests extends ElasticsearchIntegrationTest { diff --git a/src/test/java/org/elasticsearch/test/BackgroundIndexer.java b/src/test/java/org/elasticsearch/test/BackgroundIndexer.java index 29184b89768..2cafcef5d9f 100644 --- a/src/test/java/org/elasticsearch/test/BackgroundIndexer.java +++ b/src/test/java/org/elasticsearch/test/BackgroundIndexer.java @@ -217,7 +217,7 @@ public class BackgroundIndexer implements AutoCloseable { setBudget(numOfDocs); } - /** Stop all background threads **/ + /** Stop all background threads * */ public void stop() throws InterruptedException { if (stop.get()) { return; diff --git a/src/test/java/org/elasticsearch/test/ElasticsearchIntegrationTest.java b/src/test/java/org/elasticsearch/test/ElasticsearchIntegrationTest.java index f7062a94994..9ed53f2fd51 100644 --- a/src/test/java/org/elasticsearch/test/ElasticsearchIntegrationTest.java +++ b/src/test/java/org/elasticsearch/test/ElasticsearchIntegrationTest.java @@ -97,6 +97,7 @@ import org.elasticsearch.rest.RestStatus; import org.elasticsearch.script.ScriptService; import org.elasticsearch.search.SearchService; import org.elasticsearch.test.client.RandomizingClient; +import org.elasticsearch.test.disruption.ServiceDisruptionScheme; import org.hamcrest.Matchers; import org.junit.*; @@ -581,6 +582,7 @@ public abstract class ElasticsearchIntegrationTest extends ElasticsearchTestCase boolean success = false; try { logger.info("[{}#{}]: cleaning up after test", getTestClass().getSimpleName(), getTestName()); + clearDisruptionScheme(); final Scope currentClusterScope = getCurrentClusterScope(); try { if (currentClusterScope != Scope.TEST) { @@ -644,6 +646,13 @@ public abstract class ElasticsearchIntegrationTest extends ElasticsearchTestCase } public static Client client() { + return client(null); + } + + public static Client client(@Nullable String node) { + if (node != null) { + return internalCluster().client(node); + } Client client = cluster().client(); if (frequently()) { client = new RandomizingClient(client, getRandom()); @@ -689,6 +698,15 @@ public abstract class ElasticsearchIntegrationTest extends ElasticsearchTestCase return between(minimumNumberOfReplicas(), maximumNumberOfReplicas()); } + + public void setDisruptionScheme(ServiceDisruptionScheme scheme) { + internalCluster().setDisruptionScheme(scheme); + } + + public void clearDisruptionScheme() { + internalCluster().clearDisruptionScheme(); + } + /** * Returns a settings object used in {@link #createIndex(String...)} and {@link #prepareCreate(String)} and friends. * This method can be overwritten by subclasses to set defaults for the indices that are created by the test. @@ -889,7 +907,7 @@ public abstract class ElasticsearchIntegrationTest extends ElasticsearchTestCase * It is useful to ensure that all action on the cluster have finished and all shards that were currently relocating * are now allocated and started. */ - public ClusterHealthStatus ensureGreen(String... indices) { + public ClusterHealthStatus ensureGreen(String... indices) { ClusterHealthResponse actionGet = client().admin().cluster() .health(Requests.clusterHealthRequest(indices).waitForGreenStatus().waitForEvents(Priority.LANGUID).waitForRelocatingShards(0)).actionGet(); if (actionGet.isTimedOut()) { diff --git a/src/test/java/org/elasticsearch/test/InternalTestCluster.java b/src/test/java/org/elasticsearch/test/InternalTestCluster.java index c28d4e812f5..56cf4b8851f 100644 --- a/src/test/java/org/elasticsearch/test/InternalTestCluster.java +++ b/src/test/java/org/elasticsearch/test/InternalTestCluster.java @@ -76,6 +76,7 @@ import org.elasticsearch.plugins.PluginsService; import org.elasticsearch.search.SearchService; import org.elasticsearch.test.cache.recycler.MockBigArraysModule; import org.elasticsearch.test.cache.recycler.MockPageCacheRecyclerModule; +import org.elasticsearch.test.disruption.ServiceDisruptionScheme; import org.elasticsearch.test.engine.MockEngineModule; import org.elasticsearch.test.store.MockFSIndexStoreModule; import org.elasticsearch.test.transport.AssertingLocalTransport; @@ -106,6 +107,7 @@ import static org.elasticsearch.node.NodeBuilder.nodeBuilder; import static org.elasticsearch.test.ElasticsearchTestCase.assertBusy; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoTimeout; import static org.hamcrest.Matchers.equalTo; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertThat; /** @@ -150,7 +152,7 @@ public final class InternalTestCluster extends TestCluster { static final boolean DEFAULT_ENABLE_RANDOM_BENCH_NODES = true; - static final String NODE_MODE = nodeMode(); + public static final String NODE_MODE = nodeMode(); /* sorted map to make traverse order reproducible, concurrent since we do checks on it not within a sync block */ private final NavigableMap nodes = new TreeMap<>(); @@ -187,6 +189,7 @@ public final class InternalTestCluster extends TestCluster { */ private final String nodePrefix; + private ServiceDisruptionScheme activeDisruptionScheme; public InternalTestCluster(long clusterSeed, int minNumDataNodes, int maxNumDataNodes, String clusterName, int numClientNodes, boolean enableRandomBenchNodes, int jvmOrdinal, String nodePrefix) { @@ -222,7 +225,7 @@ public final class InternalTestCluster extends TestCluster { this.numSharedClientNodes = numClientNodes; } } - assert this.numSharedClientNodes >=0; + assert this.numSharedClientNodes >= 0; this.enableRandomBenchNodes = enableRandomBenchNodes; @@ -251,7 +254,7 @@ public final class InternalTestCluster extends TestCluster { if (numOfDataPaths > 0) { StringBuilder dataPath = new StringBuilder(); for (int i = 0; i < numOfDataPaths; i++) { - dataPath.append(new File("data/d"+i).getAbsolutePath()).append(','); + dataPath.append(new File("data/d" + i).getAbsolutePath()).append(','); } builder.put("path.data", dataPath.toString()); } @@ -275,7 +278,7 @@ public final class InternalTestCluster extends TestCluster { public static String nodeMode() { Builder builder = ImmutableSettings.builder(); - if (Strings.isEmpty(System.getProperty("es.node.mode"))&& Strings.isEmpty(System.getProperty("es.node.local"))) { + if (Strings.isEmpty(System.getProperty("es.node.mode")) && Strings.isEmpty(System.getProperty("es.node.local"))) { return "local"; // default if nothing is specified } if (Strings.hasLength(System.getProperty("es.node.mode"))) { @@ -296,6 +299,10 @@ public final class InternalTestCluster extends TestCluster { return clusterName; } + public String[] getNodeNames() { + return nodes.keySet().toArray(Strings.EMPTY_ARRAY); + } + private static boolean isLocalTransportConfigured() { if ("local".equals(System.getProperty("es.node.mode", "network"))) { return true; @@ -328,7 +335,7 @@ public final class InternalTestCluster extends TestCluster { //.put("index.store.type", random.nextInt(10) == 0 ? MockRamIndexStoreModule.class.getName() : MockFSIndexStoreModule.class.getName()) // decrease the routing schedule so new nodes will be added quickly - some random value between 30 and 80 ms .put("cluster.routing.schedule", (30 + random.nextInt(50)) + "ms") - // default to non gateway + // default to non gateway .put("gateway.type", "none") .put(SETTING_CLUSTER_NODE_SEED, seed); if (ENABLE_MOCK_MODULES && usually(random)) { @@ -352,7 +359,7 @@ public final class InternalTestCluster extends TestCluster { builder.put(SearchService.KEEPALIVE_INTERVAL_KEY, TimeValue.timeValueSeconds(10 + random.nextInt(5 * 60))); } if (random.nextBoolean()) { // sometimes set a - builder.put(SearchService.DEFAUTL_KEEPALIVE_KEY, TimeValue.timeValueSeconds(100 + random.nextInt(5*60))); + builder.put(SearchService.DEFAUTL_KEEPALIVE_KEY, TimeValue.timeValueSeconds(100 + random.nextInt(5 * 60))); } if (random.nextBoolean()) { // change threadpool types to make sure we don't have components that rely on the type of thread pools @@ -493,6 +500,7 @@ public final class InternalTestCluster extends TestCluster { while (limit.hasNext()) { NodeAndClient next = limit.next(); nodesToRemove.add(next); + removeDistruptionSchemeFromNode(next); next.close(); } for (NodeAndClient toRemove : nodesToRemove) { @@ -667,6 +675,10 @@ public final class InternalTestCluster extends TestCluster { @Override public void close() { if (this.open.compareAndSet(true, false)) { + if (activeDisruptionScheme != null) { + activeDisruptionScheme.testClusterClosed(); + activeDisruptionScheme = null; + } IOUtils.closeWhileHandlingException(nodes.values()); nodes.clear(); executor.shutdownNow(); @@ -777,7 +789,6 @@ public final class InternalTestCluster extends TestCluster { public static final String TRANSPORT_CLIENT_PREFIX = "transport_client_"; static class TransportClientFactory { - private static TransportClientFactory NO_SNIFF_CLIENT_FACTORY = new TransportClientFactory(false, ImmutableSettings.EMPTY); private static TransportClientFactory SNIFF_CLIENT_FACTORY = new TransportClientFactory(true, ImmutableSettings.EMPTY); @@ -831,10 +842,6 @@ public final class InternalTestCluster extends TestCluster { } private synchronized void reset(boolean wipeData) throws IOException { - randomlyResetClients(); - if (wipeData) { - wipeDataDirectories(); - } // clear all rules for mock transport services for (NodeAndClient nodeAndClient : nodes.values()) { TransportService transportService = nodeAndClient.node.injector().getInstance(TransportService.class); @@ -842,6 +849,10 @@ public final class InternalTestCluster extends TestCluster { ((MockTransportService) transportService).clearAllRules(); } } + randomlyResetClients(); + if (wipeData) { + wipeDataDirectories(); + } if (nextNodeId.get() == sharedNodesSeeds.length && nodes.size() == sharedNodesSeeds.length) { logger.debug("Cluster hasn't changed - moving out - nodes: [{}] nextNodeId: [{}] numSharedNodes: [{}]", nodes.keySet(), nextNodeId.get(), sharedNodesSeeds.length); return; @@ -1030,6 +1041,7 @@ public final class InternalTestCluster extends TestCluster { NodeAndClient nodeAndClient = getRandomNodeAndClient(new DataNodePredicate()); if (nodeAndClient != null) { logger.info("Closing random node [{}] ", nodeAndClient.name); + removeDistruptionSchemeFromNode(nodeAndClient); nodes.remove(nodeAndClient.name); nodeAndClient.close(); } @@ -1049,6 +1061,7 @@ public final class InternalTestCluster extends TestCluster { }); if (nodeAndClient != null) { logger.info("Closing filtered random node [{}] ", nodeAndClient.name); + removeDistruptionSchemeFromNode(nodeAndClient); nodes.remove(nodeAndClient.name); nodeAndClient.close(); } @@ -1063,6 +1076,7 @@ public final class InternalTestCluster extends TestCluster { String masterNodeName = getMasterName(); assert nodes.containsKey(masterNodeName); logger.info("Closing master node [{}] ", masterNodeName); + removeDistruptionSchemeFromNode(nodes.get(masterNodeName)); NodeAndClient remove = nodes.remove(masterNodeName); remove.close(); } @@ -1074,6 +1088,7 @@ public final class InternalTestCluster extends TestCluster { NodeAndClient nodeAndClient = getRandomNodeAndClient(Predicates.not(new MasterNodePredicate(getMasterName()))); if (nodeAndClient != null) { logger.info("Closing random non master node [{}] current master [{}] ", nodeAndClient.name, getMasterName()); + removeDistruptionSchemeFromNode(nodeAndClient); nodes.remove(nodeAndClient.name); nodeAndClient.close(); } @@ -1127,6 +1142,9 @@ public final class InternalTestCluster extends TestCluster { if (!callback.doRestart(nodeAndClient.name)) { logger.info("Closing node [{}] during restart", nodeAndClient.name); toRemove.add(nodeAndClient); + if (activeDisruptionScheme != null) { + activeDisruptionScheme.removeFromNode(nodeAndClient.name, this); + } nodeAndClient.close(); } } @@ -1141,18 +1159,33 @@ public final class InternalTestCluster extends TestCluster { for (NodeAndClient nodeAndClient : nodes.values()) { callback.doAfterNodes(numNodesRestarted++, nodeAndClient.nodeClient()); logger.info("Restarting node [{}] ", nodeAndClient.name); + if (activeDisruptionScheme != null) { + activeDisruptionScheme.removeFromNode(nodeAndClient.name, this); + } nodeAndClient.restart(callback); + if (activeDisruptionScheme != null) { + activeDisruptionScheme.applyToNode(nodeAndClient.name, this); + } } } else { int numNodesRestarted = 0; for (NodeAndClient nodeAndClient : nodes.values()) { callback.doAfterNodes(numNodesRestarted++, nodeAndClient.nodeClient()); logger.info("Stopping node [{}] ", nodeAndClient.name); + if (activeDisruptionScheme != null) { + activeDisruptionScheme.removeFromNode(nodeAndClient.name, this); + } nodeAndClient.node.close(); } for (NodeAndClient nodeAndClient : nodes.values()) { logger.info("Starting node [{}] ", nodeAndClient.name); + if (activeDisruptionScheme != null) { + activeDisruptionScheme.removeFromNode(nodeAndClient.name, this); + } nodeAndClient.restart(callback); + if (activeDisruptionScheme != null) { + activeDisruptionScheme.applyToNode(nodeAndClient.name, this); + } } } } @@ -1193,7 +1226,10 @@ public final class InternalTestCluster extends TestCluster { } - private String getMasterName() { + /** + * get the name of the current master node + */ + public String getMasterName() { try { ClusterState state = client().admin().cluster().prepareState().execute().actionGet().getState(); return state.nodes().masterNode().name(); @@ -1350,6 +1386,7 @@ public final class InternalTestCluster extends TestCluster { dataDirToClean.addAll(Arrays.asList(nodeEnv.nodeDataLocations())); } nodes.put(nodeAndClient.name, nodeAndClient); + applyDisruptionSchemeToNode(nodeAndClient); } public void closeNonSharedNodes(boolean wipeData) throws IOException { @@ -1371,6 +1408,48 @@ public final class InternalTestCluster extends TestCluster { return hasFilterCache; } + public void setDisruptionScheme(ServiceDisruptionScheme scheme) { + clearDisruptionScheme(); + scheme.applyToCluster(this); + activeDisruptionScheme = scheme; + } + + public void clearDisruptionScheme() { + if (activeDisruptionScheme != null) { + TimeValue expectedHealingTime = activeDisruptionScheme.expectedTimeToHeal(); + logger.info("Clearing active scheme {}, expected healing time {}", activeDisruptionScheme, expectedHealingTime); + activeDisruptionScheme.removeFromCluster(this); + // We don't what scheme is picked, certain schemes don't partition the cluster, but process slow, so we need + // to to sleep, cluster health alone doesn't verify if these schemes have been cleared. + if (expectedHealingTime != null && expectedHealingTime.millis() > 0) { + try { + Thread.sleep(expectedHealingTime.millis()); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + assertFalse("cluster failed to form after disruption was healed", client().admin().cluster().prepareHealth() + .setWaitForNodes("" + nodes.size()) + .setWaitForRelocatingShards(0) + .get().isTimedOut()); + } + activeDisruptionScheme = null; + } + + private void applyDisruptionSchemeToNode(NodeAndClient nodeAndClient) { + if (activeDisruptionScheme != null) { + assert nodes.containsKey(nodeAndClient.name); + activeDisruptionScheme.applyToNode(nodeAndClient.name, this); + } + } + + private void removeDistruptionSchemeFromNode(NodeAndClient nodeAndClient) { + if (activeDisruptionScheme != null) { + assert nodes.containsKey(nodeAndClient.name); + activeDisruptionScheme.removeFromNode(nodeAndClient.name, this); + } + } + private synchronized Collection dataNodeAndClients() { return Collections2.filter(nodes.values(), new DataNodePredicate()); } diff --git a/src/test/java/org/elasticsearch/test/SettingsSource.java b/src/test/java/org/elasticsearch/test/SettingsSource.java index 8829885bf7b..6341d842d67 100644 --- a/src/test/java/org/elasticsearch/test/SettingsSource.java +++ b/src/test/java/org/elasticsearch/test/SettingsSource.java @@ -20,7 +20,7 @@ package org.elasticsearch.test; import org.elasticsearch.common.settings.Settings; -abstract class SettingsSource { +public abstract class SettingsSource { public static final SettingsSource EMPTY = new SettingsSource() { @Override @@ -35,7 +35,7 @@ abstract class SettingsSource { }; /** - * @return the settings for the node represented by the given ordinal, or {@code null} if there are no settings defined + * @return the settings for the node represented by the given ordinal, or {@code null} if there are no settings defined */ public abstract Settings node(int nodeOrdinal); diff --git a/src/test/java/org/elasticsearch/test/disruption/LongGCDisruption.java b/src/test/java/org/elasticsearch/test/disruption/LongGCDisruption.java new file mode 100644 index 00000000000..d2fa09cb7dd --- /dev/null +++ b/src/test/java/org/elasticsearch/test/disruption/LongGCDisruption.java @@ -0,0 +1,177 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.test.disruption; + +import org.elasticsearch.common.unit.TimeValue; + +import java.util.HashSet; +import java.util.Random; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.regex.Pattern; + +public class LongGCDisruption extends SingleNodeDisruption { + + volatile boolean disrupting; + volatile Thread worker; + + final long intervalBetweenDelaysMin; + final long intervalBetweenDelaysMax; + final long delayDurationMin; + final long delayDurationMax; + + + public LongGCDisruption(Random random) { + this(null, random); + } + + public LongGCDisruption(String disruptedNode, Random random) { + this(disruptedNode, random, 100, 200, 300, 20000); + } + + public LongGCDisruption(String disruptedNode, Random random, long intervalBetweenDelaysMin, + long intervalBetweenDelaysMax, long delayDurationMin, long delayDurationMax) { + this(random, intervalBetweenDelaysMin, intervalBetweenDelaysMax, delayDurationMin, delayDurationMax); + this.disruptedNode = disruptedNode; + } + + public LongGCDisruption(Random random, + long intervalBetweenDelaysMin, long intervalBetweenDelaysMax, long delayDurationMin, + long delayDurationMax) { + super(random); + this.intervalBetweenDelaysMin = intervalBetweenDelaysMin; + this.intervalBetweenDelaysMax = intervalBetweenDelaysMax; + this.delayDurationMin = delayDurationMin; + this.delayDurationMax = delayDurationMax; + } + + final static AtomicInteger thread_ids = new AtomicInteger(); + + @Override + public void startDisrupting() { + disrupting = true; + worker = new Thread(new BackgroundWorker(), "long_gc_simulation_" + thread_ids.incrementAndGet()); + worker.setDaemon(true); + worker.start(); + } + + @Override + public void stopDisrupting() { + if (worker == null) { + return; + } + logger.info("stopping long GCs on [{}]", disruptedNode); + disrupting = false; + worker.interrupt(); + try { + worker.join(2 * (intervalBetweenDelaysMax + delayDurationMax)); + } catch (InterruptedException e) { + logger.info("background thread failed to stop"); + } + worker = null; + } + + final static Pattern[] unsafeClasses = new Pattern[]{ + // logging has shared JVM locks - we may suspend a thread and block other nodes from doing their thing + Pattern.compile("Logger") + }; + + private boolean stopNodeThreads(String node, Set nodeThreads) { + Set allThreadsSet = Thread.getAllStackTraces().keySet(); + boolean stopped = false; + final String nodeThreadNamePart = "[" + node + "]"; + for (Thread thread : allThreadsSet) { + String name = thread.getName(); + if (name.contains(nodeThreadNamePart)) { + if (thread.isAlive() && nodeThreads.add(thread)) { + stopped = true; + thread.suspend(); + // double check the thread is not in a shared resource like logging. If so, let it go and come back.. + boolean safe = true; + safe: + for (StackTraceElement stackElement : thread.getStackTrace()) { + String className = stackElement.getClassName(); + for (Pattern unsafePattern : unsafeClasses) { + if (unsafePattern.matcher(className).find()) { + safe = false; + break safe; + } + } + } + if (!safe) { + thread.resume(); + nodeThreads.remove(thread); + } + } + } + } + return stopped; + } + + private void resumeThreads(Set threads) { + for (Thread thread : threads) { + thread.resume(); + } + } + + private void simulateLongGC(final TimeValue duration) throws InterruptedException { + final String disruptionNodeCopy = disruptedNode; + if (disruptionNodeCopy == null) { + return; + } + logger.info("node [{}] goes into GC for for [{}]", disruptionNodeCopy, duration); + final Set nodeThreads = new HashSet<>(); + try { + while (stopNodeThreads(disruptionNodeCopy, nodeThreads)) ; + if (!nodeThreads.isEmpty()) { + Thread.sleep(duration.millis()); + } + } finally { + logger.info("node [{}] resumes from GC", disruptionNodeCopy); + resumeThreads(nodeThreads); + } + } + + @Override + public TimeValue expectedTimeToHeal() { + return TimeValue.timeValueMillis(0); + } + + class BackgroundWorker implements Runnable { + + @Override + public void run() { + while (disrupting && disruptedNode != null) { + try { + TimeValue duration = new TimeValue(delayDurationMin + random.nextInt((int) (delayDurationMax - delayDurationMin))); + simulateLongGC(duration); + + duration = new TimeValue(intervalBetweenDelaysMin + random.nextInt((int) (intervalBetweenDelaysMax - intervalBetweenDelaysMin))); + if (disrupting && disruptedNode != null) { + Thread.sleep(duration.millis()); + } + } catch (InterruptedException e) { + } catch (Exception e) { + logger.error("error in background worker", e); + } + } + } + } + +} diff --git a/src/test/java/org/elasticsearch/test/disruption/NetworkDelaysPartition.java b/src/test/java/org/elasticsearch/test/disruption/NetworkDelaysPartition.java new file mode 100644 index 00000000000..9eb99302e46 --- /dev/null +++ b/src/test/java/org/elasticsearch/test/disruption/NetworkDelaysPartition.java @@ -0,0 +1,92 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.test.disruption; + +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.test.transport.MockTransportService; + +import java.util.Random; +import java.util.Set; + +public class NetworkDelaysPartition extends NetworkPartition { + + static long DEFAULT_DELAY_MIN = 10000; + static long DEFAULT_DELAY_MAX = 90000; + + + final long delayMin; + final long delayMax; + + TimeValue duration; + + public NetworkDelaysPartition(Random random) { + this(random, DEFAULT_DELAY_MIN, DEFAULT_DELAY_MAX); + } + + public NetworkDelaysPartition(Random random, long delayMin, long delayMax) { + super(random); + this.delayMin = delayMin; + this.delayMax = delayMax; + } + + public NetworkDelaysPartition(String node1, String node2, Random random) { + this(node1, node2, DEFAULT_DELAY_MIN, DEFAULT_DELAY_MAX, random); + } + + public NetworkDelaysPartition(String node1, String node2, long delayMin, long delayMax, Random random) { + super(node1, node2, random); + this.delayMin = delayMin; + this.delayMax = delayMax; + } + + public NetworkDelaysPartition(Set nodesSideOne, Set nodesSideTwo, Random random) { + this(nodesSideOne, nodesSideTwo, DEFAULT_DELAY_MIN, DEFAULT_DELAY_MAX, random); + } + + public NetworkDelaysPartition(Set nodesSideOne, Set nodesSideTwo, long delayMin, long delayMax, Random random) { + super(nodesSideOne, nodesSideTwo, random); + this.delayMin = delayMin; + this.delayMax = delayMax; + + } + + @Override + public synchronized void startDisrupting() { + duration = new TimeValue(delayMin + random.nextInt((int) (delayMax - delayMin))); + super.startDisrupting(); + } + + @Override + void applyDisruption(DiscoveryNode node1, MockTransportService transportService1, + DiscoveryNode node2, MockTransportService transportService2) { + transportService1.addUnresponsiveRule(node1, duration); + transportService1.addUnresponsiveRule(node2, duration); + } + + @Override + protected String getPartitionDescription() { + return "network delays for [" + duration + "]"; + } + + @Override + public TimeValue expectedTimeToHeal() { + return TimeValue.timeValueMillis(delayMax); + } +} diff --git a/src/test/java/org/elasticsearch/test/disruption/NetworkDisconnectPartition.java b/src/test/java/org/elasticsearch/test/disruption/NetworkDisconnectPartition.java new file mode 100644 index 00000000000..8653b50f749 --- /dev/null +++ b/src/test/java/org/elasticsearch/test/disruption/NetworkDisconnectPartition.java @@ -0,0 +1,59 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.test.disruption; + +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.test.transport.MockTransportService; + +import java.util.Random; +import java.util.Set; + +public class NetworkDisconnectPartition extends NetworkPartition { + + + public NetworkDisconnectPartition(Random random) { + super(random); + } + + public NetworkDisconnectPartition(String node1, String node2, Random random) { + super(node1, node2, random); + } + + public NetworkDisconnectPartition(Set nodesSideOne, Set nodesSideTwo, Random random) { + super(nodesSideOne, nodesSideTwo, random); + } + + @Override + protected String getPartitionDescription() { + return "disconnected"; + } + + @Override + void applyDisruption(DiscoveryNode node1, MockTransportService transportService1, + DiscoveryNode node2, MockTransportService transportService2) { + transportService1.addFailToSendNoConnectRule(node2); + transportService2.addFailToSendNoConnectRule(node1); + } + + @Override + public TimeValue expectedTimeToHeal() { + return TimeValue.timeValueSeconds(0); + } +} diff --git a/src/test/java/org/elasticsearch/test/disruption/NetworkPartition.java b/src/test/java/org/elasticsearch/test/disruption/NetworkPartition.java new file mode 100644 index 00000000000..8206fafef4e --- /dev/null +++ b/src/test/java/org/elasticsearch/test/disruption/NetworkPartition.java @@ -0,0 +1,202 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.test.disruption; + +import com.google.common.collect.ImmutableList; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.common.logging.ESLogger; +import org.elasticsearch.common.logging.Loggers; +import org.elasticsearch.discovery.Discovery; +import org.elasticsearch.test.InternalTestCluster; +import org.elasticsearch.test.transport.MockTransportService; +import org.elasticsearch.transport.TransportService; + +import java.util.HashSet; +import java.util.List; +import java.util.Random; +import java.util.Set; + +public abstract class NetworkPartition implements ServiceDisruptionScheme { + + protected final ESLogger logger = Loggers.getLogger(getClass()); + + final Set nodesSideOne; + final Set nodesSideTwo; + volatile boolean autoExpand; + protected final Random random; + protected volatile InternalTestCluster cluster; + protected volatile boolean activeDisruption = false; + + + public NetworkPartition(Random random) { + this.random = new Random(random.nextLong()); + nodesSideOne = new HashSet<>(); + nodesSideTwo = new HashSet<>(); + autoExpand = true; + } + + public NetworkPartition(String node1, String node2, Random random) { + this(random); + nodesSideOne.add(node1); + nodesSideTwo.add(node2); + autoExpand = false; + } + + public NetworkPartition(Set nodesSideOne, Set nodesSideTwo, Random random) { + this(random); + this.nodesSideOne.addAll(nodesSideOne); + this.nodesSideTwo.addAll(nodesSideTwo); + autoExpand = false; + } + + + public List getNodesSideOne() { + return ImmutableList.copyOf(nodesSideOne); + } + + public List getNodesSideTwo() { + return ImmutableList.copyOf(nodesSideTwo); + } + + public List getMajoritySide() { + if (nodesSideOne.size() >= nodesSideTwo.size()) { + return getNodesSideOne(); + } else { + return getNodesSideTwo(); + } + } + + public List getMinoritySide() { + if (nodesSideOne.size() >= nodesSideTwo.size()) { + return getNodesSideTwo(); + } else { + return getNodesSideOne(); + } + } + + @Override + public void applyToCluster(InternalTestCluster cluster) { + this.cluster = cluster; + if (autoExpand) { + for (String node : cluster.getNodeNames()) { + applyToNode(node, cluster); + } + } + } + + @Override + public void removeFromCluster(InternalTestCluster cluster) { + stopDisrupting(); + } + + @Override + public synchronized void applyToNode(String node, InternalTestCluster cluster) { + if (!autoExpand || nodesSideOne.contains(node) || nodesSideTwo.contains(node)) { + return; + } + if (nodesSideOne.isEmpty()) { + nodesSideOne.add(node); + } else if (nodesSideTwo.isEmpty()) { + nodesSideTwo.add(node); + } else if (random.nextBoolean()) { + nodesSideOne.add(node); + } else { + nodesSideTwo.add(node); + } + } + + @Override + public synchronized void removeFromNode(String node, InternalTestCluster cluster) { + MockTransportService transportService = (MockTransportService) cluster.getInstance(TransportService.class, node); + DiscoveryNode discoveryNode = discoveryNode(node); + Set otherSideNodes; + if (nodesSideOne.contains(node)) { + otherSideNodes = nodesSideTwo; + } else if (nodesSideTwo.contains(node)) { + otherSideNodes = nodesSideOne; + } else { + return; + } + for (String node2 : otherSideNodes) { + MockTransportService transportService2 = (MockTransportService) cluster.getInstance(TransportService.class, node2); + DiscoveryNode discoveryNode2 = discoveryNode(node2); + removeDisruption(discoveryNode, transportService, discoveryNode2, transportService2); + } + } + + @Override + public synchronized void testClusterClosed() { + + } + + protected abstract String getPartitionDescription(); + + + protected DiscoveryNode discoveryNode(String node) { + return cluster.getInstance(Discovery.class, node).localNode(); + } + + @Override + public synchronized void startDisrupting() { + if (nodesSideOne.size() == 0 || nodesSideTwo.size() == 0) { + return; + } + logger.info("nodes {} will be partitioned from {}. partition type [{}]", nodesSideOne, nodesSideTwo, getPartitionDescription()); + activeDisruption = true; + for (String node1 : nodesSideOne) { + MockTransportService transportService1 = (MockTransportService) cluster.getInstance(TransportService.class, node1); + DiscoveryNode discoveryNode1 = discoveryNode(node1); + for (String node2 : nodesSideTwo) { + DiscoveryNode discoveryNode2 = discoveryNode(node2); + MockTransportService transportService2 = (MockTransportService) cluster.getInstance(TransportService.class, node2); + applyDisruption(discoveryNode1, transportService1, discoveryNode2, transportService2); + } + } + } + + + @Override + public synchronized void stopDisrupting() { + if (nodesSideOne.size() == 0 || nodesSideTwo.size() == 0 || !activeDisruption) { + return; + } + logger.info("restoring partition between nodes {} & nodes {}", nodesSideOne, nodesSideTwo); + for (String node1 : nodesSideOne) { + MockTransportService transportService1 = (MockTransportService) cluster.getInstance(TransportService.class, node1); + DiscoveryNode discoveryNode1 = discoveryNode(node1); + for (String node2 : nodesSideTwo) { + DiscoveryNode discoveryNode2 = discoveryNode(node2); + MockTransportService transportService2 = (MockTransportService) cluster.getInstance(TransportService.class, node2); + removeDisruption(discoveryNode1, transportService1, discoveryNode2, transportService2); + } + } + activeDisruption = false; + } + + abstract void applyDisruption(DiscoveryNode node1, MockTransportService transportService1, + DiscoveryNode node2, MockTransportService transportService2); + + + protected void removeDisruption(DiscoveryNode node1, MockTransportService transportService1, + DiscoveryNode node2, MockTransportService transportService2) { + transportService1.clearRule(node2); + transportService2.clearRule(node1); + } + +} diff --git a/src/test/java/org/elasticsearch/test/disruption/NetworkUnresponsivePartition.java b/src/test/java/org/elasticsearch/test/disruption/NetworkUnresponsivePartition.java new file mode 100644 index 00000000000..1feb56c46c7 --- /dev/null +++ b/src/test/java/org/elasticsearch/test/disruption/NetworkUnresponsivePartition.java @@ -0,0 +1,58 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.test.disruption; + +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.test.transport.MockTransportService; + +import java.util.Random; +import java.util.Set; + +public class NetworkUnresponsivePartition extends NetworkPartition { + + public NetworkUnresponsivePartition(Random random) { + super(random); + } + + public NetworkUnresponsivePartition(String node1, String node2, Random random) { + super(node1, node2, random); + } + + public NetworkUnresponsivePartition(Set nodesSideOne, Set nodesSideTwo, Random random) { + super(nodesSideOne, nodesSideTwo, random); + } + + @Override + protected String getPartitionDescription() { + return "unresponsive"; + } + + @Override + void applyDisruption(DiscoveryNode node1, MockTransportService transportService1, + DiscoveryNode node2, MockTransportService transportService2) { + transportService1.addUnresponsiveRule(node2); + transportService2.addUnresponsiveRule(node1); + } + + @Override + public TimeValue expectedTimeToHeal() { + return TimeValue.timeValueSeconds(0); + } +} diff --git a/src/test/java/org/elasticsearch/test/disruption/NoOpDisruptionScheme.java b/src/test/java/org/elasticsearch/test/disruption/NoOpDisruptionScheme.java new file mode 100644 index 00000000000..7b348b1afea --- /dev/null +++ b/src/test/java/org/elasticsearch/test/disruption/NoOpDisruptionScheme.java @@ -0,0 +1,66 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.test.disruption; + +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.test.InternalTestCluster; + +public class NoOpDisruptionScheme implements ServiceDisruptionScheme { + + @Override + public void applyToCluster(InternalTestCluster cluster) { + + } + + @Override + public void removeFromCluster(InternalTestCluster cluster) { + + } + + @Override + public void applyToNode(String node, InternalTestCluster cluster) { + + } + + @Override + public void removeFromNode(String node, InternalTestCluster cluster) { + + } + + @Override + public void startDisrupting() { + + } + + @Override + public void stopDisrupting() { + + } + + @Override + public void testClusterClosed() { + + } + + @Override + public TimeValue expectedTimeToHeal() { + return TimeValue.timeValueSeconds(0); + } +} diff --git a/src/test/java/org/elasticsearch/test/disruption/ServiceDisruptionScheme.java b/src/test/java/org/elasticsearch/test/disruption/ServiceDisruptionScheme.java new file mode 100644 index 00000000000..70774a82356 --- /dev/null +++ b/src/test/java/org/elasticsearch/test/disruption/ServiceDisruptionScheme.java @@ -0,0 +1,42 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.test.disruption; + +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.test.InternalTestCluster; + +public interface ServiceDisruptionScheme { + + public void applyToCluster(InternalTestCluster cluster); + + public void removeFromCluster(InternalTestCluster cluster); + + public void applyToNode(String node, InternalTestCluster cluster); + + public void removeFromNode(String node, InternalTestCluster cluster); + + public void startDisrupting(); + + public void stopDisrupting(); + + public void testClusterClosed(); + + public TimeValue expectedTimeToHeal(); + +} diff --git a/src/test/java/org/elasticsearch/test/disruption/SingleNodeDisruption.java b/src/test/java/org/elasticsearch/test/disruption/SingleNodeDisruption.java new file mode 100644 index 00000000000..3148254011e --- /dev/null +++ b/src/test/java/org/elasticsearch/test/disruption/SingleNodeDisruption.java @@ -0,0 +1,83 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.test.disruption; + +import org.elasticsearch.common.logging.ESLogger; +import org.elasticsearch.common.logging.Loggers; +import org.elasticsearch.test.InternalTestCluster; + +import java.util.Random; + +public abstract class SingleNodeDisruption implements ServiceDisruptionScheme { + + protected final ESLogger logger = Loggers.getLogger(getClass()); + + protected volatile String disruptedNode; + protected volatile InternalTestCluster cluster; + protected final Random random; + + + public SingleNodeDisruption(String disruptedNode, Random random) { + this(random); + this.disruptedNode = disruptedNode; + } + + public SingleNodeDisruption(Random random) { + this.random = new Random(random.nextLong()); + } + + @Override + public void applyToCluster(InternalTestCluster cluster) { + this.cluster = cluster; + if (disruptedNode == null) { + String[] nodes = cluster.getNodeNames(); + disruptedNode = nodes[random.nextInt(nodes.length)]; + } + } + + @Override + public void removeFromCluster(InternalTestCluster cluster) { + if (disruptedNode != null) { + removeFromNode(disruptedNode, cluster); + } + } + + @Override + public synchronized void applyToNode(String node, InternalTestCluster cluster) { + + } + + @Override + public synchronized void removeFromNode(String node, InternalTestCluster cluster) { + if (disruptedNode == null) { + return; + } + if (!node.equals(disruptedNode)) { + return; + } + stopDisrupting(); + disruptedNode = null; + } + + @Override + public synchronized void testClusterClosed() { + disruptedNode = null; + } + +} diff --git a/src/test/java/org/elasticsearch/test/disruption/SlowClusterStateProcessing.java b/src/test/java/org/elasticsearch/test/disruption/SlowClusterStateProcessing.java new file mode 100644 index 00000000000..46ae0afe54c --- /dev/null +++ b/src/test/java/org/elasticsearch/test/disruption/SlowClusterStateProcessing.java @@ -0,0 +1,153 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.test.disruption; + +import org.elasticsearch.cluster.ClusterService; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.ClusterStateNonMasterUpdateTask; +import org.elasticsearch.common.Priority; +import org.elasticsearch.common.unit.TimeValue; + +import java.util.Random; +import java.util.concurrent.CountDownLatch; + +public class SlowClusterStateProcessing extends SingleNodeDisruption { + + volatile boolean disrupting; + volatile Thread worker; + + final long intervalBetweenDelaysMin; + final long intervalBetweenDelaysMax; + final long delayDurationMin; + final long delayDurationMax; + + + public SlowClusterStateProcessing(Random random) { + this(null, random); + } + + public SlowClusterStateProcessing(String disruptedNode, Random random) { + this(disruptedNode, random, 100, 200, 300, 20000); + } + + public SlowClusterStateProcessing(String disruptedNode, Random random, long intervalBetweenDelaysMin, + long intervalBetweenDelaysMax, long delayDurationMin, long delayDurationMax) { + this(random, intervalBetweenDelaysMin, intervalBetweenDelaysMax, delayDurationMin, delayDurationMax); + this.disruptedNode = disruptedNode; + } + + public SlowClusterStateProcessing(Random random, + long intervalBetweenDelaysMin, long intervalBetweenDelaysMax, long delayDurationMin, + long delayDurationMax) { + super(random); + this.intervalBetweenDelaysMin = intervalBetweenDelaysMin; + this.intervalBetweenDelaysMax = intervalBetweenDelaysMax; + this.delayDurationMin = delayDurationMin; + this.delayDurationMax = delayDurationMax; + } + + + @Override + public void startDisrupting() { + disrupting = true; + worker = new Thread(new BackgroundWorker()); + worker.setDaemon(true); + worker.start(); + } + + @Override + public void stopDisrupting() { + if (worker == null) { + return; + } + logger.info("stopping to slow down cluster state processing on [{}]", disruptedNode); + disrupting = false; + worker.interrupt(); + try { + worker.join(2 * (intervalBetweenDelaysMax + delayDurationMax)); + } catch (InterruptedException e) { + logger.info("background thread failed to stop"); + } + worker = null; + } + + + private boolean interruptClusterStateProcessing(final TimeValue duration) throws InterruptedException { + final String disruptionNodeCopy = disruptedNode; + if (disruptionNodeCopy == null) { + return false; + } + logger.info("delaying cluster state updates on node [{}] for [{}]", disruptionNodeCopy, duration); + final CountDownLatch countDownLatch = new CountDownLatch(1); + ClusterService clusterService = cluster.getInstance(ClusterService.class, disruptionNodeCopy); + if (clusterService == null) { + return false; + } + clusterService.submitStateUpdateTask("service_disruption_delay", Priority.IMMEDIATE, new ClusterStateNonMasterUpdateTask() { + + @Override + public ClusterState execute(ClusterState currentState) throws Exception { + Thread.sleep(duration.millis()); + countDownLatch.countDown(); + return currentState; + } + + @Override + public void onFailure(String source, Throwable t) { + countDownLatch.countDown(); + } + }); + try { + countDownLatch.await(); + } catch (InterruptedException e) { + // try to wait again, we really want the cluster state thread to be freed up when stopping disruption + countDownLatch.await(); + } + return true; + } + + @Override + public TimeValue expectedTimeToHeal() { + return TimeValue.timeValueMillis(0); + } + + class BackgroundWorker implements Runnable { + + @Override + public void run() { + while (disrupting && disruptedNode != null) { + try { + TimeValue duration = new TimeValue(delayDurationMin + random.nextInt((int) (delayDurationMax - delayDurationMin))); + if (!interruptClusterStateProcessing(duration)) { + continue; + } + + duration = new TimeValue(intervalBetweenDelaysMin + random.nextInt((int) (intervalBetweenDelaysMax - intervalBetweenDelaysMin))); + if (disrupting && disruptedNode != null) { + Thread.sleep(duration.millis()); + } + } catch (InterruptedException e) { + } catch (Exception e) { + logger.error("error in background worker", e); + } + } + } + } + +} diff --git a/src/test/java/org/elasticsearch/test/transport/MockTransportService.java b/src/test/java/org/elasticsearch/test/transport/MockTransportService.java index 14f0296121e..cf088bab476 100644 --- a/src/test/java/org/elasticsearch/test/transport/MockTransportService.java +++ b/src/test/java/org/elasticsearch/test/transport/MockTransportService.java @@ -24,14 +24,21 @@ import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.common.component.Lifecycle; import org.elasticsearch.common.component.LifecycleListener; import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.io.stream.BytesStreamInput; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.common.network.NetworkService; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.BoundTransportAddress; import org.elasticsearch.common.transport.TransportAddress; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.common.util.concurrent.AbstractRunnable; import org.elasticsearch.common.util.concurrent.ConcurrentCollections; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.*; import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; import java.util.Set; import java.util.concurrent.ConcurrentMap; @@ -46,6 +53,7 @@ public class MockTransportService extends TransportService { public MockTransportService(Settings settings, Transport transport, ThreadPool threadPool) { super(settings, new LookupTestTransport(transport), threadPool); this.original = transport; + } /** @@ -92,12 +100,19 @@ public class MockTransportService extends TransportService { }); } + /** + * Adds a rule that will cause matching operations to throw ConnectTransportExceptions + */ + public void addFailToSendNoConnectRule(DiscoveryNode node, final String... blockedActions) { + addFailToSendNoConnectRule(node, new HashSet<>(Arrays.asList(blockedActions))); + } + /** * Adds a rule that will cause matching operations to throw ConnectTransportExceptions */ public void addFailToSendNoConnectRule(DiscoveryNode node, final Set blockedActions) { - ((LookupTestTransport) transport).transports.put(node.getAddress(), new DelegateTransport(original) { + addDelegate(node, new DelegateTransport(original) { @Override public void connectToNode(DiscoveryNode node) throws ConnectTransportException { original.connectToNode(node); @@ -124,7 +139,6 @@ public class MockTransportService extends TransportService { * and failing to connect once the rule was added. */ public void addUnresponsiveRule(DiscoveryNode node) { - // TODO add a parameter to delay the connect timeout? addDelegate(node, new DelegateTransport(original) { @Override public void connectToNode(DiscoveryNode node) throws ConnectTransportException { @@ -143,8 +157,101 @@ public class MockTransportService extends TransportService { }); } + /** + * Adds a rule that will cause ignores each send request, simulating an unresponsive node + * and failing to connect once the rule was added. + * + * @param duration the amount of time to delay sending and connecting. + */ + public void addUnresponsiveRule(DiscoveryNode node, final TimeValue duration) { + final long startTime = System.currentTimeMillis(); + + addDelegate(node, new DelegateTransport(original) { + + TimeValue getDelay() { + return new TimeValue(duration.millis() - (System.currentTimeMillis() - startTime)); + } + + @Override + public void connectToNode(DiscoveryNode node) throws ConnectTransportException { + TimeValue delay = getDelay(); + if (delay.millis() <= 0) { + original.connectToNode(node); + return; + } + + // TODO: Replace with proper setting + TimeValue connectingTimeout = NetworkService.TcpSettings.TCP_DEFAULT_CONNECT_TIMEOUT; + try { + if (delay.millis() < connectingTimeout.millis()) { + Thread.sleep(delay.millis()); + original.connectToNode(node); + } else { + Thread.sleep(connectingTimeout.millis()); + throw new ConnectTransportException(node, "UNRESPONSIVE: simulated"); + } + } catch (InterruptedException e) { + throw new ConnectTransportException(node, "UNRESPONSIVE: interrupted while sleeping", e); + } + } + + @Override + public void connectToNodeLight(DiscoveryNode node) throws ConnectTransportException { + TimeValue delay = getDelay(); + if (delay.millis() <= 0) { + original.connectToNodeLight(node); + return; + } + + // TODO: Replace with proper setting + TimeValue connectingTimeout = NetworkService.TcpSettings.TCP_DEFAULT_CONNECT_TIMEOUT; + try { + if (delay.millis() < connectingTimeout.millis()) { + Thread.sleep(delay.millis()); + original.connectToNodeLight(node); + } else { + Thread.sleep(connectingTimeout.millis()); + throw new ConnectTransportException(node, "UNRESPONSIVE: simulated"); + } + } catch (InterruptedException e) { + throw new ConnectTransportException(node, "UNRESPONSIVE: interrupted while sleeping", e); + } + } + + @Override + public void sendRequest(final DiscoveryNode node, final long requestId, final String action, TransportRequest request, final TransportRequestOptions options) throws IOException, TransportException { + // delayed sending - even if larger then the request timeout to simulated a potential late response from target node + + TimeValue delay = getDelay(); + if (delay.millis() <= 0) { + original.sendRequest(node, requestId, action, request, options); + return; + } + + // poor mans request cloning... + TransportRequestHandler handler = MockTransportService.this.getHandler(action); + BytesStreamOutput bStream = new BytesStreamOutput(); + request.writeTo(bStream); + final TransportRequest clonedRequest = handler.newInstance(); + clonedRequest.readFrom(new BytesStreamInput(bStream.bytes())); + + threadPool.schedule(delay, ThreadPool.Names.GENERIC, new AbstractRunnable() { + @Override + public void run() { + try { + original.sendRequest(node, requestId, action, clonedRequest, options); + } catch (Throwable e) { + logger.debug("failed to send delayed request", e); + } + } + }); + } + }); + } + /** * Adds a new delegate transport that is used for communication with the given node. + * * @return true iff no other delegate was registered for this node before, otherwise false */ public boolean addDelegate(DiscoveryNode node, DelegateTransport transport) { @@ -209,12 +316,11 @@ public class MockTransportService extends TransportService { protected final Transport transport; + public DelegateTransport(Transport transport) { this.transport = transport; } - - @Override public void transportServiceAdapter(TransportServiceAdapter service) { transport.transportServiceAdapter(service);